]> git.basschouten.com Git - openhab-addons.git/commitdiff
[mimictts] Add LRU cache (#14564)
authorGwendal Roulleau <dalgwen@users.noreply.github.com>
Wed, 12 Jul 2023 19:59:48 +0000 (21:59 +0200)
committerGitHub <noreply@github.com>
Wed, 12 Jul 2023 19:59:48 +0000 (21:59 +0200)
* [mimictts] Add LRU cache

And simplifies code with new core capabilities (no more need to create temporary files implementing FixedLengthAudioStream)

---------

Signed-off-by: Gwendal Roulleau <gwendal.roulleau@gmail.com>
bundles/org.openhab.voice.mimictts/README.md
bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/AutoDeleteFileAudioStream.java [deleted file]
bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/InputStreamAudioStream.java
bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicConfiguration.java
bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicTTSService.java
bundles/org.openhab.voice.mimictts/src/main/resources/OH-INF/config/config.xml
bundles/org.openhab.voice.mimictts/src/main/resources/OH-INF/i18n/mimictts.properties

index b5d572a25e7da0dd03ba87bc2a6a286d0f1feeb3..93e9e84b3d76e89839c45cf23387b2028f942823 100644 (file)
@@ -17,7 +17,6 @@ It supports a subset of SSML, and if you want to use it, be sure to start your t
 Using your favorite configuration UI to edit **Settings / Other Services - Mimic Text-to-Speech** and set:
 
 * **url** - Mimic URL. Default to `http://localhost:59125`
-* **workaroundServletSink** - A boolean activating a workaround for audiosink using the openHAB servlet. It stores audio file temporarily on disk, allowing the servlet to get a cloned stream as needed. Default false.
 * **speakingRate** - Controls how fast the voice speaks the text. A value of 1 is the speed of the training dataset. Less than 1 is faster, and more than 1 is slower.
 * **audioVolatility** - The amount of noise added to the generated audio (0-1). Can help mask audio artifacts from the voice model. Multi-speaker models tend to sound better with a lower amount of noise than single speaker models.
 * **phonemeVolatility** - The amount of noise used to generate phoneme durations (0-1). Allows for variable speaking cadance, with a value closer to 1 being more variable. Multi-speaker models tend to sound better with a lower amount of phoneme variability than single speaker models.
@@ -48,3 +47,7 @@ In case you would like to setup these settings via a text file, you can edit the
 org.openhab.voice:defaultTTS=mimictts
 org.openhab.voice:defaultVoice=mimictts:fr_FR_siwis_low
 ```
+
+## Caching
+
+The mimic TTS service uses the openHAB TTS cache to cache audio files produced from the most recent queries in order to reduce traffic, improve performance and reduce number of requests.
diff --git a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/AutoDeleteFileAudioStream.java b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/AutoDeleteFileAudioStream.java
deleted file mode 100644 (file)
index 465a2b2..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-/**
- * Copyright (c) 2010-2023 Contributors to the openHAB project
- *
- * See the NOTICE file(s) distributed with this work for additional
- * information.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0
- *
- * SPDX-License-Identifier: EPL-2.0
- */
-package org.openhab.voice.mimic.internal;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.eclipse.jdt.annotation.NonNullByDefault;
-import org.openhab.core.audio.AudioException;
-import org.openhab.core.audio.AudioFormat;
-import org.openhab.core.audio.FileAudioStream;
-
-/**
- * A FileAudioStream that autodelete after it and its clone are closed
- * Useful to not congest temporary directory
- *
- * @author Gwendal Roulleau - Initial contribution
- */
-@NonNullByDefault
-public class AutoDeleteFileAudioStream extends FileAudioStream {
-
-    private final File file;
-    private final AudioFormat audioFormat;
-    private final List<ClonedFileInputStream> clonedAudioStreams = new ArrayList<>(1);
-    private boolean isOpen = true;
-
-    public AutoDeleteFileAudioStream(File file, AudioFormat format) throws AudioException {
-        super(file, format);
-        this.file = file;
-        this.audioFormat = format;
-    }
-
-    @Override
-    public void close() throws IOException {
-        super.close();
-        this.isOpen = false;
-        deleteIfPossible();
-    }
-
-    protected void deleteIfPossible() {
-        boolean aClonedStreamIsOpen = clonedAudioStreams.stream().anyMatch(as -> as.isOpen);
-        if (!isOpen && !aClonedStreamIsOpen) {
-            file.delete();
-        }
-    }
-
-    @Override
-    public InputStream getClonedStream() throws AudioException {
-        ClonedFileInputStream clonedInputStream = new ClonedFileInputStream(this, file, audioFormat);
-        clonedAudioStreams.add(clonedInputStream);
-        return clonedInputStream;
-    }
-
-    private static class ClonedFileInputStream extends FileAudioStream {
-        protected boolean isOpen = true;
-        private final AutoDeleteFileAudioStream parent;
-
-        public ClonedFileInputStream(AutoDeleteFileAudioStream parent, File file, AudioFormat audioFormat)
-                throws AudioException {
-            super(file, audioFormat);
-            this.parent = parent;
-        }
-
-        @Override
-        public void close() throws IOException {
-            super.close();
-            this.isOpen = false;
-            parent.deleteIfPossible();
-        }
-    }
-}
index 144ba4334de5758c69cc1f161b09f4890102b18d..6d88cafea8ea610da19ad10bebbb232118e07cbc 100644 (file)
@@ -18,9 +18,9 @@ import java.io.OutputStream;
 
 import org.eclipse.jdt.annotation.NonNullByDefault;
 import org.eclipse.jdt.annotation.Nullable;
-import org.openhab.core.audio.AudioException;
 import org.openhab.core.audio.AudioFormat;
-import org.openhab.core.audio.FixedLengthAudioStream;
+import org.openhab.core.audio.AudioStream;
+import org.openhab.core.audio.SizeableAudioStream;
 
 /**
  * An AudioStream with an {@link InputStream} inside
@@ -28,7 +28,7 @@ import org.openhab.core.audio.FixedLengthAudioStream;
  * @author Gwendal Roulleau - Initial contribution
  */
 @NonNullByDefault
-public class InputStreamAudioStream extends FixedLengthAudioStream {
+public class InputStreamAudioStream extends AudioStream implements SizeableAudioStream {
 
     public InputStream innerInputStream;
     public AudioFormat audioFormat;
@@ -115,9 +115,4 @@ public class InputStreamAudioStream extends FixedLengthAudioStream {
     public long length() {
         return length;
     }
-
-    @Override
-    public InputStream getClonedStream() throws AudioException {
-        throw new AudioException("Operation not supported");
-    }
 }
index e35064e39558db5b29cb15caf486735ef4466c42..e8c56c14635d76e542928789d5075ee58fce815c 100644 (file)
@@ -25,5 +25,4 @@ public class MimicConfiguration {
     public Double speakingRate = 1.0;
     public Double audioVolatility = 0.667;
     public Double phonemeVolatility = 0.8;
-    public Boolean workaroundServletSink = false;
 }
index 39364035c8fd44d97f77629354e93f2e4882dc29..5b26dbd99eea9c5054b02b8a81cfb88f49cdebd4 100644 (file)
  */
 package org.openhab.voice.mimic.internal;
 
-import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.UnsupportedEncodingException;
+import java.math.BigInteger;
 import java.net.URLEncoder;
 import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.StandardCopyOption;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
-import java.util.UUID;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
@@ -38,13 +37,13 @@ import org.eclipse.jetty.client.util.InputStreamResponseListener;
 import org.eclipse.jetty.client.util.StringContentProvider;
 import org.eclipse.jetty.http.HttpHeader;
 import org.eclipse.jetty.http.HttpStatus;
-import org.openhab.core.OpenHAB;
-import org.openhab.core.audio.AudioException;
 import org.openhab.core.audio.AudioFormat;
 import org.openhab.core.audio.AudioStream;
 import org.openhab.core.config.core.ConfigurableService;
 import org.openhab.core.io.net.http.HttpClientFactory;
 import org.openhab.core.io.net.http.HttpRequestBuilder;
+import org.openhab.core.voice.AbstractCachedTTSService;
+import org.openhab.core.voice.TTSCache;
 import org.openhab.core.voice.TTSException;
 import org.openhab.core.voice.TTSService;
 import org.openhab.core.voice.Voice;
@@ -67,11 +66,11 @@ import com.google.gson.JsonSyntaxException;
  * @author Gwendal Roulleau - Initial contribution
  */
 @Component(configurationPid = MimicTTSService.SERVICE_PID, property = Constants.SERVICE_PID + "="
-        + MimicTTSService.SERVICE_PID)
+        + MimicTTSService.SERVICE_PID, service = TTSService.class)
 @ConfigurableService(category = MimicTTSService.SERVICE_CATEGORY, label = MimicTTSService.SERVICE_NAME
         + " Text-to-Speech", description_uri = MimicTTSService.SERVICE_CATEGORY + ":" + MimicTTSService.SERVICE_ID)
 @NonNullByDefault
-public class MimicTTSService implements TTSService {
+public class MimicTTSService extends AbstractCachedTTSService {
 
     private final Logger logger = LoggerFactory.getLogger(MimicTTSService.class);
 
@@ -84,7 +83,6 @@ public class MimicTTSService implements TTSService {
      * Configuration parameters
      */
     private static final String PARAM_URL = "url";
-    private static final String PARAM_WORKAROUNDSERVLETSINK = "workaroundServletSink";
     private static final String PARAM_SPEAKINGRATE = "speakingRate";
     private static final String PARAM_AUDIOVOLATITLITY = "audioVolatility";
     private static final String PARAM_PHONEMEVOLATITLITY = "phonemeVolatility";
@@ -108,7 +106,9 @@ public class MimicTTSService implements TTSService {
     private final HttpClient httpClient;
 
     @Activate
-    public MimicTTSService(final @Reference HttpClientFactory httpClientFactory, Map<String, Object> config) {
+    public MimicTTSService(final @Reference HttpClientFactory httpClientFactory, @Reference TTSCache ttsCache,
+            Map<String, Object> config) {
+        super(ttsCache);
         updateConfig(config);
         this.httpClient = httpClientFactory.getCommonHttpClient();
     }
@@ -130,12 +130,6 @@ public class MimicTTSService implements TTSService {
             config.url = param.toString();
         }
 
-        // workaround
-        param = newConfig.get(PARAM_WORKAROUNDSERVLETSINK);
-        if (param != null) {
-            config.workaroundServletSink = Boolean.parseBoolean(param.toString());
-        }
-
         // audio volatility
         try {
             param = newConfig.get(PARAM_AUDIOVOLATITLITY);
@@ -227,8 +221,7 @@ public class MimicTTSService implements TTSService {
      * @throws TTSException in case the service is unavailable or a parameter is invalid.
      */
     @Override
-    public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
-
+    public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
         if (!availableVoices.contains(voice)) {
             // let a chance for the service to update :
             refreshVoices();
@@ -294,24 +287,7 @@ public class MimicTTSService implements TTSService {
                 }
 
                 InputStream inputStreamFromMimic = inputStreamResponseListener.getInputStream();
-                try {
-                    if (!config.workaroundServletSink) {
-                        return new InputStreamAudioStream(inputStreamFromMimic, AUDIO_FORMAT, length);
-                    } else {
-                        // Some audio sinks use the openHAB servlet to get audio. This servlet require the
-                        // getClonedStream()
-                        // method
-                        // So we cache the file on disk, thus implementing the method thanks to FileAudioStream.
-                        return createTemporaryFile(inputStreamFromMimic, AUDIO_FORMAT);
-                    }
-                } catch (TTSException e) {
-                    try {
-                        inputStreamFromMimic.close();
-                    } catch (IOException e1) {
-                    }
-                    throw e;
-                }
-
+                return new InputStreamAudioStream(inputStreamFromMimic, AUDIO_FORMAT, length);
             } else {
                 String errorMessage = "Cannot get wav from mimic url " + urlTTS + " with HTTP response code "
                         + response.getStatus() + " for reason " + response.getReason();
@@ -325,16 +301,16 @@ public class MimicTTSService implements TTSService {
         }
     }
 
-    private AudioStream createTemporaryFile(InputStream inputStream, AudioFormat audioFormat) throws TTSException {
-        File mimicDirectory = new File(OpenHAB.getUserDataFolder(), "mimic");
-        mimicDirectory.mkdir();
+    @Override
+    public String getCacheKey(String text, Voice voice, AudioFormat requestedFormat) {
+        MessageDigest md;
         try {
-            File tempFile = File.createTempFile(UUID.randomUUID().toString(), ".wav", mimicDirectory);
-            tempFile.deleteOnExit();
-            Files.copy(inputStream, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
-            return new AutoDeleteFileAudioStream(tempFile, audioFormat);
-        } catch (AudioException | IOException e) {
-            throw new TTSException("Cannot create temporary audio file", e);
+            md = MessageDigest.getInstance("MD5");
+        } catch (NoSuchAlgorithmException e) {
+            return "nomd5algorithm";
         }
+        byte[] binaryKey = ((text + voice.getUID() + requestedFormat.toString() + config.speakingRate
+                + config.audioVolatility + config.phonemeVolatility).getBytes());
+        return String.format("%032x", new BigInteger(1, md.digest(binaryKey)));
     }
 }
index 2107070d73499ed4368e38a396aa9cb8e07a72fd..ab8619a39e25825714a0dce115eb93b7fafa8284 100644 (file)
                        <description>Mimic 3 URL.</description>
                        <default>http://localhost:59125</default>
                </parameter>
-               <parameter name="workaroundServletSink" type="boolean" required="false">
-                       <label>Workaround For Servlet-Based Audiosink</label>
-                       <description>Enable this workaround to store temporarily the file on disk. Needed if you play on audiosink based on
-                               the openHAB audio servlet.</description>
-                       <default>false</default>
-               </parameter>
                <parameter name="speakingRate" min="0" max="1" type="decimal" required="false">
                        <label>Speaking Rate</label>
                        <description>Controls how fast the voice speaks the text. A value of 1 is the speed of the training dataset. Less
index 2472d5b8916cb5f5405a2e165a7cb1571b30c94e..f34268e20aa1eebb41745216221b0456ce28852c 100644 (file)
@@ -4,8 +4,6 @@ voice.config.mimictts.phonemeVolatility.label = Phoneme Volatility
 voice.config.mimictts.phonemeVolatility.description = The amount of noise used to generate phoneme durations (0-1). Allows for variable speaking cadance, with a value closer to 1 being more variable. Multi-speaker models tend to sound better with a lower amount of phoneme variability than single speaker models.
 voice.config.mimictts.speakingRate.label = Speaking Rate
 voice.config.mimictts.speakingRate.description = Controls how fast the voice speaks the text. A value of 1 is the speed of the training dataset. Less than 1 is faster, and more than 1 is slower.
-voice.config.mimictts.workaroundServletSink.label= Workaround For Servlet-Based Audiosink
-voice.config.mimictts.workaroundServletSink.description= Enable this workaround to store temporarily the file on disk. Needed if you play on audiosink based on the openHAB audio servlet.
 voice.config.mimictts.url.label = URL
 voice.config.mimictts.url.description = Mimic 3 URL.