]> git.basschouten.com Git - openhab-addons.git/blob
f02fb498ad25cdae7f0cb558b4b52152c5b2716b
[openhab-addons.git] /
1 /**
2  * Copyright (c) 2010-2023 Contributors to the openHAB project
3  *
4  * See the NOTICE file(s) distributed with this work for additional
5  * information.
6  *
7  * This program and the accompanying materials are made available under the
8  * terms of the Eclipse Public License 2.0 which is available at
9  * http://www.eclipse.org/legal/epl-2.0
10  *
11  * SPDX-License-Identifier: EPL-2.0
12  */
13 package org.openhab.voice.voskstt.internal;
14
15 import static org.openhab.voice.voskstt.internal.VoskSTTConstants.*;
16
17 import java.io.File;
18 import java.io.IOException;
19 import java.io.InputStream;
20 import java.nio.file.Path;
21 import java.util.Locale;
22 import java.util.Map;
23 import java.util.Set;
24 import java.util.concurrent.Future;
25 import java.util.concurrent.ScheduledExecutorService;
26 import java.util.concurrent.atomic.AtomicBoolean;
27
28 import org.eclipse.jdt.annotation.NonNullByDefault;
29 import org.eclipse.jdt.annotation.Nullable;
30 import org.openhab.core.OpenHAB;
31 import org.openhab.core.audio.AudioFormat;
32 import org.openhab.core.audio.AudioStream;
33 import org.openhab.core.common.ThreadPoolManager;
34 import org.openhab.core.config.core.ConfigurableService;
35 import org.openhab.core.config.core.Configuration;
36 import org.openhab.core.io.rest.LocaleService;
37 import org.openhab.core.voice.RecognitionStartEvent;
38 import org.openhab.core.voice.RecognitionStopEvent;
39 import org.openhab.core.voice.STTException;
40 import org.openhab.core.voice.STTListener;
41 import org.openhab.core.voice.STTService;
42 import org.openhab.core.voice.STTServiceHandle;
43 import org.openhab.core.voice.SpeechRecognitionErrorEvent;
44 import org.openhab.core.voice.SpeechRecognitionEvent;
45 import org.osgi.framework.Constants;
46 import org.osgi.service.component.annotations.Activate;
47 import org.osgi.service.component.annotations.Component;
48 import org.osgi.service.component.annotations.Deactivate;
49 import org.osgi.service.component.annotations.Modified;
50 import org.osgi.service.component.annotations.Reference;
51 import org.slf4j.Logger;
52 import org.slf4j.LoggerFactory;
53 import org.vosk.LibVosk;
54 import org.vosk.LogLevel;
55 import org.vosk.Model;
56 import org.vosk.Recognizer;
57
58 import com.fasterxml.jackson.databind.ObjectMapper;
59 import com.sun.jna.NativeLibrary;
60
61 /**
62  * The {@link VoskSTTService} class is a service implementation to use Vosk-API for Speech-to-Text.
63  *
64  * @author Miguel Álvarez - Initial contribution
65  */
66 @NonNullByDefault
67 @Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "=" + SERVICE_PID)
68 @ConfigurableService(category = SERVICE_CATEGORY, label = SERVICE_NAME
69         + " Speech-to-Text", description_uri = SERVICE_CATEGORY + ":" + SERVICE_ID)
70 public class VoskSTTService implements STTService {
71     private static final String VOSK_FOLDER = Path.of(OpenHAB.getUserDataFolder(), "vosk").toString();
72     private static final String MODEL_PATH = Path.of(VOSK_FOLDER, "model").toString();
73     static {
74         Logger logger = LoggerFactory.getLogger(VoskSTTService.class);
75         File directory = new File(VOSK_FOLDER);
76         if (!directory.exists()) {
77             if (directory.mkdir()) {
78                 logger.info("vosk dir created {}", VOSK_FOLDER);
79             }
80         }
81     }
82     private final Logger logger = LoggerFactory.getLogger(VoskSTTService.class);
83     private final ScheduledExecutorService executor = ThreadPoolManager.getScheduledPool("OH-voice-voskstt");
84     private final LocaleService localeService;
85     private VoskSTTConfiguration config = new VoskSTTConfiguration();
86     private @Nullable Model model;
87
88     @Activate
89     public VoskSTTService(@Reference LocaleService localeService) {
90         this.localeService = localeService;
91     }
92
93     @Activate
94     protected void activate(Map<String, Object> config) {
95         try {
96             String osName = System.getProperty("os.name", "generic").toLowerCase();
97             String osArch = System.getProperty("os.arch", "").toLowerCase();
98             if (osName.contains("linux") && ("arm".equals(osArch) || "armv7l".equals(osArch))) {
99                 // workaround for loading required shared libraries
100                 loadSharedLibrariesArmv7l();
101             }
102             LibVosk.setLogLevel(LogLevel.WARNINGS);
103             configChange(config);
104         } catch (LinkageError e) {
105             logger.warn("LinkageError, service will not work: {}", e.getMessage());
106         }
107     }
108
109     @Modified
110     protected void modified(Map<String, Object> config) {
111         configChange(config);
112     }
113
114     @Deactivate
115     protected void deactivate(Map<String, Object> config) {
116         try {
117             unloadModel();
118         } catch (IOException e) {
119             logger.warn("IOException unloading model: {}", e.getMessage());
120         }
121     }
122
123     private void configChange(Map<String, Object> config) {
124         this.config = new Configuration(config).as(VoskSTTConfiguration.class);
125         if (this.config.preloadModel) {
126             try {
127                 loadModel();
128             } catch (IOException e) {
129                 logger.warn("IOException loading model: {}", e.getMessage());
130             } catch (UnsatisfiedLinkError e) {
131                 logger.warn("Missing native dependency: {}", e.getMessage());
132             }
133         } else {
134             try {
135                 unloadModel();
136             } catch (IOException e) {
137                 logger.warn("IOException unloading model: {}", e.getMessage());
138             }
139         }
140     }
141
142     @Override
143     public String getId() {
144         return SERVICE_ID;
145     }
146
147     @Override
148     public String getLabel(@Nullable Locale locale) {
149         return SERVICE_NAME;
150     }
151
152     @Override
153     public Set<Locale> getSupportedLocales() {
154         // as it is not possible to determine the language of the model that was downloaded and setup by the user, it is
155         // assumed the language of the model is matching the locale of the openHAB server
156         return Set.of(localeService.getLocale(null));
157     }
158
159     @Override
160     public Set<AudioFormat> getSupportedFormats() {
161         return Set.of(
162                 new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, null, null, 16000L));
163     }
164
165     @Override
166     public STTServiceHandle recognize(STTListener sttListener, AudioStream audioStream, Locale locale, Set<String> set)
167             throws STTException {
168         AtomicBoolean aborted = new AtomicBoolean(false);
169         try {
170             var frequency = audioStream.getFormat().getFrequency();
171             if (frequency == null) {
172                 throw new IOException("missing audio stream frequency");
173             }
174             backgroundRecognize(sttListener, audioStream, frequency, aborted);
175         } catch (IOException e) {
176             throw new STTException(e);
177         }
178         return () -> {
179             aborted.set(true);
180         };
181     }
182
183     private Model getModel() throws IOException, UnsatisfiedLinkError {
184         var model = this.model;
185         if (model != null) {
186             return model;
187         }
188         return loadModel();
189     }
190
191     private Model loadModel() throws IOException, UnsatisfiedLinkError {
192         unloadModel();
193         var modelFile = new File(MODEL_PATH);
194         if (!modelFile.exists() || !modelFile.isDirectory()) {
195             throw new IOException("missing model dir: " + MODEL_PATH);
196         }
197         logger.debug("loading model");
198         var model = new Model(MODEL_PATH);
199         if (config.preloadModel) {
200             this.model = model;
201         }
202         return model;
203     }
204
205     private void unloadModel() throws IOException {
206         var model = this.model;
207         if (model != null) {
208             logger.debug("unloading model");
209             model.close();
210             this.model = null;
211         }
212     }
213
214     private Future<?> backgroundRecognize(STTListener sttListener, InputStream audioStream, long frequency,
215             AtomicBoolean aborted) {
216         StringBuilder transcriptBuilder = new StringBuilder();
217         long maxTranscriptionMillis = (config.maxTranscriptionSeconds * 1000L);
218         long maxSilenceMillis = (config.maxSilenceSeconds * 1000L);
219         long startTime = System.currentTimeMillis();
220         return executor.submit(() -> {
221             Recognizer recognizer = null;
222             Model model = null;
223             try {
224                 model = getModel();
225                 recognizer = new Recognizer(model, frequency);
226                 long lastInputTime = System.currentTimeMillis();
227                 int nbytes;
228                 byte[] b = new byte[4096];
229                 sttListener.sttEventReceived(new RecognitionStartEvent());
230                 while (!aborted.get()) {
231                     nbytes = audioStream.read(b);
232                     if (aborted.get()) {
233                         break;
234                     }
235                     if (isExpiredInterval(maxTranscriptionMillis, startTime)) {
236                         logger.debug("Stops listening, max transcription time reached");
237                         break;
238                     }
239                     if (!config.singleUtteranceMode && isExpiredInterval(maxSilenceMillis, lastInputTime)) {
240                         logger.debug("Stops listening, max silence time reached");
241                         break;
242                     }
243                     if (nbytes == 0) {
244                         trySleep(100);
245                         continue;
246                     }
247                     if (recognizer.acceptWaveForm(b, nbytes)) {
248                         lastInputTime = System.currentTimeMillis();
249                         var result = recognizer.getResult();
250                         logger.debug("Result: {}", result);
251                         ObjectMapper mapper = new ObjectMapper();
252                         var json = mapper.readTree(result);
253                         transcriptBuilder.append(json.get("text").asText()).append(" ");
254                         if (config.singleUtteranceMode) {
255                             break;
256                         }
257                     } else {
258                         logger.debug("Partial: {}", recognizer.getPartialResult());
259                     }
260                 }
261                 if (!aborted.get()) {
262                     sttListener.sttEventReceived(new RecognitionStopEvent());
263                     var transcript = transcriptBuilder.toString().trim();
264                     logger.debug("Final: {}", transcript);
265                     if (!transcript.isBlank()) {
266                         sttListener.sttEventReceived(new SpeechRecognitionEvent(transcript, 1F));
267                     } else {
268                         if (!config.noResultsMessage.isBlank()) {
269                             sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.noResultsMessage));
270                         } else {
271                             sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("No results"));
272                         }
273                     }
274                 }
275             } catch (IOException e) {
276                 logger.warn("Error running speech to text: {}", e.getMessage());
277                 if (config.errorMessage.isBlank()) {
278                     sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Error"));
279                 } else {
280                     sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.errorMessage));
281                 }
282             } catch (UnsatisfiedLinkError e) {
283                 logger.warn("Missing native dependency: {}", e.getMessage());
284                 if (config.errorMessage.isBlank()) {
285                     sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Error"));
286                 } else {
287                     sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.errorMessage));
288                 }
289             } finally {
290                 if (recognizer != null) {
291                     recognizer.close();
292                 }
293                 if (!config.preloadModel && model != null) {
294                     model.close();
295                 }
296             }
297             try {
298                 audioStream.close();
299             } catch (IOException e) {
300                 logger.warn("IOException on close: {}", e.getMessage());
301             }
302         });
303     }
304
305     private void trySleep(long ms) {
306         try {
307             Thread.sleep(ms);
308         } catch (InterruptedException ignored) {
309         }
310     }
311
312     private boolean isExpiredInterval(long interval, long referenceTime) {
313         return System.currentTimeMillis() - referenceTime > interval;
314     }
315
316     private void loadSharedLibrariesArmv7l() {
317         logger.debug("loading required shared libraries for linux arm");
318         var libatomicArmLibPath = Path.of("/usr/lib/arm-linux-gnueabihf/libatomic.so.1");
319         if (libatomicArmLibPath.toFile().exists()) {
320             var libatomicArmLibFolderPath = libatomicArmLibPath.getParent().toAbsolutePath();
321             String libraryPath = System.getProperty("jna.library.path", System.getProperty("java.library.path"));
322             if (!libraryPath.contains(libatomicArmLibFolderPath.toString())) {
323                 libraryPath = libatomicArmLibFolderPath + "/:" + libraryPath;
324                 System.setProperty("jna.library.path", libraryPath);
325                 logger.debug("jna library path updated: {}", libraryPath);
326             }
327             NativeLibrary.getInstance("libatomic");
328             logger.debug("loaded libatomic shared library");
329         } else {
330             throw new LinkageError("Required shared library libatomic is missing");
331         }
332     }
333 }