]> git.basschouten.com Git - openhab-addons.git/blob
b4d38392140bb691b920fc69d01de31d482632e6
[openhab-addons.git] /
1 /**
2  * Copyright (c) 2010-2024 Contributors to the openHAB project
3  *
4  * See the NOTICE file(s) distributed with this work for additional
5  * information.
6  *
7  * This program and the accompanying materials are made available under the
8  * terms of the Eclipse Public License 2.0 which is available at
9  * http://www.eclipse.org/legal/epl-2.0
10  *
11  * SPDX-License-Identifier: EPL-2.0
12  */
13 package org.openhab.voice.voskstt.internal;
14
15 import static org.openhab.voice.voskstt.internal.VoskSTTConstants.*;
16
17 import java.io.File;
18 import java.io.IOException;
19 import java.io.InputStream;
20 import java.nio.file.Path;
21 import java.util.Locale;
22 import java.util.Map;
23 import java.util.Set;
24 import java.util.concurrent.Future;
25 import java.util.concurrent.ScheduledExecutorService;
26 import java.util.concurrent.atomic.AtomicBoolean;
27
28 import org.eclipse.jdt.annotation.NonNullByDefault;
29 import org.eclipse.jdt.annotation.Nullable;
30 import org.openhab.core.OpenHAB;
31 import org.openhab.core.audio.AudioFormat;
32 import org.openhab.core.audio.AudioStream;
33 import org.openhab.core.audio.utils.AudioWaveUtils;
34 import org.openhab.core.common.ThreadPoolManager;
35 import org.openhab.core.config.core.ConfigurableService;
36 import org.openhab.core.config.core.Configuration;
37 import org.openhab.core.io.rest.LocaleService;
38 import org.openhab.core.voice.RecognitionStartEvent;
39 import org.openhab.core.voice.RecognitionStopEvent;
40 import org.openhab.core.voice.STTException;
41 import org.openhab.core.voice.STTListener;
42 import org.openhab.core.voice.STTService;
43 import org.openhab.core.voice.STTServiceHandle;
44 import org.openhab.core.voice.SpeechRecognitionErrorEvent;
45 import org.openhab.core.voice.SpeechRecognitionEvent;
46 import org.osgi.framework.Constants;
47 import org.osgi.service.component.annotations.Activate;
48 import org.osgi.service.component.annotations.Component;
49 import org.osgi.service.component.annotations.Deactivate;
50 import org.osgi.service.component.annotations.Modified;
51 import org.osgi.service.component.annotations.Reference;
52 import org.slf4j.Logger;
53 import org.slf4j.LoggerFactory;
54 import org.vosk.LibVosk;
55 import org.vosk.LogLevel;
56 import org.vosk.Model;
57 import org.vosk.Recognizer;
58
59 import com.fasterxml.jackson.databind.ObjectMapper;
60 import com.sun.jna.NativeLibrary;
61
62 /**
63  * The {@link VoskSTTService} class is a service implementation to use Vosk-API for Speech-to-Text.
64  *
65  * @author Miguel Álvarez - Initial contribution
66  */
67 @NonNullByDefault
68 @Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "=" + SERVICE_PID)
69 @ConfigurableService(category = SERVICE_CATEGORY, label = SERVICE_NAME
70         + " Speech-to-Text", description_uri = SERVICE_CATEGORY + ":" + SERVICE_ID)
71 public class VoskSTTService implements STTService {
72     private static final String VOSK_FOLDER = Path.of(OpenHAB.getUserDataFolder(), "vosk").toString();
73     private static final String MODEL_PATH = Path.of(VOSK_FOLDER, "model").toString();
74     static {
75         Logger logger = LoggerFactory.getLogger(VoskSTTService.class);
76         File directory = new File(VOSK_FOLDER);
77         if (!directory.exists()) {
78             if (directory.mkdir()) {
79                 logger.info("vosk dir created {}", VOSK_FOLDER);
80             }
81         }
82     }
83     private final Logger logger = LoggerFactory.getLogger(VoskSTTService.class);
84     private final ScheduledExecutorService executor = ThreadPoolManager.getScheduledPool("OH-voice-voskstt");
85     private final LocaleService localeService;
86     private VoskSTTConfiguration config = new VoskSTTConfiguration();
87     private @Nullable Model model;
88
89     @Activate
90     public VoskSTTService(@Reference LocaleService localeService) {
91         this.localeService = localeService;
92     }
93
94     @Activate
95     protected void activate(Map<String, Object> config) {
96         try {
97             String osName = System.getProperty("os.name", "generic").toLowerCase();
98             String osArch = System.getProperty("os.arch", "").toLowerCase();
99             if (osName.contains("linux") && ("arm".equals(osArch) || "armv7l".equals(osArch))) {
100                 // workaround for loading required shared libraries
101                 loadSharedLibrariesArmv7l();
102             }
103             LibVosk.setLogLevel(LogLevel.WARNINGS);
104             configChange(config);
105         } catch (LinkageError e) {
106             logger.warn("LinkageError, service will not work: {}", e.getMessage());
107         }
108     }
109
110     @Modified
111     protected void modified(Map<String, Object> config) {
112         configChange(config);
113     }
114
115     @Deactivate
116     protected void deactivate(Map<String, Object> config) {
117         try {
118             unloadModel();
119         } catch (IOException e) {
120             logger.warn("IOException unloading model: {}", e.getMessage());
121         }
122     }
123
124     private void configChange(Map<String, Object> config) {
125         this.config = new Configuration(config).as(VoskSTTConfiguration.class);
126         if (this.config.preloadModel) {
127             try {
128                 loadModel();
129             } catch (IOException e) {
130                 logger.warn("IOException loading model: {}", e.getMessage());
131             } catch (UnsatisfiedLinkError e) {
132                 logger.warn("Missing native dependency: {}", e.getMessage());
133             }
134         } else {
135             try {
136                 unloadModel();
137             } catch (IOException e) {
138                 logger.warn("IOException unloading model: {}", e.getMessage());
139             }
140         }
141     }
142
143     @Override
144     public String getId() {
145         return SERVICE_ID;
146     }
147
148     @Override
149     public String getLabel(@Nullable Locale locale) {
150         return SERVICE_NAME;
151     }
152
153     @Override
154     public Set<Locale> getSupportedLocales() {
155         // as it is not possible to determine the language of the model that was downloaded and setup by the user, it is
156         // assumed the language of the model is matching the locale of the openHAB server
157         return Set.of(localeService.getLocale(null));
158     }
159
160     @Override
161     public Set<AudioFormat> getSupportedFormats() {
162         return Set.of(
163                 new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_PCM_SIGNED, false, null, null, 16000L),
164                 new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, null, null, 16000L));
165     }
166
167     @Override
168     public STTServiceHandle recognize(STTListener sttListener, AudioStream audioStream, Locale locale, Set<String> set)
169             throws STTException {
170         AtomicBoolean aborted = new AtomicBoolean(false);
171         try {
172             AudioFormat format = audioStream.getFormat();
173             var frequency = format.getFrequency();
174             if (frequency == null) {
175                 throw new IOException("missing audio stream frequency");
176             }
177             if (AudioFormat.CONTAINER_WAVE.equals(format.getContainer())) {
178                 AudioWaveUtils.removeFMT(audioStream);
179             }
180             backgroundRecognize(sttListener, audioStream, frequency, aborted);
181         } catch (IOException e) {
182             throw new STTException(e);
183         }
184         return () -> {
185             aborted.set(true);
186         };
187     }
188
189     private Model getModel() throws IOException, UnsatisfiedLinkError {
190         var model = this.model;
191         if (model != null) {
192             return model;
193         }
194         return loadModel();
195     }
196
197     private Model loadModel() throws IOException, UnsatisfiedLinkError {
198         unloadModel();
199         var modelFile = new File(MODEL_PATH);
200         if (!modelFile.exists() || !modelFile.isDirectory()) {
201             throw new IOException("missing model dir: " + MODEL_PATH);
202         }
203         logger.debug("loading model");
204         var model = new Model(MODEL_PATH);
205         if (config.preloadModel) {
206             this.model = model;
207         }
208         return model;
209     }
210
211     private void unloadModel() throws IOException {
212         var model = this.model;
213         if (model != null) {
214             logger.debug("unloading model");
215             model.close();
216             this.model = null;
217         }
218     }
219
220     private Future<?> backgroundRecognize(STTListener sttListener, InputStream audioStream, long frequency,
221             AtomicBoolean aborted) {
222         StringBuilder transcriptBuilder = new StringBuilder();
223         long maxTranscriptionMillis = (config.maxTranscriptionSeconds * 1000L);
224         long maxSilenceMillis = (config.maxSilenceSeconds * 1000L);
225         long startTime = System.currentTimeMillis();
226         return executor.submit(() -> {
227             Recognizer recognizer = null;
228             Model model = null;
229             try {
230                 model = getModel();
231                 recognizer = new Recognizer(model, frequency);
232                 long lastInputTime = System.currentTimeMillis();
233                 int nbytes;
234                 byte[] b = new byte[4096];
235                 sttListener.sttEventReceived(new RecognitionStartEvent());
236                 while (!aborted.get()) {
237                     nbytes = audioStream.read(b);
238                     if (aborted.get()) {
239                         break;
240                     }
241                     if (isExpiredInterval(maxTranscriptionMillis, startTime)) {
242                         logger.debug("Stops listening, max transcription time reached");
243                         break;
244                     }
245                     if (!config.singleUtteranceMode && isExpiredInterval(maxSilenceMillis, lastInputTime)) {
246                         logger.debug("Stops listening, max silence time reached");
247                         break;
248                     }
249                     if (nbytes == 0) {
250                         trySleep(100);
251                         continue;
252                     }
253                     if (recognizer.acceptWaveForm(b, nbytes)) {
254                         lastInputTime = System.currentTimeMillis();
255                         var result = recognizer.getResult();
256                         logger.debug("Result: {}", result);
257                         ObjectMapper mapper = new ObjectMapper();
258                         var json = mapper.readTree(result);
259                         transcriptBuilder.append(json.get("text").asText()).append(" ");
260                         if (config.singleUtteranceMode) {
261                             break;
262                         }
263                     } else {
264                         logger.debug("Partial: {}", recognizer.getPartialResult());
265                     }
266                 }
267                 if (!aborted.get()) {
268                     sttListener.sttEventReceived(new RecognitionStopEvent());
269                     var transcript = transcriptBuilder.toString().trim();
270                     logger.debug("Final: {}", transcript);
271                     if (!transcript.isBlank()) {
272                         sttListener.sttEventReceived(new SpeechRecognitionEvent(transcript, 1F));
273                     } else {
274                         if (!config.noResultsMessage.isBlank()) {
275                             sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.noResultsMessage));
276                         } else {
277                             sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("No results"));
278                         }
279                     }
280                 }
281             } catch (IOException e) {
282                 logger.warn("Error running speech to text: {}", e.getMessage());
283                 if (config.errorMessage.isBlank()) {
284                     sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Error"));
285                 } else {
286                     sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.errorMessage));
287                 }
288             } catch (UnsatisfiedLinkError e) {
289                 logger.warn("Missing native dependency: {}", e.getMessage());
290                 if (config.errorMessage.isBlank()) {
291                     sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Error"));
292                 } else {
293                     sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.errorMessage));
294                 }
295             } finally {
296                 if (recognizer != null) {
297                     recognizer.close();
298                 }
299                 if (!config.preloadModel && model != null) {
300                     model.close();
301                 }
302             }
303             try {
304                 audioStream.close();
305             } catch (IOException e) {
306                 logger.warn("IOException on close: {}", e.getMessage());
307             }
308         });
309     }
310
311     private void trySleep(long ms) {
312         try {
313             Thread.sleep(ms);
314         } catch (InterruptedException ignored) {
315         }
316     }
317
318     private boolean isExpiredInterval(long interval, long referenceTime) {
319         return System.currentTimeMillis() - referenceTime > interval;
320     }
321
322     private void loadSharedLibrariesArmv7l() {
323         logger.debug("loading required shared libraries for linux arm");
324         var libatomicArmLibPath = Path.of("/usr/lib/arm-linux-gnueabihf/libatomic.so.1");
325         if (libatomicArmLibPath.toFile().exists()) {
326             var libatomicArmLibFolderPath = libatomicArmLibPath.getParent().toAbsolutePath();
327             String libraryPath = System.getProperty("jna.library.path", System.getProperty("java.library.path"));
328             if (!libraryPath.contains(libatomicArmLibFolderPath.toString())) {
329                 libraryPath = libatomicArmLibFolderPath + "/:" + libraryPath;
330                 System.setProperty("jna.library.path", libraryPath);
331                 logger.debug("jna library path updated: {}", libraryPath);
332             }
333             NativeLibrary.getInstance("libatomic");
334             logger.debug("loaded libatomic shared library");
335         } else {
336             throw new LinkageError("Required shared library libatomic is missing");
337         }
338     }
339 }