2 * Copyright (c) 2010-2024 Contributors to the openHAB project
4 * See the NOTICE file(s) distributed with this work for additional
7 * This program and the accompanying materials are made available under the
8 * terms of the Eclipse Public License 2.0 which is available at
9 * http://www.eclipse.org/legal/epl-2.0
11 * SPDX-License-Identifier: EPL-2.0
13 package org.openhab.voice.voskstt.internal;
15 import static org.openhab.voice.voskstt.internal.VoskSTTConstants.*;
18 import java.io.IOException;
19 import java.io.InputStream;
20 import java.nio.file.Path;
21 import java.util.Locale;
24 import java.util.concurrent.Future;
25 import java.util.concurrent.ScheduledExecutorService;
26 import java.util.concurrent.atomic.AtomicBoolean;
28 import org.eclipse.jdt.annotation.NonNullByDefault;
29 import org.eclipse.jdt.annotation.Nullable;
30 import org.openhab.core.OpenHAB;
31 import org.openhab.core.audio.AudioFormat;
32 import org.openhab.core.audio.AudioStream;
33 import org.openhab.core.audio.utils.AudioWaveUtils;
34 import org.openhab.core.common.ThreadPoolManager;
35 import org.openhab.core.config.core.ConfigurableService;
36 import org.openhab.core.config.core.Configuration;
37 import org.openhab.core.io.rest.LocaleService;
38 import org.openhab.core.voice.RecognitionStartEvent;
39 import org.openhab.core.voice.RecognitionStopEvent;
40 import org.openhab.core.voice.STTException;
41 import org.openhab.core.voice.STTListener;
42 import org.openhab.core.voice.STTService;
43 import org.openhab.core.voice.STTServiceHandle;
44 import org.openhab.core.voice.SpeechRecognitionErrorEvent;
45 import org.openhab.core.voice.SpeechRecognitionEvent;
46 import org.osgi.framework.Constants;
47 import org.osgi.service.component.annotations.Activate;
48 import org.osgi.service.component.annotations.Component;
49 import org.osgi.service.component.annotations.Deactivate;
50 import org.osgi.service.component.annotations.Modified;
51 import org.osgi.service.component.annotations.Reference;
52 import org.slf4j.Logger;
53 import org.slf4j.LoggerFactory;
54 import org.vosk.LibVosk;
55 import org.vosk.LogLevel;
56 import org.vosk.Model;
57 import org.vosk.Recognizer;
59 import com.fasterxml.jackson.databind.ObjectMapper;
60 import com.sun.jna.NativeLibrary;
63 * The {@link VoskSTTService} class is a service implementation to use Vosk-API for Speech-to-Text.
65 * @author Miguel Álvarez - Initial contribution
68 @Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "=" + SERVICE_PID)
69 @ConfigurableService(category = SERVICE_CATEGORY, label = SERVICE_NAME
70 + " Speech-to-Text", description_uri = SERVICE_CATEGORY + ":" + SERVICE_ID)
71 public class VoskSTTService implements STTService {
72 private static final String VOSK_FOLDER = Path.of(OpenHAB.getUserDataFolder(), "vosk").toString();
73 private static final String MODEL_PATH = Path.of(VOSK_FOLDER, "model").toString();
75 Logger logger = LoggerFactory.getLogger(VoskSTTService.class);
76 File directory = new File(VOSK_FOLDER);
77 if (!directory.exists()) {
78 if (directory.mkdir()) {
79 logger.info("vosk dir created {}", VOSK_FOLDER);
83 private final Logger logger = LoggerFactory.getLogger(VoskSTTService.class);
84 private final ScheduledExecutorService executor = ThreadPoolManager.getScheduledPool("OH-voice-voskstt");
85 private final LocaleService localeService;
86 private VoskSTTConfiguration config = new VoskSTTConfiguration();
87 private @Nullable Model model;
90 public VoskSTTService(@Reference LocaleService localeService) {
91 this.localeService = localeService;
95 protected void activate(Map<String, Object> config) {
97 String osName = System.getProperty("os.name", "generic").toLowerCase();
98 String osArch = System.getProperty("os.arch", "").toLowerCase();
99 if (osName.contains("linux") && ("arm".equals(osArch) || "armv7l".equals(osArch))) {
100 // workaround for loading required shared libraries
101 loadSharedLibrariesArmv7l();
103 LibVosk.setLogLevel(LogLevel.WARNINGS);
104 configChange(config);
105 } catch (LinkageError e) {
106 logger.warn("LinkageError, service will not work: {}", e.getMessage());
111 protected void modified(Map<String, Object> config) {
112 configChange(config);
116 protected void deactivate(Map<String, Object> config) {
119 } catch (IOException e) {
120 logger.warn("IOException unloading model: {}", e.getMessage());
124 private void configChange(Map<String, Object> config) {
125 this.config = new Configuration(config).as(VoskSTTConfiguration.class);
126 if (this.config.preloadModel) {
129 } catch (IOException e) {
130 logger.warn("IOException loading model: {}", e.getMessage());
131 } catch (UnsatisfiedLinkError e) {
132 logger.warn("Missing native dependency: {}", e.getMessage());
137 } catch (IOException e) {
138 logger.warn("IOException unloading model: {}", e.getMessage());
144 public String getId() {
149 public String getLabel(@Nullable Locale locale) {
154 public Set<Locale> getSupportedLocales() {
155 // as it is not possible to determine the language of the model that was downloaded and setup by the user, it is
156 // assumed the language of the model is matching the locale of the openHAB server
157 return Set.of(localeService.getLocale(null));
161 public Set<AudioFormat> getSupportedFormats() {
163 new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_PCM_SIGNED, false, null, null, 16000L),
164 new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, null, null, 16000L));
168 public STTServiceHandle recognize(STTListener sttListener, AudioStream audioStream, Locale locale, Set<String> set)
169 throws STTException {
170 AtomicBoolean aborted = new AtomicBoolean(false);
172 AudioFormat format = audioStream.getFormat();
173 var frequency = format.getFrequency();
174 if (frequency == null) {
175 throw new IOException("missing audio stream frequency");
177 if (AudioFormat.CONTAINER_WAVE.equals(format.getContainer())) {
178 AudioWaveUtils.removeFMT(audioStream);
180 backgroundRecognize(sttListener, audioStream, frequency, aborted);
181 } catch (IOException e) {
182 throw new STTException(e);
189 private Model getModel() throws IOException, UnsatisfiedLinkError {
190 var model = this.model;
197 private Model loadModel() throws IOException, UnsatisfiedLinkError {
199 var modelFile = new File(MODEL_PATH);
200 if (!modelFile.exists() || !modelFile.isDirectory()) {
201 throw new IOException("missing model dir: " + MODEL_PATH);
203 logger.debug("loading model");
204 var model = new Model(MODEL_PATH);
205 if (config.preloadModel) {
211 private void unloadModel() throws IOException {
212 var model = this.model;
214 logger.debug("unloading model");
220 private Future<?> backgroundRecognize(STTListener sttListener, InputStream audioStream, long frequency,
221 AtomicBoolean aborted) {
222 StringBuilder transcriptBuilder = new StringBuilder();
223 long maxTranscriptionMillis = (config.maxTranscriptionSeconds * 1000L);
224 long maxSilenceMillis = (config.maxSilenceSeconds * 1000L);
225 long startTime = System.currentTimeMillis();
226 return executor.submit(() -> {
227 Recognizer recognizer = null;
231 recognizer = new Recognizer(model, frequency);
232 long lastInputTime = System.currentTimeMillis();
234 byte[] b = new byte[4096];
235 sttListener.sttEventReceived(new RecognitionStartEvent());
236 while (!aborted.get()) {
237 nbytes = audioStream.read(b);
241 if (isExpiredInterval(maxTranscriptionMillis, startTime)) {
242 logger.debug("Stops listening, max transcription time reached");
245 if (!config.singleUtteranceMode && isExpiredInterval(maxSilenceMillis, lastInputTime)) {
246 logger.debug("Stops listening, max silence time reached");
253 if (recognizer.acceptWaveForm(b, nbytes)) {
254 lastInputTime = System.currentTimeMillis();
255 var result = recognizer.getResult();
256 logger.debug("Result: {}", result);
257 ObjectMapper mapper = new ObjectMapper();
258 var json = mapper.readTree(result);
259 transcriptBuilder.append(json.get("text").asText()).append(" ");
260 if (config.singleUtteranceMode) {
264 logger.debug("Partial: {}", recognizer.getPartialResult());
267 if (!aborted.get()) {
268 sttListener.sttEventReceived(new RecognitionStopEvent());
269 var transcript = transcriptBuilder.toString().trim();
270 logger.debug("Final: {}", transcript);
271 if (!transcript.isBlank()) {
272 sttListener.sttEventReceived(new SpeechRecognitionEvent(transcript, 1F));
274 if (!config.noResultsMessage.isBlank()) {
275 sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.noResultsMessage));
277 sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("No results"));
281 } catch (IOException e) {
282 logger.warn("Error running speech to text: {}", e.getMessage());
283 if (config.errorMessage.isBlank()) {
284 sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Error"));
286 sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.errorMessage));
288 } catch (UnsatisfiedLinkError e) {
289 logger.warn("Missing native dependency: {}", e.getMessage());
290 if (config.errorMessage.isBlank()) {
291 sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Error"));
293 sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.errorMessage));
296 if (recognizer != null) {
299 if (!config.preloadModel && model != null) {
305 } catch (IOException e) {
306 logger.warn("IOException on close: {}", e.getMessage());
311 private void trySleep(long ms) {
314 } catch (InterruptedException ignored) {
318 private boolean isExpiredInterval(long interval, long referenceTime) {
319 return System.currentTimeMillis() - referenceTime > interval;
322 private void loadSharedLibrariesArmv7l() {
323 logger.debug("loading required shared libraries for linux arm");
324 var libatomicArmLibPath = Path.of("/usr/lib/arm-linux-gnueabihf/libatomic.so.1");
325 if (libatomicArmLibPath.toFile().exists()) {
326 var libatomicArmLibFolderPath = libatomicArmLibPath.getParent().toAbsolutePath();
327 String libraryPath = System.getProperty("jna.library.path", System.getProperty("java.library.path"));
328 if (!libraryPath.contains(libatomicArmLibFolderPath.toString())) {
329 libraryPath = libatomicArmLibFolderPath + "/:" + libraryPath;
330 System.setProperty("jna.library.path", libraryPath);
331 logger.debug("jna library path updated: {}", libraryPath);
333 NativeLibrary.getInstance("libatomic");
334 logger.debug("loaded libatomic shared library");
336 throw new LinkageError("Required shared library libatomic is missing");