2 * Copyright (c) 2010-2023 Contributors to the openHAB project
4 * See the NOTICE file(s) distributed with this work for additional
7 * This program and the accompanying materials are made available under the
8 * terms of the Eclipse Public License 2.0 which is available at
9 * http://www.eclipse.org/legal/epl-2.0
11 * SPDX-License-Identifier: EPL-2.0
13 package org.openhab.voice.voskstt.internal;
15 import static org.openhab.voice.voskstt.internal.VoskSTTConstants.*;
18 import java.io.IOException;
19 import java.io.InputStream;
20 import java.nio.file.Path;
21 import java.util.Locale;
24 import java.util.concurrent.Future;
25 import java.util.concurrent.ScheduledExecutorService;
26 import java.util.concurrent.atomic.AtomicBoolean;
28 import org.eclipse.jdt.annotation.NonNullByDefault;
29 import org.eclipse.jdt.annotation.Nullable;
30 import org.openhab.core.OpenHAB;
31 import org.openhab.core.audio.AudioFormat;
32 import org.openhab.core.audio.AudioStream;
33 import org.openhab.core.common.ThreadPoolManager;
34 import org.openhab.core.config.core.ConfigurableService;
35 import org.openhab.core.config.core.Configuration;
36 import org.openhab.core.io.rest.LocaleService;
37 import org.openhab.core.voice.RecognitionStartEvent;
38 import org.openhab.core.voice.RecognitionStopEvent;
39 import org.openhab.core.voice.STTException;
40 import org.openhab.core.voice.STTListener;
41 import org.openhab.core.voice.STTService;
42 import org.openhab.core.voice.STTServiceHandle;
43 import org.openhab.core.voice.SpeechRecognitionErrorEvent;
44 import org.openhab.core.voice.SpeechRecognitionEvent;
45 import org.osgi.framework.Constants;
46 import org.osgi.service.component.annotations.Activate;
47 import org.osgi.service.component.annotations.Component;
48 import org.osgi.service.component.annotations.Deactivate;
49 import org.osgi.service.component.annotations.Modified;
50 import org.osgi.service.component.annotations.Reference;
51 import org.slf4j.Logger;
52 import org.slf4j.LoggerFactory;
53 import org.vosk.LibVosk;
54 import org.vosk.LogLevel;
55 import org.vosk.Model;
56 import org.vosk.Recognizer;
58 import com.fasterxml.jackson.databind.ObjectMapper;
59 import com.sun.jna.NativeLibrary;
62 * The {@link VoskSTTService} class is a service implementation to use Vosk-API for Speech-to-Text.
64 * @author Miguel Álvarez - Initial contribution
67 @Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "=" + SERVICE_PID)
68 @ConfigurableService(category = SERVICE_CATEGORY, label = SERVICE_NAME
69 + " Speech-to-Text", description_uri = SERVICE_CATEGORY + ":" + SERVICE_ID)
70 public class VoskSTTService implements STTService {
71 private static final String VOSK_FOLDER = Path.of(OpenHAB.getUserDataFolder(), "vosk").toString();
72 private static final String MODEL_PATH = Path.of(VOSK_FOLDER, "model").toString();
74 Logger logger = LoggerFactory.getLogger(VoskSTTService.class);
75 File directory = new File(VOSK_FOLDER);
76 if (!directory.exists()) {
77 if (directory.mkdir()) {
78 logger.info("vosk dir created {}", VOSK_FOLDER);
82 private final Logger logger = LoggerFactory.getLogger(VoskSTTService.class);
83 private final ScheduledExecutorService executor = ThreadPoolManager.getScheduledPool("OH-voice-voskstt");
84 private final LocaleService localeService;
85 private VoskSTTConfiguration config = new VoskSTTConfiguration();
86 private @Nullable Model model;
89 public VoskSTTService(@Reference LocaleService localeService) {
90 this.localeService = localeService;
94 protected void activate(Map<String, Object> config) {
96 String osName = System.getProperty("os.name", "generic").toLowerCase();
97 String osArch = System.getProperty("os.arch", "").toLowerCase();
98 if (osName.contains("linux") && (osArch.equals("arm") || osArch.equals("armv7l"))) {
99 // workaround for loading required shared libraries
100 loadSharedLibrariesArmv7l();
102 LibVosk.setLogLevel(LogLevel.WARNINGS);
103 configChange(config);
104 } catch (LinkageError e) {
105 logger.warn("LinkageError, service will not work: {}", e.getMessage());
110 protected void modified(Map<String, Object> config) {
111 configChange(config);
115 protected void deactivate(Map<String, Object> config) {
118 } catch (IOException e) {
119 logger.warn("IOException unloading model: {}", e.getMessage());
123 private void configChange(Map<String, Object> config) {
124 this.config = new Configuration(config).as(VoskSTTConfiguration.class);
125 if (this.config.preloadModel) {
128 } catch (IOException e) {
129 logger.warn("IOException loading model: {}", e.getMessage());
130 } catch (UnsatisfiedLinkError e) {
131 logger.warn("Missing native dependency: {}", e.getMessage());
136 } catch (IOException e) {
137 logger.warn("IOException unloading model: {}", e.getMessage());
143 public String getId() {
148 public String getLabel(@Nullable Locale locale) {
153 public Set<Locale> getSupportedLocales() {
154 // as it is not possible to determine the language of the model that was downloaded and setup by the user, it is
155 // assumed the language of the model is matching the locale of the openHAB server
156 return Set.of(localeService.getLocale(null));
160 public Set<AudioFormat> getSupportedFormats() {
162 new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, null, null, 16000L));
166 public STTServiceHandle recognize(STTListener sttListener, AudioStream audioStream, Locale locale, Set<String> set)
167 throws STTException {
168 AtomicBoolean aborted = new AtomicBoolean(false);
170 var frequency = audioStream.getFormat().getFrequency();
171 if (frequency == null) {
172 throw new IOException("missing audio stream frequency");
174 backgroundRecognize(sttListener, audioStream, frequency, aborted);
175 } catch (IOException e) {
176 throw new STTException(e);
183 private Model getModel() throws IOException, UnsatisfiedLinkError {
184 var model = this.model;
191 private Model loadModel() throws IOException, UnsatisfiedLinkError {
193 var modelFile = new File(MODEL_PATH);
194 if (!modelFile.exists() || !modelFile.isDirectory()) {
195 throw new IOException("missing model dir: " + MODEL_PATH);
197 logger.debug("loading model");
198 var model = new Model(MODEL_PATH);
199 if (config.preloadModel) {
205 private void unloadModel() throws IOException {
206 var model = this.model;
208 logger.debug("unloading model");
214 private Future<?> backgroundRecognize(STTListener sttListener, InputStream audioStream, long frequency,
215 AtomicBoolean aborted) {
216 StringBuilder transcriptBuilder = new StringBuilder();
217 long maxTranscriptionMillis = (config.maxTranscriptionSeconds * 1000L);
218 long maxSilenceMillis = (config.maxSilenceSeconds * 1000L);
219 long startTime = System.currentTimeMillis();
220 return executor.submit(() -> {
221 Recognizer recognizer = null;
225 recognizer = new Recognizer(model, frequency);
226 long lastInputTime = System.currentTimeMillis();
228 byte[] b = new byte[4096];
229 sttListener.sttEventReceived(new RecognitionStartEvent());
230 while (!aborted.get()) {
231 nbytes = audioStream.read(b);
235 if (isExpiredInterval(maxTranscriptionMillis, startTime)) {
236 logger.debug("Stops listening, max transcription time reached");
239 if (!config.singleUtteranceMode && isExpiredInterval(maxSilenceMillis, lastInputTime)) {
240 logger.debug("Stops listening, max silence time reached");
247 if (recognizer.acceptWaveForm(b, nbytes)) {
248 lastInputTime = System.currentTimeMillis();
249 var result = recognizer.getResult();
250 logger.debug("Result: {}", result);
251 ObjectMapper mapper = new ObjectMapper();
252 var json = mapper.readTree(result);
253 transcriptBuilder.append(json.get("text").asText()).append(" ");
254 if (config.singleUtteranceMode) {
258 logger.debug("Partial: {}", recognizer.getPartialResult());
261 if (!aborted.get()) {
262 sttListener.sttEventReceived(new RecognitionStopEvent());
263 var transcript = transcriptBuilder.toString().trim();
264 logger.debug("Final: {}", transcript);
265 if (!transcript.isBlank()) {
266 sttListener.sttEventReceived(new SpeechRecognitionEvent(transcript, 1F));
268 if (!config.noResultsMessage.isBlank()) {
269 sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.noResultsMessage));
271 sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("No results"));
275 } catch (IOException e) {
276 logger.warn("Error running speech to text: {}", e.getMessage());
277 if (config.errorMessage.isBlank()) {
278 sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Error"));
280 sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.errorMessage));
282 } catch (UnsatisfiedLinkError e) {
283 logger.warn("Missing native dependency: {}", e.getMessage());
284 if (config.errorMessage.isBlank()) {
285 sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Error"));
287 sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.errorMessage));
290 if (recognizer != null) {
293 if (!config.preloadModel && model != null) {
299 } catch (IOException e) {
300 logger.warn("IOException on close: {}", e.getMessage());
305 private void trySleep(long ms) {
308 } catch (InterruptedException ignored) {
312 private boolean isExpiredInterval(long interval, long referenceTime) {
313 return System.currentTimeMillis() - referenceTime > interval;
316 private void loadSharedLibrariesArmv7l() {
317 logger.debug("loading required shared libraries for linux arm");
318 var libatomicArmLibPath = Path.of("/usr/lib/arm-linux-gnueabihf/libatomic.so.1");
319 if (libatomicArmLibPath.toFile().exists()) {
320 var libatomicArmLibFolderPath = libatomicArmLibPath.getParent().toAbsolutePath();
321 String libraryPath = System.getProperty("jna.library.path", System.getProperty("java.library.path"));
322 if (!libraryPath.contains(libatomicArmLibFolderPath.toString())) {
323 libraryPath = libatomicArmLibFolderPath + "/:" + libraryPath;
324 System.setProperty("jna.library.path", libraryPath);
325 logger.debug("jna library path updated: {}", libraryPath);
327 NativeLibrary.getInstance("libatomic");
328 logger.debug("loaded libatomic shared library");
330 throw new LinkageError("Required shared library libatomic is missing");