git.basschouten.com Git - openhab-addons.git/blob

   1 /**
   2  * Copyright (c) 2010-2023 Contributors to the openHAB project
   3  *
   4  * See the NOTICE file(s) distributed with this work for additional
   5  * information.
   6  *
   7  * This program and the accompanying materials are made available under the
   8  * terms of the Eclipse Public License 2.0 which is available at
   9  * http://www.eclipse.org/legal/epl-2.0
  10  *
  11  * SPDX-License-Identifier: EPL-2.0
  12  */
  13 package org.openhab.voice.voskstt.internal;
  14
  15 import static org.openhab.voice.voskstt.internal.VoskSTTConstants.*;
  16
  17 import java.io.File;
  18 import java.io.IOException;
  19 import java.io.InputStream;
  20 import java.nio.file.Path;
  21 import java.util.Locale;
  22 import java.util.Map;
  23 import java.util.Set;
  24 import java.util.concurrent.Future;
  25 import java.util.concurrent.ScheduledExecutorService;
  26 import java.util.concurrent.atomic.AtomicBoolean;
  27
  28 import org.eclipse.jdt.annotation.NonNullByDefault;
  29 import org.eclipse.jdt.annotation.Nullable;
  30 import org.openhab.core.OpenHAB;
  31 import org.openhab.core.audio.AudioFormat;
  32 import org.openhab.core.audio.AudioStream;
  33 import org.openhab.core.common.ThreadPoolManager;
  34 import org.openhab.core.config.core.ConfigurableService;
  35 import org.openhab.core.config.core.Configuration;
  36 import org.openhab.core.io.rest.LocaleService;
  37 import org.openhab.core.voice.RecognitionStartEvent;
  38 import org.openhab.core.voice.RecognitionStopEvent;
  39 import org.openhab.core.voice.STTException;
  40 import org.openhab.core.voice.STTListener;
  41 import org.openhab.core.voice.STTService;
  42 import org.openhab.core.voice.STTServiceHandle;
  43 import org.openhab.core.voice.SpeechRecognitionErrorEvent;
  44 import org.openhab.core.voice.SpeechRecognitionEvent;
  45 import org.osgi.framework.Constants;
  46 import org.osgi.service.component.annotations.Activate;
  47 import org.osgi.service.component.annotations.Component;
  48 import org.osgi.service.component.annotations.Deactivate;
  49 import org.osgi.service.component.annotations.Modified;
  50 import org.osgi.service.component.annotations.Reference;
  51 import org.slf4j.Logger;
  52 import org.slf4j.LoggerFactory;
  53 import org.vosk.LibVosk;
  54 import org.vosk.LogLevel;
  55 import org.vosk.Model;
  56 import org.vosk.Recognizer;
  57
  58 import com.fasterxml.jackson.databind.ObjectMapper;
  59 import com.sun.jna.NativeLibrary;
  60
  61 /**
  62  * The {@link VoskSTTService} class is a service implementation to use Vosk-API for Speech-to-Text.
  63  *
  64  * @author Miguel Álvarez - Initial contribution
  65  */
  66 @NonNullByDefault
  67 @Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "=" + SERVICE_PID)
  68 @ConfigurableService(category = SERVICE_CATEGORY, label = SERVICE_NAME
  69         + " Speech-to-Text", description_uri = SERVICE_CATEGORY + ":" + SERVICE_ID)
  70 public class VoskSTTService implements STTService {
  71     private static final String VOSK_FOLDER = Path.of(OpenHAB.getUserDataFolder(), "vosk").toString();
  72     private static final String MODEL_PATH = Path.of(VOSK_FOLDER, "model").toString();
  73     static {
  74         Logger logger = LoggerFactory.getLogger(VoskSTTService.class);
  75         File directory = new File(VOSK_FOLDER);
  76         if (!directory.exists()) {
  77             if (directory.mkdir()) {
  78                 logger.info("vosk dir created {}", VOSK_FOLDER);
  79             }
  80         }
  81     }
  82     private final Logger logger = LoggerFactory.getLogger(VoskSTTService.class);
  83     private final ScheduledExecutorService executor = ThreadPoolManager.getScheduledPool("OH-voice-voskstt");
  84     private final LocaleService localeService;
  85     private VoskSTTConfiguration config = new VoskSTTConfiguration();
  86     private @Nullable Model model;
  87
  88     @Activate
  89     public VoskSTTService(@Reference LocaleService localeService) {
  90         this.localeService = localeService;
  91     }
  92
  93     @Activate
  94     protected void activate(Map<String, Object> config) {
  95         try {
  96             String osName = System.getProperty("os.name", "generic").toLowerCase();
  97             String osArch = System.getProperty("os.arch", "").toLowerCase();
  98             if (osName.contains("linux") && (osArch.equals("arm") || osArch.equals("armv7l"))) {
  99                 // workaround for loading required shared libraries
 100                 loadSharedLibrariesArmv7l();
 101             }
 102             LibVosk.setLogLevel(LogLevel.WARNINGS);
 103             configChange(config);
 104         } catch (LinkageError e) {
 105             logger.warn("LinkageError, service will not work: {}", e.getMessage());
 106         }
 107     }
 108
 109     @Modified
 110     protected void modified(Map<String, Object> config) {
 111         configChange(config);
 112     }
 113
 114     @Deactivate
 115     protected void deactivate(Map<String, Object> config) {
 116         try {
 117             unloadModel();
 118         } catch (IOException e) {
 119             logger.warn("IOException unloading model: {}", e.getMessage());
 120         }
 121     }
 122
 123     private void configChange(Map<String, Object> config) {
 124         this.config = new Configuration(config).as(VoskSTTConfiguration.class);
 125         if (this.config.preloadModel) {
 126             try {
 127                 loadModel();
 128             } catch (IOException e) {
 129                 logger.warn("IOException loading model: {}", e.getMessage());
 130             } catch (UnsatisfiedLinkError e) {
 131                 logger.warn("Missing native dependency: {}", e.getMessage());
 132             }
 133         } else {
 134             try {
 135                 unloadModel();
 136             } catch (IOException e) {
 137                 logger.warn("IOException unloading model: {}", e.getMessage());
 138             }
 139         }
 140     }
 141
 142     @Override
 143     public String getId() {
 144         return SERVICE_ID;
 145     }
 146
 147     @Override
 148     public String getLabel(@Nullable Locale locale) {
 149         return SERVICE_NAME;
 150     }
 151
 152     @Override
 153     public Set<Locale> getSupportedLocales() {
 154         // as it is not possible to determine the language of the model that was downloaded and setup by the user, it is
 155         // assumed the language of the model is matching the locale of the openHAB server
 156         return Set.of(localeService.getLocale(null));
 157     }
 158
 159     @Override
 160     public Set<AudioFormat> getSupportedFormats() {
 161         return Set.of(
 162                 new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, null, null, 16000L));
 163     }
 164
 165     @Override
 166     public STTServiceHandle recognize(STTListener sttListener, AudioStream audioStream, Locale locale, Set<String> set)
 167             throws STTException {
 168         AtomicBoolean aborted = new AtomicBoolean(false);
 169         try {
 170             var frequency = audioStream.getFormat().getFrequency();
 171             if (frequency == null) {
 172                 throw new IOException("missing audio stream frequency");
 173             }
 174             backgroundRecognize(sttListener, audioStream, frequency, aborted);
 175         } catch (IOException e) {
 176             throw new STTException(e);
 177         }
 178         return () -> {
 179             aborted.set(true);
 180         };
 181     }
 182
 183     private Model getModel() throws IOException, UnsatisfiedLinkError {
 184         var model = this.model;
 185         if (model != null) {
 186             return model;
 187         }
 188         return loadModel();
 189     }
 190
 191     private Model loadModel() throws IOException, UnsatisfiedLinkError {
 192         unloadModel();
 193         var modelFile = new File(MODEL_PATH);
 194         if (!modelFile.exists() || !modelFile.isDirectory()) {
 195             throw new IOException("missing model dir: " + MODEL_PATH);
 196         }
 197         logger.debug("loading model");
 198         var model = new Model(MODEL_PATH);
 199         if (config.preloadModel) {
 200             this.model = model;
 201         }
 202         return model;
 203     }
 204
 205     private void unloadModel() throws IOException {
 206         var model = this.model;
 207         if (model != null) {
 208             logger.debug("unloading model");
 209             model.close();
 210             this.model = null;
 211         }
 212     }
 213
 214     private Future<?> backgroundRecognize(STTListener sttListener, InputStream audioStream, long frequency,
 215             AtomicBoolean aborted) {
 216         StringBuilder transcriptBuilder = new StringBuilder();
 217         long maxTranscriptionMillis = (config.maxTranscriptionSeconds * 1000L);
 218         long maxSilenceMillis = (config.maxSilenceSeconds * 1000L);
 219         long startTime = System.currentTimeMillis();
 220         return executor.submit(() -> {
 221             Recognizer recognizer = null;
 222             Model model = null;
 223             try {
 224                 model = getModel();
 225                 recognizer = new Recognizer(model, frequency);
 226                 long lastInputTime = System.currentTimeMillis();
 227                 int nbytes;
 228                 byte[] b = new byte[4096];
 229                 sttListener.sttEventReceived(new RecognitionStartEvent());
 230                 while (!aborted.get()) {
 231                     nbytes = audioStream.read(b);
 232                     if (aborted.get()) {
 233                         break;
 234                     }
 235                     if (isExpiredInterval(maxTranscriptionMillis, startTime)) {
 236                         logger.debug("Stops listening, max transcription time reached");
 237                         break;
 238                     }
 239                     if (!config.singleUtteranceMode && isExpiredInterval(maxSilenceMillis, lastInputTime)) {
 240                         logger.debug("Stops listening, max silence time reached");
 241                         break;
 242                     }
 243                     if (nbytes == 0) {
 244                         trySleep(100);
 245                         continue;
 246                     }
 247                     if (recognizer.acceptWaveForm(b, nbytes)) {
 248                         lastInputTime = System.currentTimeMillis();
 249                         var result = recognizer.getResult();
 250                         logger.debug("Result: {}", result);
 251                         ObjectMapper mapper = new ObjectMapper();
 252                         var json = mapper.readTree(result);
 253                         transcriptBuilder.append(json.get("text").asText()).append(" ");
 254                         if (config.singleUtteranceMode) {
 255                             break;
 256                         }
 257                     } else {
 258                         logger.debug("Partial: {}", recognizer.getPartialResult());
 259                     }
 260                 }
 261                 if (!aborted.get()) {
 262                     sttListener.sttEventReceived(new RecognitionStopEvent());
 263                     var transcript = transcriptBuilder.toString().trim();
 264                     logger.debug("Final: {}", transcript);
 265                     if (!transcript.isBlank()) {
 266                         sttListener.sttEventReceived(new SpeechRecognitionEvent(transcript, 1F));
 267                     } else {
 268                         if (!config.noResultsMessage.isBlank()) {
 269                             sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.noResultsMessage));
 270                         } else {
 271                             sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("No results"));
 272                         }
 273                     }
 274                 }
 275             } catch (IOException e) {
 276                 logger.warn("Error running speech to text: {}", e.getMessage());
 277                 if (config.errorMessage.isBlank()) {
 278                     sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Error"));
 279                 } else {
 280                     sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.errorMessage));
 281                 }
 282             } catch (UnsatisfiedLinkError e) {
 283                 logger.warn("Missing native dependency: {}", e.getMessage());
 284                 if (config.errorMessage.isBlank()) {
 285                     sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Error"));
 286                 } else {
 287                     sttListener.sttEventReceived(new SpeechRecognitionErrorEvent(config.errorMessage));
 288                 }
 289             } finally {
 290                 if (recognizer != null) {
 291                     recognizer.close();
 292                 }
 293                 if (!config.preloadModel && model != null) {
 294                     model.close();
 295                 }
 296             }
 297             try {
 298                 audioStream.close();
 299             } catch (IOException e) {
 300                 logger.warn("IOException on close: {}", e.getMessage());
 301             }
 302         });
 303     }
 304
 305     private void trySleep(long ms) {
 306         try {
 307             Thread.sleep(ms);
 308         } catch (InterruptedException ignored) {
 309         }
 310     }
 311
 312     private boolean isExpiredInterval(long interval, long referenceTime) {
 313         return System.currentTimeMillis() - referenceTime > interval;
 314     }
 315
 316     private void loadSharedLibrariesArmv7l() {
 317         logger.debug("loading required shared libraries for linux arm");
 318         var libatomicArmLibPath = Path.of("/usr/lib/arm-linux-gnueabihf/libatomic.so.1");
 319         if (libatomicArmLibPath.toFile().exists()) {
 320             var libatomicArmLibFolderPath = libatomicArmLibPath.getParent().toAbsolutePath();
 321             String libraryPath = System.getProperty("jna.library.path", System.getProperty("java.library.path"));
 322             if (!libraryPath.contains(libatomicArmLibFolderPath.toString())) {
 323                 libraryPath = libatomicArmLibFolderPath + "/:" + libraryPath;
 324                 System.setProperty("jna.library.path", libraryPath);
 325                 logger.debug("jna library path updated: {}", libraryPath);
 326             }
 327             NativeLibrary.getInstance("libatomic");
 328             logger.debug("loaded libatomic shared library");
 329         } else {
 330             throw new LinkageError("Required shared library libatomic is missing");
 331         }
 332     }
 333 }