git.basschouten.com Git - openhab-addons.git/blob

   1 /**
   2  * Copyright (c) 2010-2023 Contributors to the openHAB project
   3  *
   4  * See the NOTICE file(s) distributed with this work for additional
   5  * information.
   6  *
   7  * This program and the accompanying materials are made available under the
   8  * terms of the Eclipse Public License 2.0 which is available at
   9  * http://www.eclipse.org/legal/epl-2.0
  10  *
  11  * SPDX-License-Identifier: EPL-2.0
  12  */
  13 package org.openhab.voice.mimic.internal;
  14
  15 import java.io.File;
  16 import java.io.IOException;
  17 import java.io.InputStream;
  18 import java.io.UnsupportedEncodingException;
  19 import java.net.URLEncoder;
  20 import java.nio.charset.StandardCharsets;
  21 import java.nio.file.Files;
  22 import java.nio.file.StandardCopyOption;
  23 import java.util.HashSet;
  24 import java.util.List;
  25 import java.util.Locale;
  26 import java.util.Map;
  27 import java.util.Set;
  28 import java.util.UUID;
  29 import java.util.concurrent.ExecutionException;
  30 import java.util.concurrent.TimeUnit;
  31 import java.util.concurrent.TimeoutException;
  32
  33 import org.eclipse.jdt.annotation.NonNullByDefault;
  34 import org.eclipse.jdt.annotation.Nullable;
  35 import org.eclipse.jetty.client.HttpClient;
  36 import org.eclipse.jetty.client.api.Response;
  37 import org.eclipse.jetty.client.util.InputStreamResponseListener;
  38 import org.eclipse.jetty.client.util.StringContentProvider;
  39 import org.eclipse.jetty.http.HttpHeader;
  40 import org.eclipse.jetty.http.HttpStatus;
  41 import org.openhab.core.OpenHAB;
  42 import org.openhab.core.audio.AudioException;
  43 import org.openhab.core.audio.AudioFormat;
  44 import org.openhab.core.audio.AudioStream;
  45 import org.openhab.core.config.core.ConfigurableService;
  46 import org.openhab.core.io.net.http.HttpClientFactory;
  47 import org.openhab.core.io.net.http.HttpRequestBuilder;
  48 import org.openhab.core.voice.TTSException;
  49 import org.openhab.core.voice.TTSService;
  50 import org.openhab.core.voice.Voice;
  51 import org.openhab.voice.mimic.internal.dto.VoiceDto;
  52 import org.osgi.framework.Constants;
  53 import org.osgi.service.component.annotations.Activate;
  54 import org.osgi.service.component.annotations.Component;
  55 import org.osgi.service.component.annotations.Modified;
  56 import org.osgi.service.component.annotations.Reference;
  57 import org.slf4j.Logger;
  58 import org.slf4j.LoggerFactory;
  59
  60 import com.google.gson.Gson;
  61 import com.google.gson.GsonBuilder;
  62 import com.google.gson.JsonSyntaxException;
  63
  64 /**
  65  * Mimic Voice service implementation.
  66  *
  67  * @author Gwendal Roulleau - Initial contribution
  68  */
  69 @Component(configurationPid = MimicTTSService.SERVICE_PID, property = Constants.SERVICE_PID + "="
  70         + MimicTTSService.SERVICE_PID)
  71 @ConfigurableService(category = MimicTTSService.SERVICE_CATEGORY, label = MimicTTSService.SERVICE_NAME
  72         + " Text-to-Speech", description_uri = MimicTTSService.SERVICE_CATEGORY + ":" + MimicTTSService.SERVICE_ID)
  73 @NonNullByDefault
  74 public class MimicTTSService implements TTSService {
  75
  76     private final Logger logger = LoggerFactory.getLogger(MimicTTSService.class);
  77
  78     static final String SERVICE_CATEGORY = "voice";
  79     static final String SERVICE_ID = "mimictts";
  80     static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID;
  81     static final String SERVICE_NAME = "Mimic";
  82
  83     /**
  84      * Configuration parameters
  85      */
  86     private static final String PARAM_URL = "url";
  87     private static final String PARAM_WORKAROUNDSERVLETSINK = "workaroundServletSink";
  88     private static final String PARAM_SPEAKINGRATE = "speakingRate";
  89     private static final String PARAM_AUDIOVOLATITLITY = "audioVolatility";
  90     private static final String PARAM_PHONEMEVOLATITLITY = "phonemeVolatility";
  91
  92     /**
  93      * Url
  94      */
  95     private static final String LIST_VOICES_URL = "/api/voices";
  96     private static final String SYNTHETIZE_URL = "/api/tts";
  97
  98     /** The only wave format supported */
  99     private static final AudioFormat AUDIO_FORMAT = new AudioFormat(AudioFormat.CONTAINER_WAVE,
 100             AudioFormat.CODEC_PCM_SIGNED, false, 16, 52000, 22050L, 1);
 101
 102     private Set<Voice> availableVoices = new HashSet<>();
 103
 104     private final MimicConfiguration config = new MimicConfiguration();
 105
 106     private final Gson gson = new GsonBuilder().create();
 107
 108     private final HttpClient httpClient;
 109
 110     @Activate
 111     public MimicTTSService(final @Reference HttpClientFactory httpClientFactory, Map<String, Object> config) {
 112         updateConfig(config);
 113         this.httpClient = httpClientFactory.getCommonHttpClient();
 114     }
 115
 116     /**
 117      * Called by the framework when the configuration was updated.
 118      *
 119      * @param newConfig Updated configuration
 120      */
 121     @Modified
 122     private void updateConfig(Map<String, Object> newConfig) {
 123         logger.debug("Updating configuration");
 124
 125         // client id
 126         Object param = newConfig.get(PARAM_URL);
 127         if (param == null) {
 128             logger.warn("Missing URL to access Mimic TTS API. Using localhost");
 129         } else {
 130             config.url = param.toString();
 131         }
 132
 133         // workaround
 134         param = newConfig.get(PARAM_WORKAROUNDSERVLETSINK);
 135         if (param != null) {
 136             config.workaroundServletSink = Boolean.parseBoolean(param.toString());
 137         }
 138
 139         // audio volatility
 140         try {
 141             param = newConfig.get(PARAM_AUDIOVOLATITLITY);
 142             if (param != null) {
 143                 config.audioVolatility = Double.parseDouble(param.toString());
 144             }
 145         } catch (NumberFormatException e) {
 146             logger.warn("Cannot parse audioVolatility parameter. Using default");
 147         }
 148
 149         // phoneme volatility
 150         try {
 151             param = newConfig.get(PARAM_PHONEMEVOLATITLITY);
 152             if (param != null) {
 153                 config.phonemeVolatility = Double.parseDouble(param.toString());
 154             }
 155         } catch (NumberFormatException e) {
 156             logger.warn("Cannot parse phonemeVolatility parameter. Using default");
 157         }
 158
 159         // speakingRate
 160         try {
 161             param = newConfig.get(PARAM_SPEAKINGRATE);
 162             if (param != null) {
 163                 config.speakingRate = Double.parseDouble(param.toString());
 164             }
 165         } catch (NumberFormatException e) {
 166             logger.warn("Cannot parse speakingRate parameter. Using default");
 167         }
 168
 169         refreshVoices();
 170     }
 171
 172     @Override
 173     public String getId() {
 174         return SERVICE_ID;
 175     }
 176
 177     @Override
 178     public String getLabel(@Nullable Locale locale) {
 179         return SERVICE_NAME;
 180     }
 181
 182     @Override
 183     public Set<Voice> getAvailableVoices() {
 184         return availableVoices;
 185     }
 186
 187     public void refreshVoices() {
 188         String url = config.url + LIST_VOICES_URL;
 189         availableVoices.clear();
 190         try {
 191             String responseVoices = HttpRequestBuilder.getFrom(url).getContentAsString();
 192             VoiceDto[] mimicVoiceResponse = gson.fromJson(responseVoices, VoiceDto[].class);
 193             if (mimicVoiceResponse == null) {
 194                 logger.warn("Cannot get mimic voices from the URL {}", url);
 195                 return;
 196             } else if (mimicVoiceResponse.length == 0) {
 197                 logger.debug("Voice set response from Mimic is empty ?!");
 198                 return;
 199             }
 200             for (VoiceDto voiceDto : mimicVoiceResponse) {
 201                 List<String> speakers = voiceDto.speakers;
 202                 if (speakers != null && !speakers.isEmpty()) {
 203                     for (String speaker : speakers) {
 204                         availableVoices.add(new MimicVoice(voiceDto.key, voiceDto.language, voiceDto.name, speaker));
 205                     }
 206                 } else {
 207                     availableVoices.add(new MimicVoice(voiceDto.key, voiceDto.language, voiceDto.name, null));
 208                 }
 209             }
 210         } catch (IOException | JsonSyntaxException e) {
 211             logger.warn("Cannot get mimic voices from the URL {}, error {}", url, e.getMessage());
 212         }
 213     }
 214
 215     @Override
 216     public Set<AudioFormat> getSupportedFormats() {
 217         return Set.<AudioFormat> of(AUDIO_FORMAT);
 218     }
 219
 220     /**
 221      * Checks parameters and calls the API to synthesize voice.
 222      *
 223      * @param text Input text.
 224      * @param voice Selected voice.
 225      * @param requestedFormat Format that is supported by the target sink as well.
 226      * @return Output audio stream
 227      * @throws TTSException in case the service is unavailable or a parameter is invalid.
 228      */
 229     @Override
 230     public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
 231
 232         if (!availableVoices.contains(voice)) {
 233             // let a chance for the service to update :
 234             refreshVoices();
 235             if (!availableVoices.contains(voice)) {
 236                 throw new TTSException("Voice " + voice.getUID() + " not available for MimicTTS");
 237             }
 238         }
 239
 240         logger.debug("Synthesize '{}' for voice '{}' in format {}", text, voice.getUID(), requestedFormat);
 241         // Validate arguments
 242         // trim text
 243         String trimmedText = text.trim();
 244         if (trimmedText.isEmpty()) {
 245             throw new TTSException("The passed text is empty");
 246         }
 247         if (!AUDIO_FORMAT.isCompatible(requestedFormat)) {
 248             throw new TTSException("The passed AudioFormat is unsupported");
 249         }
 250
 251         String encodedVoice;
 252         try {
 253             encodedVoice = URLEncoder.encode(((MimicVoice) voice).getTechnicalName(),
 254                     StandardCharsets.UTF_8.toString());
 255         } catch (UnsupportedEncodingException e) {
 256             throw new IllegalArgumentException("Cannot encode voice in URL " + ((MimicVoice) voice).getTechnicalName());
 257         }
 258
 259         // create the url for given locale, format
 260         String urlTTS = config.url + SYNTHETIZE_URL + "?voice=" + encodedVoice + "&noiseScale=" + config.audioVolatility
 261                 + "&noiseW=" + config.phonemeVolatility + "&lengthScale=" + config.speakingRate + "&audioTarget=client";
 262         logger.debug("Querying mimic with URL {}", urlTTS);
 263
 264         // prepare the response as an inputstream
 265         InputStreamResponseListener inputStreamResponseListener = new InputStreamResponseListener();
 266         // we will use a POST method for the text
 267         StringContentProvider textContentProvider = new StringContentProvider(text, StandardCharsets.UTF_8);
 268         if (text.startsWith("<speak>")) {
 269             httpClient.POST(urlTTS).header("Content-Type", "application/ssml+xml").content(textContentProvider)
 270                     .accept("audio/wav").send(inputStreamResponseListener);
 271         } else {
 272             httpClient.POST(urlTTS).content(textContentProvider).accept("audio/wav").send(inputStreamResponseListener);
 273         }
 274
 275         // compute the estimated timeout using a "stupid" method based on text length, as the response time depends on
 276         // the requested text. Average speaker speed estimated to 10/second.
 277         // Will use a safe margin multiplicator (x5) to accept very slow mimic server
 278         // So the constant chosen is 5 * 10 = /2
 279         int timeout = text.length() / 2;
 280
 281         // check response status and return AudioStream
 282         Response response;
 283         try {
 284             response = inputStreamResponseListener.get(timeout, TimeUnit.SECONDS);
 285             if (response.getStatus() == HttpStatus.OK_200) {
 286                 String lengthHeader = response.getHeaders().get(HttpHeader.CONTENT_LENGTH);
 287                 long length;
 288                 try {
 289                     length = Long.parseLong(lengthHeader);
 290                 } catch (NumberFormatException e) {
 291                     throw new TTSException(
 292                             "Cannot get Content-Length header from mimic response. Are you sure to query a mimic TTS server at "
 293                                     + urlTTS + " ?");
 294                 }
 295
 296                 InputStream inputStreamFromMimic = inputStreamResponseListener.getInputStream();
 297                 try {
 298                     if (!config.workaroundServletSink) {
 299                         return new InputStreamAudioStream(inputStreamFromMimic, AUDIO_FORMAT, length);
 300                     } else {
 301                         // Some audio sinks use the openHAB servlet to get audio. This servlet require the
 302                         // getClonedStream()
 303                         // method
 304                         // So we cache the file on disk, thus implementing the method thanks to FileAudioStream.
 305                         return createTemporaryFile(inputStreamFromMimic, AUDIO_FORMAT);
 306                     }
 307                 } catch (TTSException e) {
 308                     try {
 309                         inputStreamFromMimic.close();
 310                     } catch (IOException e1) {
 311                     }
 312                     throw e;
 313                 }
 314
 315             } else {
 316                 String errorMessage = "Cannot get wav from mimic url " + urlTTS + " with HTTP response code "
 317                         + response.getStatus() + " for reason " + response.getReason();
 318                 TTSException ttsException = new TTSException(errorMessage);
 319                 response.abort(ttsException);
 320                 throw ttsException;
 321             }
 322         } catch (InterruptedException | TimeoutException | ExecutionException e) {
 323             String errorMessage = "Cannot get wav from mimic url " + urlTTS;
 324             throw new TTSException(errorMessage, e);
 325         }
 326     }
 327
 328     private AudioStream createTemporaryFile(InputStream inputStream, AudioFormat audioFormat) throws TTSException {
 329         File mimicDirectory = new File(OpenHAB.getUserDataFolder(), "mimic");
 330         mimicDirectory.mkdir();
 331         try {
 332             File tempFile = File.createTempFile(UUID.randomUUID().toString(), ".wav", mimicDirectory);
 333             tempFile.deleteOnExit();
 334             Files.copy(inputStream, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
 335             return new AutoDeleteFileAudioStream(tempFile, audioFormat);
 336         } catch (AudioException | IOException e) {
 337             throw new TTSException("Cannot create temporary audio file", e);
 338         }
 339     }
 340 }