git.basschouten.com Git - openhab-addons.git/blob

   1 /**
   2  * Copyright (c) 2010-2023 Contributors to the openHAB project
   3  *
   4  * See the NOTICE file(s) distributed with this work for additional
   5  * information.
   6  *
   7  * This program and the accompanying materials are made available under the
   8  * terms of the Eclipse Public License 2.0 which is available at
   9  * http://www.eclipse.org/legal/epl-2.0
  10  *
  11  * SPDX-License-Identifier: EPL-2.0
  12  */
  13 package org.openhab.voice.mimic.internal;
  14
  15 import java.io.IOException;
  16 import java.nio.charset.StandardCharsets;
  17 import java.util.HashSet;
  18 import java.util.List;
  19 import java.util.Locale;
  20 import java.util.Map;
  21 import java.util.Set;
  22 import java.util.concurrent.ExecutionException;
  23 import java.util.concurrent.TimeUnit;
  24 import java.util.concurrent.TimeoutException;
  25
  26 import org.eclipse.jdt.annotation.NonNullByDefault;
  27 import org.eclipse.jdt.annotation.Nullable;
  28 import org.eclipse.jetty.client.HttpClient;
  29 import org.eclipse.jetty.client.api.Response;
  30 import org.eclipse.jetty.client.util.InputStreamResponseListener;
  31 import org.eclipse.jetty.client.util.StringContentProvider;
  32 import org.eclipse.jetty.http.HttpHeader;
  33 import org.eclipse.jetty.http.HttpStatus;
  34 import org.openhab.core.audio.AudioFormat;
  35 import org.openhab.core.audio.AudioStream;
  36 import org.openhab.core.config.core.ConfigurableService;
  37 import org.openhab.core.io.net.http.HttpClientFactory;
  38 import org.openhab.core.io.net.http.HttpRequestBuilder;
  39 import org.openhab.core.voice.TTSException;
  40 import org.openhab.core.voice.TTSService;
  41 import org.openhab.core.voice.Voice;
  42 import org.openhab.voice.mimic.internal.dto.VoiceDto;
  43 import org.osgi.framework.Constants;
  44 import org.osgi.service.component.annotations.Activate;
  45 import org.osgi.service.component.annotations.Component;
  46 import org.osgi.service.component.annotations.Modified;
  47 import org.osgi.service.component.annotations.Reference;
  48 import org.slf4j.Logger;
  49 import org.slf4j.LoggerFactory;
  50
  51 import com.google.gson.Gson;
  52 import com.google.gson.GsonBuilder;
  53 import com.google.gson.JsonSyntaxException;
  54
  55 /**
  56  * Mimic Voice service implementation.
  57  *
  58  * @author Gwendal Roulleau - Initial contribution
  59  */
  60 @Component(configurationPid = MimicTTSService.SERVICE_PID, property = Constants.SERVICE_PID + "="
  61         + MimicTTSService.SERVICE_PID)
  62 @ConfigurableService(category = MimicTTSService.SERVICE_CATEGORY, label = MimicTTSService.SERVICE_NAME
  63         + " Text-to-Speech", description_uri = MimicTTSService.SERVICE_CATEGORY + ":" + MimicTTSService.SERVICE_ID)
  64 @NonNullByDefault
  65 public class MimicTTSService implements TTSService {
  66
  67     private final Logger logger = LoggerFactory.getLogger(MimicTTSService.class);
  68
  69     static final String SERVICE_CATEGORY = "voice";
  70     static final String SERVICE_ID = "mimictts";
  71     static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID;
  72     static final String SERVICE_NAME = "Mimic";
  73
  74     /**
  75      * Configuration parameters
  76      */
  77     private static final String PARAM_URL = "url";
  78     private static final String PARAM_SPEAKINGRATE = "speakingRate";
  79     private static final String PARAM_AUDIOVOLATITLITY = "audioVolatility";
  80     private static final String PARAM_PHONEMEVOLATITLITY = "phonemeVolatility";
  81
  82     /**
  83      * Url
  84      */
  85     private static final String LIST_VOICES_URL = "/api/voices";
  86     private static final String SYNTHETIZE_URL = "/api/tts";
  87
  88     /** The only wave format supported */
  89     private static final AudioFormat AUDIO_FORMAT = new AudioFormat(AudioFormat.CONTAINER_WAVE,
  90             AudioFormat.CODEC_PCM_SIGNED, false, 16, 52000, 22050L, 1);
  91
  92     private Set<Voice> availableVoices = new HashSet<>();
  93
  94     private final MimicConfiguration config = new MimicConfiguration();
  95
  96     private final Gson gson = new GsonBuilder().create();
  97
  98     private final HttpClient httpClient;
  99
 100     @Activate
 101     public MimicTTSService(final @Reference HttpClientFactory httpClientFactory, Map<String, Object> config) {
 102         updateConfig(config);
 103         this.httpClient = httpClientFactory.getCommonHttpClient();
 104     }
 105
 106     /**
 107      * Called by the framework when the configuration was updated.
 108      *
 109      * @param newConfig Updated configuration
 110      */
 111     @Modified
 112     private void updateConfig(Map<String, Object> newConfig) {
 113         logger.debug("Updating configuration");
 114
 115         // client id
 116         Object param = newConfig.get(PARAM_URL);
 117         if (param == null) {
 118             logger.warn("Missing URL to access Mimic TTS API. Using localhost");
 119         } else {
 120             config.url = param.toString();
 121         }
 122
 123         // audio volatility
 124         try {
 125             param = newConfig.get(PARAM_AUDIOVOLATITLITY);
 126             if (param != null) {
 127                 config.audioVolatility = Double.parseDouble(param.toString());
 128             }
 129         } catch (NumberFormatException e) {
 130             logger.warn("Cannot parse audioVolatility parameter. Using default");
 131         }
 132
 133         // phoneme volatility
 134         try {
 135             param = newConfig.get(PARAM_PHONEMEVOLATITLITY);
 136             if (param != null) {
 137                 config.phonemeVolatility = Double.parseDouble(param.toString());
 138             }
 139         } catch (NumberFormatException e) {
 140             logger.warn("Cannot parse phonemeVolatility parameter. Using default");
 141         }
 142
 143         // speakingRate
 144         try {
 145             param = newConfig.get(PARAM_SPEAKINGRATE);
 146             if (param != null) {
 147                 config.speakingRate = Double.parseDouble(param.toString());
 148             }
 149         } catch (NumberFormatException e) {
 150             logger.warn("Cannot parse speakingRate parameter. Using default");
 151         }
 152
 153         refreshVoices();
 154     }
 155
 156     @Override
 157     public String getId() {
 158         return SERVICE_ID;
 159     }
 160
 161     @Override
 162     public String getLabel(@Nullable Locale locale) {
 163         return SERVICE_NAME;
 164     }
 165
 166     @Override
 167     public Set<Voice> getAvailableVoices() {
 168         return availableVoices;
 169     }
 170
 171     public void refreshVoices() {
 172         String url = config.url + LIST_VOICES_URL;
 173         availableVoices.clear();
 174         try {
 175             String responseVoices = HttpRequestBuilder.getFrom(url).getContentAsString();
 176             VoiceDto[] mimicVoiceResponse = gson.fromJson(responseVoices, VoiceDto[].class);
 177             if (mimicVoiceResponse == null) {
 178                 logger.warn("Cannot get mimic voices from the URL {}", url);
 179                 return;
 180             } else if (mimicVoiceResponse.length == 0) {
 181                 logger.debug("Voice set response from Mimic is empty ?!");
 182                 return;
 183             }
 184             for (VoiceDto voiceDto : mimicVoiceResponse) {
 185                 List<String> speakers = voiceDto.speakers;
 186                 if (speakers != null && !speakers.isEmpty()) {
 187                     for (String speaker : speakers) {
 188                         availableVoices.add(new MimicVoice(voiceDto.key, voiceDto.language, voiceDto.name, speaker));
 189                     }
 190                 } else {
 191                     availableVoices.add(new MimicVoice(voiceDto.key, voiceDto.language, voiceDto.name, null));
 192                 }
 193             }
 194         } catch (IOException | JsonSyntaxException e) {
 195             logger.warn("Cannot get mimic voices from the URL {}, error {}", url, e.getMessage());
 196         }
 197     }
 198
 199     @Override
 200     public Set<AudioFormat> getSupportedFormats() {
 201         return Set.<AudioFormat> of(AUDIO_FORMAT);
 202     }
 203
 204     /**
 205      * Checks parameters and calls the API to synthesize voice.
 206      *
 207      * @param text Input text.
 208      * @param voice Selected voice.
 209      * @param requestedFormat Format that is supported by the target sink as well.
 210      * @return Output audio stream
 211      * @throws TTSException in case the service is unavailable or a parameter is invalid.
 212      */
 213     @Override
 214     public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
 215
 216         if (!availableVoices.contains(voice)) {
 217             // let a chance for the service to update :
 218             refreshVoices();
 219             if (!availableVoices.contains(voice)) {
 220                 throw new TTSException("Voice " + voice.getUID() + " not available for MimicTTS");
 221             }
 222         }
 223
 224         logger.debug("Synthesize '{}' for voice '{}' in format {}", text, voice.getUID(), requestedFormat);
 225         // Validate arguments
 226         // trim text
 227         String trimmedText = text.trim();
 228         if (trimmedText.isEmpty()) {
 229             throw new TTSException("The passed text is empty");
 230         }
 231         if (!AUDIO_FORMAT.isCompatible(requestedFormat)) {
 232             throw new TTSException("The passed AudioFormat is unsupported");
 233         }
 234
 235         String ssml = "";
 236         if (text.startsWith("<speak>")) {
 237             ssml = "&ssml=true";
 238         }
 239
 240         // create the url for given locale, format
 241         String urlTTS = config.url + SYNTHETIZE_URL + "?voice=" + ((MimicVoice) voice).getTechnicalName() + ssml
 242                 + "&noiseScale=" + config.audioVolatility + "&noiseW=" + config.phonemeVolatility + "&lengthScale="
 243                 + config.speakingRate + "&audioTarget=client";
 244         logger.debug("Querying mimic with URL {}", urlTTS);
 245
 246         // prepare the response as an inputstream
 247         InputStreamResponseListener inputStreamResponseListener = new InputStreamResponseListener();
 248         // we will use a POST method for the text
 249         StringContentProvider textContentProvider = new StringContentProvider(text, StandardCharsets.UTF_8);
 250         httpClient.POST(urlTTS).content(textContentProvider).accept("audio/wav").send(inputStreamResponseListener);
 251
 252         // compute the estimated timeout using a "stupid" method based on text length, as the response time depends on
 253         // the requested text. Average speaker speed estimated to 10/second.
 254         // Will use a safe margin multiplicator (x5) to accept very slow mimic server
 255         // So the constant chosen is 5 * 10 = /2
 256         int timeout = text.length() / 2;
 257
 258         // check response status and return AudioStream
 259         Response response;
 260         try {
 261             response = inputStreamResponseListener.get(timeout, TimeUnit.SECONDS);
 262             if (response.getStatus() == HttpStatus.OK_200) {
 263                 String lengthHeader = response.getHeaders().get(HttpHeader.CONTENT_LENGTH);
 264                 long length;
 265                 try {
 266                     length = Long.parseLong(lengthHeader);
 267                 } catch (NumberFormatException e) {
 268                     throw new TTSException(
 269                             "Cannot get Content-Length header from mimic response. Are you sure to query a mimic TTS server at "
 270                                     + urlTTS + " ?");
 271                 }
 272                 return new InputStreamAudioStream(inputStreamResponseListener.getInputStream(), AUDIO_FORMAT, length);
 273             } else {
 274                 String errorMessage = "Cannot get wav from mimic url " + urlTTS + " with HTTP response code "
 275                         + response.getStatus() + " for reason " + response.getReason();
 276                 TTSException ttsException = new TTSException(errorMessage);
 277                 response.abort(ttsException);
 278                 throw ttsException;
 279             }
 280         } catch (InterruptedException | TimeoutException | ExecutionException e) {
 281             String errorMessage = "Cannot get wav from mimic url " + urlTTS;
 282             throw new TTSException(errorMessage, e);
 283         }
 284     }
 285 }