git.basschouten.com Git - openhab-addons.git/blob

   1 /**
   2  * Copyright (c) 2010-2022 Contributors to the openHAB project
   3  *
   4  * See the NOTICE file(s) distributed with this work for additional
   5  * information.
   6  *
   7  * This program and the accompanying materials are made available under the
   8  * terms of the Eclipse Public License 2.0 which is available at
   9  * http://www.eclipse.org/legal/epl-2.0
  10  *
  11  * SPDX-License-Identifier: EPL-2.0
  12  */
  13 package org.openhab.voice.mimic.internal;
  14
  15 import java.io.IOException;
  16 import java.io.UnsupportedEncodingException;
  17 import java.net.URLEncoder;
  18 import java.nio.charset.StandardCharsets;
  19 import java.util.HashSet;
  20 import java.util.Locale;
  21 import java.util.Map;
  22 import java.util.Set;
  23
  24 import org.eclipse.jdt.annotation.NonNullByDefault;
  25 import org.eclipse.jdt.annotation.Nullable;
  26 import org.openhab.core.audio.AudioFormat;
  27 import org.openhab.core.audio.AudioStream;
  28 import org.openhab.core.audio.ByteArrayAudioStream;
  29 import org.openhab.core.config.core.ConfigurableService;
  30 import org.openhab.core.io.net.http.HttpRequestBuilder;
  31 import org.openhab.core.io.net.http.HttpUtil;
  32 import org.openhab.core.library.types.RawType;
  33 import org.openhab.core.voice.TTSException;
  34 import org.openhab.core.voice.TTSService;
  35 import org.openhab.core.voice.Voice;
  36 import org.openhab.voice.mimic.internal.dto.VoiceDto;
  37 import org.osgi.framework.Constants;
  38 import org.osgi.service.component.annotations.Activate;
  39 import org.osgi.service.component.annotations.Component;
  40 import org.osgi.service.component.annotations.Modified;
  41 import org.slf4j.Logger;
  42 import org.slf4j.LoggerFactory;
  43
  44 import com.google.gson.Gson;
  45 import com.google.gson.GsonBuilder;
  46 import com.google.gson.JsonSyntaxException;
  47
  48 /**
  49  * Mimic Voice service implementation.
  50  *
  51  * @author Gwendal Roulleau - Initial contribution
  52  */
  53 @Component(configurationPid = MimicTTSService.SERVICE_PID, property = Constants.SERVICE_PID + "="
  54         + MimicTTSService.SERVICE_PID)
  55 @ConfigurableService(category = MimicTTSService.SERVICE_CATEGORY, label = MimicTTSService.SERVICE_NAME
  56         + " Text-to-Speech", description_uri = MimicTTSService.SERVICE_CATEGORY + ":" + MimicTTSService.SERVICE_ID)
  57 @NonNullByDefault
  58 public class MimicTTSService implements TTSService {
  59
  60     static final String SERVICE_CATEGORY = "voice";
  61     static final String SERVICE_ID = "mimictts";
  62     static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID;
  63     static final String SERVICE_NAME = "Mimic";
  64
  65     /**
  66      * Configuration parameters
  67      */
  68     private static final String PARAM_URL = "url";
  69     private static final String PARAM_SPEAKINGRATE = "speakingRate";
  70     private static final String PARAM_AUDIOVOLATITLITY = "audioVolatility";
  71     private static final String PARAM_PHONEMEVOLATITLITY = "phonemeVolatility";
  72
  73     /**
  74      * Url
  75      */
  76     private static final String LIST_VOICES_URL = "/api/voices";
  77     private static final String SYNTHETIZE_URL = "/api/tts";
  78
  79     /** The only wave format supported */
  80     private static final AudioFormat AUDIO_FORMAT = new AudioFormat(AudioFormat.CONTAINER_WAVE,
  81             AudioFormat.CODEC_PCM_SIGNED, false, 16, 52000, 22050L, 1);
  82
  83     private Set<Voice> availableVoices = new HashSet<>();
  84
  85     /**
  86      * Logger.
  87      */
  88     private final Logger logger = LoggerFactory.getLogger(MimicTTSService.class);
  89
  90     private final MimicConfiguration config = new MimicConfiguration();
  91
  92     private final Gson gson = new GsonBuilder().create();
  93
  94     @Activate
  95     protected void activate(Map<String, Object> config) {
  96         updateConfig(config);
  97     }
  98
  99     /**
 100      * Called by the framework when the configuration was updated.
 101      *
 102      * @param newConfig Updated configuration
 103      */
 104     @Modified
 105     private void updateConfig(Map<String, Object> newConfig) {
 106         logger.debug("Updating configuration");
 107
 108         // client id
 109         Object param = newConfig.get(PARAM_URL);
 110         if (param == null) {
 111             logger.warn("Missing URL to access Mimic TTS API. Using localhost");
 112         } else {
 113             config.url = param.toString();
 114         }
 115
 116         // audio volatility
 117         try {
 118             param = newConfig.get(PARAM_AUDIOVOLATITLITY);
 119             if (param != null) {
 120                 config.audioVolatility = Double.parseDouble(param.toString());
 121             }
 122         } catch (NumberFormatException e) {
 123             logger.warn("Cannot parse audioVolatility parameter. Using default");
 124         }
 125
 126         // phoneme volatility
 127         try {
 128             param = newConfig.get(PARAM_PHONEMEVOLATITLITY);
 129             if (param != null) {
 130                 config.phonemeVolatility = Double.parseDouble(param.toString());
 131             }
 132         } catch (NumberFormatException e) {
 133             logger.warn("Cannot parse phonemeVolatility parameter. Using default");
 134         }
 135
 136         // speakingRate
 137         try {
 138             param = newConfig.get(PARAM_SPEAKINGRATE);
 139             if (param != null) {
 140                 config.speakingRate = Double.parseDouble(param.toString());
 141             }
 142         } catch (NumberFormatException e) {
 143             logger.warn("Cannot parse speakingRate parameter. Using default");
 144         }
 145
 146         refreshVoices();
 147     }
 148
 149     @Override
 150     public String getId() {
 151         return SERVICE_ID;
 152     }
 153
 154     @Override
 155     public String getLabel(@Nullable Locale locale) {
 156         return SERVICE_NAME;
 157     }
 158
 159     @Override
 160     public Set<Voice> getAvailableVoices() {
 161         return availableVoices;
 162     }
 163
 164     public void refreshVoices() {
 165         String url = config.url + LIST_VOICES_URL;
 166         availableVoices.clear();
 167         try {
 168             String responseVoices = HttpRequestBuilder.getFrom(url).getContentAsString();
 169             VoiceDto[] mimicVoiceResponse = gson.fromJson(responseVoices, VoiceDto[].class);
 170             if (mimicVoiceResponse == null) {
 171                 logger.warn("Cannot get mimic voices from the URL {}", url);
 172                 return;
 173             } else if (mimicVoiceResponse.length == 0) {
 174                 logger.debug("Voice set response from Mimic is empty ?!");
 175                 return;
 176             }
 177             for (VoiceDto voiceDto : mimicVoiceResponse) {
 178                 if (voiceDto.speakers != null && voiceDto.speakers.size() > 0) {
 179                     for (String speaker : voiceDto.speakers) {
 180                         availableVoices.add(new MimicVoice(voiceDto.key, voiceDto.language, voiceDto.name, speaker));
 181                     }
 182                 } else {
 183                     availableVoices.add(new MimicVoice(voiceDto.key, voiceDto.language, voiceDto.name, null));
 184                 }
 185             }
 186         } catch (IOException | JsonSyntaxException e) {
 187             logger.warn("Cannot get mimic voices from the URL {}, error {}", url, e.getMessage());
 188         }
 189     }
 190
 191     @Override
 192     public Set<AudioFormat> getSupportedFormats() {
 193         return Set.<AudioFormat> of(AUDIO_FORMAT);
 194     }
 195
 196     /**
 197      * Checks parameters and calls the API to synthesize voice.
 198      *
 199      * @param text Input text.
 200      * @param voice Selected voice.
 201      * @param requestedFormat Format that is supported by the target sink as well.
 202      * @return Output audio stream
 203      * @throws TTSException in case the service is unavailable or a parameter is invalid.
 204      */
 205     @Override
 206     public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
 207
 208         if (!availableVoices.contains(voice)) {
 209             // let a chance for the service to update :
 210             refreshVoices();
 211             if (!availableVoices.contains(voice)) {
 212                 throw new TTSException("Voice " + voice.getUID() + " not available for MimicTTS");
 213             }
 214         }
 215
 216         logger.debug("Synthesize '{}' for voice '{}' in format {}", text, voice.getUID(), requestedFormat);
 217         // Validate arguments
 218         // trim text
 219         String trimmedText = text.trim();
 220         if (trimmedText.isEmpty()) {
 221             throw new TTSException("The passed text is empty");
 222         }
 223         if (!AUDIO_FORMAT.isCompatible(requestedFormat)) {
 224             throw new TTSException("The passed AudioFormat is unsupported");
 225         }
 226         String encodedText;
 227         try {
 228             encodedText = URLEncoder.encode(text, StandardCharsets.UTF_8.toString());
 229         } catch (UnsupportedEncodingException e) {
 230             throw new IllegalArgumentException("Cannot encode text in URL " + text);
 231         }
 232
 233         String ssml = "";
 234         if (text.startsWith("<speak>")) {
 235             ssml = "&ssml=true";
 236         }
 237
 238         // create the audio byte array for given text, locale, format
 239         String urlTTS = config.url + SYNTHETIZE_URL + "?text=" + encodedText + "&voice="
 240                 + ((MimicVoice) voice).getTechnicalName() + ssml + "&noiseScale=" + config.audioVolatility + "&noiseW="
 241                 + config.phonemeVolatility + "&lengthScale=" + config.speakingRate + "&audioTarget=client";
 242         logger.debug("Querying mimic with URL {}", urlTTS);
 243         RawType responseWav = HttpUtil.downloadData(urlTTS, "audio/wav", false, -1);
 244         if (responseWav == null) {
 245             throw new TTSException("Cannot get wav from mimic url " + urlTTS);
 246         }
 247         return new ByteArrayAudioStream(responseWav.getBytes(), AUDIO_FORMAT);
 248     }
 249 }