2 * Copyright (c) 2010-2024 Contributors to the openHAB project
4 * See the NOTICE file(s) distributed with this work for additional
7 * This program and the accompanying materials are made available under the
8 * terms of the Eclipse Public License 2.0 which is available at
9 * http://www.eclipse.org/legal/epl-2.0
11 * SPDX-License-Identifier: EPL-2.0
13 package org.openhab.voice.mimic.internal;
15 import java.io.IOException;
16 import java.io.InputStream;
17 import java.io.UnsupportedEncodingException;
18 import java.math.BigInteger;
19 import java.net.URLEncoder;
20 import java.nio.charset.StandardCharsets;
21 import java.security.MessageDigest;
22 import java.security.NoSuchAlgorithmException;
23 import java.util.HashSet;
24 import java.util.List;
25 import java.util.Locale;
28 import java.util.concurrent.ExecutionException;
29 import java.util.concurrent.TimeUnit;
30 import java.util.concurrent.TimeoutException;
32 import org.eclipse.jdt.annotation.NonNullByDefault;
33 import org.eclipse.jdt.annotation.Nullable;
34 import org.eclipse.jetty.client.HttpClient;
35 import org.eclipse.jetty.client.api.Response;
36 import org.eclipse.jetty.client.util.InputStreamResponseListener;
37 import org.eclipse.jetty.client.util.StringContentProvider;
38 import org.eclipse.jetty.http.HttpHeader;
39 import org.eclipse.jetty.http.HttpStatus;
40 import org.openhab.core.audio.AudioFormat;
41 import org.openhab.core.audio.AudioStream;
42 import org.openhab.core.config.core.ConfigurableService;
43 import org.openhab.core.io.net.http.HttpClientFactory;
44 import org.openhab.core.io.net.http.HttpRequestBuilder;
45 import org.openhab.core.voice.AbstractCachedTTSService;
46 import org.openhab.core.voice.TTSCache;
47 import org.openhab.core.voice.TTSException;
48 import org.openhab.core.voice.TTSService;
49 import org.openhab.core.voice.Voice;
50 import org.openhab.voice.mimic.internal.dto.VoiceDto;
51 import org.osgi.framework.Constants;
52 import org.osgi.service.component.annotations.Activate;
53 import org.osgi.service.component.annotations.Component;
54 import org.osgi.service.component.annotations.Modified;
55 import org.osgi.service.component.annotations.Reference;
56 import org.slf4j.Logger;
57 import org.slf4j.LoggerFactory;
59 import com.google.gson.Gson;
60 import com.google.gson.GsonBuilder;
61 import com.google.gson.JsonSyntaxException;
64 * Mimic Voice service implementation.
66 * @author Gwendal Roulleau - Initial contribution
68 @Component(configurationPid = MimicTTSService.SERVICE_PID, property = Constants.SERVICE_PID + "="
69 + MimicTTSService.SERVICE_PID, service = TTSService.class)
70 @ConfigurableService(category = MimicTTSService.SERVICE_CATEGORY, label = MimicTTSService.SERVICE_NAME
71 + " Text-to-Speech", description_uri = MimicTTSService.SERVICE_CATEGORY + ":" + MimicTTSService.SERVICE_ID)
73 public class MimicTTSService extends AbstractCachedTTSService {
75 private final Logger logger = LoggerFactory.getLogger(MimicTTSService.class);
77 static final String SERVICE_CATEGORY = "voice";
78 static final String SERVICE_ID = "mimictts";
79 static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID;
80 static final String SERVICE_NAME = "Mimic";
83 * Configuration parameters
85 private static final String PARAM_URL = "url";
86 private static final String PARAM_SPEAKINGRATE = "speakingRate";
87 private static final String PARAM_AUDIOVOLATITLITY = "audioVolatility";
88 private static final String PARAM_PHONEMEVOLATITLITY = "phonemeVolatility";
93 private static final String LIST_VOICES_URL = "/api/voices";
94 private static final String SYNTHETIZE_URL = "/api/tts";
96 /** The only wave format supported */
97 private static final AudioFormat AUDIO_FORMAT = new AudioFormat(AudioFormat.CONTAINER_WAVE,
98 AudioFormat.CODEC_PCM_SIGNED, false, 16, 52000, 22050L, 1);
100 private Set<Voice> availableVoices = new HashSet<>();
102 private final MimicConfiguration config = new MimicConfiguration();
104 private final Gson gson = new GsonBuilder().create();
106 private final HttpClient httpClient;
109 public MimicTTSService(final @Reference HttpClientFactory httpClientFactory, @Reference TTSCache ttsCache,
110 Map<String, Object> config) {
112 updateConfig(config);
113 this.httpClient = httpClientFactory.getCommonHttpClient();
117 * Called by the framework when the configuration was updated.
119 * @param newConfig Updated configuration
122 private void updateConfig(Map<String, Object> newConfig) {
123 logger.debug("Updating configuration");
126 Object param = newConfig.get(PARAM_URL);
128 logger.warn("Missing URL to access Mimic TTS API. Using localhost");
130 config.url = param.toString();
135 param = newConfig.get(PARAM_AUDIOVOLATITLITY);
137 config.audioVolatility = Double.parseDouble(param.toString());
139 } catch (NumberFormatException e) {
140 logger.warn("Cannot parse audioVolatility parameter. Using default");
143 // phoneme volatility
145 param = newConfig.get(PARAM_PHONEMEVOLATITLITY);
147 config.phonemeVolatility = Double.parseDouble(param.toString());
149 } catch (NumberFormatException e) {
150 logger.warn("Cannot parse phonemeVolatility parameter. Using default");
155 param = newConfig.get(PARAM_SPEAKINGRATE);
157 config.speakingRate = Double.parseDouble(param.toString());
159 } catch (NumberFormatException e) {
160 logger.warn("Cannot parse speakingRate parameter. Using default");
167 public String getId() {
172 public String getLabel(@Nullable Locale locale) {
177 public Set<Voice> getAvailableVoices() {
178 return availableVoices;
181 public void refreshVoices() {
182 String url = config.url + LIST_VOICES_URL;
183 availableVoices.clear();
185 String responseVoices = HttpRequestBuilder.getFrom(url).getContentAsString();
186 VoiceDto[] mimicVoiceResponse = gson.fromJson(responseVoices, VoiceDto[].class);
187 if (mimicVoiceResponse == null) {
188 logger.warn("Cannot get mimic voices from the URL {}", url);
190 } else if (mimicVoiceResponse.length == 0) {
191 logger.debug("Voice set response from Mimic is empty ?!");
194 for (VoiceDto voiceDto : mimicVoiceResponse) {
195 List<String> speakers = voiceDto.speakers;
196 if (speakers != null && !speakers.isEmpty()) {
197 for (String speaker : speakers) {
198 availableVoices.add(new MimicVoice(voiceDto.key, voiceDto.language, voiceDto.name, speaker));
201 availableVoices.add(new MimicVoice(voiceDto.key, voiceDto.language, voiceDto.name, null));
204 } catch (IOException | JsonSyntaxException e) {
205 logger.warn("Cannot get mimic voices from the URL {}, error {}", url, e.getMessage());
210 public Set<AudioFormat> getSupportedFormats() {
211 return Set.<AudioFormat> of(AUDIO_FORMAT);
215 * Checks parameters and calls the API to synthesize voice.
217 * @param text Input text.
218 * @param voice Selected voice.
219 * @param requestedFormat Format that is supported by the target sink as well.
220 * @return Output audio stream
221 * @throws TTSException in case the service is unavailable or a parameter is invalid.
224 public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
225 if (!availableVoices.contains(voice)) {
226 // let a chance for the service to update :
228 if (!availableVoices.contains(voice)) {
229 throw new TTSException("Voice " + voice.getUID() + " not available for MimicTTS");
233 logger.debug("Synthesize '{}' for voice '{}' in format {}", text, voice.getUID(), requestedFormat);
234 // Validate arguments
236 String trimmedText = text.trim();
237 if (trimmedText.isEmpty()) {
238 throw new TTSException("The passed text is empty");
240 if (!AUDIO_FORMAT.isCompatible(requestedFormat)) {
241 throw new TTSException("The passed AudioFormat is unsupported");
246 encodedVoice = URLEncoder.encode(((MimicVoice) voice).getTechnicalName(),
247 StandardCharsets.UTF_8.toString());
248 } catch (UnsupportedEncodingException e) {
249 throw new IllegalArgumentException("Cannot encode voice in URL " + ((MimicVoice) voice).getTechnicalName());
252 // create the url for given locale, format
253 String urlTTS = config.url + SYNTHETIZE_URL + "?voice=" + encodedVoice + "&noiseScale=" + config.audioVolatility
254 + "&noiseW=" + config.phonemeVolatility + "&lengthScale=" + config.speakingRate + "&audioTarget=client";
255 logger.debug("Querying mimic with URL {}", urlTTS);
257 // prepare the response as an inputstream
258 InputStreamResponseListener inputStreamResponseListener = new InputStreamResponseListener();
259 // we will use a POST method for the text
260 StringContentProvider textContentProvider = new StringContentProvider(text, StandardCharsets.UTF_8);
261 if (text.startsWith("<speak>")) {
262 httpClient.POST(urlTTS).header("Content-Type", "application/ssml+xml").content(textContentProvider)
263 .accept("audio/wav").send(inputStreamResponseListener);
265 httpClient.POST(urlTTS).content(textContentProvider).accept("audio/wav").send(inputStreamResponseListener);
268 // compute the estimated timeout using a "stupid" method based on text length, as the response time depends on
269 // the requested text. Average speaker speed estimated to 10/second.
270 // Will use a safe margin multiplicator (x5) to accept very slow mimic server
271 // So the constant chosen is 5 * 10 = /2
272 int timeout = text.length() / 2;
274 // check response status and return AudioStream
277 response = inputStreamResponseListener.get(timeout, TimeUnit.SECONDS);
278 if (response.getStatus() == HttpStatus.OK_200) {
279 String lengthHeader = response.getHeaders().get(HttpHeader.CONTENT_LENGTH);
282 length = Long.parseLong(lengthHeader);
283 } catch (NumberFormatException e) {
284 throw new TTSException(
285 "Cannot get Content-Length header from mimic response. Are you sure to query a mimic TTS server at "
289 InputStream inputStreamFromMimic = inputStreamResponseListener.getInputStream();
290 return new InputStreamAudioStream(inputStreamFromMimic, AUDIO_FORMAT, length);
292 String errorMessage = "Cannot get wav from mimic url " + urlTTS + " with HTTP response code "
293 + response.getStatus() + " for reason " + response.getReason();
294 TTSException ttsException = new TTSException(errorMessage);
295 response.abort(ttsException);
298 } catch (InterruptedException | TimeoutException | ExecutionException e) {
299 String errorMessage = "Cannot get wav from mimic url " + urlTTS;
300 throw new TTSException(errorMessage, e);
305 public String getCacheKey(String text, Voice voice, AudioFormat requestedFormat) {
308 md = MessageDigest.getInstance("MD5");
309 } catch (NoSuchAlgorithmException e) {
310 return "nomd5algorithm";
312 byte[] binaryKey = ((text + voice.getUID() + requestedFormat.toString() + config.speakingRate
313 + config.audioVolatility + config.phonemeVolatility).getBytes());
314 return String.format("%032x", new BigInteger(1, md.digest(binaryKey)));