2 * Copyright (c) 2010-2023 Contributors to the openHAB project
4 * See the NOTICE file(s) distributed with this work for additional
7 * This program and the accompanying materials are made available under the
8 * terms of the Eclipse Public License 2.0 which is available at
9 * http://www.eclipse.org/legal/epl-2.0
11 * SPDX-License-Identifier: EPL-2.0
13 package org.openhab.voice.googletts.internal;
15 import static org.openhab.voice.googletts.internal.GoogleTTSService.*;
17 import java.io.ByteArrayInputStream;
18 import java.io.IOException;
19 import java.io.InputStream;
20 import java.math.BigInteger;
21 import java.nio.charset.StandardCharsets;
22 import java.security.MessageDigest;
23 import java.security.NoSuchAlgorithmException;
24 import java.util.Collections;
25 import java.util.HashSet;
26 import java.util.Locale;
30 import org.eclipse.jdt.annotation.NonNull;
31 import org.eclipse.jdt.annotation.NonNullByDefault;
32 import org.eclipse.jdt.annotation.Nullable;
33 import org.openhab.core.audio.AudioFormat;
34 import org.openhab.core.audio.AudioStream;
35 import org.openhab.core.audio.ByteArrayAudioStream;
36 import org.openhab.core.audio.utils.AudioWaveUtils;
37 import org.openhab.core.auth.client.oauth2.OAuthFactory;
38 import org.openhab.core.config.core.ConfigurableService;
39 import org.openhab.core.voice.AbstractCachedTTSService;
40 import org.openhab.core.voice.TTSCache;
41 import org.openhab.core.voice.TTSException;
42 import org.openhab.core.voice.TTSService;
43 import org.openhab.core.voice.Voice;
44 import org.openhab.voice.googletts.internal.dto.AudioEncoding;
45 import org.osgi.framework.Constants;
46 import org.osgi.service.cm.ConfigurationAdmin;
47 import org.osgi.service.component.annotations.Activate;
48 import org.osgi.service.component.annotations.Component;
49 import org.osgi.service.component.annotations.Deactivate;
50 import org.osgi.service.component.annotations.Modified;
51 import org.osgi.service.component.annotations.Reference;
52 import org.slf4j.Logger;
53 import org.slf4j.LoggerFactory;
56 * Voice service implementation.
58 * @author Gabor Bicskei - Initial contribution
60 @Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "="
61 + SERVICE_PID, service = TTSService.class)
62 @ConfigurableService(category = SERVICE_CATEGORY, label = SERVICE_NAME
63 + " Text-to-Speech", description_uri = SERVICE_CATEGORY + ":" + SERVICE_ID)
64 public class GoogleTTSService extends AbstractCachedTTSService {
68 static final String SERVICE_NAME = "Google Cloud";
73 static final String SERVICE_ID = "googletts";
78 static final String SERVICE_CATEGORY = "voice";
83 static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID;
86 * Configuration parameters
88 private static final String PARAM_CLIENT_ID = "clientId";
89 private static final String PARAM_CLIEND_SECRET = "clientSecret";
90 static final String PARAM_AUTHCODE = "authcode";
91 private static final String PARAM_PITCH = "pitch";
92 private static final String PARAM_SPEAKING_RATE = "speakingRate";
93 private static final String PARAM_VOLUME_GAIN_DB = "volumeGainDb";
98 private final Logger logger = LoggerFactory.getLogger(GoogleTTSService.class);
101 * Set of supported audio formats
103 private Set<AudioFormat> audioFormats = new HashSet<>();
106 * Google Cloud TTS API implementation
108 private @NonNullByDefault({}) GoogleCloudAPI apiImpl;
109 private final ConfigurationAdmin configAdmin;
110 private final OAuthFactory oAuthFactory;
113 * All voices for all supported locales
115 private Set<Voice> allVoices = new HashSet<>();
117 private final GoogleTTSConfig config = new GoogleTTSConfig();
120 public GoogleTTSService(final @Reference ConfigurationAdmin configAdmin, final @Reference OAuthFactory oAuthFactory,
121 @Reference TTSCache ttsCache, Map<String, Object> config) {
123 this.configAdmin = configAdmin;
124 this.oAuthFactory = oAuthFactory;
128 * DS activate, with access to ConfigAdmin
131 protected void activate(Map<String, Object> config) {
132 apiImpl = new GoogleCloudAPI(configAdmin, oAuthFactory);
133 updateConfig(config);
137 protected void dispose() {
139 audioFormats = new HashSet<>();
140 allVoices = new HashSet<>();
144 * Initializing audio formats. Google supports 3 formats:
146 * Uncompressed 16-bit signed little-endian samples (Linear PCM). Audio content returned as LINEAR16
147 * also contains a WAV header.
151 * Opus encoded audio wrapped in an ogg container. This is not supported by openHAB.
153 * @return Set of supported AudioFormats
155 private Set<AudioFormat> initAudioFormats() {
156 logger.trace("Initializing audio formats");
157 Set<AudioFormat> result = new HashSet<>();
158 for (String format : apiImpl.getSupportedAudioFormats()) {
159 AudioFormat audioFormat = getAudioFormat(format);
160 if (audioFormat != null) {
161 result.add(audioFormat);
162 logger.trace("Audio format supported: {}", format);
164 logger.trace("Audio format not supported: {}", format);
167 return Collections.unmodifiableSet(result);
171 * Loads available voices from Google API
173 * @return Set of available voices.
175 private Set<Voice> initVoices() {
176 logger.trace("Initializing voices");
177 Set<Voice> result = new HashSet<>();
178 for (Locale locale : apiImpl.getSupportedLocales()) {
179 result.addAll(apiImpl.getVoicesForLocale(locale));
181 if (logger.isTraceEnabled()) {
182 for (Voice voice : result) {
183 logger.trace("Google Cloud TTS voice: {}", voice.getLabel());
186 return Collections.unmodifiableSet(result);
190 * Called by the framework when the configuration was updated.
192 * @param newConfig Updated configuration
195 private void updateConfig(Map<String, Object> newConfig) {
196 logger.debug("Updating configuration");
197 if (newConfig != null) {
199 String param = newConfig.containsKey(PARAM_CLIENT_ID) ? newConfig.get(PARAM_CLIENT_ID).toString() : null;
200 config.clientId = param;
202 logger.warn("Missing client id configuration to access Google Cloud TTS API.");
205 param = newConfig.containsKey(PARAM_CLIEND_SECRET) ? newConfig.get(PARAM_CLIEND_SECRET).toString() : null;
206 config.clientSecret = param;
208 logger.warn("Missing client secret configuration to access Google Cloud TTS API.");
211 param = newConfig.containsKey(PARAM_AUTHCODE) ? newConfig.get(PARAM_AUTHCODE).toString() : null;
212 config.authcode = param;
215 param = newConfig.containsKey(PARAM_PITCH) ? newConfig.get(PARAM_PITCH).toString() : null;
217 config.pitch = Double.parseDouble(param);
221 param = newConfig.containsKey(PARAM_SPEAKING_RATE) ? newConfig.get(PARAM_SPEAKING_RATE).toString() : null;
223 config.speakingRate = Double.parseDouble(param);
227 param = newConfig.containsKey(PARAM_VOLUME_GAIN_DB) ? newConfig.get(PARAM_VOLUME_GAIN_DB).toString() : null;
229 config.volumeGainDb = Double.parseDouble(param);
232 if (config.clientId != null && !config.clientId.isEmpty() && config.clientSecret != null
233 && !config.clientSecret.isEmpty()) {
234 apiImpl.setConfig(config);
235 if (apiImpl.isInitialized()) {
236 allVoices = initVoices();
237 audioFormats = initAudioFormats();
241 logger.warn("Missing Google Cloud TTS configuration.");
246 public String getId() {
251 public String getLabel(@Nullable Locale locale) {
256 public Set<Voice> getAvailableVoices() {
261 public Set<AudioFormat> getSupportedFormats() {
266 * Helper to create AudioFormat objects from Google names.
268 * @param format Google audio format.
269 * @return Audio format object.
271 private @Nullable AudioFormat getAudioFormat(String format) {
272 Integer bitDepth = 16;
273 Long frequency = 44100L;
275 AudioEncoding encoding = AudioEncoding.valueOf(format);
279 // we use by default: MP3, 44khz_16bit_mono with bitrate 64 kbps
280 return new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, null, bitDepth, 64000,
283 // we use by default: wav, 44khz_16bit_mono
284 return new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, null, bitDepth, null,
287 logger.warn("Audio format {} is not yet supported.", format);
293 * Checks parameters and calls the API to synthesize voice.
295 * @param text Input text.
296 * @param voice Selected voice.
297 * @param requestedFormat Format that is supported by the target sink as well.
298 * @return Output audio stream
299 * @throws TTSException in case the service is unavailable or a parameter is invalid.
302 public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
303 logger.debug("Synthesize '{}' for voice '{}' in format {}", text, voice.getUID(), requestedFormat);
304 // Validate known api key
305 if (!apiImpl.isInitialized()) {
306 throw new TTSException("Missing service configuration.");
308 // Validate arguments
310 String trimmedText = text.trim();
311 if (trimmedText.isEmpty()) {
312 throw new TTSException("The passed text is null or empty");
314 if (!this.allVoices.contains(voice)) {
315 throw new TTSException("The passed voice is unsupported");
317 boolean isAudioFormatSupported = false;
318 for (AudioFormat currentAudioFormat : this.audioFormats) {
319 if (currentAudioFormat.isCompatible(requestedFormat)) {
320 isAudioFormatSupported = true;
324 if (!isAudioFormatSupported) {
325 throw new TTSException("The passed AudioFormat is unsupported");
328 // create the audio byte array for given text, locale, format
329 byte[] audio = apiImpl.synthesizeSpeech(trimmedText, (GoogleTTSVoice) voice, requestedFormat.getCodec());
331 throw new TTSException("Could not synthesize text via Google Cloud TTS Service");
334 // compute the real format returned by google if wave file
335 AudioFormat finalFormat = requestedFormat;
336 if (AudioFormat.CONTAINER_WAVE.equals(requestedFormat.getContainer())) {
337 finalFormat = parseAudioFormat(audio);
340 return new ByteArrayAudioStream(audio, finalFormat);
343 private AudioFormat parseAudioFormat(byte[] audio) throws TTSException {
344 try (InputStream inputStream = new ByteArrayInputStream(audio)) {
345 return AudioWaveUtils.parseWavFormat(inputStream);
346 } catch (IOException e) {
347 throw new TTSException("Cannot parse WAV format", e);
352 public @NonNull String getCacheKey(@NonNull String text, @NonNull Voice voice,
353 @NonNull AudioFormat requestedFormat) {
355 MessageDigest md = MessageDigest.getInstance("MD5");
356 byte[] bytesOfMessage = (config.toConfigString() + text + requestedFormat).getBytes(StandardCharsets.UTF_8);
357 String hash = String.format("%032x", new BigInteger(1, md.digest(bytesOfMessage)));
358 return ((GoogleTTSVoice) voice).getTechnicalName() + "_" + hash;
359 } catch (NoSuchAlgorithmException e) {
361 logger.warn("Could not create MD5 hash for '{}'", text, e);
362 return "nomd5algorithm";