2 * Copyright (c) 2010-2022 Contributors to the openHAB project
4 * See the NOTICE file(s) distributed with this work for additional
7 * This program and the accompanying materials are made available under the
8 * terms of the Eclipse Public License 2.0 which is available at
9 * http://www.eclipse.org/legal/epl-2.0
11 * SPDX-License-Identifier: EPL-2.0
13 package org.openhab.voice.googletts.internal;
15 import static org.openhab.voice.googletts.internal.GoogleTTSService.*;
18 import java.util.Collections;
19 import java.util.HashSet;
20 import java.util.Locale;
24 import org.eclipse.jdt.annotation.NonNullByDefault;
25 import org.eclipse.jdt.annotation.Nullable;
26 import org.openhab.core.OpenHAB;
27 import org.openhab.core.audio.AudioFormat;
28 import org.openhab.core.audio.AudioStream;
29 import org.openhab.core.audio.ByteArrayAudioStream;
30 import org.openhab.core.auth.client.oauth2.OAuthFactory;
31 import org.openhab.core.config.core.ConfigurableService;
32 import org.openhab.core.voice.TTSException;
33 import org.openhab.core.voice.TTSService;
34 import org.openhab.core.voice.Voice;
35 import org.openhab.voice.googletts.internal.dto.AudioEncoding;
36 import org.osgi.framework.Constants;
37 import org.osgi.service.cm.ConfigurationAdmin;
38 import org.osgi.service.component.annotations.Activate;
39 import org.osgi.service.component.annotations.Component;
40 import org.osgi.service.component.annotations.Modified;
41 import org.osgi.service.component.annotations.Reference;
42 import org.slf4j.Logger;
43 import org.slf4j.LoggerFactory;
46 * Voice service implementation.
48 * @author Gabor Bicskei - Initial contribution
50 @Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "=" + SERVICE_PID)
51 @ConfigurableService(category = SERVICE_CATEGORY, label = SERVICE_NAME
52 + " Text-to-Speech", description_uri = SERVICE_CATEGORY + ":" + SERVICE_ID)
53 public class GoogleTTSService implements TTSService {
57 static final String SERVICE_NAME = "Google Cloud";
62 static final String SERVICE_ID = "googletts";
67 static final String SERVICE_CATEGORY = "voice";
72 static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID;
75 * Cache folder under $userdata
77 private static final String CACHE_FOLDER_NAME = "cache";
80 * Configuration parameters
82 private static final String PARAM_CLIENT_ID = "clientId";
83 private static final String PARAM_CLIEND_SECRET = "clientSecret";
84 static final String PARAM_AUTHCODE = "authcode";
85 private static final String PARAM_PITCH = "pitch";
86 private static final String PARAM_SPEAKING_RATE = "speakingRate";
87 private static final String PARAM_VOLUME_GAIN_DB = "volumeGainDb";
88 private static final String PARAM_PURGE_CACHE = "purgeCache";
93 private final Logger logger = LoggerFactory.getLogger(GoogleTTSService.class);
96 * Set of supported audio formats
98 private Set<AudioFormat> audioFormats = new HashSet<>();
101 * Google Cloud TTS API implementation
103 private @NonNullByDefault({}) GoogleCloudAPI apiImpl;
104 private final ConfigurationAdmin configAdmin;
105 private final OAuthFactory oAuthFactory;
108 * All voices for all supported locales
110 private Set<Voice> allVoices = new HashSet<>();
112 private final GoogleTTSConfig config = new GoogleTTSConfig();
115 public GoogleTTSService(final @Reference ConfigurationAdmin configAdmin,
116 final @Reference OAuthFactory oAuthFactory) {
117 this.configAdmin = configAdmin;
118 this.oAuthFactory = oAuthFactory;
122 * DS activate, with access to ConfigAdmin
125 protected void activate(Map<String, Object> config) {
126 // create cache folder
127 File userData = new File(OpenHAB.getUserDataFolder());
128 File cacheFolder = new File(new File(userData, CACHE_FOLDER_NAME), SERVICE_PID);
129 if (!cacheFolder.exists()) {
130 cacheFolder.mkdirs();
132 logger.debug("Using cache folder {}", cacheFolder.getAbsolutePath());
134 apiImpl = new GoogleCloudAPI(configAdmin, oAuthFactory, cacheFolder);
135 updateConfig(config);
139 * Initializing audio formats. Google supports 3 formats:
141 * Uncompressed 16-bit signed little-endian samples (Linear PCM). Audio content returned as LINEAR16
142 * also contains a WAV header.
146 * Opus encoded audio wrapped in an ogg container. This is not supported by openHAB.
148 * @return Set of supported AudioFormats
150 private Set<AudioFormat> initAudioFormats() {
151 logger.trace("Initializing audio formats");
152 Set<AudioFormat> result = new HashSet<>();
153 for (String format : apiImpl.getSupportedAudioFormats()) {
154 AudioFormat audioFormat = getAudioFormat(format);
155 if (audioFormat != null) {
156 result.add(audioFormat);
157 logger.trace("Audio format supported: {}", format);
159 logger.trace("Audio format not supported: {}", format);
162 return Collections.unmodifiableSet(result);
166 * Loads available voices from Google API
168 * @return Set of available voices.
170 private Set<Voice> initVoices() {
171 logger.trace("Initializing voices");
172 Set<Voice> result = new HashSet<>();
173 for (Locale locale : apiImpl.getSupportedLocales()) {
174 result.addAll(apiImpl.getVoicesForLocale(locale));
176 if (logger.isTraceEnabled()) {
177 for (Voice voice : result) {
178 logger.trace("Google Cloud TTS voice: {}", voice.getLabel());
181 return Collections.unmodifiableSet(result);
185 * Called by the framework when the configuration was updated.
187 * @param newConfig Updated configuration
190 private void updateConfig(Map<String, Object> newConfig) {
191 logger.debug("Updating configuration");
192 if (newConfig != null) {
194 String param = newConfig.containsKey(PARAM_CLIENT_ID) ? newConfig.get(PARAM_CLIENT_ID).toString() : null;
195 config.clientId = param;
197 logger.warn("Missing client id configuration to access Google Cloud TTS API.");
200 param = newConfig.containsKey(PARAM_CLIEND_SECRET) ? newConfig.get(PARAM_CLIEND_SECRET).toString() : null;
201 config.clientSecret = param;
203 logger.warn("Missing client secret configuration to access Google Cloud TTS API.");
206 param = newConfig.containsKey(PARAM_AUTHCODE) ? newConfig.get(PARAM_AUTHCODE).toString() : null;
207 config.authcode = param;
210 param = newConfig.containsKey(PARAM_PITCH) ? newConfig.get(PARAM_PITCH).toString() : null;
212 config.pitch = Double.parseDouble(param);
216 param = newConfig.containsKey(PARAM_SPEAKING_RATE) ? newConfig.get(PARAM_SPEAKING_RATE).toString() : null;
218 config.speakingRate = Double.parseDouble(param);
222 param = newConfig.containsKey(PARAM_VOLUME_GAIN_DB) ? newConfig.get(PARAM_VOLUME_GAIN_DB).toString() : null;
224 config.volumeGainDb = Double.parseDouble(param);
228 param = newConfig.containsKey(PARAM_PURGE_CACHE) ? newConfig.get(PARAM_PURGE_CACHE).toString() : null;
230 config.purgeCache = Boolean.parseBoolean(param);
232 logger.trace("New configuration: {}", config.toString());
234 if (config.clientId != null && !config.clientId.isEmpty() && config.clientSecret != null
235 && !config.clientSecret.isEmpty()) {
236 apiImpl.setConfig(config);
237 if (apiImpl.isInitialized()) {
238 allVoices = initVoices();
239 audioFormats = initAudioFormats();
243 logger.warn("Missing Google Cloud TTS configuration.");
248 public String getId() {
253 public String getLabel(@Nullable Locale locale) {
258 public Set<Voice> getAvailableVoices() {
263 public Set<AudioFormat> getSupportedFormats() {
268 * Helper to create AudioFormat objects from Google names.
270 * @param format Google audio format.
271 * @return Audio format object.
273 private @Nullable AudioFormat getAudioFormat(String format) {
274 Integer bitDepth = 16;
275 Long frequency = 44100L;
277 AudioEncoding encoding = AudioEncoding.valueOf(format);
281 // we use by default: MP3, 44khz_16bit_mono with bitrate 64 kbps
282 return new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, null, bitDepth, 64000,
285 // we use by default: wav, 44khz_16bit_mono
286 return new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, null, bitDepth, null,
289 logger.warn("Audio format {} is not yet supported.", format);
295 * Checks parameters and calls the API to synthesize voice.
297 * @param text Input text.
298 * @param voice Selected voice.
299 * @param requestedFormat Format that is supported by the target sink as well.
300 * @return Output audio stream
301 * @throws TTSException in case the service is unavailable or a parameter is invalid.
304 public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
305 logger.debug("Synthesize '{}' for voice '{}' in format {}", text, voice.getUID(), requestedFormat);
306 // Validate known api key
307 if (!apiImpl.isInitialized()) {
308 throw new TTSException("Missing service configuration.");
310 // Validate arguments
312 String trimmedText = text.trim();
313 if (trimmedText.isEmpty()) {
314 throw new TTSException("The passed text is null or empty");
316 if (!this.allVoices.contains(voice)) {
317 throw new TTSException("The passed voice is unsupported");
319 boolean isAudioFormatSupported = false;
320 for (AudioFormat currentAudioFormat : this.audioFormats) {
321 if (currentAudioFormat.isCompatible(requestedFormat)) {
322 isAudioFormatSupported = true;
326 if (!isAudioFormatSupported) {
327 throw new TTSException("The passed AudioFormat is unsupported");
330 // create the audio byte array for given text, locale, format
331 byte[] audio = apiImpl.synthesizeSpeech(trimmedText, (GoogleTTSVoice) voice, requestedFormat.getCodec());
333 throw new TTSException("Could not synthesize text via Google Cloud TTS Service");
335 return new ByteArrayAudioStream(audio, requestedFormat);