2 * Copyright (c) 2010-2023 Contributors to the openHAB project
4 * See the NOTICE file(s) distributed with this work for additional
7 * This program and the accompanying materials are made available under the
8 * terms of the Eclipse Public License 2.0 which is available at
9 * http://www.eclipse.org/legal/epl-2.0
11 * SPDX-License-Identifier: EPL-2.0
13 package org.openhab.voice.googletts.internal;
16 import java.io.FileNotFoundException;
17 import java.io.FileOutputStream;
18 import java.io.IOException;
19 import java.math.BigInteger;
20 import java.nio.charset.StandardCharsets;
21 import java.nio.file.Files;
22 import java.security.MessageDigest;
23 import java.security.NoSuchAlgorithmException;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.Base64;
27 import java.util.Dictionary;
28 import java.util.HashMap;
29 import java.util.HashSet;
30 import java.util.List;
31 import java.util.Locale;
35 import org.eclipse.jdt.annotation.Nullable;
36 import org.eclipse.jetty.http.HttpHeader;
37 import org.eclipse.jetty.http.MimeTypes;
38 import org.openhab.core.audio.AudioFormat;
39 import org.openhab.core.auth.AuthenticationException;
40 import org.openhab.core.auth.client.oauth2.AccessTokenResponse;
41 import org.openhab.core.auth.client.oauth2.OAuthClientService;
42 import org.openhab.core.auth.client.oauth2.OAuthException;
43 import org.openhab.core.auth.client.oauth2.OAuthFactory;
44 import org.openhab.core.auth.client.oauth2.OAuthResponseException;
45 import org.openhab.core.i18n.CommunicationException;
46 import org.openhab.core.io.net.http.HttpRequestBuilder;
47 import org.openhab.voice.googletts.internal.dto.AudioConfig;
48 import org.openhab.voice.googletts.internal.dto.AudioEncoding;
49 import org.openhab.voice.googletts.internal.dto.ListVoicesResponse;
50 import org.openhab.voice.googletts.internal.dto.SsmlVoiceGender;
51 import org.openhab.voice.googletts.internal.dto.SynthesisInput;
52 import org.openhab.voice.googletts.internal.dto.SynthesizeSpeechRequest;
53 import org.openhab.voice.googletts.internal.dto.SynthesizeSpeechResponse;
54 import org.openhab.voice.googletts.internal.dto.Voice;
55 import org.openhab.voice.googletts.internal.dto.VoiceSelectionParams;
56 import org.osgi.service.cm.Configuration;
57 import org.osgi.service.cm.ConfigurationAdmin;
58 import org.slf4j.Logger;
59 import org.slf4j.LoggerFactory;
61 import com.google.gson.Gson;
62 import com.google.gson.GsonBuilder;
63 import com.google.gson.JsonSyntaxException;
66 * Google Cloud TTS API call implementation.
68 * @author Gabor Bicskei - Initial contribution and API
70 class GoogleCloudAPI {
72 private static final char EXTENSION_SEPARATOR = '.';
73 private static final char UNIX_SEPARATOR = '/';
74 private static final char WINDOWS_SEPARATOR = '\\';
76 private static final String BEARER = "Bearer ";
78 private static final String GCP_AUTH_URI = "https://accounts.google.com/o/oauth2/auth";
79 private static final String GCP_TOKEN_URI = "https://accounts.google.com/o/oauth2/token";
80 private static final String GCP_REDIRECT_URI = "https://www.google.com";
82 * Google Cloud Platform authorization scope
84 private static final String GCP_SCOPE = "https://www.googleapis.com/auth/cloud-platform";
87 * URL used for retrieving the list of available voices
89 private static final String LIST_VOICES_URL = "https://texttospeech.googleapis.com/v1/voices";
92 * URL used for synthesizing text to speech
94 private static final String SYTNHESIZE_SPEECH_URL = "https://texttospeech.googleapis.com/v1/text:synthesize";
99 private final Logger logger = LoggerFactory.getLogger(GoogleCloudAPI.class);
102 * Supported voices and locales
104 private final Map<Locale, Set<GoogleTTSVoice>> voices = new HashMap<>();
109 private File cacheFolder;
114 private @Nullable GoogleTTSConfig config;
116 private final Gson gson = new GsonBuilder().create();
117 private final ConfigurationAdmin configAdmin;
118 private final OAuthFactory oAuthFactory;
120 private @Nullable OAuthClientService oAuthService;
125 * @param cacheFolder Service cache folder
127 GoogleCloudAPI(ConfigurationAdmin configAdmin, OAuthFactory oAuthFactory, File cacheFolder) {
128 this.configAdmin = configAdmin;
129 this.oAuthFactory = oAuthFactory;
130 this.cacheFolder = cacheFolder;
134 * Configuration update.
136 * @param config New configuration.
138 void setConfig(GoogleTTSConfig config) {
139 this.config = config;
141 if (oAuthService != null) {
142 oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
146 String clientId = config.clientId;
147 String clientSecret = config.clientSecret;
148 if (clientId != null && !clientId.isEmpty() && clientSecret != null && !clientSecret.isEmpty()) {
149 final OAuthClientService oAuthService = oAuthFactory.createOAuthClientService(GoogleTTSService.SERVICE_PID,
150 GCP_TOKEN_URI, GCP_AUTH_URI, clientId, clientSecret, GCP_SCOPE, false);
151 this.oAuthService = oAuthService;
155 } catch (AuthenticationException | CommunicationException e) {
156 logger.warn("Error initializing Google Cloud TTS service: {}", e.getMessage());
157 oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
158 this.oAuthService = null;
166 if (config.purgeCache) {
167 File[] files = cacheFolder.listFiles();
168 if (files != null && files.length > 0) {
169 Arrays.stream(files).forEach(File::delete);
171 logger.debug("Cache purged.");
175 public void dispose() {
176 if (oAuthService != null) {
177 oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
184 * Fetches the OAuth2 tokens from Google Cloud Platform if the auth-code is set in the configuration. If successful
185 * the auth-code will be removed from the configuration.
187 * @throws AuthenticationException
188 * @throws CommunicationException
190 @SuppressWarnings("null")
191 private void getAccessToken() throws AuthenticationException, CommunicationException {
192 String authcode = config.authcode;
193 if (authcode != null && !authcode.isEmpty()) {
194 logger.debug("Trying to get access and refresh tokens.");
196 AccessTokenResponse response = oAuthService.getAccessTokenResponseByAuthorizationCode(authcode,
198 if (response.getRefreshToken() == null || response.getRefreshToken().isEmpty()) {
199 throw new AuthenticationException("Error fetching refresh token. Please reauthorize");
201 } catch (OAuthException | OAuthResponseException e) {
202 logger.debug("Error fetching access token: {}", e.getMessage(), e);
203 throw new AuthenticationException(
204 "Error fetching access token. Invalid authcode? Please generate a new one.");
205 } catch (IOException e) {
206 throw new CommunicationException(
207 String.format("An unexpected IOException occurred: %s", e.getMessage()));
210 config.authcode = null;
213 Configuration serviceConfig = configAdmin.getConfiguration(GoogleTTSService.SERVICE_PID);
214 Dictionary<String, Object> configProperties = serviceConfig.getProperties();
215 if (configProperties != null) {
216 configProperties.put(GoogleTTSService.PARAM_AUTHCODE, "");
217 serviceConfig.update(configProperties);
219 } catch (IOException e) {
222 "Failed to update configuration for Google Cloud TTS service. Please clear the 'authcode' configuration parameter manualy.");
227 @SuppressWarnings("null")
228 private String getAuthorizationHeader() throws AuthenticationException, CommunicationException {
229 final AccessTokenResponse accessTokenResponse;
231 accessTokenResponse = oAuthService.getAccessTokenResponse();
232 } catch (OAuthException | OAuthResponseException e) {
233 logger.debug("Error fetching access token: {}", e.getMessage(), e);
234 throw new AuthenticationException(
235 "Error fetching access token. Invalid authcode? Please generate a new one.");
236 } catch (IOException e) {
237 throw new CommunicationException(String.format("An unexpected IOException occurred: %s", e.getMessage()));
239 if (accessTokenResponse == null || accessTokenResponse.getAccessToken() == null
240 || accessTokenResponse.getAccessToken().isEmpty()) {
241 throw new AuthenticationException("No access token. Is this thing authorized?");
243 if (accessTokenResponse.getRefreshToken() == null || accessTokenResponse.getRefreshToken().isEmpty()) {
244 throw new AuthenticationException("No refresh token. Please reauthorize");
246 return BEARER + accessTokenResponse.getAccessToken();
250 * Loads supported audio formats
252 * @return Set of audio formats
254 Set<String> getSupportedAudioFormats() {
255 Set<String> formats = new HashSet<>();
256 for (AudioEncoding audioEncoding : AudioEncoding.values()) {
257 if (audioEncoding != AudioEncoding.AUDIO_ENCODING_UNSPECIFIED) {
258 formats.add(audioEncoding.toString());
267 * @return Set of locales
269 Set<Locale> getSupportedLocales() {
270 return voices.keySet();
274 * Supported voices for locale.
276 * @param locale Locale
277 * @return Set of voices
279 Set<GoogleTTSVoice> getVoicesForLocale(Locale locale) {
280 Set<GoogleTTSVoice> localeVoices = voices.get(locale);
281 return localeVoices != null ? localeVoices : Set.of();
285 * Google API call to load locales and voices.
287 * @throws AuthenticationException
288 * @throws CommunicationException
290 private void initVoices() throws AuthenticationException, CommunicationException {
291 if (oAuthService != null) {
293 for (GoogleTTSVoice voice : listVoices()) {
294 Locale locale = voice.getLocale();
295 Set<GoogleTTSVoice> localeVoices;
296 if (!voices.containsKey(locale)) {
297 localeVoices = new HashSet<>();
298 voices.put(locale, localeVoices);
300 localeVoices = voices.get(locale);
302 localeVoices.add(voice);
305 logger.error("Google client is not initialized!");
309 @SuppressWarnings("null")
310 private List<GoogleTTSVoice> listVoices() throws AuthenticationException, CommunicationException {
311 HttpRequestBuilder builder = HttpRequestBuilder.getFrom(LIST_VOICES_URL)
312 .withHeader(HttpHeader.AUTHORIZATION.name(), getAuthorizationHeader());
315 ListVoicesResponse listVoicesResponse = gson.fromJson(builder.getContentAsString(),
316 ListVoicesResponse.class);
318 if (listVoicesResponse == null || listVoicesResponse.getVoices() == null) {
322 List<GoogleTTSVoice> result = new ArrayList<>();
323 for (Voice voice : listVoicesResponse.getVoices()) {
324 for (String languageCode : voice.getLanguageCodes()) {
325 result.add(new GoogleTTSVoice(Locale.forLanguageTag(languageCode), voice.getName(),
326 voice.getSsmlGender().name()));
330 } catch (JsonSyntaxException e) {
332 } catch (IOException e) {
333 throw new CommunicationException(String.format("An unexpected IOException occurred: %s", e.getMessage()));
339 * Converts audio format to Google parameters.
341 * @param codec Requested codec
342 * @return String array of Google audio format and the file extension to use.
344 private String[] getFormatForCodec(String codec) {
346 case AudioFormat.CODEC_MP3:
347 return new String[] { AudioEncoding.MP3.toString(), "mp3" };
348 case AudioFormat.CODEC_PCM_SIGNED:
349 return new String[] { AudioEncoding.LINEAR16.toString(), "wav" };
351 throw new IllegalArgumentException("Audio format " + codec + " is not yet supported");
355 public byte[] synthesizeSpeech(String text, GoogleTTSVoice voice, String codec) {
356 String[] format = getFormatForCodec(codec);
357 String fileNameInCache = getUniqueFilenameForText(text, voice.getTechnicalName());
358 File audioFileInCache = new File(cacheFolder, fileNameInCache + "." + format[1]);
361 if (audioFileInCache.exists()) {
362 logger.debug("Audio file {} was found in cache.", audioFileInCache.getName());
363 return Files.readAllBytes(audioFileInCache.toPath());
366 // if not in cache, get audio data and put to cache
367 byte[] audio = synthesizeSpeechByGoogle(text, voice, format[0]);
369 saveAudioAndTextToFile(text, audioFileInCache, audio, voice.getTechnicalName());
372 } catch (AuthenticationException | CommunicationException e) {
373 logger.warn("Error initializing Google Cloud TTS service: {}", e.getMessage());
374 if (oAuthService != null) {
375 oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
379 } catch (FileNotFoundException e) {
380 logger.warn("Could not write file {} to cache: {}", audioFileInCache, e.getMessage());
381 } catch (IOException e) {
382 logger.debug("An unexpected IOException occurred: {}", e.getMessage());
388 * Create cache entry.
390 * @param text Converted text.
391 * @param cacheFile Cache entry file.
392 * @param audio Byte array of the audio.
393 * @param voiceName Used voice
394 * @throws FileNotFoundException
395 * @throws IOException in case of file handling exceptions
397 private void saveAudioAndTextToFile(String text, File cacheFile, byte[] audio, String voiceName)
398 throws IOException, FileNotFoundException {
399 logger.debug("Caching audio file {}", cacheFile.getName());
400 try (FileOutputStream audioFileOutputStream = new FileOutputStream(cacheFile)) {
401 audioFileOutputStream.write(audio);
404 // write text to file for transparency too
405 // this allows to know which contents is in which audio file
406 String textFileName = removeExtension(cacheFile.getName()) + ".txt";
407 logger.debug("Caching text file {}", textFileName);
408 try (FileOutputStream textFileOutputStream = new FileOutputStream(new File(cacheFolder, textFileName))) {
410 StringBuilder sb = new StringBuilder("Config: ")
411 .append(config.toConfigString())
414 .append(System.lineSeparator())
417 .append(System.lineSeparator());
419 textFileOutputStream.write(sb.toString().getBytes(StandardCharsets.UTF_8));
424 * Removes the extension of a file name.
426 * @param fileName the file name to remove the extension of
427 * @return the filename without the extension
429 private String removeExtension(String fileName) {
430 int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
431 int lastSeparator = Math.max(fileName.lastIndexOf(UNIX_SEPARATOR), fileName.lastIndexOf(WINDOWS_SEPARATOR));
432 return lastSeparator > extensionPos ? fileName : fileName.substring(0, extensionPos);
436 * Call Google service to synthesize the required text
438 * @param text Text to synthesize
439 * @param voice Voice parameter
440 * @param audioFormat Audio encoding format
441 * @return Audio input stream or {@code null} when encoding exceptions occur
442 * @throws AuthenticationException
443 * @throws CommunicationException
445 @SuppressWarnings("null")
446 private byte[] synthesizeSpeechByGoogle(String text, GoogleTTSVoice voice, String audioFormat)
447 throws AuthenticationException, CommunicationException {
448 AudioConfig audioConfig = new AudioConfig(AudioEncoding.valueOf(audioFormat), config.pitch, config.speakingRate,
449 config.volumeGainDb);
450 SynthesisInput synthesisInput = new SynthesisInput(text);
451 VoiceSelectionParams voiceSelectionParams = new VoiceSelectionParams(voice.getLocale().getLanguage(),
452 voice.getLabel(), SsmlVoiceGender.valueOf(voice.getSsmlGender()));
454 SynthesizeSpeechRequest request = new SynthesizeSpeechRequest(audioConfig, synthesisInput,
455 voiceSelectionParams);
457 HttpRequestBuilder builder = HttpRequestBuilder.postTo(SYTNHESIZE_SPEECH_URL)
458 .withHeader(HttpHeader.AUTHORIZATION.name(), getAuthorizationHeader())
459 .withContent(gson.toJson(request), MimeTypes.Type.APPLICATION_JSON.name());
462 SynthesizeSpeechResponse synthesizeSpeechResponse = gson.fromJson(builder.getContentAsString(),
463 SynthesizeSpeechResponse.class);
465 if (synthesizeSpeechResponse == null) {
469 byte[] encodedBytes = synthesizeSpeechResponse.getAudioContent().getBytes(StandardCharsets.UTF_8);
470 return Base64.getDecoder().decode(encodedBytes);
471 } catch (JsonSyntaxException e) {
473 } catch (IOException e) {
474 throw new CommunicationException(String.format("An unexpected IOException occurred: %s", e.getMessage()));
480 * Gets a unique filename for a give text, by creating a MD5 hash of it. It
481 * will be preceded by the locale.
483 * Sample: "en-US_00a2653ac5f77063bc4ea2fee87318d3"
485 private String getUniqueFilenameForText(String text, String voiceName) {
487 MessageDigest md = MessageDigest.getInstance("MD5");
488 byte[] bytesOfMessage = (config.toConfigString() + text).getBytes(StandardCharsets.UTF_8);
489 String fileNameHash = String.format("%032x", new BigInteger(1, md.digest(bytesOfMessage)));
490 return voiceName + "_" + fileNameHash;
491 } catch (NoSuchAlgorithmException e) {
493 logger.error("Could not create MD5 hash for '{}'", text, e);
498 boolean isInitialized() {
499 return oAuthService != null;