2 * Copyright (c) 2010-2023 Contributors to the openHAB project
4 * See the NOTICE file(s) distributed with this work for additional
7 * This program and the accompanying materials are made available under the
8 * terms of the Eclipse Public License 2.0 which is available at
9 * http://www.eclipse.org/legal/epl-2.0
11 * SPDX-License-Identifier: EPL-2.0
13 package org.openhab.voice.googletts.internal;
15 import java.io.IOException;
16 import java.nio.charset.StandardCharsets;
17 import java.util.ArrayList;
18 import java.util.Base64;
19 import java.util.Dictionary;
20 import java.util.HashMap;
21 import java.util.HashSet;
22 import java.util.List;
23 import java.util.Locale;
27 import org.eclipse.jdt.annotation.Nullable;
28 import org.eclipse.jetty.http.HttpHeader;
29 import org.eclipse.jetty.http.MimeTypes;
30 import org.openhab.core.audio.AudioFormat;
31 import org.openhab.core.auth.AuthenticationException;
32 import org.openhab.core.auth.client.oauth2.AccessTokenResponse;
33 import org.openhab.core.auth.client.oauth2.OAuthClientService;
34 import org.openhab.core.auth.client.oauth2.OAuthException;
35 import org.openhab.core.auth.client.oauth2.OAuthFactory;
36 import org.openhab.core.auth.client.oauth2.OAuthResponseException;
37 import org.openhab.core.i18n.CommunicationException;
38 import org.openhab.core.io.net.http.HttpRequestBuilder;
39 import org.openhab.voice.googletts.internal.dto.AudioConfig;
40 import org.openhab.voice.googletts.internal.dto.AudioEncoding;
41 import org.openhab.voice.googletts.internal.dto.ListVoicesResponse;
42 import org.openhab.voice.googletts.internal.dto.SsmlVoiceGender;
43 import org.openhab.voice.googletts.internal.dto.SynthesisInput;
44 import org.openhab.voice.googletts.internal.dto.SynthesizeSpeechRequest;
45 import org.openhab.voice.googletts.internal.dto.SynthesizeSpeechResponse;
46 import org.openhab.voice.googletts.internal.dto.Voice;
47 import org.openhab.voice.googletts.internal.dto.VoiceSelectionParams;
48 import org.osgi.service.cm.Configuration;
49 import org.osgi.service.cm.ConfigurationAdmin;
50 import org.slf4j.Logger;
51 import org.slf4j.LoggerFactory;
53 import com.google.gson.Gson;
54 import com.google.gson.GsonBuilder;
55 import com.google.gson.JsonSyntaxException;
58 * Google Cloud TTS API call implementation.
60 * @author Gabor Bicskei - Initial contribution and API
62 class GoogleCloudAPI {
64 private static final String BEARER = "Bearer ";
66 private static final String GCP_AUTH_URI = "https://accounts.google.com/o/oauth2/auth";
67 private static final String GCP_TOKEN_URI = "https://accounts.google.com/o/oauth2/token";
68 private static final String GCP_REDIRECT_URI = "https://www.google.com";
70 * Google Cloud Platform authorization scope
72 private static final String GCP_SCOPE = "https://www.googleapis.com/auth/cloud-platform";
75 * URL used for retrieving the list of available voices
77 private static final String LIST_VOICES_URL = "https://texttospeech.googleapis.com/v1/voices";
80 * URL used for synthesizing text to speech
82 private static final String SYTNHESIZE_SPEECH_URL = "https://texttospeech.googleapis.com/v1/text:synthesize";
87 private final Logger logger = LoggerFactory.getLogger(GoogleCloudAPI.class);
90 * Supported voices and locales
92 private final Map<Locale, Set<GoogleTTSVoice>> voices = new HashMap<>();
97 private @Nullable GoogleTTSConfig config;
99 private final Gson gson = new GsonBuilder().create();
100 private final ConfigurationAdmin configAdmin;
101 private final OAuthFactory oAuthFactory;
103 private @Nullable OAuthClientService oAuthService;
109 GoogleCloudAPI(ConfigurationAdmin configAdmin, OAuthFactory oAuthFactory) {
110 this.configAdmin = configAdmin;
111 this.oAuthFactory = oAuthFactory;
115 * Configuration update.
117 * @param config New configuration.
119 void setConfig(GoogleTTSConfig config) {
120 this.config = config;
122 if (oAuthService != null) {
123 oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
127 String clientId = config.clientId;
128 String clientSecret = config.clientSecret;
129 if (clientId != null && !clientId.isEmpty() && clientSecret != null && !clientSecret.isEmpty()) {
130 final OAuthClientService oAuthService = oAuthFactory.createOAuthClientService(GoogleTTSService.SERVICE_PID,
131 GCP_TOKEN_URI, GCP_AUTH_URI, clientId, clientSecret, GCP_SCOPE, false);
132 this.oAuthService = oAuthService;
136 } catch (AuthenticationException | CommunicationException e) {
137 logger.warn("Error initializing Google Cloud TTS service: {}", e.getMessage());
138 oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
139 this.oAuthService = null;
147 public void dispose() {
148 if (oAuthService != null) {
149 oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
156 * Fetches the OAuth2 tokens from Google Cloud Platform if the auth-code is set in the configuration. If successful
157 * the auth-code will be removed from the configuration.
159 * @throws AuthenticationException
160 * @throws CommunicationException
162 @SuppressWarnings("null")
163 private void getAccessToken() throws AuthenticationException, CommunicationException {
164 String authcode = config.authcode;
165 if (authcode != null && !authcode.isEmpty()) {
166 logger.debug("Trying to get access and refresh tokens.");
168 AccessTokenResponse response = oAuthService.getAccessTokenResponseByAuthorizationCode(authcode,
170 if (response.getRefreshToken() == null || response.getRefreshToken().isEmpty()) {
171 throw new AuthenticationException("Error fetching refresh token. Please reauthorize");
173 } catch (OAuthException | OAuthResponseException e) {
174 logger.debug("Error fetching access token: {}", e.getMessage(), e);
175 throw new AuthenticationException(
176 "Error fetching access token. Invalid authcode? Please generate a new one.");
177 } catch (IOException e) {
178 throw new CommunicationException(
179 String.format("An unexpected IOException occurred: %s", e.getMessage()));
182 config.authcode = null;
185 Configuration serviceConfig = configAdmin.getConfiguration(GoogleTTSService.SERVICE_PID);
186 Dictionary<String, Object> configProperties = serviceConfig.getProperties();
187 if (configProperties != null) {
188 configProperties.put(GoogleTTSService.PARAM_AUTHCODE, "");
189 serviceConfig.update(configProperties);
191 } catch (IOException e) {
194 "Failed to update configuration for Google Cloud TTS service. Please clear the 'authcode' configuration parameter manualy.");
199 @SuppressWarnings("null")
200 private String getAuthorizationHeader() throws AuthenticationException, CommunicationException {
201 final AccessTokenResponse accessTokenResponse;
203 accessTokenResponse = oAuthService.getAccessTokenResponse();
204 } catch (OAuthException | OAuthResponseException e) {
205 logger.debug("Error fetching access token: {}", e.getMessage(), e);
206 throw new AuthenticationException(
207 "Error fetching access token. Invalid authcode? Please generate a new one.");
208 } catch (IOException e) {
209 throw new CommunicationException(String.format("An unexpected IOException occurred: %s", e.getMessage()));
211 if (accessTokenResponse == null || accessTokenResponse.getAccessToken() == null
212 || accessTokenResponse.getAccessToken().isEmpty()) {
213 throw new AuthenticationException("No access token. Is this thing authorized?");
215 if (accessTokenResponse.getRefreshToken() == null || accessTokenResponse.getRefreshToken().isEmpty()) {
216 throw new AuthenticationException("No refresh token. Please reauthorize");
218 return BEARER + accessTokenResponse.getAccessToken();
222 * Loads supported audio formats
224 * @return Set of audio formats
226 Set<String> getSupportedAudioFormats() {
227 Set<String> formats = new HashSet<>();
228 for (AudioEncoding audioEncoding : AudioEncoding.values()) {
229 if (audioEncoding != AudioEncoding.AUDIO_ENCODING_UNSPECIFIED) {
230 formats.add(audioEncoding.toString());
239 * @return Set of locales
241 Set<Locale> getSupportedLocales() {
242 return voices.keySet();
246 * Supported voices for locale.
248 * @param locale Locale
249 * @return Set of voices
251 Set<GoogleTTSVoice> getVoicesForLocale(Locale locale) {
252 Set<GoogleTTSVoice> localeVoices = voices.get(locale);
253 return localeVoices != null ? localeVoices : Set.of();
257 * Google API call to load locales and voices.
259 * @throws AuthenticationException
260 * @throws CommunicationException
262 private void initVoices() throws AuthenticationException, CommunicationException {
263 if (oAuthService != null) {
265 for (GoogleTTSVoice voice : listVoices()) {
266 Locale locale = voice.getLocale();
267 Set<GoogleTTSVoice> localeVoices;
268 if (!voices.containsKey(locale)) {
269 localeVoices = new HashSet<>();
270 voices.put(locale, localeVoices);
272 localeVoices = voices.get(locale);
274 localeVoices.add(voice);
277 logger.error("Google client is not initialized!");
281 @SuppressWarnings("null")
282 private List<GoogleTTSVoice> listVoices() throws AuthenticationException, CommunicationException {
283 HttpRequestBuilder builder = HttpRequestBuilder.getFrom(LIST_VOICES_URL)
284 .withHeader(HttpHeader.AUTHORIZATION.name(), getAuthorizationHeader());
287 ListVoicesResponse listVoicesResponse = gson.fromJson(builder.getContentAsString(),
288 ListVoicesResponse.class);
290 if (listVoicesResponse == null || listVoicesResponse.getVoices() == null) {
294 List<GoogleTTSVoice> result = new ArrayList<>();
295 for (Voice voice : listVoicesResponse.getVoices()) {
296 for (String languageCode : voice.getLanguageCodes()) {
297 result.add(new GoogleTTSVoice(Locale.forLanguageTag(languageCode), voice.getName(),
298 voice.getSsmlGender().name()));
302 } catch (JsonSyntaxException e) {
304 } catch (IOException e) {
305 throw new CommunicationException(String.format("An unexpected IOException occurred: %s", e.getMessage()));
311 * Converts audio format to Google parameters.
313 * @param codec Requested codec
314 * @return String array of Google audio format and the file extension to use.
316 private String getFormatForCodec(String codec) {
318 case AudioFormat.CODEC_MP3:
319 return AudioEncoding.MP3.toString();
320 case AudioFormat.CODEC_PCM_SIGNED:
321 return AudioEncoding.LINEAR16.toString();
323 throw new IllegalArgumentException("Audio format " + codec + " is not yet supported");
327 public byte[] synthesizeSpeech(String text, GoogleTTSVoice voice, String codec) {
328 String format = getFormatForCodec(codec);
330 return synthesizeSpeechByGoogle(text, voice, format);
331 } catch (AuthenticationException | CommunicationException e) {
332 logger.warn("Error initializing Google Cloud TTS service: {}", e.getMessage());
333 if (oAuthService != null) {
334 oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
343 * Call Google service to synthesize the required text
345 * @param text Text to synthesize
346 * @param voice Voice parameter
347 * @param audioFormat Audio encoding format
348 * @return Audio input stream or {@code null} when encoding exceptions occur
349 * @throws AuthenticationException
350 * @throws CommunicationException
352 @SuppressWarnings("null")
353 private byte[] synthesizeSpeechByGoogle(String text, GoogleTTSVoice voice, String audioFormat)
354 throws AuthenticationException, CommunicationException {
355 AudioConfig audioConfig = new AudioConfig(AudioEncoding.valueOf(audioFormat), config.pitch, config.speakingRate,
356 config.volumeGainDb);
357 SynthesisInput synthesisInput = new SynthesisInput(text);
358 VoiceSelectionParams voiceSelectionParams = new VoiceSelectionParams(voice.getLocale().getLanguage(),
359 voice.getLabel(), SsmlVoiceGender.valueOf(voice.getSsmlGender()));
361 SynthesizeSpeechRequest request = new SynthesizeSpeechRequest(audioConfig, synthesisInput,
362 voiceSelectionParams);
364 HttpRequestBuilder builder = HttpRequestBuilder.postTo(SYTNHESIZE_SPEECH_URL)
365 .withHeader(HttpHeader.AUTHORIZATION.name(), getAuthorizationHeader())
366 .withContent(gson.toJson(request), MimeTypes.Type.APPLICATION_JSON.name());
369 SynthesizeSpeechResponse synthesizeSpeechResponse = gson.fromJson(builder.getContentAsString(),
370 SynthesizeSpeechResponse.class);
372 if (synthesizeSpeechResponse == null) {
376 byte[] encodedBytes = synthesizeSpeechResponse.getAudioContent().getBytes(StandardCharsets.UTF_8);
377 return Base64.getDecoder().decode(encodedBytes);
378 } catch (JsonSyntaxException e) {
380 } catch (IOException e) {
381 throw new CommunicationException(String.format("An unexpected IOException occurred: %s", e.getMessage()));
386 boolean isInitialized() {
387 return oAuthService != null;