]> git.basschouten.com Git - openhab-addons.git/blob
0a5ef5983e8fbe0bea5851a2c7f3524593edd3d6
[openhab-addons.git] /
1 /**
2  * Copyright (c) 2010-2023 Contributors to the openHAB project
3  *
4  * See the NOTICE file(s) distributed with this work for additional
5  * information.
6  *
7  * This program and the accompanying materials are made available under the
8  * terms of the Eclipse Public License 2.0 which is available at
9  * http://www.eclipse.org/legal/epl-2.0
10  *
11  * SPDX-License-Identifier: EPL-2.0
12  */
13 package org.openhab.voice.googletts.internal;
14
15 import java.io.IOException;
16 import java.nio.charset.StandardCharsets;
17 import java.util.ArrayList;
18 import java.util.Base64;
19 import java.util.Dictionary;
20 import java.util.HashMap;
21 import java.util.HashSet;
22 import java.util.List;
23 import java.util.Locale;
24 import java.util.Map;
25 import java.util.Set;
26
27 import org.eclipse.jdt.annotation.Nullable;
28 import org.eclipse.jetty.http.HttpHeader;
29 import org.eclipse.jetty.http.MimeTypes;
30 import org.openhab.core.audio.AudioFormat;
31 import org.openhab.core.auth.AuthenticationException;
32 import org.openhab.core.auth.client.oauth2.AccessTokenResponse;
33 import org.openhab.core.auth.client.oauth2.OAuthClientService;
34 import org.openhab.core.auth.client.oauth2.OAuthException;
35 import org.openhab.core.auth.client.oauth2.OAuthFactory;
36 import org.openhab.core.auth.client.oauth2.OAuthResponseException;
37 import org.openhab.core.i18n.CommunicationException;
38 import org.openhab.core.io.net.http.HttpRequestBuilder;
39 import org.openhab.voice.googletts.internal.dto.AudioConfig;
40 import org.openhab.voice.googletts.internal.dto.AudioEncoding;
41 import org.openhab.voice.googletts.internal.dto.ListVoicesResponse;
42 import org.openhab.voice.googletts.internal.dto.SsmlVoiceGender;
43 import org.openhab.voice.googletts.internal.dto.SynthesisInput;
44 import org.openhab.voice.googletts.internal.dto.SynthesizeSpeechRequest;
45 import org.openhab.voice.googletts.internal.dto.SynthesizeSpeechResponse;
46 import org.openhab.voice.googletts.internal.dto.Voice;
47 import org.openhab.voice.googletts.internal.dto.VoiceSelectionParams;
48 import org.osgi.service.cm.Configuration;
49 import org.osgi.service.cm.ConfigurationAdmin;
50 import org.slf4j.Logger;
51 import org.slf4j.LoggerFactory;
52
53 import com.google.gson.Gson;
54 import com.google.gson.GsonBuilder;
55 import com.google.gson.JsonSyntaxException;
56
57 /**
58  * Google Cloud TTS API call implementation.
59  *
60  * @author Gabor Bicskei - Initial contribution and API
61  */
62 class GoogleCloudAPI {
63
64     private static final String BEARER = "Bearer ";
65
66     private static final String GCP_AUTH_URI = "https://accounts.google.com/o/oauth2/auth";
67     private static final String GCP_TOKEN_URI = "https://accounts.google.com/o/oauth2/token";
68     private static final String GCP_REDIRECT_URI = "https://www.google.com";
69     /**
70      * Google Cloud Platform authorization scope
71      */
72     private static final String GCP_SCOPE = "https://www.googleapis.com/auth/cloud-platform";
73
74     /**
75      * URL used for retrieving the list of available voices
76      */
77     private static final String LIST_VOICES_URL = "https://texttospeech.googleapis.com/v1/voices";
78
79     /**
80      * URL used for synthesizing text to speech
81      */
82     private static final String SYTNHESIZE_SPEECH_URL = "https://texttospeech.googleapis.com/v1/text:synthesize";
83
84     /**
85      * Logger
86      */
87     private final Logger logger = LoggerFactory.getLogger(GoogleCloudAPI.class);
88
89     /**
90      * Supported voices and locales
91      */
92     private final Map<Locale, Set<GoogleTTSVoice>> voices = new HashMap<>();
93
94     /**
95      * Configuration
96      */
97     private @Nullable GoogleTTSConfig config;
98
99     private final Gson gson = new GsonBuilder().create();
100     private final ConfigurationAdmin configAdmin;
101     private final OAuthFactory oAuthFactory;
102
103     private @Nullable OAuthClientService oAuthService;
104
105     /**
106      * Constructor.
107      *
108      */
109     GoogleCloudAPI(ConfigurationAdmin configAdmin, OAuthFactory oAuthFactory) {
110         this.configAdmin = configAdmin;
111         this.oAuthFactory = oAuthFactory;
112     }
113
114     /**
115      * Configuration update.
116      *
117      * @param config New configuration.
118      */
119     void setConfig(GoogleTTSConfig config) {
120         this.config = config;
121
122         if (oAuthService != null) {
123             oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
124             oAuthService = null;
125         }
126
127         String clientId = config.clientId;
128         String clientSecret = config.clientSecret;
129         if (clientId != null && !clientId.isEmpty() && clientSecret != null && !clientSecret.isEmpty()) {
130             final OAuthClientService oAuthService = oAuthFactory.createOAuthClientService(GoogleTTSService.SERVICE_PID,
131                     GCP_TOKEN_URI, GCP_AUTH_URI, clientId, clientSecret, GCP_SCOPE, false);
132             this.oAuthService = oAuthService;
133             try {
134                 getAccessToken();
135                 initVoices();
136             } catch (AuthenticationException | CommunicationException e) {
137                 logger.warn("Error initializing Google Cloud TTS service: {}", e.getMessage());
138                 oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
139                 this.oAuthService = null;
140                 voices.clear();
141             }
142         } else {
143             voices.clear();
144         }
145     }
146
147     public void dispose() {
148         if (oAuthService != null) {
149             oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
150             oAuthService = null;
151         }
152         voices.clear();
153     }
154
155     /**
156      * Fetches the OAuth2 tokens from Google Cloud Platform if the auth-code is set in the configuration. If successful
157      * the auth-code will be removed from the configuration.
158      *
159      * @throws AuthenticationException
160      * @throws CommunicationException
161      */
162     @SuppressWarnings("null")
163     private void getAccessToken() throws AuthenticationException, CommunicationException {
164         String authcode = config.authcode;
165         if (authcode != null && !authcode.isEmpty()) {
166             logger.debug("Trying to get access and refresh tokens.");
167             try {
168                 AccessTokenResponse response = oAuthService.getAccessTokenResponseByAuthorizationCode(authcode,
169                         GCP_REDIRECT_URI);
170                 if (response.getRefreshToken() == null || response.getRefreshToken().isEmpty()) {
171                     throw new AuthenticationException("Error fetching refresh token. Please reauthorize");
172                 }
173             } catch (OAuthException | OAuthResponseException e) {
174                 logger.debug("Error fetching access token: {}", e.getMessage(), e);
175                 throw new AuthenticationException(
176                         "Error fetching access token. Invalid authcode? Please generate a new one.");
177             } catch (IOException e) {
178                 throw new CommunicationException(
179                         String.format("An unexpected IOException occurred: %s", e.getMessage()));
180             }
181
182             config.authcode = null;
183
184             try {
185                 Configuration serviceConfig = configAdmin.getConfiguration(GoogleTTSService.SERVICE_PID);
186                 Dictionary<String, Object> configProperties = serviceConfig.getProperties();
187                 if (configProperties != null) {
188                     configProperties.put(GoogleTTSService.PARAM_AUTHCODE, "");
189                     serviceConfig.update(configProperties);
190                 }
191             } catch (IOException e) {
192                 // should not happen
193                 logger.warn(
194                         "Failed to update configuration for Google Cloud TTS service. Please clear the 'authcode' configuration parameter manualy.");
195             }
196         }
197     }
198
199     @SuppressWarnings("null")
200     private String getAuthorizationHeader() throws AuthenticationException, CommunicationException {
201         final AccessTokenResponse accessTokenResponse;
202         try {
203             accessTokenResponse = oAuthService.getAccessTokenResponse();
204         } catch (OAuthException | OAuthResponseException e) {
205             logger.debug("Error fetching access token: {}", e.getMessage(), e);
206             throw new AuthenticationException(
207                     "Error fetching access token. Invalid authcode? Please generate a new one.");
208         } catch (IOException e) {
209             throw new CommunicationException(String.format("An unexpected IOException occurred: %s", e.getMessage()));
210         }
211         if (accessTokenResponse == null || accessTokenResponse.getAccessToken() == null
212                 || accessTokenResponse.getAccessToken().isEmpty()) {
213             throw new AuthenticationException("No access token. Is this thing authorized?");
214         }
215         if (accessTokenResponse.getRefreshToken() == null || accessTokenResponse.getRefreshToken().isEmpty()) {
216             throw new AuthenticationException("No refresh token. Please reauthorize");
217         }
218         return BEARER + accessTokenResponse.getAccessToken();
219     }
220
221     /**
222      * Loads supported audio formats
223      *
224      * @return Set of audio formats
225      */
226     Set<String> getSupportedAudioFormats() {
227         Set<String> formats = new HashSet<>();
228         for (AudioEncoding audioEncoding : AudioEncoding.values()) {
229             if (audioEncoding != AudioEncoding.AUDIO_ENCODING_UNSPECIFIED) {
230                 formats.add(audioEncoding.toString());
231             }
232         }
233         return formats;
234     }
235
236     /**
237      * Supported locales.
238      *
239      * @return Set of locales
240      */
241     Set<Locale> getSupportedLocales() {
242         return voices.keySet();
243     }
244
245     /**
246      * Supported voices for locale.
247      *
248      * @param locale Locale
249      * @return Set of voices
250      */
251     Set<GoogleTTSVoice> getVoicesForLocale(Locale locale) {
252         Set<GoogleTTSVoice> localeVoices = voices.get(locale);
253         return localeVoices != null ? localeVoices : Set.of();
254     }
255
256     /**
257      * Google API call to load locales and voices.
258      *
259      * @throws AuthenticationException
260      * @throws CommunicationException
261      */
262     private void initVoices() throws AuthenticationException, CommunicationException {
263         if (oAuthService != null) {
264             voices.clear();
265             for (GoogleTTSVoice voice : listVoices()) {
266                 Locale locale = voice.getLocale();
267                 Set<GoogleTTSVoice> localeVoices;
268                 if (!voices.containsKey(locale)) {
269                     localeVoices = new HashSet<>();
270                     voices.put(locale, localeVoices);
271                 } else {
272                     localeVoices = voices.get(locale);
273                 }
274                 localeVoices.add(voice);
275             }
276         } else {
277             logger.error("Google client is not initialized!");
278         }
279     }
280
281     @SuppressWarnings("null")
282     private List<GoogleTTSVoice> listVoices() throws AuthenticationException, CommunicationException {
283         HttpRequestBuilder builder = HttpRequestBuilder.getFrom(LIST_VOICES_URL)
284                 .withHeader(HttpHeader.AUTHORIZATION.name(), getAuthorizationHeader());
285
286         try {
287             ListVoicesResponse listVoicesResponse = gson.fromJson(builder.getContentAsString(),
288                     ListVoicesResponse.class);
289
290             if (listVoicesResponse == null || listVoicesResponse.getVoices() == null) {
291                 return List.of();
292             }
293
294             List<GoogleTTSVoice> result = new ArrayList<>();
295             for (Voice voice : listVoicesResponse.getVoices()) {
296                 for (String languageCode : voice.getLanguageCodes()) {
297                     result.add(new GoogleTTSVoice(Locale.forLanguageTag(languageCode), voice.getName(),
298                             voice.getSsmlGender().name()));
299                 }
300             }
301             return result;
302         } catch (JsonSyntaxException e) {
303             // do nothing
304         } catch (IOException e) {
305             throw new CommunicationException(String.format("An unexpected IOException occurred: %s", e.getMessage()));
306         }
307         return List.of();
308     }
309
310     /**
311      * Converts audio format to Google parameters.
312      *
313      * @param codec Requested codec
314      * @return String array of Google audio format and the file extension to use.
315      */
316     private String getFormatForCodec(String codec) {
317         switch (codec) {
318             case AudioFormat.CODEC_MP3:
319                 return AudioEncoding.MP3.toString();
320             case AudioFormat.CODEC_PCM_SIGNED:
321                 return AudioEncoding.LINEAR16.toString();
322             default:
323                 throw new IllegalArgumentException("Audio format " + codec + " is not yet supported");
324         }
325     }
326
327     public byte[] synthesizeSpeech(String text, GoogleTTSVoice voice, String codec) {
328         String format = getFormatForCodec(codec);
329         try {
330             return synthesizeSpeechByGoogle(text, voice, format);
331         } catch (AuthenticationException e) {
332             logger.warn("Error authenticating Google Cloud TTS service: {}", e.getMessage());
333             if (oAuthService != null) {
334                 oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
335                 oAuthService = null;
336             }
337         } catch (CommunicationException e) {
338             logger.warn("Error initializing Google Cloud TTS service: {}", e.getMessage());
339         }
340         voices.clear();
341         return null;
342     }
343
344     /**
345      * Call Google service to synthesize the required text
346      *
347      * @param text Text to synthesize
348      * @param voice Voice parameter
349      * @param audioFormat Audio encoding format
350      * @return Audio input stream or {@code null} when encoding exceptions occur
351      * @throws AuthenticationException
352      * @throws CommunicationException
353      */
354     @SuppressWarnings("null")
355     private byte[] synthesizeSpeechByGoogle(String text, GoogleTTSVoice voice, String audioFormat)
356             throws AuthenticationException, CommunicationException {
357         AudioConfig audioConfig = new AudioConfig(AudioEncoding.valueOf(audioFormat), config.pitch, config.speakingRate,
358                 config.volumeGainDb);
359         SynthesisInput synthesisInput = new SynthesisInput(text);
360         VoiceSelectionParams voiceSelectionParams = new VoiceSelectionParams(voice.getLocale().getLanguage(),
361                 voice.getLabel(), SsmlVoiceGender.valueOf(voice.getSsmlGender()));
362
363         SynthesizeSpeechRequest request = new SynthesizeSpeechRequest(audioConfig, synthesisInput,
364                 voiceSelectionParams);
365
366         HttpRequestBuilder builder = HttpRequestBuilder.postTo(SYTNHESIZE_SPEECH_URL)
367                 .withHeader(HttpHeader.AUTHORIZATION.name(), getAuthorizationHeader())
368                 .withContent(gson.toJson(request), MimeTypes.Type.APPLICATION_JSON.name());
369
370         try {
371             SynthesizeSpeechResponse synthesizeSpeechResponse = gson.fromJson(builder.getContentAsString(),
372                     SynthesizeSpeechResponse.class);
373
374             if (synthesizeSpeechResponse == null) {
375                 return null;
376             }
377
378             byte[] encodedBytes = synthesizeSpeechResponse.getAudioContent().getBytes(StandardCharsets.UTF_8);
379             return Base64.getDecoder().decode(encodedBytes);
380         } catch (JsonSyntaxException e) {
381             // do nothing
382         } catch (IOException e) {
383             throw new CommunicationException(String.format("An unexpected IOException occurred: %s", e.getMessage()));
384         }
385         return null;
386     }
387
388     boolean isInitialized() {
389         return oAuthService != null;
390     }
391 }