]> git.basschouten.com Git - openhab-addons.git/blob
692d47170148941a6dd00fa1ffca5c20ff4af57c
[openhab-addons.git] /
1 /**
2  * Copyright (c) 2010-2023 Contributors to the openHAB project
3  *
4  * See the NOTICE file(s) distributed with this work for additional
5  * information.
6  *
7  * This program and the accompanying materials are made available under the
8  * terms of the Eclipse Public License 2.0 which is available at
9  * http://www.eclipse.org/legal/epl-2.0
10  *
11  * SPDX-License-Identifier: EPL-2.0
12  */
13 package org.openhab.voice.googletts.internal;
14
15 import java.io.File;
16 import java.io.FileNotFoundException;
17 import java.io.FileOutputStream;
18 import java.io.IOException;
19 import java.math.BigInteger;
20 import java.nio.charset.StandardCharsets;
21 import java.nio.file.Files;
22 import java.security.MessageDigest;
23 import java.security.NoSuchAlgorithmException;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.Base64;
27 import java.util.Dictionary;
28 import java.util.HashMap;
29 import java.util.HashSet;
30 import java.util.List;
31 import java.util.Locale;
32 import java.util.Map;
33 import java.util.Set;
34
35 import org.eclipse.jdt.annotation.Nullable;
36 import org.eclipse.jetty.http.HttpHeader;
37 import org.eclipse.jetty.http.MimeTypes;
38 import org.openhab.core.audio.AudioFormat;
39 import org.openhab.core.auth.AuthenticationException;
40 import org.openhab.core.auth.client.oauth2.AccessTokenResponse;
41 import org.openhab.core.auth.client.oauth2.OAuthClientService;
42 import org.openhab.core.auth.client.oauth2.OAuthException;
43 import org.openhab.core.auth.client.oauth2.OAuthFactory;
44 import org.openhab.core.auth.client.oauth2.OAuthResponseException;
45 import org.openhab.core.i18n.CommunicationException;
46 import org.openhab.core.io.net.http.HttpRequestBuilder;
47 import org.openhab.voice.googletts.internal.dto.AudioConfig;
48 import org.openhab.voice.googletts.internal.dto.AudioEncoding;
49 import org.openhab.voice.googletts.internal.dto.ListVoicesResponse;
50 import org.openhab.voice.googletts.internal.dto.SsmlVoiceGender;
51 import org.openhab.voice.googletts.internal.dto.SynthesisInput;
52 import org.openhab.voice.googletts.internal.dto.SynthesizeSpeechRequest;
53 import org.openhab.voice.googletts.internal.dto.SynthesizeSpeechResponse;
54 import org.openhab.voice.googletts.internal.dto.Voice;
55 import org.openhab.voice.googletts.internal.dto.VoiceSelectionParams;
56 import org.osgi.service.cm.Configuration;
57 import org.osgi.service.cm.ConfigurationAdmin;
58 import org.slf4j.Logger;
59 import org.slf4j.LoggerFactory;
60
61 import com.google.gson.Gson;
62 import com.google.gson.GsonBuilder;
63 import com.google.gson.JsonSyntaxException;
64
65 /**
66  * Google Cloud TTS API call implementation.
67  *
68  * @author Gabor Bicskei - Initial contribution and API
69  */
70 class GoogleCloudAPI {
71
72     private static final char EXTENSION_SEPARATOR = '.';
73     private static final char UNIX_SEPARATOR = '/';
74     private static final char WINDOWS_SEPARATOR = '\\';
75
76     private static final String BEARER = "Bearer ";
77
78     private static final String GCP_AUTH_URI = "https://accounts.google.com/o/oauth2/auth";
79     private static final String GCP_TOKEN_URI = "https://accounts.google.com/o/oauth2/token";
80     private static final String GCP_REDIRECT_URI = "https://www.google.com";
81     /**
82      * Google Cloud Platform authorization scope
83      */
84     private static final String GCP_SCOPE = "https://www.googleapis.com/auth/cloud-platform";
85
86     /**
87      * URL used for retrieving the list of available voices
88      */
89     private static final String LIST_VOICES_URL = "https://texttospeech.googleapis.com/v1/voices";
90
91     /**
92      * URL used for synthesizing text to speech
93      */
94     private static final String SYTNHESIZE_SPEECH_URL = "https://texttospeech.googleapis.com/v1/text:synthesize";
95
96     /**
97      * Logger
98      */
99     private final Logger logger = LoggerFactory.getLogger(GoogleCloudAPI.class);
100
101     /**
102      * Supported voices and locales
103      */
104     private final Map<Locale, Set<GoogleTTSVoice>> voices = new HashMap<>();
105
106     /**
107      * Cache folder
108      */
109     private File cacheFolder;
110
111     /**
112      * Configuration
113      */
114     private @Nullable GoogleTTSConfig config;
115
116     private final Gson gson = new GsonBuilder().create();
117     private final ConfigurationAdmin configAdmin;
118     private final OAuthFactory oAuthFactory;
119
120     private @Nullable OAuthClientService oAuthService;
121
122     /**
123      * Constructor.
124      *
125      * @param cacheFolder Service cache folder
126      */
127     GoogleCloudAPI(ConfigurationAdmin configAdmin, OAuthFactory oAuthFactory, File cacheFolder) {
128         this.configAdmin = configAdmin;
129         this.oAuthFactory = oAuthFactory;
130         this.cacheFolder = cacheFolder;
131     }
132
133     /**
134      * Configuration update.
135      *
136      * @param config New configuration.
137      */
138     void setConfig(GoogleTTSConfig config) {
139         this.config = config;
140
141         if (oAuthService != null) {
142             oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
143             oAuthService = null;
144         }
145
146         String clientId = config.clientId;
147         String clientSecret = config.clientSecret;
148         if (clientId != null && !clientId.isEmpty() && clientSecret != null && !clientSecret.isEmpty()) {
149             final OAuthClientService oAuthService = oAuthFactory.createOAuthClientService(GoogleTTSService.SERVICE_PID,
150                     GCP_TOKEN_URI, GCP_AUTH_URI, clientId, clientSecret, GCP_SCOPE, false);
151             this.oAuthService = oAuthService;
152             try {
153                 getAccessToken();
154                 initVoices();
155             } catch (AuthenticationException | CommunicationException e) {
156                 logger.warn("Error initializing Google Cloud TTS service: {}", e.getMessage());
157                 oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
158                 this.oAuthService = null;
159                 voices.clear();
160             }
161         } else {
162             voices.clear();
163         }
164
165         // maintain cache
166         if (config.purgeCache) {
167             File[] files = cacheFolder.listFiles();
168             if (files != null && files.length > 0) {
169                 Arrays.stream(files).forEach(File::delete);
170             }
171             logger.debug("Cache purged.");
172         }
173     }
174
175     public void dispose() {
176         if (oAuthService != null) {
177             oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
178             oAuthService = null;
179         }
180         voices.clear();
181     }
182
183     /**
184      * Fetches the OAuth2 tokens from Google Cloud Platform if the auth-code is set in the configuration. If successful
185      * the auth-code will be removed from the configuration.
186      *
187      * @throws AuthenticationException
188      * @throws CommunicationException
189      */
190     @SuppressWarnings("null")
191     private void getAccessToken() throws AuthenticationException, CommunicationException {
192         String authcode = config.authcode;
193         if (authcode != null && !authcode.isEmpty()) {
194             logger.debug("Trying to get access and refresh tokens.");
195             try {
196                 AccessTokenResponse response = oAuthService.getAccessTokenResponseByAuthorizationCode(authcode,
197                         GCP_REDIRECT_URI);
198                 if (response.getRefreshToken() == null || response.getRefreshToken().isEmpty()) {
199                     throw new AuthenticationException("Error fetching refresh token. Please reauthorize");
200                 }
201             } catch (OAuthException | OAuthResponseException e) {
202                 logger.debug("Error fetching access token: {}", e.getMessage(), e);
203                 throw new AuthenticationException(
204                         "Error fetching access token. Invalid authcode? Please generate a new one.");
205             } catch (IOException e) {
206                 throw new CommunicationException(
207                         String.format("An unexpected IOException occurred: %s", e.getMessage()));
208             }
209
210             config.authcode = null;
211
212             try {
213                 Configuration serviceConfig = configAdmin.getConfiguration(GoogleTTSService.SERVICE_PID);
214                 Dictionary<String, Object> configProperties = serviceConfig.getProperties();
215                 if (configProperties != null) {
216                     configProperties.put(GoogleTTSService.PARAM_AUTHCODE, "");
217                     serviceConfig.update(configProperties);
218                 }
219             } catch (IOException e) {
220                 // should not happen
221                 logger.warn(
222                         "Failed to update configuration for Google Cloud TTS service. Please clear the 'authcode' configuration parameter manualy.");
223             }
224         }
225     }
226
227     @SuppressWarnings("null")
228     private String getAuthorizationHeader() throws AuthenticationException, CommunicationException {
229         final AccessTokenResponse accessTokenResponse;
230         try {
231             accessTokenResponse = oAuthService.getAccessTokenResponse();
232         } catch (OAuthException | OAuthResponseException e) {
233             logger.debug("Error fetching access token: {}", e.getMessage(), e);
234             throw new AuthenticationException(
235                     "Error fetching access token. Invalid authcode? Please generate a new one.");
236         } catch (IOException e) {
237             throw new CommunicationException(String.format("An unexpected IOException occurred: %s", e.getMessage()));
238         }
239         if (accessTokenResponse == null || accessTokenResponse.getAccessToken() == null
240                 || accessTokenResponse.getAccessToken().isEmpty()) {
241             throw new AuthenticationException("No access token. Is this thing authorized?");
242         }
243         if (accessTokenResponse.getRefreshToken() == null || accessTokenResponse.getRefreshToken().isEmpty()) {
244             throw new AuthenticationException("No refresh token. Please reauthorize");
245         }
246         return BEARER + accessTokenResponse.getAccessToken();
247     }
248
249     /**
250      * Loads supported audio formats
251      *
252      * @return Set of audio formats
253      */
254     Set<String> getSupportedAudioFormats() {
255         Set<String> formats = new HashSet<>();
256         for (AudioEncoding audioEncoding : AudioEncoding.values()) {
257             if (audioEncoding != AudioEncoding.AUDIO_ENCODING_UNSPECIFIED) {
258                 formats.add(audioEncoding.toString());
259             }
260         }
261         return formats;
262     }
263
264     /**
265      * Supported locales.
266      *
267      * @return Set of locales
268      */
269     Set<Locale> getSupportedLocales() {
270         return voices.keySet();
271     }
272
273     /**
274      * Supported voices for locale.
275      *
276      * @param locale Locale
277      * @return Set of voices
278      */
279     Set<GoogleTTSVoice> getVoicesForLocale(Locale locale) {
280         Set<GoogleTTSVoice> localeVoices = voices.get(locale);
281         return localeVoices != null ? localeVoices : Set.of();
282     }
283
284     /**
285      * Google API call to load locales and voices.
286      *
287      * @throws AuthenticationException
288      * @throws CommunicationException
289      */
290     private void initVoices() throws AuthenticationException, CommunicationException {
291         if (oAuthService != null) {
292             voices.clear();
293             for (GoogleTTSVoice voice : listVoices()) {
294                 Locale locale = voice.getLocale();
295                 Set<GoogleTTSVoice> localeVoices;
296                 if (!voices.containsKey(locale)) {
297                     localeVoices = new HashSet<>();
298                     voices.put(locale, localeVoices);
299                 } else {
300                     localeVoices = voices.get(locale);
301                 }
302                 localeVoices.add(voice);
303             }
304         } else {
305             logger.error("Google client is not initialized!");
306         }
307     }
308
309     @SuppressWarnings("null")
310     private List<GoogleTTSVoice> listVoices() throws AuthenticationException, CommunicationException {
311         HttpRequestBuilder builder = HttpRequestBuilder.getFrom(LIST_VOICES_URL)
312                 .withHeader(HttpHeader.AUTHORIZATION.name(), getAuthorizationHeader());
313
314         try {
315             ListVoicesResponse listVoicesResponse = gson.fromJson(builder.getContentAsString(),
316                     ListVoicesResponse.class);
317
318             if (listVoicesResponse == null || listVoicesResponse.getVoices() == null) {
319                 return List.of();
320             }
321
322             List<GoogleTTSVoice> result = new ArrayList<>();
323             for (Voice voice : listVoicesResponse.getVoices()) {
324                 for (String languageCode : voice.getLanguageCodes()) {
325                     result.add(new GoogleTTSVoice(Locale.forLanguageTag(languageCode), voice.getName(),
326                             voice.getSsmlGender().name()));
327                 }
328             }
329             return result;
330         } catch (JsonSyntaxException e) {
331             // do nothing
332         } catch (IOException e) {
333             throw new CommunicationException(String.format("An unexpected IOException occurred: %s", e.getMessage()));
334         }
335         return List.of();
336     }
337
338     /**
339      * Converts audio format to Google parameters.
340      *
341      * @param codec Requested codec
342      * @return String array of Google audio format and the file extension to use.
343      */
344     private String[] getFormatForCodec(String codec) {
345         switch (codec) {
346             case AudioFormat.CODEC_MP3:
347                 return new String[] { AudioEncoding.MP3.toString(), "mp3" };
348             case AudioFormat.CODEC_PCM_SIGNED:
349                 return new String[] { AudioEncoding.LINEAR16.toString(), "wav" };
350             default:
351                 throw new IllegalArgumentException("Audio format " + codec + " is not yet supported");
352         }
353     }
354
355     public byte[] synthesizeSpeech(String text, GoogleTTSVoice voice, String codec) {
356         String[] format = getFormatForCodec(codec);
357         String fileNameInCache = getUniqueFilenameForText(text, voice.getTechnicalName());
358         File audioFileInCache = new File(cacheFolder, fileNameInCache + "." + format[1]);
359         try {
360             // check if in cache
361             if (audioFileInCache.exists()) {
362                 logger.debug("Audio file {} was found in cache.", audioFileInCache.getName());
363                 return Files.readAllBytes(audioFileInCache.toPath());
364             }
365
366             // if not in cache, get audio data and put to cache
367             byte[] audio = synthesizeSpeechByGoogle(text, voice, format[0]);
368             if (audio != null) {
369                 saveAudioAndTextToFile(text, audioFileInCache, audio, voice.getTechnicalName());
370             }
371             return audio;
372         } catch (AuthenticationException | CommunicationException e) {
373             logger.warn("Error initializing Google Cloud TTS service: {}", e.getMessage());
374             if (oAuthService != null) {
375                 oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
376                 oAuthService = null;
377             }
378             voices.clear();
379         } catch (FileNotFoundException e) {
380             logger.warn("Could not write file {} to cache: {}", audioFileInCache, e.getMessage());
381         } catch (IOException e) {
382             logger.debug("An unexpected IOException occurred: {}", e.getMessage());
383         }
384         return null;
385     }
386
387     /**
388      * Create cache entry.
389      *
390      * @param text Converted text.
391      * @param cacheFile Cache entry file.
392      * @param audio Byte array of the audio.
393      * @param voiceName Used voice
394      * @throws FileNotFoundException
395      * @throws IOException in case of file handling exceptions
396      */
397     private void saveAudioAndTextToFile(String text, File cacheFile, byte[] audio, String voiceName)
398             throws IOException, FileNotFoundException {
399         logger.debug("Caching audio file {}", cacheFile.getName());
400         try (FileOutputStream audioFileOutputStream = new FileOutputStream(cacheFile)) {
401             audioFileOutputStream.write(audio);
402         }
403
404         // write text to file for transparency too
405         // this allows to know which contents is in which audio file
406         String textFileName = removeExtension(cacheFile.getName()) + ".txt";
407         logger.debug("Caching text file {}", textFileName);
408         try (FileOutputStream textFileOutputStream = new FileOutputStream(new File(cacheFolder, textFileName))) {
409             // @formatter:off
410             StringBuilder sb = new StringBuilder("Config: ")
411                     .append(config.toConfigString())
412                     .append(",voice=")
413                     .append(voiceName)
414                     .append(System.lineSeparator())
415                     .append("Text: ")
416                     .append(text)
417                     .append(System.lineSeparator());
418             // @formatter:on
419             textFileOutputStream.write(sb.toString().getBytes(StandardCharsets.UTF_8));
420         }
421     }
422
423     /**
424      * Removes the extension of a file name.
425      *
426      * @param fileName the file name to remove the extension of
427      * @return the filename without the extension
428      */
429     private String removeExtension(String fileName) {
430         int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
431         int lastSeparator = Math.max(fileName.lastIndexOf(UNIX_SEPARATOR), fileName.lastIndexOf(WINDOWS_SEPARATOR));
432         return lastSeparator > extensionPos ? fileName : fileName.substring(0, extensionPos);
433     }
434
435     /**
436      * Call Google service to synthesize the required text
437      *
438      * @param text Text to synthesize
439      * @param voice Voice parameter
440      * @param audioFormat Audio encoding format
441      * @return Audio input stream or {@code null} when encoding exceptions occur
442      * @throws AuthenticationException
443      * @throws CommunicationException
444      */
445     @SuppressWarnings("null")
446     private byte[] synthesizeSpeechByGoogle(String text, GoogleTTSVoice voice, String audioFormat)
447             throws AuthenticationException, CommunicationException {
448         AudioConfig audioConfig = new AudioConfig(AudioEncoding.valueOf(audioFormat), config.pitch, config.speakingRate,
449                 config.volumeGainDb);
450         SynthesisInput synthesisInput = new SynthesisInput(text);
451         VoiceSelectionParams voiceSelectionParams = new VoiceSelectionParams(voice.getLocale().getLanguage(),
452                 voice.getLabel(), SsmlVoiceGender.valueOf(voice.getSsmlGender()));
453
454         SynthesizeSpeechRequest request = new SynthesizeSpeechRequest(audioConfig, synthesisInput,
455                 voiceSelectionParams);
456
457         HttpRequestBuilder builder = HttpRequestBuilder.postTo(SYTNHESIZE_SPEECH_URL)
458                 .withHeader(HttpHeader.AUTHORIZATION.name(), getAuthorizationHeader())
459                 .withContent(gson.toJson(request), MimeTypes.Type.APPLICATION_JSON.name());
460
461         try {
462             SynthesizeSpeechResponse synthesizeSpeechResponse = gson.fromJson(builder.getContentAsString(),
463                     SynthesizeSpeechResponse.class);
464
465             if (synthesizeSpeechResponse == null) {
466                 return null;
467             }
468
469             byte[] encodedBytes = synthesizeSpeechResponse.getAudioContent().getBytes(StandardCharsets.UTF_8);
470             return Base64.getDecoder().decode(encodedBytes);
471         } catch (JsonSyntaxException e) {
472             // do nothing
473         } catch (IOException e) {
474             throw new CommunicationException(String.format("An unexpected IOException occurred: %s", e.getMessage()));
475         }
476         return null;
477     }
478
479     /**
480      * Gets a unique filename for a give text, by creating a MD5 hash of it. It
481      * will be preceded by the locale.
482      * <p>
483      * Sample: "en-US_00a2653ac5f77063bc4ea2fee87318d3"
484      */
485     private String getUniqueFilenameForText(String text, String voiceName) {
486         try {
487             MessageDigest md = MessageDigest.getInstance("MD5");
488             byte[] bytesOfMessage = (config.toConfigString() + text).getBytes(StandardCharsets.UTF_8);
489             String fileNameHash = String.format("%032x", new BigInteger(1, md.digest(bytesOfMessage)));
490             return voiceName + "_" + fileNameHash;
491         } catch (NoSuchAlgorithmException e) {
492             // should not happen
493             logger.error("Could not create MD5 hash for '{}'", text, e);
494             return null;
495         }
496     }
497
498     boolean isInitialized() {
499         return oAuthService != null;
500     }
501 }