]> git.basschouten.com Git - openhab-addons.git/blob
766b894c93724e2213e58bc49f89a115d7550f27
[openhab-addons.git] /
1 /**
2  * Copyright (c) 2010-2021 Contributors to the openHAB project
3  *
4  * See the NOTICE file(s) distributed with this work for additional
5  * information.
6  *
7  * This program and the accompanying materials are made available under the
8  * terms of the Eclipse Public License 2.0 which is available at
9  * http://www.eclipse.org/legal/epl-2.0
10  *
11  * SPDX-License-Identifier: EPL-2.0
12  */
13 package org.openhab.voice.googletts.internal;
14
15 import java.io.File;
16 import java.io.FileNotFoundException;
17 import java.io.FileOutputStream;
18 import java.io.IOException;
19 import java.math.BigInteger;
20 import java.nio.charset.StandardCharsets;
21 import java.nio.file.Files;
22 import java.security.MessageDigest;
23 import java.security.NoSuchAlgorithmException;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.Base64;
27 import java.util.Collections;
28 import java.util.Dictionary;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.List;
32 import java.util.Locale;
33 import java.util.Map;
34 import java.util.Set;
35
36 import org.eclipse.jdt.annotation.Nullable;
37 import org.eclipse.jetty.http.HttpHeader;
38 import org.eclipse.jetty.http.MimeTypes;
39 import org.openhab.core.audio.AudioFormat;
40 import org.openhab.core.auth.client.oauth2.AccessTokenResponse;
41 import org.openhab.core.auth.client.oauth2.OAuthClientService;
42 import org.openhab.core.auth.client.oauth2.OAuthException;
43 import org.openhab.core.auth.client.oauth2.OAuthFactory;
44 import org.openhab.core.auth.client.oauth2.OAuthResponseException;
45 import org.openhab.core.io.net.http.HttpRequestBuilder;
46 import org.openhab.voice.googletts.internal.protocol.AudioConfig;
47 import org.openhab.voice.googletts.internal.protocol.AudioEncoding;
48 import org.openhab.voice.googletts.internal.protocol.ListVoicesResponse;
49 import org.openhab.voice.googletts.internal.protocol.SsmlVoiceGender;
50 import org.openhab.voice.googletts.internal.protocol.SynthesisInput;
51 import org.openhab.voice.googletts.internal.protocol.SynthesizeSpeechRequest;
52 import org.openhab.voice.googletts.internal.protocol.SynthesizeSpeechResponse;
53 import org.openhab.voice.googletts.internal.protocol.Voice;
54 import org.openhab.voice.googletts.internal.protocol.VoiceSelectionParams;
55 import org.osgi.service.cm.Configuration;
56 import org.osgi.service.cm.ConfigurationAdmin;
57 import org.slf4j.Logger;
58 import org.slf4j.LoggerFactory;
59
60 import com.google.gson.Gson;
61 import com.google.gson.GsonBuilder;
62
63 /**
64  * Google Cloud TTS API call implementation.
65  *
66  * @author Gabor Bicskei - Initial contribution and API
67  */
68 class GoogleCloudAPI {
69
70     private static final char EXTENSION_SEPARATOR = '.';
71     private static final char UNIX_SEPARATOR = '/';
72     private static final char WINDOWS_SEPARATOR = '\\';
73
74     private static final String BEARER = "Bearer ";
75
76     private static final String GCP_AUTH_URI = "https://accounts.google.com/o/oauth2/auth";
77     private static final String GCP_TOKEN_URI = "https://accounts.google.com/o/oauth2/token";
78     private static final String GCP_REDIRECT_URI = "urn:ietf:wg:oauth:2.0:oob";
79     /**
80      * Google Cloud Platform authorization scope
81      */
82     private static final String GCP_SCOPE = "https://www.googleapis.com/auth/cloud-platform";
83
84     /**
85      * URL used for retrieving the list of available voices
86      */
87     private static final String LIST_VOICES_URL = "https://texttospeech.googleapis.com/v1/voices";
88
89     /**
90      * URL used for synthesizing text to speech
91      */
92     private static final String SYTNHESIZE_SPEECH_URL = "https://texttospeech.googleapis.com/v1/text:synthesize";
93
94     /**
95      * Logger
96      */
97     private final Logger logger = LoggerFactory.getLogger(GoogleCloudAPI.class);
98
99     /**
100      * Supported voices and locales
101      */
102     private final Map<Locale, Set<GoogleTTSVoice>> voices = new HashMap<>();
103
104     /**
105      * Cache folder
106      */
107     private File cacheFolder;
108
109     /**
110      * Configuration
111      */
112     private @Nullable GoogleTTSConfig config;
113
114     /**
115      * Status flag
116      */
117     private boolean initialized;
118
119     private final Gson gson = new GsonBuilder().create();
120     private final ConfigurationAdmin configAdmin;
121     private final OAuthFactory oAuthFactory;
122
123     private @Nullable OAuthClientService oAuthService;
124
125     /**
126      * Constructor.
127      *
128      * @param cacheFolder Service cache folder
129      */
130     GoogleCloudAPI(ConfigurationAdmin configAdmin, OAuthFactory oAuthFactory, File cacheFolder) {
131         this.configAdmin = configAdmin;
132         this.oAuthFactory = oAuthFactory;
133         this.cacheFolder = cacheFolder;
134     }
135
136     /**
137      * Configuration update.
138      *
139      * @param config New configuration.
140      */
141     void setConfig(GoogleTTSConfig config) {
142         this.config = config;
143
144         String clientId = config.clientId;
145         String clientSecret = config.clientSecret;
146         if (clientId != null && !clientId.isEmpty() && clientSecret != null && !clientSecret.isEmpty()) {
147             try {
148                 final OAuthClientService oAuthService = oAuthFactory.createOAuthClientService(
149                         GoogleTTSService.SERVICE_PID, GCP_TOKEN_URI, GCP_AUTH_URI, clientId, clientSecret, GCP_SCOPE,
150                         false);
151                 this.oAuthService = oAuthService;
152                 getAccessToken();
153                 initialized = true;
154                 initVoices();
155             } catch (AuthenticationException | IOException ex) {
156                 logger.warn("Error initializing Google Cloud TTS service: {}", ex.getMessage());
157                 oAuthService = null;
158                 initialized = false;
159                 voices.clear();
160             }
161         } else {
162             oAuthService = null;
163             initialized = false;
164             voices.clear();
165         }
166
167         // maintain cache
168         if (config.purgeCache) {
169             File[] files = cacheFolder.listFiles();
170             if (files != null && files.length > 0) {
171                 Arrays.stream(files).forEach(File::delete);
172             }
173             logger.debug("Cache purged.");
174         }
175     }
176
177     /**
178      * Fetches the OAuth2 tokens from Google Cloud Platform if the auth-code is set in the configuration. If successful
179      * the auth-code will be removed from the configuration.
180      */
181     private void getAccessToken() throws AuthenticationException, IOException {
182         String authcode = config.authcode;
183         if (authcode != null && !authcode.isEmpty()) {
184             logger.debug("Trying to get access and refresh tokens.");
185             try {
186                 oAuthService.getAccessTokenResponseByAuthorizationCode(authcode, GCP_REDIRECT_URI);
187             } catch (OAuthException | OAuthResponseException ex) {
188                 logger.debug("Error fetching access token: {}", ex.getMessage(), ex);
189                 throw new AuthenticationException(
190                         "Error fetching access token. Invalid authcode? Please generate a new one.");
191             }
192
193             config.authcode = null;
194
195             try {
196                 Configuration serviceConfig = configAdmin.getConfiguration(GoogleTTSService.SERVICE_PID);
197                 Dictionary<String, Object> configProperties = serviceConfig.getProperties();
198                 if (configProperties != null) {
199                     configProperties.put(GoogleTTSService.PARAM_AUTHCODE, "");
200                     serviceConfig.update(configProperties);
201                 }
202             } catch (IOException e) {
203                 // should not happen
204                 logger.warn(
205                         "Failed to update configuration for Google Cloud TTS service. Please clear the 'authcode' configuration parameter manualy.");
206             }
207         }
208     }
209
210     private String getAuthorizationHeader() throws AuthenticationException, IOException {
211         final AccessTokenResponse accessTokenResponse;
212         try {
213             accessTokenResponse = oAuthService.getAccessTokenResponse();
214         } catch (OAuthException | OAuthResponseException ex) {
215             logger.debug("Error fetching access token: {}", ex.getMessage(), ex);
216             throw new AuthenticationException(
217                     "Error fetching access token. Invalid authcode? Please generate a new one.");
218         }
219         if (accessTokenResponse == null || accessTokenResponse.getAccessToken() == null
220                 || accessTokenResponse.getAccessToken().isEmpty()) {
221             throw new AuthenticationException("No access token. Is this thing authorized?");
222         }
223         return BEARER + accessTokenResponse.getAccessToken();
224     }
225
226     /**
227      * Loads supported audio formats
228      *
229      * @return Set of audio formats
230      */
231     Set<String> getSupportedAudioFormats() {
232         Set<String> formats = new HashSet<>();
233         for (AudioEncoding audioEncoding : AudioEncoding.values()) {
234             if (audioEncoding != AudioEncoding.AUDIO_ENCODING_UNSPECIFIED) {
235                 formats.add(audioEncoding.toString());
236             }
237         }
238         return formats;
239     }
240
241     /**
242      * Supported locales.
243      *
244      * @return Set of locales
245      */
246     Set<Locale> getSupportedLocales() {
247         return voices.keySet();
248     }
249
250     /**
251      * Supported voices for locale.
252      *
253      * @param locale Locale
254      * @return Set of voices
255      */
256     Set<GoogleTTSVoice> getVoicesForLocale(Locale locale) {
257         Set<GoogleTTSVoice> localeVoices = voices.get(locale);
258         return localeVoices != null ? localeVoices : Collections.emptySet();
259     }
260
261     /**
262      * Google API call to load locales and voices.
263      */
264     private void initVoices() throws AuthenticationException, IOException {
265         if (oAuthService != null) {
266             voices.clear();
267             for (GoogleTTSVoice voice : listVoices()) {
268                 Locale locale = voice.getLocale();
269                 Set<GoogleTTSVoice> localeVoices;
270                 if (!voices.containsKey(locale)) {
271                     localeVoices = new HashSet<>();
272                     voices.put(locale, localeVoices);
273                 } else {
274                     localeVoices = voices.get(locale);
275                 }
276                 localeVoices.add(voice);
277             }
278         } else {
279             logger.error("Google client is not initialized!");
280         }
281     }
282
283     @SuppressWarnings("null")
284     private List<GoogleTTSVoice> listVoices() throws AuthenticationException, IOException {
285         HttpRequestBuilder builder = HttpRequestBuilder.getFrom(LIST_VOICES_URL)
286                 .withHeader(HttpHeader.AUTHORIZATION.name(), getAuthorizationHeader());
287
288         ListVoicesResponse listVoicesResponse = gson.fromJson(builder.getContentAsString(), ListVoicesResponse.class);
289
290         if (listVoicesResponse == null || listVoicesResponse.getVoices() == null) {
291             return Collections.emptyList();
292         }
293
294         List<GoogleTTSVoice> result = new ArrayList<>();
295         for (Voice voice : listVoicesResponse.getVoices()) {
296             for (String languageCode : voice.getLanguageCodes()) {
297                 result.add(new GoogleTTSVoice(Locale.forLanguageTag(languageCode), voice.getName(),
298                         voice.getSsmlGender().name()));
299             }
300         }
301
302         return result;
303     }
304
305     /**
306      * Converts audio format to Google parameters.
307      *
308      * @param codec Requested codec
309      * @return String array of Google audio format and the file extension to use.
310      */
311     private String[] getFormatForCodec(String codec) {
312         switch (codec) {
313             case AudioFormat.CODEC_MP3:
314                 return new String[] { AudioEncoding.MP3.toString(), "mp3" };
315             case AudioFormat.CODEC_PCM_SIGNED:
316                 return new String[] { AudioEncoding.LINEAR16.toString(), "wav" };
317             default:
318                 throw new IllegalArgumentException("Audio format " + codec + " is not yet supported");
319         }
320     }
321
322     byte[] synthesizeSpeech(String text, GoogleTTSVoice voice, String codec) {
323         String[] format = getFormatForCodec(codec);
324         String fileNameInCache = getUniqueFilenameForText(text, voice.getTechnicalName());
325         File audioFileInCache = new File(cacheFolder, fileNameInCache + "." + format[1]);
326         try {
327             // check if in cache
328             if (audioFileInCache.exists()) {
329                 logger.debug("Audio file {} was found in cache.", audioFileInCache.getName());
330                 return Files.readAllBytes(audioFileInCache.toPath());
331             }
332
333             // if not in cache, get audio data and put to cache
334             byte[] audio = synthesizeSpeechByGoogle(text, voice, format[0]);
335             if (audio != null) {
336                 saveAudioAndTextToFile(text, audioFileInCache, audio, voice.getTechnicalName());
337             }
338             return audio;
339         } catch (AuthenticationException ex) {
340             logger.warn("Error initializing Google Cloud TTS service: {}", ex.getMessage());
341             oAuthService = null;
342             initialized = false;
343             voices.clear();
344             return null;
345         } catch (FileNotFoundException ex) {
346             logger.warn("Could not write {} to cache", audioFileInCache, ex);
347             return null;
348         } catch (IOException ex) {
349             logger.error("Could not write {} to cache", audioFileInCache, ex);
350             return null;
351         }
352     }
353
354     /**
355      * Create cache entry.
356      *
357      * @param text Converted text.
358      * @param cacheFile Cache entry file.
359      * @param audio Byte array of the audio.
360      * @param voiceName Used voice
361      * @throws IOException in case of file handling exceptions
362      */
363     private void saveAudioAndTextToFile(String text, File cacheFile, byte[] audio, String voiceName)
364             throws IOException {
365         logger.debug("Caching audio file {}", cacheFile.getName());
366         try (FileOutputStream audioFileOutputStream = new FileOutputStream(cacheFile)) {
367             audioFileOutputStream.write(audio);
368         }
369
370         // write text to file for transparency too
371         // this allows to know which contents is in which audio file
372         String textFileName = removeExtension(cacheFile.getName()) + ".txt";
373         logger.debug("Caching text file {}", textFileName);
374         try (FileOutputStream textFileOutputStream = new FileOutputStream(new File(cacheFolder, textFileName))) {
375             // @formatter:off
376             StringBuilder sb = new StringBuilder("Config: ")
377                     .append(config.toConfigString())
378                     .append(",voice=")
379                     .append(voiceName)
380                     .append(System.lineSeparator())
381                     .append("Text: ")
382                     .append(text)
383                     .append(System.lineSeparator());
384             // @formatter:on
385             textFileOutputStream.write(sb.toString().getBytes(StandardCharsets.UTF_8));
386         }
387     }
388
389     /**
390      * Removes the extension of a file name.
391      *
392      * @param fileName the file name to remove the extension of
393      * @return the filename without the extension
394      */
395     private String removeExtension(String fileName) {
396         int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
397         int lastSeparator = Math.max(fileName.lastIndexOf(UNIX_SEPARATOR), fileName.lastIndexOf(WINDOWS_SEPARATOR));
398         return lastSeparator > extensionPos ? fileName : fileName.substring(0, extensionPos);
399     }
400
401     /**
402      * Call Google service to synthesize the required text
403      *
404      * @param text Text to synthesize
405      * @param voice Voice parameter
406      * @param audioFormat Audio encoding format
407      * @return Audio input stream or {@code null} when encoding exceptions occur
408      */
409     @SuppressWarnings({ "null", "unused" })
410     private byte[] synthesizeSpeechByGoogle(String text, GoogleTTSVoice voice, String audioFormat)
411             throws AuthenticationException, IOException {
412         AudioConfig audioConfig = new AudioConfig(AudioEncoding.valueOf(audioFormat), config.pitch, config.speakingRate,
413                 config.volumeGainDb);
414         SynthesisInput synthesisInput = new SynthesisInput(text);
415         VoiceSelectionParams voiceSelectionParams = new VoiceSelectionParams(voice.getLocale().getLanguage(),
416                 voice.getLabel(), SsmlVoiceGender.valueOf(voice.getSsmlGender()));
417
418         SynthesizeSpeechRequest request = new SynthesizeSpeechRequest(audioConfig, synthesisInput,
419                 voiceSelectionParams);
420
421         HttpRequestBuilder builder = HttpRequestBuilder.postTo(SYTNHESIZE_SPEECH_URL)
422                 .withHeader(HttpHeader.AUTHORIZATION.name(), getAuthorizationHeader())
423                 .withContent(gson.toJson(request), MimeTypes.Type.APPLICATION_JSON.name());
424
425         SynthesizeSpeechResponse synthesizeSpeechResponse = gson.fromJson(builder.getContentAsString(),
426                 SynthesizeSpeechResponse.class);
427
428         if (synthesizeSpeechResponse == null) {
429             return null;
430         }
431
432         byte[] encodedBytes = synthesizeSpeechResponse.getAudioContent().getBytes(StandardCharsets.UTF_8);
433         return Base64.getDecoder().decode(encodedBytes);
434     }
435
436     /**
437      * Gets a unique filename for a give text, by creating a MD5 hash of it. It
438      * will be preceded by the locale.
439      * <p>
440      * Sample: "en-US_00a2653ac5f77063bc4ea2fee87318d3"
441      */
442     private String getUniqueFilenameForText(String text, String voiceName) {
443         try {
444             MessageDigest md = MessageDigest.getInstance("MD5");
445             byte[] bytesOfMessage = (config.toConfigString() + text).getBytes(StandardCharsets.UTF_8);
446             String fileNameHash = String.format("%032x", new BigInteger(1, md.digest(bytesOfMessage)));
447             return voiceName + "_" + fileNameHash;
448         } catch (NoSuchAlgorithmException ex) {
449             // should not happen
450             logger.error("Could not create MD5 hash for '{}'", text, ex);
451             return null;
452         }
453     }
454
455     boolean isInitialized() {
456         return initialized;
457     }
458 }