git.basschouten.com Git - openhab-addons.git/blob

   1 /**
   2  * Copyright (c) 2010-2021 Contributors to the openHAB project
   3  *
   4  * See the NOTICE file(s) distributed with this work for additional
   5  * information.
   6  *
   7  * This program and the accompanying materials are made available under the
   8  * terms of the Eclipse Public License 2.0 which is available at
   9  * http://www.eclipse.org/legal/epl-2.0
  10  *
  11  * SPDX-License-Identifier: EPL-2.0
  12  */
  13 package org.openhab.voice.googletts.internal;
  14
  15 import java.io.File;
  16 import java.io.FileNotFoundException;
  17 import java.io.FileOutputStream;
  18 import java.io.IOException;
  19 import java.math.BigInteger;
  20 import java.nio.charset.StandardCharsets;
  21 import java.nio.file.Files;
  22 import java.security.MessageDigest;
  23 import java.security.NoSuchAlgorithmException;
  24 import java.util.ArrayList;
  25 import java.util.Arrays;
  26 import java.util.Base64;
  27 import java.util.Collections;
  28 import java.util.Dictionary;
  29 import java.util.HashMap;
  30 import java.util.HashSet;
  31 import java.util.List;
  32 import java.util.Locale;
  33 import java.util.Map;
  34 import java.util.Set;
  35
  36 import org.eclipse.jdt.annotation.Nullable;
  37 import org.eclipse.jetty.http.HttpHeader;
  38 import org.eclipse.jetty.http.MimeTypes;
  39 import org.openhab.core.audio.AudioFormat;
  40 import org.openhab.core.auth.client.oauth2.AccessTokenResponse;
  41 import org.openhab.core.auth.client.oauth2.OAuthClientService;
  42 import org.openhab.core.auth.client.oauth2.OAuthException;
  43 import org.openhab.core.auth.client.oauth2.OAuthFactory;
  44 import org.openhab.core.auth.client.oauth2.OAuthResponseException;
  45 import org.openhab.core.io.net.http.HttpRequestBuilder;
  46 import org.openhab.voice.googletts.internal.protocol.AudioConfig;
  47 import org.openhab.voice.googletts.internal.protocol.AudioEncoding;
  48 import org.openhab.voice.googletts.internal.protocol.ListVoicesResponse;
  49 import org.openhab.voice.googletts.internal.protocol.SsmlVoiceGender;
  50 import org.openhab.voice.googletts.internal.protocol.SynthesisInput;
  51 import org.openhab.voice.googletts.internal.protocol.SynthesizeSpeechRequest;
  52 import org.openhab.voice.googletts.internal.protocol.SynthesizeSpeechResponse;
  53 import org.openhab.voice.googletts.internal.protocol.Voice;
  54 import org.openhab.voice.googletts.internal.protocol.VoiceSelectionParams;
  55 import org.osgi.service.cm.Configuration;
  56 import org.osgi.service.cm.ConfigurationAdmin;
  57 import org.slf4j.Logger;
  58 import org.slf4j.LoggerFactory;
  59
  60 import com.google.gson.Gson;
  61 import com.google.gson.GsonBuilder;
  62
  63 /**
  64  * Google Cloud TTS API call implementation.
  65  *
  66  * @author Gabor Bicskei - Initial contribution and API
  67  */
  68 class GoogleCloudAPI {
  69
  70     private static final char EXTENSION_SEPARATOR = '.';
  71     private static final char UNIX_SEPARATOR = '/';
  72     private static final char WINDOWS_SEPARATOR = '\\';
  73
  74     private static final String BEARER = "Bearer ";
  75
  76     private static final String GCP_AUTH_URI = "https://accounts.google.com/o/oauth2/auth";
  77     private static final String GCP_TOKEN_URI = "https://accounts.google.com/o/oauth2/token";
  78     private static final String GCP_REDIRECT_URI = "urn:ietf:wg:oauth:2.0:oob";
  79     /**
  80      * Google Cloud Platform authorization scope
  81      */
  82     private static final String GCP_SCOPE = "https://www.googleapis.com/auth/cloud-platform";
  83
  84     /**
  85      * URL used for retrieving the list of available voices
  86      */
  87     private static final String LIST_VOICES_URL = "https://texttospeech.googleapis.com/v1/voices";
  88
  89     /**
  90      * URL used for synthesizing text to speech
  91      */
  92     private static final String SYTNHESIZE_SPEECH_URL = "https://texttospeech.googleapis.com/v1/text:synthesize";
  93
  94     /**
  95      * Logger
  96      */
  97     private final Logger logger = LoggerFactory.getLogger(GoogleCloudAPI.class);
  98
  99     /**
 100      * Supported voices and locales
 101      */
 102     private final Map<Locale, Set<GoogleTTSVoice>> voices = new HashMap<>();
 103
 104     /**
 105      * Cache folder
 106      */
 107     private File cacheFolder;
 108
 109     /**
 110      * Configuration
 111      */
 112     private @Nullable GoogleTTSConfig config;
 113
 114     /**
 115      * Status flag
 116      */
 117     private boolean initialized;
 118
 119     private final Gson gson = new GsonBuilder().create();
 120     private final ConfigurationAdmin configAdmin;
 121     private final OAuthFactory oAuthFactory;
 122
 123     private @Nullable OAuthClientService oAuthService;
 124
 125     /**
 126      * Constructor.
 127      *
 128      * @param cacheFolder Service cache folder
 129      */
 130     GoogleCloudAPI(ConfigurationAdmin configAdmin, OAuthFactory oAuthFactory, File cacheFolder) {
 131         this.configAdmin = configAdmin;
 132         this.oAuthFactory = oAuthFactory;
 133         this.cacheFolder = cacheFolder;
 134     }
 135
 136     /**
 137      * Configuration update.
 138      *
 139      * @param config New configuration.
 140      */
 141     void setConfig(GoogleTTSConfig config) {
 142         this.config = config;
 143
 144         String clientId = config.clientId;
 145         String clientSecret = config.clientSecret;
 146         if (clientId != null && !clientId.isEmpty() && clientSecret != null && !clientSecret.isEmpty()) {
 147             try {
 148                 final OAuthClientService oAuthService = oAuthFactory.createOAuthClientService(
 149                         GoogleTTSService.SERVICE_PID, GCP_TOKEN_URI, GCP_AUTH_URI, clientId, clientSecret, GCP_SCOPE,
 150                         false);
 151                 this.oAuthService = oAuthService;
 152                 getAccessToken();
 153                 initialized = true;
 154                 initVoices();
 155             } catch (AuthenticationException | IOException ex) {
 156                 logger.warn("Error initializing Google Cloud TTS service: {}", ex.getMessage());
 157                 oAuthService = null;
 158                 initialized = false;
 159                 voices.clear();
 160             }
 161         } else {
 162             oAuthService = null;
 163             initialized = false;
 164             voices.clear();
 165         }
 166
 167         // maintain cache
 168         if (config.purgeCache) {
 169             File[] files = cacheFolder.listFiles();
 170             if (files != null && files.length > 0) {
 171                 Arrays.stream(files).forEach(File::delete);
 172             }
 173             logger.debug("Cache purged.");
 174         }
 175     }
 176
 177     /**
 178      * Fetches the OAuth2 tokens from Google Cloud Platform if the auth-code is set in the configuration. If successful
 179      * the auth-code will be removed from the configuration.
 180      */
 181     private void getAccessToken() throws AuthenticationException, IOException {
 182         String authcode = config.authcode;
 183         if (authcode != null && !authcode.isEmpty()) {
 184             logger.debug("Trying to get access and refresh tokens.");
 185             try {
 186                 oAuthService.getAccessTokenResponseByAuthorizationCode(authcode, GCP_REDIRECT_URI);
 187             } catch (OAuthException | OAuthResponseException ex) {
 188                 logger.debug("Error fetching access token: {}", ex.getMessage(), ex);
 189                 throw new AuthenticationException(
 190                         "Error fetching access token. Invalid authcode? Please generate a new one.");
 191             }
 192
 193             config.authcode = null;
 194
 195             try {
 196                 Configuration serviceConfig = configAdmin.getConfiguration(GoogleTTSService.SERVICE_PID);
 197                 Dictionary<String, Object> configProperties = serviceConfig.getProperties();
 198                 if (configProperties != null) {
 199                     configProperties.put(GoogleTTSService.PARAM_AUTHCODE, "");
 200                     serviceConfig.update(configProperties);
 201                 }
 202             } catch (IOException e) {
 203                 // should not happen
 204                 logger.warn(
 205                         "Failed to update configuration for Google Cloud TTS service. Please clear the 'authcode' configuration parameter manualy.");
 206             }
 207         }
 208     }
 209
 210     private String getAuthorizationHeader() throws AuthenticationException, IOException {
 211         final AccessTokenResponse accessTokenResponse;
 212         try {
 213             accessTokenResponse = oAuthService.getAccessTokenResponse();
 214         } catch (OAuthException | OAuthResponseException ex) {
 215             logger.debug("Error fetching access token: {}", ex.getMessage(), ex);
 216             throw new AuthenticationException(
 217                     "Error fetching access token. Invalid authcode? Please generate a new one.");
 218         }
 219         if (accessTokenResponse == null || accessTokenResponse.getAccessToken() == null
 220                 || accessTokenResponse.getAccessToken().isEmpty()) {
 221             throw new AuthenticationException("No access token. Is this thing authorized?");
 222         }
 223         return BEARER + accessTokenResponse.getAccessToken();
 224     }
 225
 226     /**
 227      * Loads supported audio formats
 228      *
 229      * @return Set of audio formats
 230      */
 231     Set<String> getSupportedAudioFormats() {
 232         Set<String> formats = new HashSet<>();
 233         for (AudioEncoding audioEncoding : AudioEncoding.values()) {
 234             if (audioEncoding != AudioEncoding.AUDIO_ENCODING_UNSPECIFIED) {
 235                 formats.add(audioEncoding.toString());
 236             }
 237         }
 238         return formats;
 239     }
 240
 241     /**
 242      * Supported locales.
 243      *
 244      * @return Set of locales
 245      */
 246     Set<Locale> getSupportedLocales() {
 247         return voices.keySet();
 248     }
 249
 250     /**
 251      * Supported voices for locale.
 252      *
 253      * @param locale Locale
 254      * @return Set of voices
 255      */
 256     Set<GoogleTTSVoice> getVoicesForLocale(Locale locale) {
 257         Set<GoogleTTSVoice> localeVoices = voices.get(locale);
 258         return localeVoices != null ? localeVoices : Collections.emptySet();
 259     }
 260
 261     /**
 262      * Google API call to load locales and voices.
 263      */
 264     private void initVoices() throws AuthenticationException, IOException {
 265         if (oAuthService != null) {
 266             voices.clear();
 267             for (GoogleTTSVoice voice : listVoices()) {
 268                 Locale locale = voice.getLocale();
 269                 Set<GoogleTTSVoice> localeVoices;
 270                 if (!voices.containsKey(locale)) {
 271                     localeVoices = new HashSet<>();
 272                     voices.put(locale, localeVoices);
 273                 } else {
 274                     localeVoices = voices.get(locale);
 275                 }
 276                 localeVoices.add(voice);
 277             }
 278         } else {
 279             logger.error("Google client is not initialized!");
 280         }
 281     }
 282
 283     @SuppressWarnings("null")
 284     private List<GoogleTTSVoice> listVoices() throws AuthenticationException, IOException {
 285         HttpRequestBuilder builder = HttpRequestBuilder.getFrom(LIST_VOICES_URL)
 286                 .withHeader(HttpHeader.AUTHORIZATION.name(), getAuthorizationHeader());
 287
 288         ListVoicesResponse listVoicesResponse = gson.fromJson(builder.getContentAsString(), ListVoicesResponse.class);
 289
 290         if (listVoicesResponse == null || listVoicesResponse.getVoices() == null) {
 291             return Collections.emptyList();
 292         }
 293
 294         List<GoogleTTSVoice> result = new ArrayList<>();
 295         for (Voice voice : listVoicesResponse.getVoices()) {
 296             for (String languageCode : voice.getLanguageCodes()) {
 297                 result.add(new GoogleTTSVoice(Locale.forLanguageTag(languageCode), voice.getName(),
 298                         voice.getSsmlGender().name()));
 299             }
 300         }
 301
 302         return result;
 303     }
 304
 305     /**
 306      * Converts audio format to Google parameters.
 307      *
 308      * @param codec Requested codec
 309      * @return String array of Google audio format and the file extension to use.
 310      */
 311     private String[] getFormatForCodec(String codec) {
 312         switch (codec) {
 313             case AudioFormat.CODEC_MP3:
 314                 return new String[] { AudioEncoding.MP3.toString(), "mp3" };
 315             case AudioFormat.CODEC_PCM_SIGNED:
 316                 return new String[] { AudioEncoding.LINEAR16.toString(), "wav" };
 317             default:
 318                 throw new IllegalArgumentException("Audio format " + codec + " is not yet supported");
 319         }
 320     }
 321
 322     byte[] synthesizeSpeech(String text, GoogleTTSVoice voice, String codec) {
 323         String[] format = getFormatForCodec(codec);
 324         String fileNameInCache = getUniqueFilenameForText(text, voice.getTechnicalName());
 325         File audioFileInCache = new File(cacheFolder, fileNameInCache + "." + format[1]);
 326         try {
 327             // check if in cache
 328             if (audioFileInCache.exists()) {
 329                 logger.debug("Audio file {} was found in cache.", audioFileInCache.getName());
 330                 return Files.readAllBytes(audioFileInCache.toPath());
 331             }
 332
 333             // if not in cache, get audio data and put to cache
 334             byte[] audio = synthesizeSpeechByGoogle(text, voice, format[0]);
 335             if (audio != null) {
 336                 saveAudioAndTextToFile(text, audioFileInCache, audio, voice.getTechnicalName());
 337             }
 338             return audio;
 339         } catch (AuthenticationException ex) {
 340             logger.warn("Error initializing Google Cloud TTS service: {}", ex.getMessage());
 341             oAuthService = null;
 342             initialized = false;
 343             voices.clear();
 344             return null;
 345         } catch (FileNotFoundException ex) {
 346             logger.warn("Could not write {} to cache", audioFileInCache, ex);
 347             return null;
 348         } catch (IOException ex) {
 349             logger.error("Could not write {} to cache", audioFileInCache, ex);
 350             return null;
 351         }
 352     }
 353
 354     /**
 355      * Create cache entry.
 356      *
 357      * @param text Converted text.
 358      * @param cacheFile Cache entry file.
 359      * @param audio Byte array of the audio.
 360      * @param voiceName Used voice
 361      * @throws IOException in case of file handling exceptions
 362      */
 363     private void saveAudioAndTextToFile(String text, File cacheFile, byte[] audio, String voiceName)
 364             throws IOException {
 365         logger.debug("Caching audio file {}", cacheFile.getName());
 366         try (FileOutputStream audioFileOutputStream = new FileOutputStream(cacheFile)) {
 367             audioFileOutputStream.write(audio);
 368         }
 369
 370         // write text to file for transparency too
 371         // this allows to know which contents is in which audio file
 372         String textFileName = removeExtension(cacheFile.getName()) + ".txt";
 373         logger.debug("Caching text file {}", textFileName);
 374         try (FileOutputStream textFileOutputStream = new FileOutputStream(new File(cacheFolder, textFileName))) {
 375             // @formatter:off
 376             StringBuilder sb = new StringBuilder("Config: ")
 377                     .append(config.toConfigString())
 378                     .append(",voice=")
 379                     .append(voiceName)
 380                     .append(System.lineSeparator())
 381                     .append("Text: ")
 382                     .append(text)
 383                     .append(System.lineSeparator());
 384             // @formatter:on
 385             textFileOutputStream.write(sb.toString().getBytes(StandardCharsets.UTF_8));
 386         }
 387     }
 388
 389     /**
 390      * Removes the extension of a file name.
 391      *
 392      * @param fileName the file name to remove the extension of
 393      * @return the filename without the extension
 394      */
 395     private String removeExtension(String fileName) {
 396         int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
 397         int lastSeparator = Math.max(fileName.lastIndexOf(UNIX_SEPARATOR), fileName.lastIndexOf(WINDOWS_SEPARATOR));
 398         return lastSeparator > extensionPos ? fileName : fileName.substring(0, extensionPos);
 399     }
 400
 401     /**
 402      * Call Google service to synthesize the required text
 403      *
 404      * @param text Text to synthesize
 405      * @param voice Voice parameter
 406      * @param audioFormat Audio encoding format
 407      * @return Audio input stream or {@code null} when encoding exceptions occur
 408      */
 409     @SuppressWarnings({ "null", "unused" })
 410     private byte[] synthesizeSpeechByGoogle(String text, GoogleTTSVoice voice, String audioFormat)
 411             throws AuthenticationException, IOException {
 412         AudioConfig audioConfig = new AudioConfig(AudioEncoding.valueOf(audioFormat), config.pitch, config.speakingRate,
 413                 config.volumeGainDb);
 414         SynthesisInput synthesisInput = new SynthesisInput(text);
 415         VoiceSelectionParams voiceSelectionParams = new VoiceSelectionParams(voice.getLocale().getLanguage(),
 416                 voice.getLabel(), SsmlVoiceGender.valueOf(voice.getSsmlGender()));
 417
 418         SynthesizeSpeechRequest request = new SynthesizeSpeechRequest(audioConfig, synthesisInput,
 419                 voiceSelectionParams);
 420
 421         HttpRequestBuilder builder = HttpRequestBuilder.postTo(SYTNHESIZE_SPEECH_URL)
 422                 .withHeader(HttpHeader.AUTHORIZATION.name(), getAuthorizationHeader())
 423                 .withContent(gson.toJson(request), MimeTypes.Type.APPLICATION_JSON.name());
 424
 425         SynthesizeSpeechResponse synthesizeSpeechResponse = gson.fromJson(builder.getContentAsString(),
 426                 SynthesizeSpeechResponse.class);
 427
 428         if (synthesizeSpeechResponse == null) {
 429             return null;
 430         }
 431
 432         byte[] encodedBytes = synthesizeSpeechResponse.getAudioContent().getBytes(StandardCharsets.UTF_8);
 433         return Base64.getDecoder().decode(encodedBytes);
 434     }
 435
 436     /**
 437      * Gets a unique filename for a give text, by creating a MD5 hash of it. It
 438      * will be preceded by the locale.
 439      * <p>
 440      * Sample: "en-US_00a2653ac5f77063bc4ea2fee87318d3"
 441      */
 442     private String getUniqueFilenameForText(String text, String voiceName) {
 443         try {
 444             MessageDigest md = MessageDigest.getInstance("MD5");
 445             byte[] bytesOfMessage = (config.toConfigString() + text).getBytes(StandardCharsets.UTF_8);
 446             String fileNameHash = String.format("%032x", new BigInteger(1, md.digest(bytesOfMessage)));
 447             return voiceName + "_" + fileNameHash;
 448         } catch (NoSuchAlgorithmException ex) {
 449             // should not happen
 450             logger.error("Could not create MD5 hash for '{}'", text, ex);
 451             return null;
 452         }
 453     }
 454
 455     boolean isInitialized() {
 456         return initialized;
 457     }
 458 }