git.basschouten.com Git - openhab-addons.git/blob

   1 /**
   2  * Copyright (c) 2010-2023 Contributors to the openHAB project
   3  *
   4  * See the NOTICE file(s) distributed with this work for additional
   5  * information.
   6  *
   7  * This program and the accompanying materials are made available under the
   8  * terms of the Eclipse Public License 2.0 which is available at
   9  * http://www.eclipse.org/legal/epl-2.0
  10  *
  11  * SPDX-License-Identifier: EPL-2.0
  12  */
  13 package org.openhab.voice.googletts.internal;
  14
  15 import static org.openhab.voice.googletts.internal.GoogleTTSService.*;
  16
  17 import java.io.ByteArrayInputStream;
  18 import java.io.File;
  19 import java.io.IOException;
  20 import java.io.InputStream;
  21 import java.util.Collections;
  22 import java.util.HashSet;
  23 import java.util.Locale;
  24 import java.util.Map;
  25 import java.util.Set;
  26
  27 import org.eclipse.jdt.annotation.NonNullByDefault;
  28 import org.eclipse.jdt.annotation.Nullable;
  29 import org.openhab.core.OpenHAB;
  30 import org.openhab.core.audio.AudioFormat;
  31 import org.openhab.core.audio.AudioStream;
  32 import org.openhab.core.audio.ByteArrayAudioStream;
  33 import org.openhab.core.audio.utils.AudioWaveUtils;
  34 import org.openhab.core.auth.client.oauth2.OAuthFactory;
  35 import org.openhab.core.config.core.ConfigurableService;
  36 import org.openhab.core.voice.TTSException;
  37 import org.openhab.core.voice.TTSService;
  38 import org.openhab.core.voice.Voice;
  39 import org.openhab.voice.googletts.internal.dto.AudioEncoding;
  40 import org.osgi.framework.Constants;
  41 import org.osgi.service.cm.ConfigurationAdmin;
  42 import org.osgi.service.component.annotations.Activate;
  43 import org.osgi.service.component.annotations.Component;
  44 import org.osgi.service.component.annotations.Modified;
  45 import org.osgi.service.component.annotations.Reference;
  46 import org.slf4j.Logger;
  47 import org.slf4j.LoggerFactory;
  48
  49 /**
  50  * Voice service implementation.
  51  *
  52  * @author Gabor Bicskei - Initial contribution
  53  */
  54 @Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "=" + SERVICE_PID)
  55 @ConfigurableService(category = SERVICE_CATEGORY, label = SERVICE_NAME
  56         + " Text-to-Speech", description_uri = SERVICE_CATEGORY + ":" + SERVICE_ID)
  57 public class GoogleTTSService implements TTSService {
  58     /**
  59      * Service name
  60      */
  61     static final String SERVICE_NAME = "Google Cloud";
  62
  63     /**
  64      * Service id
  65      */
  66     static final String SERVICE_ID = "googletts";
  67
  68     /**
  69      * Service category
  70      */
  71     static final String SERVICE_CATEGORY = "voice";
  72
  73     /**
  74      * Service pid
  75      */
  76     static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID;
  77
  78     /**
  79      * Cache folder under $userdata
  80      */
  81     private static final String CACHE_FOLDER_NAME = "cache";
  82
  83     /**
  84      * Configuration parameters
  85      */
  86     private static final String PARAM_CLIENT_ID = "clientId";
  87     private static final String PARAM_CLIEND_SECRET = "clientSecret";
  88     static final String PARAM_AUTHCODE = "authcode";
  89     private static final String PARAM_PITCH = "pitch";
  90     private static final String PARAM_SPEAKING_RATE = "speakingRate";
  91     private static final String PARAM_VOLUME_GAIN_DB = "volumeGainDb";
  92     private static final String PARAM_PURGE_CACHE = "purgeCache";
  93
  94     /**
  95      * Logger.
  96      */
  97     private final Logger logger = LoggerFactory.getLogger(GoogleTTSService.class);
  98
  99     /**
 100      * Set of supported audio formats
 101      */
 102     private Set<AudioFormat> audioFormats = new HashSet<>();
 103
 104     /**
 105      * Google Cloud TTS API implementation
 106      */
 107     private @NonNullByDefault({}) GoogleCloudAPI apiImpl;
 108     private final ConfigurationAdmin configAdmin;
 109     private final OAuthFactory oAuthFactory;
 110
 111     /**
 112      * All voices for all supported locales
 113      */
 114     private Set<Voice> allVoices = new HashSet<>();
 115
 116     private final GoogleTTSConfig config = new GoogleTTSConfig();
 117
 118     @Activate
 119     public GoogleTTSService(final @Reference ConfigurationAdmin configAdmin,
 120             final @Reference OAuthFactory oAuthFactory) {
 121         this.configAdmin = configAdmin;
 122         this.oAuthFactory = oAuthFactory;
 123     }
 124
 125     /**
 126      * DS activate, with access to ConfigAdmin
 127      */
 128     @Activate
 129     protected void activate(Map<String, Object> config) {
 130         // create cache folder
 131         File userData = new File(OpenHAB.getUserDataFolder());
 132         File cacheFolder = new File(new File(userData, CACHE_FOLDER_NAME), SERVICE_PID);
 133         if (!cacheFolder.exists()) {
 134             cacheFolder.mkdirs();
 135         }
 136         logger.debug("Using cache folder {}", cacheFolder.getAbsolutePath());
 137
 138         apiImpl = new GoogleCloudAPI(configAdmin, oAuthFactory, cacheFolder);
 139         updateConfig(config);
 140     }
 141
 142     /**
 143      * Initializing audio formats. Google supports 3 formats:
 144      * LINEAR16
 145      * Uncompressed 16-bit signed little-endian samples (Linear PCM). Audio content returned as LINEAR16
 146      * also contains a WAV header.
 147      * MP3
 148      * MP3 audio.
 149      * OGG_OPUS
 150      * Opus encoded audio wrapped in an ogg container. This is not supported by openHAB.
 151      *
 152      * @return Set of supported AudioFormats
 153      */
 154     private Set<AudioFormat> initAudioFormats() {
 155         logger.trace("Initializing audio formats");
 156         Set<AudioFormat> result = new HashSet<>();
 157         for (String format : apiImpl.getSupportedAudioFormats()) {
 158             AudioFormat audioFormat = getAudioFormat(format);
 159             if (audioFormat != null) {
 160                 result.add(audioFormat);
 161                 logger.trace("Audio format supported: {}", format);
 162             } else {
 163                 logger.trace("Audio format not supported: {}", format);
 164             }
 165         }
 166         return Collections.unmodifiableSet(result);
 167     }
 168
 169     /**
 170      * Loads available voices from Google API
 171      *
 172      * @return Set of available voices.
 173      */
 174     private Set<Voice> initVoices() {
 175         logger.trace("Initializing voices");
 176         Set<Voice> result = new HashSet<>();
 177         for (Locale locale : apiImpl.getSupportedLocales()) {
 178             result.addAll(apiImpl.getVoicesForLocale(locale));
 179         }
 180         if (logger.isTraceEnabled()) {
 181             for (Voice voice : result) {
 182                 logger.trace("Google Cloud TTS voice: {}", voice.getLabel());
 183             }
 184         }
 185         return Collections.unmodifiableSet(result);
 186     }
 187
 188     /**
 189      * Called by the framework when the configuration was updated.
 190      *
 191      * @param newConfig Updated configuration
 192      */
 193     @Modified
 194     private void updateConfig(Map<String, Object> newConfig) {
 195         logger.debug("Updating configuration");
 196         if (newConfig != null) {
 197             // client id
 198             String param = newConfig.containsKey(PARAM_CLIENT_ID) ? newConfig.get(PARAM_CLIENT_ID).toString() : null;
 199             config.clientId = param;
 200             if (param == null) {
 201                 logger.warn("Missing client id configuration to access Google Cloud TTS API.");
 202             }
 203             // client secret
 204             param = newConfig.containsKey(PARAM_CLIEND_SECRET) ? newConfig.get(PARAM_CLIEND_SECRET).toString() : null;
 205             config.clientSecret = param;
 206             if (param == null) {
 207                 logger.warn("Missing client secret configuration to access Google Cloud TTS API.");
 208             }
 209             // authcode
 210             param = newConfig.containsKey(PARAM_AUTHCODE) ? newConfig.get(PARAM_AUTHCODE).toString() : null;
 211             config.authcode = param;
 212
 213             // pitch
 214             param = newConfig.containsKey(PARAM_PITCH) ? newConfig.get(PARAM_PITCH).toString() : null;
 215             if (param != null) {
 216                 config.pitch = Double.parseDouble(param);
 217             }
 218
 219             // speakingRate
 220             param = newConfig.containsKey(PARAM_SPEAKING_RATE) ? newConfig.get(PARAM_SPEAKING_RATE).toString() : null;
 221             if (param != null) {
 222                 config.speakingRate = Double.parseDouble(param);
 223             }
 224
 225             // volumeGainDb
 226             param = newConfig.containsKey(PARAM_VOLUME_GAIN_DB) ? newConfig.get(PARAM_VOLUME_GAIN_DB).toString() : null;
 227             if (param != null) {
 228                 config.volumeGainDb = Double.parseDouble(param);
 229             }
 230
 231             // purgeCache
 232             param = newConfig.containsKey(PARAM_PURGE_CACHE) ? newConfig.get(PARAM_PURGE_CACHE).toString() : null;
 233             if (param != null) {
 234                 config.purgeCache = Boolean.parseBoolean(param);
 235             }
 236             logger.trace("New configuration: {}", config.toString());
 237
 238             if (config.clientId != null && !config.clientId.isEmpty() && config.clientSecret != null
 239                     && !config.clientSecret.isEmpty()) {
 240                 apiImpl.setConfig(config);
 241                 if (apiImpl.isInitialized()) {
 242                     allVoices = initVoices();
 243                     audioFormats = initAudioFormats();
 244                 }
 245             }
 246         } else {
 247             logger.warn("Missing Google Cloud TTS configuration.");
 248         }
 249     }
 250
 251     @Override
 252     public String getId() {
 253         return SERVICE_ID;
 254     }
 255
 256     @Override
 257     public String getLabel(@Nullable Locale locale) {
 258         return SERVICE_NAME;
 259     }
 260
 261     @Override
 262     public Set<Voice> getAvailableVoices() {
 263         return allVoices;
 264     }
 265
 266     @Override
 267     public Set<AudioFormat> getSupportedFormats() {
 268         return audioFormats;
 269     }
 270
 271     /**
 272      * Helper to create AudioFormat objects from Google names.
 273      *
 274      * @param format Google audio format.
 275      * @return Audio format object.
 276      */
 277     private @Nullable AudioFormat getAudioFormat(String format) {
 278         Integer bitDepth = 16;
 279         Long frequency = 44100L;
 280
 281         AudioEncoding encoding = AudioEncoding.valueOf(format);
 282
 283         switch (encoding) {
 284             case MP3:
 285                 // we use by default: MP3, 44khz_16bit_mono with bitrate 64 kbps
 286                 return new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, null, bitDepth, 64000,
 287                         frequency);
 288             case LINEAR16:
 289                 // we use by default: wav, 44khz_16bit_mono
 290                 return new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, null, bitDepth, null,
 291                         frequency);
 292             default:
 293                 logger.warn("Audio format {} is not yet supported.", format);
 294                 return null;
 295         }
 296     }
 297
 298     /**
 299      * Checks parameters and calls the API to synthesize voice.
 300      *
 301      * @param text Input text.
 302      * @param voice Selected voice.
 303      * @param requestedFormat Format that is supported by the target sink as well.
 304      * @return Output audio stream
 305      * @throws TTSException in case the service is unavailable or a parameter is invalid.
 306      */
 307     @Override
 308     public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
 309         logger.debug("Synthesize '{}' for voice '{}' in format {}", text, voice.getUID(), requestedFormat);
 310         // Validate known api key
 311         if (!apiImpl.isInitialized()) {
 312             throw new TTSException("Missing service configuration.");
 313         }
 314         // Validate arguments
 315         // trim text
 316         String trimmedText = text.trim();
 317         if (trimmedText.isEmpty()) {
 318             throw new TTSException("The passed text is null or empty");
 319         }
 320         if (!this.allVoices.contains(voice)) {
 321             throw new TTSException("The passed voice is unsupported");
 322         }
 323         boolean isAudioFormatSupported = false;
 324         for (AudioFormat currentAudioFormat : this.audioFormats) {
 325             if (currentAudioFormat.isCompatible(requestedFormat)) {
 326                 isAudioFormatSupported = true;
 327                 break;
 328             }
 329         }
 330         if (!isAudioFormatSupported) {
 331             throw new TTSException("The passed AudioFormat is unsupported");
 332         }
 333
 334         // create the audio byte array for given text, locale, format
 335         byte[] audio = apiImpl.synthesizeSpeech(trimmedText, (GoogleTTSVoice) voice, requestedFormat.getCodec());
 336         if (audio == null) {
 337             throw new TTSException("Could not synthesize text via Google Cloud TTS Service");
 338         }
 339
 340         // compute the real format returned by google if wave file
 341         AudioFormat finalFormat = requestedFormat;
 342         if (AudioFormat.CONTAINER_WAVE.equals(requestedFormat.getContainer())) {
 343             finalFormat = parseAudioFormat(audio);
 344         }
 345
 346         return new ByteArrayAudioStream(audio, finalFormat);
 347     }
 348
 349     private AudioFormat parseAudioFormat(byte[] audio) throws TTSException {
 350         try (InputStream inputStream = new ByteArrayInputStream(audio)) {
 351             return AudioWaveUtils.parseWavFormat(inputStream);
 352         } catch (IOException e) {
 353             throw new TTSException("Cannot parse WAV format", e);
 354         }
 355     }
 356 }