git.basschouten.com Git - openhab-addons.git/blob

   1 /**
   2  * Copyright (c) 2010-2023 Contributors to the openHAB project
   3  *
   4  * See the NOTICE file(s) distributed with this work for additional
   5  * information.
   6  *
   7  * This program and the accompanying materials are made available under the
   8  * terms of the Eclipse Public License 2.0 which is available at
   9  * http://www.eclipse.org/legal/epl-2.0
  10  *
  11  * SPDX-License-Identifier: EPL-2.0
  12  */
  13 package org.openhab.voice.voicerss.internal;
  14
  15 import java.io.File;
  16 import java.io.IOException;
  17 import java.util.Collections;
  18 import java.util.HashSet;
  19 import java.util.Locale;
  20 import java.util.Map;
  21 import java.util.Set;
  22
  23 import org.eclipse.jdt.annotation.NonNullByDefault;
  24 import org.eclipse.jdt.annotation.Nullable;
  25 import org.openhab.core.OpenHAB;
  26 import org.openhab.core.audio.AudioException;
  27 import org.openhab.core.audio.AudioFormat;
  28 import org.openhab.core.audio.AudioStream;
  29 import org.openhab.core.config.core.ConfigurableService;
  30 import org.openhab.core.voice.AbstractCachedTTSService;
  31 import org.openhab.core.voice.TTSCache;
  32 import org.openhab.core.voice.TTSException;
  33 import org.openhab.core.voice.TTSService;
  34 import org.openhab.core.voice.Voice;
  35 import org.openhab.voice.voicerss.internal.cloudapi.CachedVoiceRSSCloudImpl;
  36 import org.osgi.framework.Constants;
  37 import org.osgi.service.component.annotations.Activate;
  38 import org.osgi.service.component.annotations.Component;
  39 import org.osgi.service.component.annotations.Modified;
  40 import org.osgi.service.component.annotations.Reference;
  41 import org.slf4j.Logger;
  42 import org.slf4j.LoggerFactory;
  43
  44 /**
  45  * This is a TTS service implementation for using VoiceRSS TTS service.
  46  *
  47  * @author Jochen Hiller - Initial contribution and API
  48  * @author Laurent Garnier - add support for OGG and AAC audio formats
  49  */
  50 @NonNullByDefault
  51 @Component(service = TTSService.class, configurationPid = "org.openhab.voicerss", property = Constants.SERVICE_PID
  52         + "=org.openhab.voicerss")
  53 @ConfigurableService(category = "voice", label = "VoiceRSS Text-to-Speech", description_uri = "voice:voicerss")
  54 public class VoiceRSSTTSService extends AbstractCachedTTSService {
  55
  56     /** Cache folder name is below userdata/voicerss/cache. */
  57     private static final String CACHE_FOLDER_NAME = "voicerss" + File.separator + "cache";
  58
  59     // API Key comes from ConfigAdmin
  60     private static final String CONFIG_API_KEY = "apiKey";
  61
  62     /**
  63      * Map from openHAB AudioFormat Codec to VoiceRSS API Audio Codec
  64      */
  65     private static final Map<String, String> CODEC_MAP = Map.of(AudioFormat.CODEC_PCM_SIGNED, "WAV",
  66             AudioFormat.CODEC_PCM_UNSIGNED, "WAV", AudioFormat.CODEC_PCM_ALAW, "WAV", AudioFormat.CODEC_PCM_ULAW, "WAV",
  67             AudioFormat.CODEC_MP3, "MP3", AudioFormat.CODEC_VORBIS, "OGG", AudioFormat.CODEC_AAC, "AAC");
  68
  69     /**
  70      * Map from openHAB AudioFormat Frequency to VoiceRSS API Audio Frequency
  71      */
  72     private static final Map<Long, String> FREQUENCY_MAP = Map.of(8_000L, "8khz", 11_025L, "11khz", 12_000L, "12khz",
  73             16_000L, "16khz", 22_050L, "22khz", 24_000L, "24khz", 32_000L, "32khz", 44_100L, "44khz", 48_000L, "48khz");
  74
  75     private final Logger logger = LoggerFactory.getLogger(VoiceRSSTTSService.class);
  76
  77     private @Nullable String apiKey;
  78
  79     /**
  80      * We need the cached implementation to allow for FixedLengthAudioStream.
  81      */
  82     private @Nullable CachedVoiceRSSCloudImpl voiceRssImpl;
  83
  84     /**
  85      * Set of supported voices
  86      */
  87     private @Nullable Set<Voice> voices;
  88
  89     /**
  90      * Set of supported audio formats
  91      */
  92     private @Nullable Set<AudioFormat> audioFormats;
  93
  94     @Activate
  95     public VoiceRSSTTSService(final @Reference TTSCache ttsCache) {
  96         super(ttsCache);
  97     }
  98
  99     /**
 100      * DS activate, with access to ConfigAdmin
 101      */
 102     @Activate
 103     protected void activate(@Nullable Map<String, Object> config) {
 104         try {
 105             modified(config);
 106             voiceRssImpl = initVoiceImplementation();
 107             voices = initVoices();
 108             audioFormats = initAudioFormats();
 109
 110             logger.debug("Using VoiceRSS cache folder {}", getCacheFolderName());
 111         } catch (IllegalStateException e) {
 112             logger.warn("Failed to activate VoiceRSS: {}", e.getMessage(), e);
 113         }
 114     }
 115
 116     @Modified
 117     protected void modified(@Nullable Map<String, Object> config) {
 118         if (config != null) {
 119             apiKey = config.containsKey(CONFIG_API_KEY) ? config.get(CONFIG_API_KEY).toString() : null;
 120         }
 121     }
 122
 123     @Override
 124     public Set<Voice> getAvailableVoices() {
 125         Set<Voice> localVoices = voices;
 126         return localVoices == null ? Set.of() : Collections.unmodifiableSet(localVoices);
 127     }
 128
 129     @Override
 130     public Set<AudioFormat> getSupportedFormats() {
 131         Set<AudioFormat> localFormats = audioFormats;
 132         return localFormats == null ? Set.of() : Collections.unmodifiableSet(localFormats);
 133     }
 134
 135     @Override
 136     public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
 137         logger.debug("Synthesize '{}' for voice '{}' in format {}", text, voice.getUID(), requestedFormat);
 138         CachedVoiceRSSCloudImpl voiceRssCloud = voiceRssImpl;
 139         if (voiceRssCloud == null) {
 140             throw new TTSException("The service is not correctly initialized");
 141         }
 142         // trim text
 143         String trimmedText = text.trim();
 144         if (trimmedText.isEmpty()) {
 145             throw new TTSException("The passed text is empty");
 146         }
 147         Set<Voice> localVoices = voices;
 148         if (localVoices == null || !localVoices.contains(voice)) {
 149             throw new TTSException("The passed voice is unsupported");
 150         }
 151
 152         // If one predefined cache entry for given text, locale, voice, codec and format exists,
 153         // create the input from this file stream and return it.
 154         try {
 155             File cacheAudioFile = voiceRssCloud.getTextToSpeechInCache(trimmedText, voice.getLocale().toLanguageTag(),
 156                     voice.getLabel(), getApiAudioCodec(requestedFormat), getApiAudioFormat(requestedFormat));
 157             if (cacheAudioFile != null) {
 158                 logger.debug("Use cache entry '{}'", cacheAudioFile.getName());
 159                 return new VoiceRSSAudioStream(cacheAudioFile, requestedFormat);
 160             }
 161         } catch (AudioException ex) {
 162             throw new TTSException("Could not create AudioStream: " + ex.getMessage(), ex);
 163         } catch (IOException ex) {
 164             throw new TTSException("Could not read from VoiceRSS service: " + ex.getMessage(), ex);
 165         }
 166
 167         // If no predefined cache entry exists, use the common TTS cache mechanism from core framework
 168         logger.debug("Use common TTS cache mechanism");
 169         return super.synthesize(text, voice, requestedFormat);
 170     }
 171
 172     @Override
 173     public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
 174         logger.debug("synthesizeForCache '{}' for voice '{}' in format {}", text, voice.getUID(), requestedFormat);
 175         CachedVoiceRSSCloudImpl voiceRssCloud = voiceRssImpl;
 176         if (voiceRssCloud == null) {
 177             throw new TTSException("The service is not correctly initialized");
 178         }
 179         // Validate known api key
 180         String key = apiKey;
 181         if (key == null) {
 182             throw new TTSException("Missing API key, configure it first before using");
 183         }
 184         // trim text
 185         String trimmedText = text.trim();
 186         if (trimmedText.isEmpty()) {
 187             throw new TTSException("The passed text is empty");
 188         }
 189         Set<Voice> localVoices = voices;
 190         if (localVoices == null || !localVoices.contains(voice)) {
 191             throw new TTSException("The passed voice is unsupported");
 192         }
 193
 194         try {
 195             VoiceRSSRawAudioStream audioStream = voiceRssCloud.getTextToSpeech(key, trimmedText,
 196                     voice.getLocale().toLanguageTag(), voice.getLabel(), getApiAudioCodec(requestedFormat),
 197                     getApiAudioFormat(requestedFormat));
 198             return new VoiceRSSRawAudioStream(audioStream.getInputStream(), requestedFormat, audioStream.length());
 199         } catch (IOException ex) {
 200             throw new TTSException("Could not read from VoiceRSS service: " + ex.getMessage(), ex);
 201         }
 202     }
 203
 204     /**
 205      * Initializes voices.
 206      *
 207      * @return The voices of this instance
 208      * @throws IllegalStateException if voiceRssImpl is null
 209      */
 210     private Set<Voice> initVoices() throws IllegalStateException {
 211         CachedVoiceRSSCloudImpl voiceRssCloud = voiceRssImpl;
 212         if (voiceRssCloud == null) {
 213             throw new IllegalStateException("The service is not correctly initialized");
 214         }
 215         Set<Voice> voices = new HashSet<>();
 216         for (Locale locale : voiceRssCloud.getAvailableLocales()) {
 217             for (String voiceLabel : voiceRssCloud.getAvailableVoices(locale)) {
 218                 voices.add(new VoiceRSSVoice(locale, voiceLabel));
 219             }
 220         }
 221         return voices;
 222     }
 223
 224     /**
 225      * Initializes audioFormats
 226      *
 227      * @return The audio formats of this instance
 228      * @throws IllegalStateException if voiceRssImpl is null
 229      */
 230     private Set<AudioFormat> initAudioFormats() throws IllegalStateException {
 231         CachedVoiceRSSCloudImpl voiceRssCloud = voiceRssImpl;
 232         if (voiceRssCloud == null) {
 233             throw new IllegalStateException("The service is not correctly initialized");
 234         }
 235         Set<AudioFormat> audioFormats = new HashSet<>();
 236         for (String codec : voiceRssCloud.getAvailableAudioCodecs()) {
 237             switch (codec) {
 238                 case "MP3":
 239                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, null, 16, 64000,
 240                             44_100L));
 241                     break;
 242                 case "OGG":
 243                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_OGG, AudioFormat.CODEC_VORBIS, null, 16,
 244                             null, 44_100L));
 245                     break;
 246                 case "AAC":
 247                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_AAC, null, 16, null,
 248                             44_100L));
 249                     break;
 250                 case "WAV":
 251                     // Consider only mono formats
 252                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
 253                             8, 64_000, 8_000L));
 254                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
 255                             16, 128_000, 8_000L));
 256                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
 257                             8, 88_200, 11_025L));
 258                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
 259                             16, 176_400, 11_025L));
 260                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
 261                             8, 96_000, 12_000L));
 262                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
 263                             16, 192_000, 12_000L));
 264                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
 265                             8, 128_000, 16_000L));
 266                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
 267                             16, 256_000, 16_000L));
 268                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
 269                             8, 176_400, 22_050L));
 270                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
 271                             16, 352_800, 22_050L));
 272                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
 273                             8, 192_000, 24_000L));
 274                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
 275                             16, 384_000, 24_000L));
 276                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
 277                             8, 256_000, 32_000L));
 278                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
 279                             16, 512_000, 32_000L));
 280                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
 281                             8, 352_800, 44_100L));
 282                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
 283                             16, 705_600, 44_100L));
 284                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
 285                             8, 384_000, 48_000L));
 286                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
 287                             16, 768_000, 48_000L));
 288                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8,
 289                             64_000, 8_000L));
 290                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8,
 291                             88_200, 11_025L));
 292                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8,
 293                             176_400, 22_050L));
 294                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8,
 295                             352_800, 44_100L));
 296                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8,
 297                             64_000, 8_000L));
 298                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8,
 299                             88_200, 11_025L));
 300                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8,
 301                             176_400, 22_050L));
 302                     audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8,
 303                             352_800, 44_100L));
 304                     break;
 305                 default:
 306                     logger.debug("Audio codec {} not yet supported", codec);
 307                     break;
 308             }
 309         }
 310         return audioFormats;
 311     }
 312
 313     /**
 314      * Map {@link AudioFormat#getCodec() codec} to VoiceRSS API codec.
 315      *
 316      * @throws TTSException if {@code format} is not supported
 317      */
 318     private String getApiAudioCodec(AudioFormat format) throws TTSException {
 319         final String internalCodec = format.getCodec();
 320         final String apiCodec = CODEC_MAP.get(internalCodec != null ? internalCodec : AudioFormat.CODEC_PCM_SIGNED);
 321
 322         if (apiCodec == null) {
 323             throw new TTSException("Unsupported audio format: " + format);
 324         }
 325
 326         return apiCodec;
 327     }
 328
 329     /**
 330      * Map {@link AudioFormat#getBitDepth() bit depth} and {@link AudioFormat#getFrequency() frequency} to VoiceRSS API
 331      * format.
 332      *
 333      * @throws TTSException if {@code format} is not supported
 334      */
 335     private String getApiAudioFormat(AudioFormat format) throws TTSException {
 336         final Integer formatBitDepth = format.getBitDepth();
 337         final int bitDepth = formatBitDepth != null ? formatBitDepth.intValue() : 16;
 338         final Long formatFrequency = format.getFrequency();
 339         final Long frequency = formatFrequency != null ? formatFrequency.longValue() : 44_100L;
 340         final String apiFrequency = FREQUENCY_MAP.get(frequency);
 341
 342         if (apiFrequency == null || (bitDepth != 8 && bitDepth != 16)) {
 343             throw new TTSException("Unsupported audio format: " + format);
 344         }
 345
 346         String codec = format.getCodec();
 347         switch (codec != null ? codec : AudioFormat.CODEC_PCM_SIGNED) {
 348             case AudioFormat.CODEC_PCM_ALAW:
 349                 return "alaw_" + apiFrequency + "_mono";
 350             case AudioFormat.CODEC_PCM_ULAW:
 351                 return "ulaw_" + apiFrequency + "_mono";
 352             case AudioFormat.CODEC_PCM_SIGNED:
 353             case AudioFormat.CODEC_PCM_UNSIGNED:
 354             case AudioFormat.CODEC_MP3:
 355             case AudioFormat.CODEC_VORBIS:
 356             case AudioFormat.CODEC_AAC:
 357                 return apiFrequency + "_" + bitDepth + "bit_mono";
 358             default:
 359                 throw new TTSException("Unsupported audio format: " + format);
 360         }
 361     }
 362
 363     private CachedVoiceRSSCloudImpl initVoiceImplementation() throws IllegalStateException {
 364         return new CachedVoiceRSSCloudImpl(getCacheFolderName(), true);
 365     }
 366
 367     private String getCacheFolderName() {
 368         // we assume that this folder does NOT have a trailing separator
 369         return OpenHAB.getUserDataFolder() + File.separator + CACHE_FOLDER_NAME;
 370     }
 371
 372     @Override
 373     public String getId() {
 374         return "voicerss";
 375     }
 376
 377     @Override
 378     public String getLabel(@Nullable Locale locale) {
 379         return "VoiceRSS";
 380     }
 381 }