2 * Copyright (c) 2010-2023 Contributors to the openHAB project
4 * See the NOTICE file(s) distributed with this work for additional
7 * This program and the accompanying materials are made available under the
8 * terms of the Eclipse Public License 2.0 which is available at
9 * http://www.eclipse.org/legal/epl-2.0
11 * SPDX-License-Identifier: EPL-2.0
13 package org.openhab.voice.voicerss.internal;
16 import java.io.IOException;
17 import java.util.Collections;
18 import java.util.HashSet;
19 import java.util.Locale;
23 import org.eclipse.jdt.annotation.NonNullByDefault;
24 import org.eclipse.jdt.annotation.Nullable;
25 import org.openhab.core.OpenHAB;
26 import org.openhab.core.audio.AudioException;
27 import org.openhab.core.audio.AudioFormat;
28 import org.openhab.core.audio.AudioStream;
29 import org.openhab.core.config.core.ConfigurableService;
30 import org.openhab.core.voice.AbstractCachedTTSService;
31 import org.openhab.core.voice.TTSCache;
32 import org.openhab.core.voice.TTSException;
33 import org.openhab.core.voice.TTSService;
34 import org.openhab.core.voice.Voice;
35 import org.openhab.voice.voicerss.internal.cloudapi.CachedVoiceRSSCloudImpl;
36 import org.osgi.framework.Constants;
37 import org.osgi.service.component.annotations.Activate;
38 import org.osgi.service.component.annotations.Component;
39 import org.osgi.service.component.annotations.Modified;
40 import org.osgi.service.component.annotations.Reference;
41 import org.slf4j.Logger;
42 import org.slf4j.LoggerFactory;
45 * This is a TTS service implementation for using VoiceRSS TTS service.
47 * @author Jochen Hiller - Initial contribution and API
48 * @author Laurent Garnier - add support for OGG and AAC audio formats
51 @Component(service = TTSService.class, configurationPid = "org.openhab.voicerss", property = Constants.SERVICE_PID
52 + "=org.openhab.voicerss")
53 @ConfigurableService(category = "voice", label = "VoiceRSS Text-to-Speech", description_uri = "voice:voicerss")
54 public class VoiceRSSTTSService extends AbstractCachedTTSService {
56 /** Cache folder name is below userdata/voicerss/cache. */
57 private static final String CACHE_FOLDER_NAME = "voicerss" + File.separator + "cache";
59 // API Key comes from ConfigAdmin
60 private static final String CONFIG_API_KEY = "apiKey";
63 * Map from openHAB AudioFormat Codec to VoiceRSS API Audio Codec
65 private static final Map<String, String> CODEC_MAP = Map.of(AudioFormat.CODEC_PCM_SIGNED, "WAV",
66 AudioFormat.CODEC_PCM_UNSIGNED, "WAV", AudioFormat.CODEC_PCM_ALAW, "WAV", AudioFormat.CODEC_PCM_ULAW, "WAV",
67 AudioFormat.CODEC_MP3, "MP3", AudioFormat.CODEC_VORBIS, "OGG", AudioFormat.CODEC_AAC, "AAC");
70 * Map from openHAB AudioFormat Frequency to VoiceRSS API Audio Frequency
72 private static final Map<Long, String> FREQUENCY_MAP = Map.of(8_000L, "8khz", 11_025L, "11khz", 12_000L, "12khz",
73 16_000L, "16khz", 22_050L, "22khz", 24_000L, "24khz", 32_000L, "32khz", 44_100L, "44khz", 48_000L, "48khz");
75 private final Logger logger = LoggerFactory.getLogger(VoiceRSSTTSService.class);
77 private @Nullable String apiKey;
80 * We need the cached implementation to allow for FixedLengthAudioStream.
82 private @Nullable CachedVoiceRSSCloudImpl voiceRssImpl;
85 * Set of supported voices
87 private @Nullable Set<Voice> voices;
90 * Set of supported audio formats
92 private @Nullable Set<AudioFormat> audioFormats;
95 public VoiceRSSTTSService(final @Reference TTSCache ttsCache) {
100 * DS activate, with access to ConfigAdmin
103 protected void activate(@Nullable Map<String, Object> config) {
106 voiceRssImpl = initVoiceImplementation();
107 voices = initVoices();
108 audioFormats = initAudioFormats();
110 logger.debug("Using VoiceRSS cache folder {}", getCacheFolderName());
111 } catch (IllegalStateException e) {
112 logger.warn("Failed to activate VoiceRSS: {}", e.getMessage(), e);
117 protected void modified(@Nullable Map<String, Object> config) {
118 if (config != null) {
119 apiKey = config.containsKey(CONFIG_API_KEY) ? config.get(CONFIG_API_KEY).toString() : null;
124 public Set<Voice> getAvailableVoices() {
125 Set<Voice> localVoices = voices;
126 return localVoices == null ? Set.of() : Collections.unmodifiableSet(localVoices);
130 public Set<AudioFormat> getSupportedFormats() {
131 Set<AudioFormat> localFormats = audioFormats;
132 return localFormats == null ? Set.of() : Collections.unmodifiableSet(localFormats);
136 public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
137 logger.debug("Synthesize '{}' for voice '{}' in format {}", text, voice.getUID(), requestedFormat);
138 CachedVoiceRSSCloudImpl voiceRssCloud = voiceRssImpl;
139 if (voiceRssCloud == null) {
140 throw new TTSException("The service is not correctly initialized");
143 String trimmedText = text.trim();
144 if (trimmedText.isEmpty()) {
145 throw new TTSException("The passed text is empty");
147 Set<Voice> localVoices = voices;
148 if (localVoices == null || !localVoices.contains(voice)) {
149 throw new TTSException("The passed voice is unsupported");
152 // If one predefined cache entry for given text, locale, voice, codec and format exists,
153 // create the input from this file stream and return it.
155 File cacheAudioFile = voiceRssCloud.getTextToSpeechInCache(trimmedText, voice.getLocale().toLanguageTag(),
156 voice.getLabel(), getApiAudioCodec(requestedFormat), getApiAudioFormat(requestedFormat));
157 if (cacheAudioFile != null) {
158 logger.debug("Use cache entry '{}'", cacheAudioFile.getName());
159 return new VoiceRSSAudioStream(cacheAudioFile, requestedFormat);
161 } catch (AudioException ex) {
162 throw new TTSException("Could not create AudioStream: " + ex.getMessage(), ex);
163 } catch (IOException ex) {
164 throw new TTSException("Could not read from VoiceRSS service: " + ex.getMessage(), ex);
167 // If no predefined cache entry exists, use the common TTS cache mechanism from core framework
168 logger.debug("Use common TTS cache mechanism");
169 return super.synthesize(text, voice, requestedFormat);
173 public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
174 logger.debug("synthesizeForCache '{}' for voice '{}' in format {}", text, voice.getUID(), requestedFormat);
175 CachedVoiceRSSCloudImpl voiceRssCloud = voiceRssImpl;
176 if (voiceRssCloud == null) {
177 throw new TTSException("The service is not correctly initialized");
179 // Validate known api key
182 throw new TTSException("Missing API key, configure it first before using");
185 String trimmedText = text.trim();
186 if (trimmedText.isEmpty()) {
187 throw new TTSException("The passed text is empty");
189 Set<Voice> localVoices = voices;
190 if (localVoices == null || !localVoices.contains(voice)) {
191 throw new TTSException("The passed voice is unsupported");
195 VoiceRSSRawAudioStream audioStream = voiceRssCloud.getTextToSpeech(key, trimmedText,
196 voice.getLocale().toLanguageTag(), voice.getLabel(), getApiAudioCodec(requestedFormat),
197 getApiAudioFormat(requestedFormat));
198 return new VoiceRSSRawAudioStream(audioStream.getInputStream(), requestedFormat, audioStream.length());
199 } catch (IOException ex) {
200 throw new TTSException("Could not read from VoiceRSS service: " + ex.getMessage(), ex);
205 * Initializes voices.
207 * @return The voices of this instance
208 * @throws IllegalStateException if voiceRssImpl is null
210 private Set<Voice> initVoices() throws IllegalStateException {
211 CachedVoiceRSSCloudImpl voiceRssCloud = voiceRssImpl;
212 if (voiceRssCloud == null) {
213 throw new IllegalStateException("The service is not correctly initialized");
215 Set<Voice> voices = new HashSet<>();
216 for (Locale locale : voiceRssCloud.getAvailableLocales()) {
217 for (String voiceLabel : voiceRssCloud.getAvailableVoices(locale)) {
218 voices.add(new VoiceRSSVoice(locale, voiceLabel));
225 * Initializes audioFormats
227 * @return The audio formats of this instance
228 * @throws IllegalStateException if voiceRssImpl is null
230 private Set<AudioFormat> initAudioFormats() throws IllegalStateException {
231 CachedVoiceRSSCloudImpl voiceRssCloud = voiceRssImpl;
232 if (voiceRssCloud == null) {
233 throw new IllegalStateException("The service is not correctly initialized");
235 Set<AudioFormat> audioFormats = new HashSet<>();
236 for (String codec : voiceRssCloud.getAvailableAudioCodecs()) {
239 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, null, 16, 64000,
243 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_OGG, AudioFormat.CODEC_VORBIS, null, 16,
247 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_AAC, null, 16, null,
251 // Consider only mono formats
252 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
254 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
255 16, 128_000, 8_000L));
256 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
257 8, 88_200, 11_025L));
258 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
259 16, 176_400, 11_025L));
260 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
261 8, 96_000, 12_000L));
262 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
263 16, 192_000, 12_000L));
264 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
265 8, 128_000, 16_000L));
266 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
267 16, 256_000, 16_000L));
268 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
269 8, 176_400, 22_050L));
270 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
271 16, 352_800, 22_050L));
272 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
273 8, 192_000, 24_000L));
274 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
275 16, 384_000, 24_000L));
276 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
277 8, 256_000, 32_000L));
278 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
279 16, 512_000, 32_000L));
280 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
281 8, 352_800, 44_100L));
282 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
283 16, 705_600, 44_100L));
284 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
285 8, 384_000, 48_000L));
286 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
287 16, 768_000, 48_000L));
288 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8,
290 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8,
292 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8,
294 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8,
296 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8,
298 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8,
300 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8,
302 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8,
306 logger.debug("Audio codec {} not yet supported", codec);
314 * Map {@link AudioFormat#getCodec() codec} to VoiceRSS API codec.
316 * @throws TTSException if {@code format} is not supported
318 private String getApiAudioCodec(AudioFormat format) throws TTSException {
319 final String internalCodec = format.getCodec();
320 final String apiCodec = CODEC_MAP.get(internalCodec != null ? internalCodec : AudioFormat.CODEC_PCM_SIGNED);
322 if (apiCodec == null) {
323 throw new TTSException("Unsupported audio format: " + format);
330 * Map {@link AudioFormat#getBitDepth() bit depth} and {@link AudioFormat#getFrequency() frequency} to VoiceRSS API
333 * @throws TTSException if {@code format} is not supported
335 private String getApiAudioFormat(AudioFormat format) throws TTSException {
336 final Integer formatBitDepth = format.getBitDepth();
337 final int bitDepth = formatBitDepth != null ? formatBitDepth.intValue() : 16;
338 final Long formatFrequency = format.getFrequency();
339 final Long frequency = formatFrequency != null ? formatFrequency.longValue() : 44_100L;
340 final String apiFrequency = FREQUENCY_MAP.get(frequency);
342 if (apiFrequency == null || (bitDepth != 8 && bitDepth != 16)) {
343 throw new TTSException("Unsupported audio format: " + format);
346 String codec = format.getCodec();
347 switch (codec != null ? codec : AudioFormat.CODEC_PCM_SIGNED) {
348 case AudioFormat.CODEC_PCM_ALAW:
349 return "alaw_" + apiFrequency + "_mono";
350 case AudioFormat.CODEC_PCM_ULAW:
351 return "ulaw_" + apiFrequency + "_mono";
352 case AudioFormat.CODEC_PCM_SIGNED:
353 case AudioFormat.CODEC_PCM_UNSIGNED:
354 case AudioFormat.CODEC_MP3:
355 case AudioFormat.CODEC_VORBIS:
356 case AudioFormat.CODEC_AAC:
357 return apiFrequency + "_" + bitDepth + "bit_mono";
359 throw new TTSException("Unsupported audio format: " + format);
363 private CachedVoiceRSSCloudImpl initVoiceImplementation() throws IllegalStateException {
364 return new CachedVoiceRSSCloudImpl(getCacheFolderName(), true);
367 private String getCacheFolderName() {
368 // we assume that this folder does NOT have a trailing separator
369 return OpenHAB.getUserDataFolder() + File.separator + CACHE_FOLDER_NAME;
373 public String getId() {
378 public String getLabel(@Nullable Locale locale) {