2 * Copyright (c) 2010-2022 Contributors to the openHAB project
4 * See the NOTICE file(s) distributed with this work for additional
7 * This program and the accompanying materials are made available under the
8 * terms of the Eclipse Public License 2.0 which is available at
9 * http://www.eclipse.org/legal/epl-2.0
11 * SPDX-License-Identifier: EPL-2.0
13 package org.openhab.voice.voicerss.internal;
16 import java.io.IOException;
17 import java.util.Collections;
18 import java.util.HashSet;
19 import java.util.Locale;
23 import org.openhab.core.OpenHAB;
24 import org.openhab.core.audio.AudioException;
25 import org.openhab.core.audio.AudioFormat;
26 import org.openhab.core.audio.AudioStream;
27 import org.openhab.core.config.core.ConfigurableService;
28 import org.openhab.core.voice.TTSException;
29 import org.openhab.core.voice.TTSService;
30 import org.openhab.core.voice.Voice;
31 import org.openhab.voice.voicerss.internal.cloudapi.CachedVoiceRSSCloudImpl;
32 import org.osgi.framework.Constants;
33 import org.osgi.service.component.annotations.Component;
34 import org.osgi.service.component.annotations.Modified;
35 import org.slf4j.Logger;
36 import org.slf4j.LoggerFactory;
39 * This is a TTS service implementation for using VoiceRSS TTS service.
41 * @author Jochen Hiller - Initial contribution and API
42 * @author Laurent Garnier - add support for OGG and AAC audio formats
44 @Component(configurationPid = "org.openhab.voicerss", property = Constants.SERVICE_PID + "=org.openhab.voicerss")
45 @ConfigurableService(category = "voice", label = "VoiceRSS Text-to-Speech", description_uri = "voice:voicerss")
46 public class VoiceRSSTTSService implements TTSService {
48 /** Cache folder name is below userdata/voicerss/cache. */
49 private static final String CACHE_FOLDER_NAME = "voicerss" + File.separator + "cache";
51 // API Key comes from ConfigAdmin
52 private static final String CONFIG_API_KEY = "apiKey";
55 * Map from openHAB AudioFormat Codec to VoiceRSS API Audio Codec
57 private static final Map<String, String> CODEC_MAP = Map.of(AudioFormat.CODEC_PCM_SIGNED, "WAV",
58 AudioFormat.CODEC_PCM_UNSIGNED, "WAV", AudioFormat.CODEC_PCM_ALAW, "WAV", AudioFormat.CODEC_PCM_ULAW, "WAV",
59 AudioFormat.CODEC_MP3, "MP3", AudioFormat.CODEC_VORBIS, "OGG", AudioFormat.CODEC_AAC, "AAC");
62 * Map from openHAB AudioFormat Frequency to VoiceRSS API Audio Frequency
64 private static final Map<Long, String> FREQUENCY_MAP = Map.of(8_000L, "8khz", 11_025L, "11khz", 12_000L, "12khz",
65 16_000L, "16khz", 22_050L, "22khz", 24_000L, "24khz", 32_000L, "32khz", 44_100L, "44khz", 48_000L, "48khz");
67 private final Logger logger = LoggerFactory.getLogger(VoiceRSSTTSService.class);
69 private String apiKey;
72 * We need the cached implementation to allow for FixedLengthAudioStream.
74 private CachedVoiceRSSCloudImpl voiceRssImpl;
77 * Set of supported voices
79 private Set<Voice> voices;
82 * Set of supported audio formats
84 private Set<AudioFormat> audioFormats;
87 * DS activate, with access to ConfigAdmin
89 protected void activate(Map<String, Object> config) {
92 voiceRssImpl = initVoiceImplementation();
93 voices = initVoices();
94 audioFormats = initAudioFormats();
96 logger.debug("Using VoiceRSS cache folder {}", getCacheFolderName());
97 } catch (IllegalStateException e) {
98 logger.warn("Failed to activate VoiceRSS: {}", e.getMessage(), e);
103 protected void modified(Map<String, Object> config) {
104 if (config != null) {
105 apiKey = config.containsKey(CONFIG_API_KEY) ? config.get(CONFIG_API_KEY).toString() : null;
110 public Set<Voice> getAvailableVoices() {
111 return Collections.unmodifiableSet(voices);
115 public Set<AudioFormat> getSupportedFormats() {
116 return Collections.unmodifiableSet(audioFormats);
120 public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
121 logger.debug("Synthesize '{}' for voice '{}' in format {}", text, voice.getUID(), requestedFormat);
122 // Validate known api key
123 if (apiKey == null) {
124 throw new TTSException("Missing API key, configure it first before using");
126 // Validate arguments
128 throw new TTSException("The passed text is null");
131 String trimmedText = text.trim();
132 if (trimmedText.isEmpty()) {
133 throw new TTSException("The passed text is empty");
135 if (!voices.contains(voice)) {
136 throw new TTSException("The passed voice is unsupported");
139 // now create the input stream for given text, locale, voice, codec and format.
141 File cacheAudioFile = voiceRssImpl.getTextToSpeechAsFile(apiKey, trimmedText,
142 voice.getLocale().toLanguageTag(), voice.getLabel(), getApiAudioCodec(requestedFormat),
143 getApiAudioFormat(requestedFormat));
144 return new VoiceRSSAudioStream(cacheAudioFile, requestedFormat);
145 } catch (AudioException ex) {
146 throw new TTSException("Could not create AudioStream: " + ex.getMessage(), ex);
147 } catch (IOException ex) {
148 throw new TTSException("Could not read from VoiceRSS service: " + ex.getMessage(), ex);
153 * Initializes voices.
155 * @return The voices of this instance
157 private Set<Voice> initVoices() {
158 Set<Voice> voices = new HashSet<>();
159 for (Locale locale : voiceRssImpl.getAvailableLocales()) {
160 for (String voiceLabel : voiceRssImpl.getAvailableVoices(locale)) {
161 voices.add(new VoiceRSSVoice(locale, voiceLabel));
168 * Initializes audioFormats
170 * @return The audio formats of this instance
172 private Set<AudioFormat> initAudioFormats() {
173 Set<AudioFormat> audioFormats = new HashSet<>();
174 for (String codec : voiceRssImpl.getAvailableAudioCodecs()) {
177 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, null, 16, 64000,
181 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_OGG, AudioFormat.CODEC_VORBIS, null, 16,
185 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_AAC, null, 16, null,
189 // Consider only mono formats
190 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
192 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
193 16, 128_000, 8_000L));
194 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
195 8, 88_200, 11_025L));
196 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
197 16, 176_400, 11_025L));
198 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
199 8, 96_000, 12_000L));
200 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
201 16, 192_000, 12_000L));
202 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
203 8, 128_000, 16_000L));
204 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
205 16, 256_000, 16_000L));
206 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
207 8, 176_400, 22_050L));
208 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
209 16, 352_800, 22_050L));
210 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
211 8, 192_000, 24_000L));
212 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
213 16, 384_000, 24_000L));
214 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
215 8, 256_000, 32_000L));
216 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
217 16, 512_000, 32_000L));
218 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
219 8, 352_800, 44_100L));
220 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
221 16, 705_600, 44_100L));
222 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false,
223 8, 384_000, 48_000L));
224 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false,
225 16, 768_000, 48_000L));
226 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8,
228 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8,
230 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8,
232 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8,
234 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8,
236 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8,
238 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8,
240 audioFormats.add(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8,
244 logger.debug("Audio codec {} not yet supported", codec);
252 * Map {@link AudioFormat#getCodec() codec} to VoiceRSS API codec.
254 * @throws TTSException if {@code format} is not supported
256 private String getApiAudioCodec(AudioFormat format) throws TTSException {
257 final String internalCodec = format.getCodec();
258 final String apiCodec = CODEC_MAP.get(internalCodec != null ? internalCodec : AudioFormat.CODEC_PCM_SIGNED);
260 if (apiCodec == null) {
261 throw new TTSException("Unsupported audio format: " + format);
268 * Map {@link AudioFormat#getBitDepth() bit depth} and {@link AudioFormat#getFrequency() frequency} to VoiceRSS API
271 * @throws TTSException if {@code format} is not supported
273 private String getApiAudioFormat(AudioFormat format) throws TTSException {
274 final int bitDepth = format.getBitDepth() != null ? format.getBitDepth() : 16;
275 final Long frequency = format.getFrequency() != null ? format.getFrequency() : 44_100L;
276 final String apiFrequency = FREQUENCY_MAP.get(frequency);
278 if (apiFrequency == null || (bitDepth != 8 && bitDepth != 16)) {
279 throw new TTSException("Unsupported audio format: " + format);
282 switch (format.getCodec() != null ? format.getCodec() : AudioFormat.CODEC_PCM_SIGNED) {
283 case AudioFormat.CODEC_PCM_ALAW:
284 return "alaw_" + apiFrequency + "_mono";
285 case AudioFormat.CODEC_PCM_ULAW:
286 return "ulaw_" + apiFrequency + "_mono";
287 case AudioFormat.CODEC_PCM_SIGNED:
288 case AudioFormat.CODEC_PCM_UNSIGNED:
289 case AudioFormat.CODEC_MP3:
290 case AudioFormat.CODEC_VORBIS:
291 case AudioFormat.CODEC_AAC:
292 return apiFrequency + "_" + bitDepth + "bit_mono";
294 throw new TTSException("Unsupported audio format: " + format);
298 private CachedVoiceRSSCloudImpl initVoiceImplementation() throws IllegalStateException {
299 return new CachedVoiceRSSCloudImpl(getCacheFolderName(), true);
302 private String getCacheFolderName() {
303 // we assume that this folder does NOT have a trailing separator
304 return OpenHAB.getUserDataFolder() + File.separator + CACHE_FOLDER_NAME;
308 public String getId() {
313 public String getLabel(Locale locale) {