// API Key comes from ConfigAdmin
private static final String CONFIG_API_KEY = "apiKey";
+
+ /**
+ * Map from openHAB AudioFormat Codec to VoiceRSS API Audio Codec
+ */
+ private static final Map<String, String> CODEC_MAP = Map.of(AudioFormat.CODEC_PCM_SIGNED, "WAV",
+ AudioFormat.CODEC_PCM_UNSIGNED, "WAV", AudioFormat.CODEC_PCM_ALAW, "WAV", AudioFormat.CODEC_PCM_ULAW, "WAV",
+ AudioFormat.CODEC_MP3, "MP3", AudioFormat.CODEC_VORBIS, "OGG", AudioFormat.CODEC_AAC, "AAC");
+
+ /**
+ * Map from openHAB AudioFormat Frequency to VoiceRSS API Audio Frequency
+ */
+ private static final Map<Long, String> FREQUENCY_MAP = Map.of(8_000L, "8khz", 11_025L, "11khz", 12_000L, "12khz",
+ 16_000L, "16khz", 22_050L, "22khz", 24_000L, "24khz", 32_000L, "32khz", 44_100L, "44khz", 48_000L, "48khz");
+
private String apiKey;
private final Logger logger = LoggerFactory.getLogger(VoiceRSSTTSService.class);
if (!voices.contains(voice)) {
throw new TTSException("The passed voice is unsupported");
}
- boolean isAudioFormatSupported = false;
- for (AudioFormat currentAudioFormat : audioFormats) {
- if (currentAudioFormat.isCompatible(requestedFormat)) {
- isAudioFormatSupported = true;
- break;
- }
- }
- if (!isAudioFormatSupported) {
- throw new TTSException("The passed AudioFormat is unsupported");
- }
- // now create the input stream for given text, locale, format. There is
- // only a default voice
+ // now create the input stream for given text, locale, voice, codec and format.
try {
File cacheAudioFile = voiceRssImpl.getTextToSpeechAsFile(apiKey, trimmedText,
- voice.getLocale().toLanguageTag(), voice.getLabel(), getApiAudioFormat(requestedFormat));
+ voice.getLocale().toLanguageTag(), voice.getLabel(), getApiAudioCodec(requestedFormat),
+ getApiAudioFormat(requestedFormat));
if (cacheAudioFile == null) {
throw new TTSException("Could not read from VoiceRSS service");
}
* @return The audio formats of this instance
*/
private Set<AudioFormat> initAudioFormats() {
- Set<AudioFormat> audioFormats = new HashSet<>();
- for (String format : voiceRssImpl.getAvailableAudioFormats()) {
- audioFormats.add(getAudioFormat(format));
- }
- return audioFormats;
+ return voiceRssImpl.getAvailableAudioFormats();
}
- private AudioFormat getAudioFormat(String apiFormat) {
- Boolean bigEndian = null;
- Integer bitDepth = 16;
- Integer bitRate = null;
- Long frequency = 44100L;
-
- if ("MP3".equals(apiFormat)) {
- // we use by default: MP3, 44khz_16bit_mono with bitrate 64 kbps
- bitRate = 64000;
- return new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, bigEndian, bitDepth, bitRate,
- frequency);
- } else if ("OGG".equals(apiFormat)) {
- // we use by default: OGG, 44khz_16bit_mono
- return new AudioFormat(AudioFormat.CONTAINER_OGG, AudioFormat.CODEC_VORBIS, bigEndian, bitDepth, bitRate,
- frequency);
- } else if ("AAC".equals(apiFormat)) {
- // we use by default: AAC, 44khz_16bit_mono
- return new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_AAC, bigEndian, bitDepth, bitRate,
- frequency);
- } else {
- throw new IllegalArgumentException("Audio format " + apiFormat + " not yet supported");
+ /**
+ * Map {@link AudioFormat#getCodec() codec} to VoiceRSS API codec.
+ *
+ * @throws TTSException if {@code format} is not supported
+ */
+ private String getApiAudioCodec(AudioFormat format) throws TTSException {
+ final String internalCodec = format.getCodec();
+ final String apiCodec = CODEC_MAP.get(internalCodec != null ? internalCodec : AudioFormat.CODEC_PCM_SIGNED);
+
+ if (apiCodec == null) {
+ throw new TTSException("Unsupported audio format: " + format);
}
+
+ return apiCodec;
}
- private String getApiAudioFormat(AudioFormat format) {
- if (format.getCodec().equals(AudioFormat.CODEC_MP3)) {
- return "MP3";
- } else if (format.getCodec().equals(AudioFormat.CODEC_VORBIS)) {
- return "OGG";
- } else if (format.getCodec().equals(AudioFormat.CODEC_AAC)) {
- return "AAC";
- } else {
- throw new IllegalArgumentException("Audio format " + format.getCodec() + " not yet supported");
+ /**
+ * Map {@link AudioFormat#getBitDepth() bit depth} and {@link AudioFormat#getFrequency() frequency} to VoiceRSS API
+ * format.
+ *
+ * @throws TTSException if {@code format} is not supported
+ */
+ private String getApiAudioFormat(AudioFormat format) throws TTSException {
+ final int bitDepth = format.getBitDepth() != null ? format.getBitDepth() : 16;
+ final Long frequency = format.getFrequency() != null ? format.getFrequency() : 44_100L;
+ final String apiFrequency = FREQUENCY_MAP.get(frequency);
+
+ if (apiFrequency == null || (bitDepth != 8 && bitDepth != 16)) {
+ throw new TTSException("Unsupported audio format: " + format);
+ }
+
+ switch (format.getCodec() != null ? format.getCodec() : AudioFormat.CODEC_PCM_SIGNED) {
+ case AudioFormat.CODEC_PCM_ALAW:
+ return "alaw_" + apiFrequency + "_mono";
+ case AudioFormat.CODEC_PCM_ULAW:
+ return "ulaw_" + apiFrequency + "_mono";
+ case AudioFormat.CODEC_PCM_SIGNED:
+ case AudioFormat.CODEC_PCM_UNSIGNED:
+ case AudioFormat.CODEC_MP3:
+ case AudioFormat.CODEC_VORBIS:
+ case AudioFormat.CODEC_AAC:
+ return apiFrequency + "_" + bitDepth + "_mono";
+ default:
+ throw new TTSException("Unsupported audio format: " + format);
}
}
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
+import java.util.Objects;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
}
}
- public File getTextToSpeechAsFile(String apiKey, String text, String locale, String voice, String audioFormat)
- throws IOException {
- String fileNameInCache = getUniqueFilenameForText(text, locale, voice);
+ public File getTextToSpeechAsFile(String apiKey, String text, String locale, String voice, String audioCodec,
+ String audioFormat) throws IOException {
+ String fileNameInCache = getUniqueFilenameForText(text, locale, voice, audioFormat);
// check if in cache
- File audioFileInCache = new File(cacheFolder, fileNameInCache + "." + audioFormat.toLowerCase());
+ File audioFileInCache = new File(cacheFolder, fileNameInCache + "." + audioCodec.toLowerCase());
if (audioFileInCache.exists()) {
return audioFileInCache;
}
// if not in cache, get audio data and put to cache
- try (InputStream is = super.getTextToSpeech(apiKey, text, locale, voice, audioFormat);
+ try (InputStream is = super.getTextToSpeech(apiKey, text, locale, voice, audioCodec, audioFormat);
FileOutputStream fos = new FileOutputStream(audioFileInCache)) {
copyStream(is, fos);
// write text to file for transparency too
/**
* Gets a unique filename for a give text, by creating a MD5 hash of it. It
- * will be preceded by the locale.
+ * will be preceded by the locale and suffixed by the format if it is not the
+ * default of "44khz_16bit_mono".
*
* Sample: "en-US_00a2653ac5f77063bc4ea2fee87318d3"
*/
- private String getUniqueFilenameForText(String text, String locale, String voice) {
+ private String getUniqueFilenameForText(String text, String locale, String voice, String format) {
try {
byte[] bytesOfMessage = text.getBytes(StandardCharsets.UTF_8);
MessageDigest md = MessageDigest.getInstance("MD5");
filename += voice + "_";
}
filename += hashtext;
+ if (!Objects.equals(format, "44khz_16bit_mono")) {
+ filename += "_" + format;
+ }
return filename;
} catch (NoSuchAlgorithmException ex) {
// should not happen
*
* @return A set of all audio formats supported
*/
- Set<String> getAvailableAudioFormats();
+ Set<AudioFormat> getAvailableAudioFormats();
/**
* Get all supported voices.
* the locale to use
* @param voice
* the voice to use, "default" for the default voice
+ * @param audioCodec
+ * the audio codec to use
* @param audioFormat
* the audio format to use
* @return an InputStream to the audio data in specified format
* will be raised if the audio data can not be retrieved from
* cloud service
*/
- InputStream getTextToSpeech(String apiKey, String text, String locale, String voice, String audioFormat)
- throws IOException;
+ InputStream getTextToSpeech(String apiKey, String text, String locale, String voice, String audioCodec,
+ String audioFormat) throws IOException;
}
*/
package org.openhab.voice.voicerss.internal.cloudapi;
-import static java.util.stream.Collectors.toSet;
-
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
-import java.util.stream.Stream;
+import org.openhab.core.audio.AudioFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
* <ul>
* <li>All API languages supported</li>
* <li>Only default voice supported with good audio quality</li>
- * <li>Only MP3, OGG and AAC audio formats supported</li>
+ * <li>MP3, OGG, AAC and WAV audio formats supported</li>
* <li>It uses HTTP and not HTTPS (for performance reasons)</li>
* </ul>
*
* @author Jochen Hiller - Initial contribution
* @author Laurent Garnier - add support for all API languages
* @author Laurent Garnier - add support for OGG and AAC audio formats
+ * @author Andreas Brenk - add support for WAV audio format
*/
public class VoiceRSSCloudImpl implements VoiceRSSCloudAPI {
private final Logger logger = LoggerFactory.getLogger(VoiceRSSCloudImpl.class);
- private static final Set<String> SUPPORTED_AUDIO_FORMATS = Stream.of("MP3", "OGG", "AAC").collect(toSet());
+ private static final Set<AudioFormat> SUPPORTED_AUDIO_FORMATS = Set.of(
+ new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, null, 16, null, 44_100L),
+ new AudioFormat(AudioFormat.CONTAINER_OGG, AudioFormat.CODEC_VORBIS, null, 16, null, 44_100L),
+ new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_AAC, null, 16, null, 44_100L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, null, 8, 64_000, 8_000L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, null, 16, 128_000, 8_000L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 88_200, 11_025L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 176_400, 11_025L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 96_000, 12_000L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 192_000, 12_000L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 128_000, 16_000L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 256_000, 16_000L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 176_400, 22_050L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 352_800, 22_050L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 192_000, 24_000L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 384_000, 24_000L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 256_000, 32_000L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 512_000, 32_000L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 352_800, 44_100L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 705_600, 44_100L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 384_000, 48_000L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 768_000, 48_000L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 64_000, 8_000L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 88_200, 11_025L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 176_400, 22_050L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 352_800, 44_100L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 64_000, 8_000L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 88_200, 11_025L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 176_400, 22_050L),
+ new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 352_800, 44_100L));
private static final Set<Locale> SUPPORTED_LOCALES = new HashSet<>();
static {
}
@Override
- public Set<String> getAvailableAudioFormats() {
+ public Set<AudioFormat> getAvailableAudioFormats() {
return SUPPORTED_AUDIO_FORMATS;
}
* dependencies.
*/
@Override
- public InputStream getTextToSpeech(String apiKey, String text, String locale, String voice, String audioFormat)
- throws IOException {
- String url = createURL(apiKey, text, locale, voice, audioFormat);
+ public InputStream getTextToSpeech(String apiKey, String text, String locale, String voice, String audioCodec,
+ String audioFormat) throws IOException {
+ String url = createURL(apiKey, text, locale, voice, audioCodec, audioFormat);
logger.debug("Call {}", url);
URLConnection connection = new URL(url).openConnection();
*
* It is in package scope to be accessed by tests.
*/
- private String createURL(String apiKey, String text, String locale, String voice, String audioFormat) {
+ private String createURL(String apiKey, String text, String locale, String voice, String audioCodec,
+ String audioFormat) {
String encodedMsg = URLEncoder.encode(text, StandardCharsets.UTF_8);
- String url = "http://api.voicerss.org/?key=" + apiKey + "&hl=" + locale + "&c=" + audioFormat;
+ String url = "http://api.voicerss.org/?key=" + apiKey + "&hl=" + locale + "&c=" + audioCodec + "&f="
+ + audioFormat;
if (!DEFAULT_VOICE.equals(voice)) {
url += "&v=" + voice;
}
- url += "&f=44khz_16bit_mono&src=" + encodedMsg;
+ url += "&src=" + encodedMsg;
return url;
}
}
return;
}
CachedVoiceRSSCloudImpl impl = new CachedVoiceRSSCloudImpl(cacheDir);
- File cachedFile = impl.getTextToSpeechAsFile(apiKey, trimmedMsg, locale, voice, "MP3");
+ File cachedFile = impl.getTextToSpeechAsFile(apiKey, trimmedMsg, locale, voice, "MP3", null);
System.out.println(
"Created cached audio for locale='" + locale + "', msg='" + trimmedMsg + "' to file=" + cachedFile);
}
--- /dev/null
+/**
+ * Copyright (c) 2010-2022 Contributors to the openHAB project
+ *
+ * See the NOTICE file(s) distributed with this work for additional
+ * information.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ */
+package org.openhab.voice.voicerss.internal;
+
+import org.hamcrest.Description;
+import org.hamcrest.Matcher;
+import org.hamcrest.TypeSafeMatcher;
+import org.openhab.core.audio.AudioFormat;
+
+/**
+ * Hamcrest {@link Matcher} to assert a compatible {@link AudioFormat}.
+ *
+ * @author Andreas Brenk - Initial contribution
+ */
+public class CompatibleAudioFormatMatcher extends TypeSafeMatcher<AudioFormat> {
+
+ private final AudioFormat audioFormat;
+
+ public CompatibleAudioFormatMatcher(AudioFormat audioFormat) {
+ this.audioFormat = audioFormat;
+ }
+
+ @Override
+ protected boolean matchesSafely(AudioFormat actual) {
+ return audioFormat.isCompatible(actual);
+ }
+
+ @Override
+ public void describeTo(Description description) {
+ description.appendText("an audio format compatible to ").appendValue(audioFormat);
+ }
+
+ /**
+ * Creates a matcher that matches when the examined object is
+ * compatible to the specified <code>audioFormat</code>.
+ *
+ * @param audioFormat the audio format which must be compatible
+ */
+ public static Matcher<AudioFormat> compatibleAudioFormat(AudioFormat audioFormat) {
+ return new CompatibleAudioFormatMatcher(audioFormat);
+ }
+}
--- /dev/null
+/**
+ * Copyright (c) 2010-2022 Contributors to the openHAB project
+ *
+ * See the NOTICE file(s) distributed with this work for additional
+ * information.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ */
+package org.openhab.voice.voicerss.internal;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.core.IsIterableContaining.hasItem;
+import static org.hamcrest.core.IsNot.not;
+import static org.openhab.core.audio.AudioFormat.*;
+import static org.openhab.voice.voicerss.internal.CompatibleAudioFormatMatcher.compatibleAudioFormat;
+
+import java.util.Set;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.openhab.core.audio.AudioFormat;
+import org.openhab.core.voice.TTSService;
+
+/**
+ * Tests for {@link VoiceRSSTTSService}.
+ *
+ * @author Andreas Brenk - Initial contribution
+ */
+public class VoiceRSSTTSServiceTest {
+
+ private static final AudioFormat MP3_44KHZ_16BIT = new AudioFormat(AudioFormat.CONTAINER_NONE,
+ AudioFormat.CODEC_MP3, null, 16, null, 44_100L);
+ private static final AudioFormat OGG_44KHZ_16BIT = new AudioFormat(AudioFormat.CONTAINER_OGG,
+ AudioFormat.CODEC_VORBIS, null, 16, null, 44_100L);
+ private static final AudioFormat AAC_44KHZ_16BIT = new AudioFormat(AudioFormat.CONTAINER_NONE,
+ AudioFormat.CODEC_MP3, null, 16, null, 44_100L);
+ private static final AudioFormat WAV_22KHZ_8BIT = new AudioFormat(AudioFormat.CONTAINER_WAVE,
+ AudioFormat.CODEC_PCM_UNSIGNED, null, 8, null, 22_050L);
+ private static final AudioFormat WAV_48KHZ_16BIT = new AudioFormat(AudioFormat.CONTAINER_WAVE,
+ AudioFormat.CODEC_PCM_SIGNED, false, 16, null, 48_000L);
+
+ /**
+ * The {@link VoiceRSSTTSService} under test.
+ */
+ private TTSService ttsService;
+
+ @BeforeEach
+ public void setUp() {
+ final VoiceRSSTTSService ttsService = new VoiceRSSTTSService();
+ ttsService.activate(null);
+
+ this.ttsService = ttsService;
+ }
+
+ @Test
+ public void testSupportedFormats() {
+ final Set<AudioFormat> supportedFormats = ttsService.getSupportedFormats();
+
+ // check generic formats without any further constraints
+ assertThat(supportedFormats, hasItem(compatibleAudioFormat(MP3)));
+ assertThat(supportedFormats, hasItem(compatibleAudioFormat(WAV)));
+ assertThat(supportedFormats, hasItem(compatibleAudioFormat(OGG)));
+ assertThat(supportedFormats, hasItem(compatibleAudioFormat(AAC)));
+
+ // check specific formats with common constraints
+ assertThat(supportedFormats, hasItem(compatibleAudioFormat(MP3_44KHZ_16BIT)));
+ assertThat(supportedFormats, hasItem(compatibleAudioFormat(OGG_44KHZ_16BIT)));
+ assertThat(supportedFormats, hasItem(compatibleAudioFormat(AAC_44KHZ_16BIT)));
+ assertThat(supportedFormats, hasItem(compatibleAudioFormat(WAV_22KHZ_8BIT)));
+ assertThat(supportedFormats, hasItem(compatibleAudioFormat(WAV_48KHZ_16BIT)));
+
+ // check specific formats with additional constraints
+ assertThat(supportedFormats, hasItem(compatibleAudioFormat(bitRate(WAV, 705_600)))); // 44.1 kHz 16-bit
+
+ // check unsupported formats
+ assertThat(supportedFormats, not(hasItem(compatibleAudioFormat(bitDepth(WAV, 24)))));
+ }
+
+ private AudioFormat bitDepth(AudioFormat format, Integer bitDepth) {
+ return new AudioFormat(format.getContainer(), format.getCodec(), format.isBigEndian(), bitDepth,
+ format.getBitRate(), format.getFrequency());
+ }
+
+ private AudioFormat bitRate(AudioFormat format, Integer bitRate) {
+ return new AudioFormat(format.getContainer(), format.getCodec(), format.isBigEndian(), format.getBitDepth(),
+ bitRate, format.getFrequency());
+ }
+}