2 * Copyright (c) 2010-2021 Contributors to the openHAB project
4 * See the NOTICE file(s) distributed with this work for additional
7 * This program and the accompanying materials are made available under the
8 * terms of the Eclipse Public License 2.0 which is available at
9 * http://www.eclipse.org/legal/epl-2.0
11 * SPDX-License-Identifier: EPL-2.0
13 package org.openhab.voice.voicerss.internal.cloudapi;
15 import static java.util.stream.Collectors.toSet;
17 import java.io.IOException;
18 import java.io.InputStream;
19 import java.io.UnsupportedEncodingException;
20 import java.net.HttpURLConnection;
22 import java.net.URLConnection;
23 import java.net.URLEncoder;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.List;
27 import java.util.Locale;
29 import java.util.Map.Entry;
31 import java.util.stream.Stream;
33 import org.slf4j.Logger;
34 import org.slf4j.LoggerFactory;
37 * This class implements the Cloud service from VoiceRSS. For more information,
38 * see API documentation at http://www.voicerss.org/api .
40 * Current state of implementation:
42 * <li>All API languages supported</li>
43 * <li>Only default voice supported with good audio quality</li>
44 * <li>Only MP3, OGG and AAC audio formats supported</li>
45 * <li>It uses HTTP and not HTTPS (for performance reasons)</li>
48 * @author Jochen Hiller - Initial contribution
49 * @author Laurent Garnier - add support for all API languages
50 * @author Laurent Garnier - add support for OGG and AAC audio formats
52 public class VoiceRSSCloudImpl implements VoiceRSSCloudAPI {
54 public static final String DEFAULT_VOICE = "default";
56 private final Logger logger = LoggerFactory.getLogger(VoiceRSSCloudImpl.class);
58 private static final Set<String> SUPPORTED_AUDIO_FORMATS = Stream.of("MP3", "OGG", "AAC").collect(toSet());
60 private static final Set<Locale> SUPPORTED_LOCALES = new HashSet<>();
62 SUPPORTED_LOCALES.add(Locale.forLanguageTag("ar-eg"));
63 SUPPORTED_LOCALES.add(Locale.forLanguageTag("ar-sa"));
64 SUPPORTED_LOCALES.add(Locale.forLanguageTag("bg-bg"));
65 SUPPORTED_LOCALES.add(Locale.forLanguageTag("ca-es"));
66 SUPPORTED_LOCALES.add(Locale.forLanguageTag("cs-cz"));
67 SUPPORTED_LOCALES.add(Locale.forLanguageTag("da-dk"));
68 SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-at"));
69 SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-de"));
70 SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-ch"));
71 SUPPORTED_LOCALES.add(Locale.forLanguageTag("el-gr"));
72 SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-au"));
73 SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-ca"));
74 SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-gb"));
75 SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-ie"));
76 SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-in"));
77 SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-us"));
78 SUPPORTED_LOCALES.add(Locale.forLanguageTag("es-es"));
79 SUPPORTED_LOCALES.add(Locale.forLanguageTag("es-mx"));
80 SUPPORTED_LOCALES.add(Locale.forLanguageTag("fi-fi"));
81 SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-ca"));
82 SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-fr"));
83 SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-ch"));
84 SUPPORTED_LOCALES.add(Locale.forLanguageTag("he-il"));
85 SUPPORTED_LOCALES.add(Locale.forLanguageTag("hi-in"));
86 SUPPORTED_LOCALES.add(Locale.forLanguageTag("hr-hr"));
87 SUPPORTED_LOCALES.add(Locale.forLanguageTag("hu-hu"));
88 SUPPORTED_LOCALES.add(Locale.forLanguageTag("id-id"));
89 SUPPORTED_LOCALES.add(Locale.forLanguageTag("it-it"));
90 SUPPORTED_LOCALES.add(Locale.forLanguageTag("ja-jp"));
91 SUPPORTED_LOCALES.add(Locale.forLanguageTag("ko-kr"));
92 SUPPORTED_LOCALES.add(Locale.forLanguageTag("ms-my"));
93 SUPPORTED_LOCALES.add(Locale.forLanguageTag("nb-no"));
94 SUPPORTED_LOCALES.add(Locale.forLanguageTag("nl-be"));
95 SUPPORTED_LOCALES.add(Locale.forLanguageTag("nl-nl"));
96 SUPPORTED_LOCALES.add(Locale.forLanguageTag("pl-pl"));
97 SUPPORTED_LOCALES.add(Locale.forLanguageTag("pt-br"));
98 SUPPORTED_LOCALES.add(Locale.forLanguageTag("pt-pt"));
99 SUPPORTED_LOCALES.add(Locale.forLanguageTag("ro-ro"));
100 SUPPORTED_LOCALES.add(Locale.forLanguageTag("ru-ru"));
101 SUPPORTED_LOCALES.add(Locale.forLanguageTag("sk-sk"));
102 SUPPORTED_LOCALES.add(Locale.forLanguageTag("sl-si"));
103 SUPPORTED_LOCALES.add(Locale.forLanguageTag("sv-se"));
104 SUPPORTED_LOCALES.add(Locale.forLanguageTag("ta-in"));
105 SUPPORTED_LOCALES.add(Locale.forLanguageTag("th-th"));
106 SUPPORTED_LOCALES.add(Locale.forLanguageTag("tr-tr"));
107 SUPPORTED_LOCALES.add(Locale.forLanguageTag("vi-vn"));
108 SUPPORTED_LOCALES.add(Locale.forLanguageTag("zh-cn"));
109 SUPPORTED_LOCALES.add(Locale.forLanguageTag("zh-hk"));
110 SUPPORTED_LOCALES.add(Locale.forLanguageTag("zh-tw"));
113 private static final Map<String, Set<String>> SUPPORTED_VOICES = new HashMap<>();
115 SUPPORTED_VOICES.put("ar-eg", Set.of("Oda"));
116 SUPPORTED_VOICES.put("ar-sa", Set.of("Salim"));
117 SUPPORTED_VOICES.put("bg-bg", Set.of("Dimo"));
118 SUPPORTED_VOICES.put("ca-es", Set.of("Rut"));
119 SUPPORTED_VOICES.put("cs-cz", Set.of("Josef"));
120 SUPPORTED_VOICES.put("da-dk", Set.of("Freja"));
121 SUPPORTED_VOICES.put("de-at", Set.of("Lukas"));
122 SUPPORTED_VOICES.put("de-de", Set.of("Hanna", "Lina", "Jonas"));
123 SUPPORTED_VOICES.put("de-ch", Set.of("Tim"));
124 SUPPORTED_VOICES.put("el-gr", Set.of("Neo"));
125 SUPPORTED_VOICES.put("en-au", Set.of("Zoe", "Isla", "Evie", "Jack"));
126 SUPPORTED_VOICES.put("en-ca", Set.of("Rose", "Clara", "Emma", "Mason"));
127 SUPPORTED_VOICES.put("en-gb", Set.of("Alice", "Nancy", "Lily", "Harry"));
128 SUPPORTED_VOICES.put("en-ie", Set.of("Oran"));
129 SUPPORTED_VOICES.put("en-in", Set.of("Eka", "Jai", "Ajit"));
130 SUPPORTED_VOICES.put("en-us", Set.of("Linda", "Amy", "Mary", "John", "Mike"));
131 SUPPORTED_VOICES.put("es-es", Set.of("Camila", "Sofia", "Luna", "Diego"));
132 SUPPORTED_VOICES.put("es-mx", Set.of("Juana", "Silvia", "Teresa", "Jose"));
133 SUPPORTED_VOICES.put("fi-fi", Set.of("Aada"));
134 SUPPORTED_VOICES.put("fr-ca", Set.of("Emile", "Olivia", "Logan", "Felix"));
135 SUPPORTED_VOICES.put("fr-fr", Set.of("Bette", "Iva", "Zola", "Axel"));
136 SUPPORTED_VOICES.put("fr-ch", Set.of("Theo"));
137 SUPPORTED_VOICES.put("he-il", Set.of("Rami"));
138 SUPPORTED_VOICES.put("hi-in", Set.of("Puja", "Kabir"));
139 SUPPORTED_VOICES.put("hr-hr", Set.of("Nikola"));
140 SUPPORTED_VOICES.put("hu-hu", Set.of("Mate"));
141 SUPPORTED_VOICES.put("id-id", Set.of("Intan"));
142 SUPPORTED_VOICES.put("it-it", Set.of("Bria", "Mia", "Pietro"));
143 SUPPORTED_VOICES.put("ja-jp", Set.of("Hina", "Airi", "Fumi", "Akira"));
144 SUPPORTED_VOICES.put("ko-kr", Set.of("Nari"));
145 SUPPORTED_VOICES.put("ms-my", Set.of("Aqil"));
146 SUPPORTED_VOICES.put("nb-no", Set.of("Marte", "Erik"));
147 SUPPORTED_VOICES.put("nl-be", Set.of("Daan"));
148 SUPPORTED_VOICES.put("nl-nl", Set.of("Lotte", "Bram"));
149 SUPPORTED_VOICES.put("pl-pl", Set.of("Julia", "Jan"));
150 SUPPORTED_VOICES.put("pt-br", Set.of("Marcia", "Ligia", "Yara", "Dinis"));
151 SUPPORTED_VOICES.put("pt-pt", Set.of("Leonor"));
152 SUPPORTED_VOICES.put("ro-ro", Set.of("Doru"));
153 SUPPORTED_VOICES.put("ru-ru", Set.of("Olga", "Marina", "Peter"));
154 SUPPORTED_VOICES.put("sk-sk", Set.of("Beda"));
155 SUPPORTED_VOICES.put("sl-si", Set.of("Vid"));
156 SUPPORTED_VOICES.put("sv-se", Set.of("Molly", "Hugo"));
157 SUPPORTED_VOICES.put("ta-in", Set.of("Sai"));
158 SUPPORTED_VOICES.put("th-th", Set.of("Ukrit"));
159 SUPPORTED_VOICES.put("tr-tr", Set.of("Omer"));
160 SUPPORTED_VOICES.put("vi-vn", Set.of("Chi"));
161 SUPPORTED_VOICES.put("zh-cn", Set.of("Luli", "Shu", "Chow", "Wang"));
162 SUPPORTED_VOICES.put("zh-hk", Set.of("Jia", "Xia", "Chen"));
163 SUPPORTED_VOICES.put("zh-tw", Set.of("Akemi", "Lin", "Lee"));
167 public Set<String> getAvailableAudioFormats() {
168 return SUPPORTED_AUDIO_FORMATS;
172 public Set<Locale> getAvailableLocales() {
173 return SUPPORTED_LOCALES;
177 public Set<String> getAvailableVoices() {
178 // different locales support different voices, so let's list all here in one big set when no locale is provided
179 Set<String> allvoxes = new HashSet<>();
180 allvoxes.add(DEFAULT_VOICE);
181 for (Set<String> langvoxes : SUPPORTED_VOICES.values()) {
182 for (String langvox : langvoxes) {
183 allvoxes.add(langvox);
190 public Set<String> getAvailableVoices(Locale locale) {
191 Set<String> allvoxes = new HashSet<>();
192 allvoxes.add(DEFAULT_VOICE);
193 // all maps must be defined with key in lowercase
194 String langtag = locale.toLanguageTag().toLowerCase();
195 if (SUPPORTED_VOICES.containsKey(langtag)) {
196 for (String langvox : SUPPORTED_VOICES.get(langtag)) {
197 allvoxes.add(langvox);
204 * This method will return an input stream to an audio stream for the given
207 * It will do that using a plain URL connection to avoid any external
211 public InputStream getTextToSpeech(String apiKey, String text, String locale, String voice, String audioFormat)
213 String url = createURL(apiKey, text, locale, voice, audioFormat);
214 logger.debug("Call {}", url);
215 URLConnection connection = new URL(url).openConnection();
217 // we will check return codes. The service will ALWAYS return a HTTP
218 // 200, but for error messages, it will return a text/plain format and
219 // the error message in body
220 int status = ((HttpURLConnection) connection).getResponseCode();
221 if (HttpURLConnection.HTTP_OK != status) {
222 logger.error("Call {} returned HTTP {}", url, status);
223 throw new IOException("Could not read from service: HTTP code " + status);
225 if (logger.isTraceEnabled()) {
226 for (Entry<String, List<String>> header : connection.getHeaderFields().entrySet()) {
227 logger.trace("Response.header: {}={}", header.getKey(), header.getValue());
230 String contentType = connection.getHeaderField("Content-Type");
231 InputStream is = connection.getInputStream();
232 // check if content type is text/plain, then we have an error
233 if (contentType.contains("text/plain")) {
234 byte[] bytes = new byte[256];
235 is.read(bytes, 0, 256);
236 // close before throwing an exception
239 } catch (IOException ex) {
240 logger.debug("Failed to close inputstream", ex);
242 throw new IOException(
243 "Could not read audio content, service return an error: " + new String(bytes, "UTF-8"));
252 * This method will create the URL for the cloud service. The text will be
253 * URI encoded as it is part of the URL.
255 * It is in package scope to be accessed by tests.
257 private String createURL(String apiKey, String text, String locale, String voice, String audioFormat) {
260 encodedMsg = URLEncoder.encode(text, "UTF-8");
261 } catch (UnsupportedEncodingException ex) {
262 logger.error("UnsupportedEncodingException for UTF-8 MUST NEVER HAPPEN! Check your JVM configuration!", ex);
263 // fall through and use msg un-encoded
266 String url = "http://api.voicerss.org/?key=" + apiKey + "&hl=" + locale + "&c=" + audioFormat;
267 if (!DEFAULT_VOICE.equals(voice)) {
268 url += "&v=" + voice;
270 url += "&f=44khz_16bit_mono&src=" + encodedMsg;