]> git.basschouten.com Git - openhab-addons.git/blob
88daf7ca50b08251d7d63d6ed0d3befc9d7b4bee
[openhab-addons.git] /
1 /**
2  * Copyright (c) 2010-2022 Contributors to the openHAB project
3  *
4  * See the NOTICE file(s) distributed with this work for additional
5  * information.
6  *
7  * This program and the accompanying materials are made available under the
8  * terms of the Eclipse Public License 2.0 which is available at
9  * http://www.eclipse.org/legal/epl-2.0
10  *
11  * SPDX-License-Identifier: EPL-2.0
12  */
13 package org.openhab.voice.voicerss.internal.cloudapi;
14
15 import java.io.IOException;
16 import java.io.InputStream;
17 import java.net.HttpURLConnection;
18 import java.net.URL;
19 import java.net.URLConnection;
20 import java.net.URLEncoder;
21 import java.nio.charset.StandardCharsets;
22 import java.util.HashMap;
23 import java.util.HashSet;
24 import java.util.List;
25 import java.util.Locale;
26 import java.util.Map;
27 import java.util.Map.Entry;
28 import java.util.Set;
29
30 import org.openhab.core.audio.AudioFormat;
31 import org.slf4j.Logger;
32 import org.slf4j.LoggerFactory;
33
34 /**
35  * This class implements the Cloud service from VoiceRSS. For more information,
36  * see API documentation at http://www.voicerss.org/api .
37  *
38  * Current state of implementation:
39  * <ul>
40  * <li>All API languages supported</li>
41  * <li>Only default voice supported with good audio quality</li>
42  * <li>MP3, OGG, AAC and WAV audio formats supported</li>
43  * <li>It uses HTTP and not HTTPS (for performance reasons)</li>
44  * </ul>
45  *
46  * @author Jochen Hiller - Initial contribution
47  * @author Laurent Garnier - add support for all API languages
48  * @author Laurent Garnier - add support for OGG and AAC audio formats
49  * @author Andreas Brenk - add support for WAV audio format
50  */
51 public class VoiceRSSCloudImpl implements VoiceRSSCloudAPI {
52
53     public static final String DEFAULT_VOICE = "default";
54
55     public static final String API_URL = "https://api.voicerss.org/?key=%s&hl=%s&c=%s&f=%s&src=%s";
56     public static final String API_URL_WITH_VOICE = API_URL + "&v=%s";
57
58     private final Logger logger = LoggerFactory.getLogger(VoiceRSSCloudImpl.class);
59
60     private static final Set<AudioFormat> SUPPORTED_AUDIO_FORMATS = Set.of(
61             new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, null, 16, null, 44_100L),
62             new AudioFormat(AudioFormat.CONTAINER_OGG, AudioFormat.CODEC_VORBIS, null, 16, null, 44_100L),
63             new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_AAC, null, 16, null, 44_100L),
64             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, null, 8, 64_000, 8_000L),
65             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, null, 16, 128_000, 8_000L),
66             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 88_200, 11_025L),
67             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 176_400, 11_025L),
68             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 96_000, 12_000L),
69             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 192_000, 12_000L),
70             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 128_000, 16_000L),
71             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 256_000, 16_000L),
72             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 176_400, 22_050L),
73             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 352_800, 22_050L),
74             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 192_000, 24_000L),
75             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 384_000, 24_000L),
76             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 256_000, 32_000L),
77             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 512_000, 32_000L),
78             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 352_800, 44_100L),
79             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 705_600, 44_100L),
80             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 384_000, 48_000L),
81             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 768_000, 48_000L),
82             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 64_000, 8_000L),
83             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 88_200, 11_025L),
84             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 176_400, 22_050L),
85             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 352_800, 44_100L),
86             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 64_000, 8_000L),
87             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 88_200, 11_025L),
88             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 176_400, 22_050L),
89             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 352_800, 44_100L));
90
91     private static final Set<Locale> SUPPORTED_LOCALES = new HashSet<>();
92     static {
93         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ar-eg"));
94         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ar-sa"));
95         SUPPORTED_LOCALES.add(Locale.forLanguageTag("bg-bg"));
96         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ca-es"));
97         SUPPORTED_LOCALES.add(Locale.forLanguageTag("cs-cz"));
98         SUPPORTED_LOCALES.add(Locale.forLanguageTag("da-dk"));
99         SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-at"));
100         SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-de"));
101         SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-ch"));
102         SUPPORTED_LOCALES.add(Locale.forLanguageTag("el-gr"));
103         SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-au"));
104         SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-ca"));
105         SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-gb"));
106         SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-ie"));
107         SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-in"));
108         SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-us"));
109         SUPPORTED_LOCALES.add(Locale.forLanguageTag("es-es"));
110         SUPPORTED_LOCALES.add(Locale.forLanguageTag("es-mx"));
111         SUPPORTED_LOCALES.add(Locale.forLanguageTag("fi-fi"));
112         SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-ca"));
113         SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-fr"));
114         SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-ch"));
115         SUPPORTED_LOCALES.add(Locale.forLanguageTag("he-il"));
116         SUPPORTED_LOCALES.add(Locale.forLanguageTag("hi-in"));
117         SUPPORTED_LOCALES.add(Locale.forLanguageTag("hr-hr"));
118         SUPPORTED_LOCALES.add(Locale.forLanguageTag("hu-hu"));
119         SUPPORTED_LOCALES.add(Locale.forLanguageTag("id-id"));
120         SUPPORTED_LOCALES.add(Locale.forLanguageTag("it-it"));
121         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ja-jp"));
122         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ko-kr"));
123         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ms-my"));
124         SUPPORTED_LOCALES.add(Locale.forLanguageTag("nb-no"));
125         SUPPORTED_LOCALES.add(Locale.forLanguageTag("nl-be"));
126         SUPPORTED_LOCALES.add(Locale.forLanguageTag("nl-nl"));
127         SUPPORTED_LOCALES.add(Locale.forLanguageTag("pl-pl"));
128         SUPPORTED_LOCALES.add(Locale.forLanguageTag("pt-br"));
129         SUPPORTED_LOCALES.add(Locale.forLanguageTag("pt-pt"));
130         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ro-ro"));
131         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ru-ru"));
132         SUPPORTED_LOCALES.add(Locale.forLanguageTag("sk-sk"));
133         SUPPORTED_LOCALES.add(Locale.forLanguageTag("sl-si"));
134         SUPPORTED_LOCALES.add(Locale.forLanguageTag("sv-se"));
135         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ta-in"));
136         SUPPORTED_LOCALES.add(Locale.forLanguageTag("th-th"));
137         SUPPORTED_LOCALES.add(Locale.forLanguageTag("tr-tr"));
138         SUPPORTED_LOCALES.add(Locale.forLanguageTag("vi-vn"));
139         SUPPORTED_LOCALES.add(Locale.forLanguageTag("zh-cn"));
140         SUPPORTED_LOCALES.add(Locale.forLanguageTag("zh-hk"));
141         SUPPORTED_LOCALES.add(Locale.forLanguageTag("zh-tw"));
142     }
143
144     private static final Map<String, Set<String>> SUPPORTED_VOICES = new HashMap<>();
145     static {
146         SUPPORTED_VOICES.put("ar-eg", Set.of("Oda"));
147         SUPPORTED_VOICES.put("ar-sa", Set.of("Salim"));
148         SUPPORTED_VOICES.put("bg-bg", Set.of("Dimo"));
149         SUPPORTED_VOICES.put("ca-es", Set.of("Rut"));
150         SUPPORTED_VOICES.put("cs-cz", Set.of("Josef"));
151         SUPPORTED_VOICES.put("da-dk", Set.of("Freja"));
152         SUPPORTED_VOICES.put("de-at", Set.of("Lukas"));
153         SUPPORTED_VOICES.put("de-de", Set.of("Hanna", "Lina", "Jonas"));
154         SUPPORTED_VOICES.put("de-ch", Set.of("Tim"));
155         SUPPORTED_VOICES.put("el-gr", Set.of("Neo"));
156         SUPPORTED_VOICES.put("en-au", Set.of("Zoe", "Isla", "Evie", "Jack"));
157         SUPPORTED_VOICES.put("en-ca", Set.of("Rose", "Clara", "Emma", "Mason"));
158         SUPPORTED_VOICES.put("en-gb", Set.of("Alice", "Nancy", "Lily", "Harry"));
159         SUPPORTED_VOICES.put("en-ie", Set.of("Oran"));
160         SUPPORTED_VOICES.put("en-in", Set.of("Eka", "Jai", "Ajit"));
161         SUPPORTED_VOICES.put("en-us", Set.of("Linda", "Amy", "Mary", "John", "Mike"));
162         SUPPORTED_VOICES.put("es-es", Set.of("Camila", "Sofia", "Luna", "Diego"));
163         SUPPORTED_VOICES.put("es-mx", Set.of("Juana", "Silvia", "Teresa", "Jose"));
164         SUPPORTED_VOICES.put("fi-fi", Set.of("Aada"));
165         SUPPORTED_VOICES.put("fr-ca", Set.of("Emile", "Olivia", "Logan", "Felix"));
166         SUPPORTED_VOICES.put("fr-fr", Set.of("Bette", "Iva", "Zola", "Axel"));
167         SUPPORTED_VOICES.put("fr-ch", Set.of("Theo"));
168         SUPPORTED_VOICES.put("he-il", Set.of("Rami"));
169         SUPPORTED_VOICES.put("hi-in", Set.of("Puja", "Kabir"));
170         SUPPORTED_VOICES.put("hr-hr", Set.of("Nikola"));
171         SUPPORTED_VOICES.put("hu-hu", Set.of("Mate"));
172         SUPPORTED_VOICES.put("id-id", Set.of("Intan"));
173         SUPPORTED_VOICES.put("it-it", Set.of("Bria", "Mia", "Pietro"));
174         SUPPORTED_VOICES.put("ja-jp", Set.of("Hina", "Airi", "Fumi", "Akira"));
175         SUPPORTED_VOICES.put("ko-kr", Set.of("Nari"));
176         SUPPORTED_VOICES.put("ms-my", Set.of("Aqil"));
177         SUPPORTED_VOICES.put("nb-no", Set.of("Marte", "Erik"));
178         SUPPORTED_VOICES.put("nl-be", Set.of("Daan"));
179         SUPPORTED_VOICES.put("nl-nl", Set.of("Lotte", "Bram"));
180         SUPPORTED_VOICES.put("pl-pl", Set.of("Julia", "Jan"));
181         SUPPORTED_VOICES.put("pt-br", Set.of("Marcia", "Ligia", "Yara", "Dinis"));
182         SUPPORTED_VOICES.put("pt-pt", Set.of("Leonor"));
183         SUPPORTED_VOICES.put("ro-ro", Set.of("Doru"));
184         SUPPORTED_VOICES.put("ru-ru", Set.of("Olga", "Marina", "Peter"));
185         SUPPORTED_VOICES.put("sk-sk", Set.of("Beda"));
186         SUPPORTED_VOICES.put("sl-si", Set.of("Vid"));
187         SUPPORTED_VOICES.put("sv-se", Set.of("Molly", "Hugo"));
188         SUPPORTED_VOICES.put("ta-in", Set.of("Sai"));
189         SUPPORTED_VOICES.put("th-th", Set.of("Ukrit"));
190         SUPPORTED_VOICES.put("tr-tr", Set.of("Omer"));
191         SUPPORTED_VOICES.put("vi-vn", Set.of("Chi"));
192         SUPPORTED_VOICES.put("zh-cn", Set.of("Luli", "Shu", "Chow", "Wang"));
193         SUPPORTED_VOICES.put("zh-hk", Set.of("Jia", "Xia", "Chen"));
194         SUPPORTED_VOICES.put("zh-tw", Set.of("Akemi", "Lin", "Lee"));
195     }
196
197     @Override
198     public Set<AudioFormat> getAvailableAudioFormats() {
199         return SUPPORTED_AUDIO_FORMATS;
200     }
201
202     @Override
203     public Set<Locale> getAvailableLocales() {
204         return SUPPORTED_LOCALES;
205     }
206
207     @Override
208     public Set<String> getAvailableVoices() {
209         // different locales support different voices, so let's list all here in one big set when no locale is provided
210         Set<String> allvoxes = new HashSet<>();
211         allvoxes.add(DEFAULT_VOICE);
212         for (Set<String> langvoxes : SUPPORTED_VOICES.values()) {
213             for (String langvox : langvoxes) {
214                 allvoxes.add(langvox);
215             }
216         }
217         return allvoxes;
218     }
219
220     @Override
221     public Set<String> getAvailableVoices(Locale locale) {
222         Set<String> allvoxes = new HashSet<>();
223         allvoxes.add(DEFAULT_VOICE);
224         // all maps must be defined with key in lowercase
225         String langtag = locale.toLanguageTag().toLowerCase();
226         if (SUPPORTED_VOICES.containsKey(langtag)) {
227             for (String langvox : SUPPORTED_VOICES.get(langtag)) {
228                 allvoxes.add(langvox);
229             }
230         }
231         return allvoxes;
232     }
233
234     /**
235      * This method will return an input stream to an audio stream for the given
236      * parameters.
237      *
238      * It will do that using a plain URL connection to avoid any external
239      * dependencies.
240      */
241     @Override
242     public InputStream getTextToSpeech(String apiKey, String text, String locale, String voice, String audioCodec,
243             String audioFormat) throws IOException {
244         String url = createURL(apiKey, text, locale, voice, audioCodec, audioFormat);
245         logger.debug("Call {}", url);
246         URLConnection connection = new URL(url).openConnection();
247
248         // we will check return codes. The service will ALWAYS return a HTTP
249         // 200, but for error messages, it will return a text/plain format and
250         // the error message in body
251         int status = ((HttpURLConnection) connection).getResponseCode();
252         if (HttpURLConnection.HTTP_OK != status) {
253             logger.error("Call {} returned HTTP {}", url, status);
254             throw new IOException("Could not read from service: HTTP code " + status);
255         }
256         if (logger.isTraceEnabled()) {
257             for (Entry<String, List<String>> header : connection.getHeaderFields().entrySet()) {
258                 logger.trace("Response.header: {}={}", header.getKey(), header.getValue());
259             }
260         }
261         String contentType = connection.getHeaderField("Content-Type");
262         InputStream is = connection.getInputStream();
263         // check if content type is text/plain, then we have an error
264         if (contentType.contains("text/plain")) {
265             byte[] bytes = new byte[256];
266             is.read(bytes, 0, 256);
267             // close before throwing an exception
268             try {
269                 is.close();
270             } catch (IOException ex) {
271                 logger.debug("Failed to close inputstream", ex);
272             }
273             throw new IOException(
274                     "Could not read audio content, service return an error: " + new String(bytes, "UTF-8"));
275         } else {
276             return is;
277         }
278     }
279
280     // internal
281
282     /**
283      * This method will create the URL for the cloud service. The text will be
284      * URI encoded as it is part of the URL.
285      *
286      * It is in package scope to be accessed by tests.
287      */
288     private String createURL(String apiKey, String text, String locale, String voice, String audioCodec,
289             String audioFormat) {
290         String encodedMsg = URLEncoder.encode(text, StandardCharsets.UTF_8);
291         String url;
292         if (!DEFAULT_VOICE.equals(voice)) {
293             url = String.format(API_URL_WITH_VOICE, apiKey, locale, audioCodec, audioFormat, encodedMsg, voice);
294         } else {
295             url = String.format(API_URL, apiKey, locale, audioCodec, audioFormat, encodedMsg);
296         }
297         return url;
298     }
299 }