git.basschouten.com Git - openhab-addons.git/blob

   1 /**
   2  * Copyright (c) 2010-2022 Contributors to the openHAB project
   3  *
   4  * See the NOTICE file(s) distributed with this work for additional
   5  * information.
   6  *
   7  * This program and the accompanying materials are made available under the
   8  * terms of the Eclipse Public License 2.0 which is available at
   9  * http://www.eclipse.org/legal/epl-2.0
  10  *
  11  * SPDX-License-Identifier: EPL-2.0
  12  */
  13 package org.openhab.voice.voicerss.internal.cloudapi;
  14
  15 import java.io.IOException;
  16 import java.io.InputStream;
  17 import java.net.HttpURLConnection;
  18 import java.net.URL;
  19 import java.net.URLConnection;
  20 import java.net.URLEncoder;
  21 import java.nio.charset.StandardCharsets;
  22 import java.util.HashMap;
  23 import java.util.HashSet;
  24 import java.util.List;
  25 import java.util.Locale;
  26 import java.util.Map;
  27 import java.util.Map.Entry;
  28 import java.util.Set;
  29
  30 import org.openhab.core.audio.AudioFormat;
  31 import org.slf4j.Logger;
  32 import org.slf4j.LoggerFactory;
  33
  34 /**
  35  * This class implements the Cloud service from VoiceRSS. For more information,
  36  * see API documentation at http://www.voicerss.org/api .
  37  *
  38  * Current state of implementation:
  39  * <ul>
  40  * <li>All API languages supported</li>
  41  * <li>Only default voice supported with good audio quality</li>
  42  * <li>MP3, OGG, AAC and WAV audio formats supported</li>
  43  * <li>It uses HTTP and not HTTPS (for performance reasons)</li>
  44  * </ul>
  45  *
  46  * @author Jochen Hiller - Initial contribution
  47  * @author Laurent Garnier - add support for all API languages
  48  * @author Laurent Garnier - add support for OGG and AAC audio formats
  49  * @author Andreas Brenk - add support for WAV audio format
  50  */
  51 public class VoiceRSSCloudImpl implements VoiceRSSCloudAPI {
  52
  53     public static final String DEFAULT_VOICE = "default";
  54
  55     private final Logger logger = LoggerFactory.getLogger(VoiceRSSCloudImpl.class);
  56
  57     private static final Set<AudioFormat> SUPPORTED_AUDIO_FORMATS = Set.of(
  58             new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, null, 16, null, 44_100L),
  59             new AudioFormat(AudioFormat.CONTAINER_OGG, AudioFormat.CODEC_VORBIS, null, 16, null, 44_100L),
  60             new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_AAC, null, 16, null, 44_100L),
  61             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, null, 8, 64_000, 8_000L),
  62             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, null, 16, 128_000, 8_000L),
  63             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 88_200, 11_025L),
  64             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 176_400, 11_025L),
  65             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 96_000, 12_000L),
  66             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 192_000, 12_000L),
  67             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 128_000, 16_000L),
  68             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 256_000, 16_000L),
  69             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 176_400, 22_050L),
  70             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 352_800, 22_050L),
  71             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 192_000, 24_000L),
  72             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 384_000, 24_000L),
  73             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 256_000, 32_000L),
  74             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 512_000, 32_000L),
  75             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 352_800, 44_100L),
  76             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 705_600, 44_100L),
  77             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 384_000, 48_000L),
  78             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 768_000, 48_000L),
  79             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 64_000, 8_000L),
  80             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 88_200, 11_025L),
  81             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 176_400, 22_050L),
  82             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 352_800, 44_100L),
  83             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 64_000, 8_000L),
  84             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 88_200, 11_025L),
  85             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 176_400, 22_050L),
  86             new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 352_800, 44_100L));
  87
  88     private static final Set<Locale> SUPPORTED_LOCALES = new HashSet<>();
  89     static {
  90         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ar-eg"));
  91         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ar-sa"));
  92         SUPPORTED_LOCALES.add(Locale.forLanguageTag("bg-bg"));
  93         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ca-es"));
  94         SUPPORTED_LOCALES.add(Locale.forLanguageTag("cs-cz"));
  95         SUPPORTED_LOCALES.add(Locale.forLanguageTag("da-dk"));
  96         SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-at"));
  97         SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-de"));
  98         SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-ch"));
  99         SUPPORTED_LOCALES.add(Locale.forLanguageTag("el-gr"));
 100         SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-au"));
 101         SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-ca"));
 102         SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-gb"));
 103         SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-ie"));
 104         SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-in"));
 105         SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-us"));
 106         SUPPORTED_LOCALES.add(Locale.forLanguageTag("es-es"));
 107         SUPPORTED_LOCALES.add(Locale.forLanguageTag("es-mx"));
 108         SUPPORTED_LOCALES.add(Locale.forLanguageTag("fi-fi"));
 109         SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-ca"));
 110         SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-fr"));
 111         SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-ch"));
 112         SUPPORTED_LOCALES.add(Locale.forLanguageTag("he-il"));
 113         SUPPORTED_LOCALES.add(Locale.forLanguageTag("hi-in"));
 114         SUPPORTED_LOCALES.add(Locale.forLanguageTag("hr-hr"));
 115         SUPPORTED_LOCALES.add(Locale.forLanguageTag("hu-hu"));
 116         SUPPORTED_LOCALES.add(Locale.forLanguageTag("id-id"));
 117         SUPPORTED_LOCALES.add(Locale.forLanguageTag("it-it"));
 118         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ja-jp"));
 119         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ko-kr"));
 120         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ms-my"));
 121         SUPPORTED_LOCALES.add(Locale.forLanguageTag("nb-no"));
 122         SUPPORTED_LOCALES.add(Locale.forLanguageTag("nl-be"));
 123         SUPPORTED_LOCALES.add(Locale.forLanguageTag("nl-nl"));
 124         SUPPORTED_LOCALES.add(Locale.forLanguageTag("pl-pl"));
 125         SUPPORTED_LOCALES.add(Locale.forLanguageTag("pt-br"));
 126         SUPPORTED_LOCALES.add(Locale.forLanguageTag("pt-pt"));
 127         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ro-ro"));
 128         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ru-ru"));
 129         SUPPORTED_LOCALES.add(Locale.forLanguageTag("sk-sk"));
 130         SUPPORTED_LOCALES.add(Locale.forLanguageTag("sl-si"));
 131         SUPPORTED_LOCALES.add(Locale.forLanguageTag("sv-se"));
 132         SUPPORTED_LOCALES.add(Locale.forLanguageTag("ta-in"));
 133         SUPPORTED_LOCALES.add(Locale.forLanguageTag("th-th"));
 134         SUPPORTED_LOCALES.add(Locale.forLanguageTag("tr-tr"));
 135         SUPPORTED_LOCALES.add(Locale.forLanguageTag("vi-vn"));
 136         SUPPORTED_LOCALES.add(Locale.forLanguageTag("zh-cn"));
 137         SUPPORTED_LOCALES.add(Locale.forLanguageTag("zh-hk"));
 138         SUPPORTED_LOCALES.add(Locale.forLanguageTag("zh-tw"));
 139     }
 140
 141     private static final Map<String, Set<String>> SUPPORTED_VOICES = new HashMap<>();
 142     static {
 143         SUPPORTED_VOICES.put("ar-eg", Set.of("Oda"));
 144         SUPPORTED_VOICES.put("ar-sa", Set.of("Salim"));
 145         SUPPORTED_VOICES.put("bg-bg", Set.of("Dimo"));
 146         SUPPORTED_VOICES.put("ca-es", Set.of("Rut"));
 147         SUPPORTED_VOICES.put("cs-cz", Set.of("Josef"));
 148         SUPPORTED_VOICES.put("da-dk", Set.of("Freja"));
 149         SUPPORTED_VOICES.put("de-at", Set.of("Lukas"));
 150         SUPPORTED_VOICES.put("de-de", Set.of("Hanna", "Lina", "Jonas"));
 151         SUPPORTED_VOICES.put("de-ch", Set.of("Tim"));
 152         SUPPORTED_VOICES.put("el-gr", Set.of("Neo"));
 153         SUPPORTED_VOICES.put("en-au", Set.of("Zoe", "Isla", "Evie", "Jack"));
 154         SUPPORTED_VOICES.put("en-ca", Set.of("Rose", "Clara", "Emma", "Mason"));
 155         SUPPORTED_VOICES.put("en-gb", Set.of("Alice", "Nancy", "Lily", "Harry"));
 156         SUPPORTED_VOICES.put("en-ie", Set.of("Oran"));
 157         SUPPORTED_VOICES.put("en-in", Set.of("Eka", "Jai", "Ajit"));
 158         SUPPORTED_VOICES.put("en-us", Set.of("Linda", "Amy", "Mary", "John", "Mike"));
 159         SUPPORTED_VOICES.put("es-es", Set.of("Camila", "Sofia", "Luna", "Diego"));
 160         SUPPORTED_VOICES.put("es-mx", Set.of("Juana", "Silvia", "Teresa", "Jose"));
 161         SUPPORTED_VOICES.put("fi-fi", Set.of("Aada"));
 162         SUPPORTED_VOICES.put("fr-ca", Set.of("Emile", "Olivia", "Logan", "Felix"));
 163         SUPPORTED_VOICES.put("fr-fr", Set.of("Bette", "Iva", "Zola", "Axel"));
 164         SUPPORTED_VOICES.put("fr-ch", Set.of("Theo"));
 165         SUPPORTED_VOICES.put("he-il", Set.of("Rami"));
 166         SUPPORTED_VOICES.put("hi-in", Set.of("Puja", "Kabir"));
 167         SUPPORTED_VOICES.put("hr-hr", Set.of("Nikola"));
 168         SUPPORTED_VOICES.put("hu-hu", Set.of("Mate"));
 169         SUPPORTED_VOICES.put("id-id", Set.of("Intan"));
 170         SUPPORTED_VOICES.put("it-it", Set.of("Bria", "Mia", "Pietro"));
 171         SUPPORTED_VOICES.put("ja-jp", Set.of("Hina", "Airi", "Fumi", "Akira"));
 172         SUPPORTED_VOICES.put("ko-kr", Set.of("Nari"));
 173         SUPPORTED_VOICES.put("ms-my", Set.of("Aqil"));
 174         SUPPORTED_VOICES.put("nb-no", Set.of("Marte", "Erik"));
 175         SUPPORTED_VOICES.put("nl-be", Set.of("Daan"));
 176         SUPPORTED_VOICES.put("nl-nl", Set.of("Lotte", "Bram"));
 177         SUPPORTED_VOICES.put("pl-pl", Set.of("Julia", "Jan"));
 178         SUPPORTED_VOICES.put("pt-br", Set.of("Marcia", "Ligia", "Yara", "Dinis"));
 179         SUPPORTED_VOICES.put("pt-pt", Set.of("Leonor"));
 180         SUPPORTED_VOICES.put("ro-ro", Set.of("Doru"));
 181         SUPPORTED_VOICES.put("ru-ru", Set.of("Olga", "Marina", "Peter"));
 182         SUPPORTED_VOICES.put("sk-sk", Set.of("Beda"));
 183         SUPPORTED_VOICES.put("sl-si", Set.of("Vid"));
 184         SUPPORTED_VOICES.put("sv-se", Set.of("Molly", "Hugo"));
 185         SUPPORTED_VOICES.put("ta-in", Set.of("Sai"));
 186         SUPPORTED_VOICES.put("th-th", Set.of("Ukrit"));
 187         SUPPORTED_VOICES.put("tr-tr", Set.of("Omer"));
 188         SUPPORTED_VOICES.put("vi-vn", Set.of("Chi"));
 189         SUPPORTED_VOICES.put("zh-cn", Set.of("Luli", "Shu", "Chow", "Wang"));
 190         SUPPORTED_VOICES.put("zh-hk", Set.of("Jia", "Xia", "Chen"));
 191         SUPPORTED_VOICES.put("zh-tw", Set.of("Akemi", "Lin", "Lee"));
 192     }
 193
 194     @Override
 195     public Set<AudioFormat> getAvailableAudioFormats() {
 196         return SUPPORTED_AUDIO_FORMATS;
 197     }
 198
 199     @Override
 200     public Set<Locale> getAvailableLocales() {
 201         return SUPPORTED_LOCALES;
 202     }
 203
 204     @Override
 205     public Set<String> getAvailableVoices() {
 206         // different locales support different voices, so let's list all here in one big set when no locale is provided
 207         Set<String> allvoxes = new HashSet<>();
 208         allvoxes.add(DEFAULT_VOICE);
 209         for (Set<String> langvoxes : SUPPORTED_VOICES.values()) {
 210             for (String langvox : langvoxes) {
 211                 allvoxes.add(langvox);
 212             }
 213         }
 214         return allvoxes;
 215     }
 216
 217     @Override
 218     public Set<String> getAvailableVoices(Locale locale) {
 219         Set<String> allvoxes = new HashSet<>();
 220         allvoxes.add(DEFAULT_VOICE);
 221         // all maps must be defined with key in lowercase
 222         String langtag = locale.toLanguageTag().toLowerCase();
 223         if (SUPPORTED_VOICES.containsKey(langtag)) {
 224             for (String langvox : SUPPORTED_VOICES.get(langtag)) {
 225                 allvoxes.add(langvox);
 226             }
 227         }
 228         return allvoxes;
 229     }
 230
 231     /**
 232      * This method will return an input stream to an audio stream for the given
 233      * parameters.
 234      *
 235      * It will do that using a plain URL connection to avoid any external
 236      * dependencies.
 237      */
 238     @Override
 239     public InputStream getTextToSpeech(String apiKey, String text, String locale, String voice, String audioCodec,
 240             String audioFormat) throws IOException {
 241         String url = createURL(apiKey, text, locale, voice, audioCodec, audioFormat);
 242         logger.debug("Call {}", url);
 243         URLConnection connection = new URL(url).openConnection();
 244
 245         // we will check return codes. The service will ALWAYS return a HTTP
 246         // 200, but for error messages, it will return a text/plain format and
 247         // the error message in body
 248         int status = ((HttpURLConnection) connection).getResponseCode();
 249         if (HttpURLConnection.HTTP_OK != status) {
 250             logger.error("Call {} returned HTTP {}", url, status);
 251             throw new IOException("Could not read from service: HTTP code " + status);
 252         }
 253         if (logger.isTraceEnabled()) {
 254             for (Entry<String, List<String>> header : connection.getHeaderFields().entrySet()) {
 255                 logger.trace("Response.header: {}={}", header.getKey(), header.getValue());
 256             }
 257         }
 258         String contentType = connection.getHeaderField("Content-Type");
 259         InputStream is = connection.getInputStream();
 260         // check if content type is text/plain, then we have an error
 261         if (contentType.contains("text/plain")) {
 262             byte[] bytes = new byte[256];
 263             is.read(bytes, 0, 256);
 264             // close before throwing an exception
 265             try {
 266                 is.close();
 267             } catch (IOException ex) {
 268                 logger.debug("Failed to close inputstream", ex);
 269             }
 270             throw new IOException(
 271                     "Could not read audio content, service return an error: " + new String(bytes, "UTF-8"));
 272         } else {
 273             return is;
 274         }
 275     }
 276
 277     // internal
 278
 279     /**
 280      * This method will create the URL for the cloud service. The text will be
 281      * URI encoded as it is part of the URL.
 282      *
 283      * It is in package scope to be accessed by tests.
 284      */
 285     private String createURL(String apiKey, String text, String locale, String voice, String audioCodec,
 286             String audioFormat) {
 287         String encodedMsg = URLEncoder.encode(text, StandardCharsets.UTF_8);
 288         String url = "http://api.voicerss.org/?key=" + apiKey + "&hl=" + locale + "&c=" + audioCodec + "&f="
 289                 + audioFormat;
 290         if (!DEFAULT_VOICE.equals(voice)) {
 291             url += "&v=" + voice;
 292         }
 293         url += "&src=" + encodedMsg;
 294         return url;
 295     }
 296 }