git.basschouten.com Git - openhab-addons.git/blob

   1 /**
   2  * Copyright (c) 2010-2023 Contributors to the openHAB project
   3  *
   4  * See the NOTICE file(s) distributed with this work for additional
   5  * information.
   6  *
   7  * This program and the accompanying materials are made available under the
   8  * terms of the Eclipse Public License 2.0 which is available at
   9  * http://www.eclipse.org/legal/epl-2.0
  10  *
  11  * SPDX-License-Identifier: EPL-2.0
  12  */
  13 package org.openhab.voice.pollytts.internal.cloudapi;
  14
  15 import static java.util.stream.Collectors.*;
  16 import static org.openhab.core.audio.AudioFormat.*;
  17
  18 import java.io.IOException;
  19 import java.io.InputStream;
  20 import java.util.Collections;
  21 import java.util.List;
  22 import java.util.Locale;
  23 import java.util.Map;
  24 import java.util.Set;
  25 import java.util.stream.Stream;
  26
  27 import com.amazonaws.auth.AWSCredentials;
  28 import com.amazonaws.auth.AWSStaticCredentialsProvider;
  29 import com.amazonaws.auth.BasicAWSCredentials;
  30 import com.amazonaws.services.polly.AmazonPolly;
  31 import com.amazonaws.services.polly.AmazonPollyClientBuilder;
  32 import com.amazonaws.services.polly.model.DescribeVoicesRequest;
  33 import com.amazonaws.services.polly.model.OutputFormat;
  34 import com.amazonaws.services.polly.model.SynthesizeSpeechRequest;
  35 import com.amazonaws.services.polly.model.TextType;
  36 import com.amazonaws.services.polly.model.Voice;
  37
  38 /**
  39  * This class implements the Cloud service for PollyTTS.
  40  *
  41  * The implementation supports:
  42  * <ul>
  43  * <li>All languages</li>
  44  * <li>All voices</li>
  45  * <li>MP3 and OGG formats</li>
  46  * </ul>
  47  *
  48  * @author Robert Hillman - Initial contribution
  49  */
  50 public class PollyTTSCloudImpl {
  51
  52     private static final Set<String> SUPPORTED_AUDIO_FORMATS = Collections
  53             .unmodifiableSet(Stream.of(CODEC_MP3, CONTAINER_OGG).collect(toSet()));
  54
  55     protected final PollyTTSConfig config;
  56
  57     private final AmazonPolly client;
  58     private final Map<String, String> labelToID;
  59     private final List<Voice> voices;
  60
  61     public PollyTTSCloudImpl(PollyTTSConfig config) {
  62         this.config = config;
  63
  64         AWSCredentials credentials = new BasicAWSCredentials(config.getAccessKey(), config.getSecretKey());
  65         client = AmazonPollyClientBuilder.standard().withRegion(config.getServiceRegion())
  66                 .withCredentials(new AWSStaticCredentialsProvider(credentials)).build();
  67         voices = client.describeVoices(new DescribeVoicesRequest()).getVoices();
  68
  69         // create voice to ID translation for service invocation
  70         labelToID = voices.stream().collect(toMap(Voice::getName, Voice::getId));
  71     }
  72
  73     /**
  74      * Get all supported audio formats by the TTS service. This includes MP3,
  75      * WAV and more audio formats as used in APIs.
  76      */
  77     public Set<String> getAvailableAudioFormats() {
  78         return SUPPORTED_AUDIO_FORMATS;
  79     }
  80
  81     public Set<Locale> getAvailableLocales() {
  82         // @formatter:off
  83         return voices.stream()
  84                 .map(voice -> Locale.forLanguageTag(voice.getLanguageCode()))
  85                 .collect(toSet());
  86         // @formatter:on
  87     }
  88
  89     public Set<String> getAvailableVoices() {
  90         // @formatter:off
  91         return voices.stream()
  92                 .map(Voice::getName)
  93                 .collect(toSet());
  94         // @formatter:on
  95     }
  96
  97     public Set<String> getAvailableVoices(Locale locale) {
  98         // @formatter:off
  99         return voices.stream()
 100                 .filter(voice -> voice.getLanguageCode().equalsIgnoreCase(locale.toLanguageTag()))
 101                 .map(Voice::getName)
 102                 .collect(toSet());
 103         // @formatter:on
 104     }
 105
 106     /**
 107      * This method will return an input stream to an audio stream for the given
 108      * parameters.
 109      * Get the given text in specified locale and audio format as input stream.
 110      *
 111      * @param text
 112      *            the text to translate into speech
 113      * @param label
 114      *            the voice Label to use
 115      * @param audioFormat
 116      *            the audio format to use
 117      * @return an InputStream to the audio data in specified format
 118      * @throws IOException
 119      *             will be raised if the audio data can not be retrieved from
 120      *             cloud service
 121      */
 122     public InputStream getTextToSpeech(String text, String label, String audioFormat) {
 123         String voiceID = labelToID.get(label);
 124         String format = audioFormat.toLowerCase();
 125         if ("ogg".equals(format)) {
 126             format = "ogg_vorbis";
 127         }
 128         TextType textType = text.startsWith("<speak>") ? TextType.Ssml : TextType.Text;
 129         SynthesizeSpeechRequest request = new SynthesizeSpeechRequest().withTextType(textType).withText(text)
 130                 .withVoiceId(voiceID).withOutputFormat(OutputFormat.fromValue(format));
 131         return client.synthesizeSpeech(request).getAudioStream();
 132     }
 133 }