]> git.basschouten.com Git - openhab-addons.git/blob
cf04c052df2d42d6a7e96415eaec085906aa8678
[openhab-addons.git] /
1 /**
2  * Copyright (c) 2010-2023 Contributors to the openHAB project
3  *
4  * See the NOTICE file(s) distributed with this work for additional
5  * information.
6  *
7  * This program and the accompanying materials are made available under the
8  * terms of the Eclipse Public License 2.0 which is available at
9  * http://www.eclipse.org/legal/epl-2.0
10  *
11  * SPDX-License-Identifier: EPL-2.0
12  */
13 package org.openhab.voice.pollytts.internal.cloudapi;
14
15 import static java.util.stream.Collectors.*;
16 import static org.openhab.core.audio.AudioFormat.*;
17
18 import java.io.IOException;
19 import java.io.InputStream;
20 import java.util.Collections;
21 import java.util.List;
22 import java.util.Locale;
23 import java.util.Map;
24 import java.util.Set;
25 import java.util.stream.Stream;
26
27 import com.amazonaws.auth.AWSCredentials;
28 import com.amazonaws.auth.AWSStaticCredentialsProvider;
29 import com.amazonaws.auth.BasicAWSCredentials;
30 import com.amazonaws.services.polly.AmazonPolly;
31 import com.amazonaws.services.polly.AmazonPollyClientBuilder;
32 import com.amazonaws.services.polly.model.DescribeVoicesRequest;
33 import com.amazonaws.services.polly.model.OutputFormat;
34 import com.amazonaws.services.polly.model.SynthesizeSpeechRequest;
35 import com.amazonaws.services.polly.model.TextType;
36 import com.amazonaws.services.polly.model.Voice;
37
38 /**
39  * This class implements the Cloud service for PollyTTS.
40  *
41  * The implementation supports:
42  * <ul>
43  * <li>All languages</li>
44  * <li>All voices</li>
45  * <li>MP3 and OGG formats</li>
46  * </ul>
47  *
48  * @author Robert Hillman - Initial contribution
49  */
50 public class PollyTTSCloudImpl {
51
52     private static final Set<String> SUPPORTED_AUDIO_FORMATS = Collections
53             .unmodifiableSet(Stream.of(CODEC_MP3, CONTAINER_OGG).collect(toSet()));
54
55     protected final PollyTTSConfig config;
56
57     private final AmazonPolly client;
58     private final Map<String, String> labelToID;
59     private final List<Voice> voices;
60
61     public PollyTTSCloudImpl(PollyTTSConfig config) {
62         this.config = config;
63
64         AWSCredentials credentials = new BasicAWSCredentials(config.getAccessKey(), config.getSecretKey());
65         client = AmazonPollyClientBuilder.standard().withRegion(config.getServiceRegion())
66                 .withCredentials(new AWSStaticCredentialsProvider(credentials)).build();
67         voices = client.describeVoices(new DescribeVoicesRequest()).getVoices();
68
69         // create voice to ID translation for service invocation
70         labelToID = voices.stream().collect(toMap(Voice::getName, Voice::getId));
71     }
72
73     /**
74      * Get all supported audio formats by the TTS service. This includes MP3,
75      * WAV and more audio formats as used in APIs.
76      */
77     public Set<String> getAvailableAudioFormats() {
78         return SUPPORTED_AUDIO_FORMATS;
79     }
80
81     public Set<Locale> getAvailableLocales() {
82         // @formatter:off
83         return voices.stream()
84                 .map(voice -> Locale.forLanguageTag(voice.getLanguageCode()))
85                 .collect(toSet());
86         // @formatter:on
87     }
88
89     public Set<String> getAvailableVoices() {
90         // @formatter:off
91         return voices.stream()
92                 .map(Voice::getName)
93                 .collect(toSet());
94         // @formatter:on
95     }
96
97     public Set<String> getAvailableVoices(Locale locale) {
98         // @formatter:off
99         return voices.stream()
100                 .filter(voice -> voice.getLanguageCode().equalsIgnoreCase(locale.toLanguageTag()))
101                 .map(Voice::getName)
102                 .collect(toSet());
103         // @formatter:on
104     }
105
106     /**
107      * This method will return an input stream to an audio stream for the given
108      * parameters.
109      * Get the given text in specified locale and audio format as input stream.
110      *
111      * @param text
112      *            the text to translate into speech
113      * @param label
114      *            the voice Label to use
115      * @param audioFormat
116      *            the audio format to use
117      * @return an InputStream to the audio data in specified format
118      * @throws IOException
119      *             will be raised if the audio data can not be retrieved from
120      *             cloud service
121      */
122     public InputStream getTextToSpeech(String text, String label, String audioFormat) {
123         String voiceID = labelToID.get(label);
124         String format = audioFormat.toLowerCase();
125         if ("ogg".equals(format)) {
126             format = "ogg_vorbis";
127         }
128         TextType textType = text.startsWith("<speak>") ? TextType.Ssml : TextType.Text;
129         SynthesizeSpeechRequest request = new SynthesizeSpeechRequest().withTextType(textType).withText(text)
130                 .withVoiceId(voiceID).withOutputFormat(OutputFormat.fromValue(format));
131         return client.synthesizeSpeech(request).getAudioStream();
132     }
133 }