2 * Copyright (c) 2010-2023 Contributors to the openHAB project
4 * See the NOTICE file(s) distributed with this work for additional
7 * This program and the accompanying materials are made available under the
8 * terms of the Eclipse Public License 2.0 which is available at
9 * http://www.eclipse.org/legal/epl-2.0
11 * SPDX-License-Identifier: EPL-2.0
13 package org.openhab.voice.pollytts.internal.cloudapi;
15 import static java.util.stream.Collectors.*;
16 import static org.openhab.core.audio.AudioFormat.*;
18 import java.io.InputStream;
19 import java.util.Collections;
20 import java.util.List;
21 import java.util.Locale;
24 import java.util.stream.Stream;
26 import com.amazonaws.auth.AWSCredentials;
27 import com.amazonaws.auth.AWSStaticCredentialsProvider;
28 import com.amazonaws.auth.BasicAWSCredentials;
29 import com.amazonaws.services.polly.AmazonPolly;
30 import com.amazonaws.services.polly.AmazonPollyClientBuilder;
31 import com.amazonaws.services.polly.model.AmazonPollyException;
32 import com.amazonaws.services.polly.model.DescribeVoicesRequest;
33 import com.amazonaws.services.polly.model.OutputFormat;
34 import com.amazonaws.services.polly.model.SynthesizeSpeechRequest;
35 import com.amazonaws.services.polly.model.TextType;
36 import com.amazonaws.services.polly.model.Voice;
39 * This class implements the Cloud service for PollyTTS.
41 * The implementation supports:
43 * <li>All languages</li>
45 * <li>MP3 and OGG formats</li>
48 * @author Robert Hillman - Initial contribution
50 public class PollyTTSCloudImpl {
52 private static final Set<String> SUPPORTED_AUDIO_FORMATS = Collections
53 .unmodifiableSet(Stream.of(CODEC_MP3, CONTAINER_OGG).collect(toSet()));
55 protected final PollyTTSConfig config;
57 private final AmazonPolly client;
58 private final Map<String, String> labelToID;
59 private final List<Voice> voices;
61 public PollyTTSCloudImpl(PollyTTSConfig config) {
64 AWSCredentials credentials = new BasicAWSCredentials(config.getAccessKey(), config.getSecretKey());
65 client = AmazonPollyClientBuilder.standard().withRegion(config.getServiceRegion())
66 .withCredentials(new AWSStaticCredentialsProvider(credentials)).build();
67 voices = client.describeVoices(new DescribeVoicesRequest()).getVoices();
69 // create voice to ID translation for service invocation
70 labelToID = voices.stream().collect(toMap(Voice::getName, Voice::getId));
74 * Get all supported audio formats by the TTS service. This includes MP3,
75 * WAV and more audio formats as used in APIs.
77 public Set<String> getAvailableAudioFormats() {
78 return SUPPORTED_AUDIO_FORMATS;
81 public Set<Locale> getAvailableLocales() {
83 return voices.stream()
84 .map(voice -> Locale.forLanguageTag(voice.getLanguageCode()))
89 public Set<String> getAvailableVoices() {
91 return voices.stream()
97 public Set<String> getAvailableVoices(Locale locale) {
99 return voices.stream()
100 .filter(voice -> voice.getLanguageCode().equalsIgnoreCase(locale.toLanguageTag()))
107 * This method will return an input stream to an audio stream for the given
109 * Get the given text in specified locale and audio format as input stream.
112 * the text to translate into speech
114 * the voice Label to use
116 * the audio format to use
117 * @return an InputStream to the audio data in specified format
118 * @throws AmazonPollyException will be raised if the audio data can not be retrieved from
121 public InputStream getTextToSpeech(String text, String label, String audioFormat) {
122 String voiceID = labelToID.get(label);
123 String format = audioFormat.toLowerCase();
124 if ("ogg".equals(format)) {
125 format = "ogg_vorbis";
127 TextType textType = text.startsWith("<speak>") ? TextType.Ssml : TextType.Text;
128 SynthesizeSpeechRequest request = new SynthesizeSpeechRequest().withTextType(textType).withText(text)
129 .withVoiceId(voiceID).withOutputFormat(OutputFormat.fromValue(format));
130 return client.synthesizeSpeech(request).getAudioStream();