# Written by retoor@molodetz.nl # This script interfaces with Google's Text-to-Speech API to synthesize spoken audio from text. # It also includes functionality to handle Google authentication tokens. # External imports: # - aiohttp: Asynchronous HTTP requests. # - google-auth packages: For managing Google authentication tokens. # - env, play: Local modules for playing audio and environment configurations. # MIT License # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. import aiohttp import asyncio from urllib.parse import urlencode import base64 import sys from functools import cache from google.oauth2 import id_token from google.auth.transport import requests import google.auth from play import play_audio import google.oauth2.credentials import uuid import pathlib @cache def google_token(): gcloud_default, project = google.auth.default() from google.oauth2 import _client as google_auth_client import google.auth.transport.urllib3 as google_auth_urllib3 import urllib3 http = urllib3.PoolManager() request = google_auth_urllib3.Request(http) token_uri = 'https://oauth2.googleapis.com/token' refresh_token = gcloud_default.refresh_token client_id = gcloud_default.client_id client_secret = gcloud_default.client_secret scopes = ['https://www.googleapis.com/auth/cloud-platform'] access_token, _, _, _ = google_auth_client.refresh_grant( request, token_uri, refresh_token, client_id, client_secret, scopes) return access_token async def tts(text): url = "https://texttospeech.googleapis.com/v1/text:synthesize" text = text.replace("*", "").replace("#", "").replace("`", "").strip() if not text: return headers = { "Authorization": f"Bearer {google_token()}", "Content-Type": "application/json", "X-Goog-User-Project": "lisa-448004", } data = { "input": { "text": text }, "voice": { "languageCode": "nl-NL", "name": "nl-NL-Standard-D", "ssmlGender": "FEMALE" }, "audioConfig": { "audioEncoding": "MP3", "speakingRate": 1.0, "pitch": 0.0 } } async with aiohttp.ClientSession() as session: response = await session.post(url, headers=headers, json=data) response_json = await response.json() audio_content = response_json.get("audioContent") file = pathlib.Path(str(uuid.uuid4()) + ".mp3") with file.open("wb") as audio_file: audio_file.write(base64.b64decode(audio_content.encode('latin1'))) play_audio(file) file.unlink() return def oud(): client = speech.SpeechClient() with open(file_path, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, language_code="en-US", ) response = client.recognize(config=config, audio=audio) for result in response.results: print("Transcript:", result.alternatives[0].transcript) async def main(): print(google_token()) await tts("If you hear this sentence, the google part works fine. Congrats.") if __name__ == '__main__': asyncio.run(main())