From a292f571cb801eb4deb606499e9b250c67ce339f Mon Sep 17 00:00:00 2001 From: retoor Date: Sat, 18 Jan 2025 10:02:13 +0100 Subject: [PATCH] Update. --- gcloud.py | 2 +- play.py | 24 +++++++++++++++++------- requirements.txt | 1 + tts.py | 27 ++++++++++++++++++++------- 4 files changed, 39 insertions(+), 15 deletions(-) diff --git a/gcloud.py b/gcloud.py index 8390181..3b8ee1a 100644 --- a/gcloud.py +++ b/gcloud.py @@ -122,4 +122,4 @@ async def main(): if __name__ == '__main__': - asyncio.run(main()) \ No newline at end of file + asyncio.run(main()) diff --git a/play.py b/play.py index b89f4a5..1fc1096 100644 --- a/play.py +++ b/play.py @@ -29,12 +29,21 @@ import functools import os import subprocess import sys +import pygame + + + -@functools.cache -def get_py_audio(): - return pyaudio.PyAudio() def play_audio(filename): + pygame.mixer.init() + pygame.mixer.music.load(filename) + pygame.mixer.music.play() + while pygame.mixer.music.get_busy(): + pygame.time.Clock().tick(10) + + +def play_audio2(filename): ffmpeg_cmd = [ "ffmpeg", "-i", filename, @@ -45,9 +54,9 @@ def play_audio(filename): ] process = subprocess.Popen(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=10**6) - py_audio = get_py_audio() - stream = py_audio.open( - format=py_audio.get_format_from_width(2), + p = pyaudio.PyAudio() + stream = p.open( + format=p.get_format_from_width(2), channels=2, rate=44100, output=True @@ -62,5 +71,6 @@ def play_audio(filename): finally: stream.stop_stream() stream.close() + p.terminate() process.stdout.close() - process.wait() \ No newline at end of file + process.wait() diff --git a/requirements.txt b/requirements.txt index 5967d96..f4d3b6f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ SpeechRecognition google-cloud-speech google-cloud-texttospeech google-auth +pygame diff --git a/tts.py b/tts.py index fffdaf6..f04c309 100644 --- a/tts.py +++ b/tts.py @@ -33,23 +33,22 @@ import gcloud molodetz = ServerProxy("https://api.molodetz.nl/rpc") -async def main(): +def listen(): recognizer = sr.Recognizer() - with sr.Microphone() as source: print("Adjusting to surroundings for a five seconds.") - recognizer.adjust_for_ambient_noise(source, duration=5) + + #recognizer.non_speaking_duration = 60*60 while True: print("Listening...") try: audio_data = recognizer.listen(source, timeout=10) text = recognizer.recognize_google(audio_data, language="nl-NL") #en-US - print(f"You said:\n\t{text}") - response_llm = molodetz.gpt4o_mini(text) - print(f"GPT4o mini said:\n\t{response_llm}") - await gcloud.tts(response_llm) + source = None + recognizer = None + return text except sr.WaitTimeoutError: continue except sr.UnknownValueError: @@ -57,6 +56,20 @@ async def main(): except sr.RequestError: continue + + + +async def main(): + + + #recognizer.adjust_for_ambient_noise(source, duration=5) + while True: + text = listen() + print(f"You said:\n\t{text}") + response_llm = molodetz.gpt4o_mini(text) + print(f"GPT4o mini said:\n\t{response_llm}") + await gcloud.tts(response_llm) + if __name__ == "__main__": import asyncio asyncio.run(main())