SymptoScanMD / audio_utils.py
dkg-2's picture
Upload 6 files
3ccb758 verified
import logging
import os
import platform
import subprocess
from io import BytesIO
import speech_recognition as sr
from gtts import gTTS
from pydub import AudioSegment
from groq import Groq
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def record_audio(file_path, timeout=20, phrase_time_limit=None):
"""
Simplified function to record audio from the microphone and save it as an MP3 file.
"""
recognizer = sr.Recognizer()
try:
with sr.Microphone() as source:
logging.info("Adjusting for ambient noise...")
recognizer.adjust_for_ambient_noise(source, duration=1)
logging.info("Start speaking now...")
audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)
logging.info("Recording complete.")
wav_data = audio_data.get_wav_data()
audio_segment = AudioSegment.from_wav(BytesIO(wav_data))
audio_segment.export(file_path, format="mp3", bitrate="128k")
logging.info(f"Audio saved to {file_path}")
except Exception as e:
logging.error(f"An error occurred: {e}")
def transcribe_with_groq(stt_model, audio_filepath, groq_api_key):
"""
Transcribes an audio file using the Groq API.
"""
client = Groq(api_key=groq_api_key)
with open(audio_filepath, "rb") as audio_file:
transcription = client.audio.transcriptions.create(
model=stt_model,
file=audio_file,
language="en"
)
return transcription.text
def text_to_speech_with_gtts(input_text, output_filepath="gtts_output.mp3"):
"""
Converts text to speech using gTTS and handles playback.
"""
tts = gTTS(text=input_text, lang="en", slow=False)
tts.save(output_filepath)
os_name = platform.system()
try:
if os_name == "Darwin": # macOS
subprocess.run(['afplay', output_filepath])
elif os_name == "Windows": # Windows
subprocess.run([
'powershell',
'-c',
f'(New-Object Media.SoundPlayer "{output_filepath}").PlaySync();'
])
elif os_name == "Linux":
subprocess.run(['aplay', output_filepath])
else:
raise OSError("Unsupported OS for audio playback.")
except Exception as e:
print(f"[Audio Playback Error] {e}")
return output_filepath