Spaces:
Sleeping
Sleeping
| # VoiceOfPatient.py | |
| import logging | |
| import speech_recognition as sr | |
| from pydub import AudioSegment | |
| from io import BytesIO | |
| import os | |
| from groq import Groq | |
| from dotenv import load_dotenv | |
| import warnings | |
| from pydub import AudioSegment | |
| from pydub.utils import which | |
| warnings.filterwarnings("ignore") | |
| load_dotenv() | |
| # Get the ffmpeg path from environment and register it with pydub | |
| ffmpeg_path = os.getenv("FFMPEG_PATH") | |
| GROQ_API_KEY = os.environ.get("GROQ_API_KEY") | |
| if ffmpeg_path: | |
| AudioSegment.converter = ffmpeg_path | |
| else: | |
| raise EnvironmentError("FFMPEG_PATH is not set. Please define it in the .env file.") | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| def record_audio(file_path, timeout=20, phrase_time_limit=None): | |
| """ | |
| Record audio from the microphone and save it as an MP3 file. | |
| Args: | |
| file_path (str): Path to save the recorded audio file. | |
| timeout (int): Max time to wait for speech to start (in seconds). | |
| phrase_time_limit (int or None): Max length of the speech (in seconds). | |
| """ | |
| recognizer = sr.Recognizer() | |
| try: | |
| with sr.Microphone() as source: | |
| logging.info("Adjusting for ambient noise...") | |
| recognizer.adjust_for_ambient_noise(source, duration=1) | |
| logging.info("Start speaking now...") | |
| audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit) | |
| logging.info("Recording complete.") | |
| wav_data = audio_data.get_wav_data() | |
| audio_segment = AudioSegment.from_wav(BytesIO(wav_data)) | |
| audio_segment.export(file_path, format="mp3", bitrate="128k") | |
| logging.info(f"Audio saved to: {file_path}") | |
| except Exception as e: | |
| logging.error(f"An error occurred: {e}") | |
| audio_file_path = "patientvoicetest.mp3" | |
| record_audio(file_path=audio_file_path) | |
| # Now setup speech to text model setup for transcribe the text from the voice | |
| client = Groq(api_key=GROQ_API_KEY) | |
| def transcribe_with_whisper(audio_file_path,model_name="meta-llama/llama-4-scout-17b-16e-instruct"): | |
| with open(audio_file_path, "rb") as audio_file: | |
| transcription = client.audio.transcriptions.create( | |
| model=model_name, | |
| file=audio_file, | |
| language="en", | |
| ) | |
| return transcription.text | |
| if __name__ == "__main__": | |
| pass |