Spaces:

dkg-2
/

SymptoScanMD

Sleeping

App Files Files Community

SymptoScanMD / audio_utils.py

dkg-2

Upload 6 files

3ccb758 verified 8 months ago

raw

history blame contribute delete

2.52 kB

	import logging
	import os
	import platform
	import subprocess
	from io import BytesIO

	import speech_recognition as sr
	from gtts import gTTS
	from pydub import AudioSegment
	from groq import Groq

	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	def record_audio(file_path, timeout=20, phrase_time_limit=None):
	"""
	Simplified function to record audio from the microphone and save it as an MP3 file.
	"""
	recognizer = sr.Recognizer()

	try:
	with sr.Microphone() as source:
	logging.info("Adjusting for ambient noise...")
	recognizer.adjust_for_ambient_noise(source, duration=1)
	logging.info("Start speaking now...")

	audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)
	logging.info("Recording complete.")

	wav_data = audio_data.get_wav_data()
	audio_segment = AudioSegment.from_wav(BytesIO(wav_data))
	audio_segment.export(file_path, format="mp3", bitrate="128k")

	logging.info(f"Audio saved to {file_path}")

	except Exception as e:
	logging.error(f"An error occurred: {e}")

	def transcribe_with_groq(stt_model, audio_filepath, groq_api_key):
	"""
	Transcribes an audio file using the Groq API.
	"""
	client = Groq(api_key=groq_api_key)

	with open(audio_filepath, "rb") as audio_file:
	transcription = client.audio.transcriptions.create(
	model=stt_model,
	file=audio_file,
	language="en"
	)
	return transcription.text

	def text_to_speech_with_gtts(input_text, output_filepath="gtts_output.mp3"):
	"""
	Converts text to speech using gTTS and handles playback.
	"""
	tts = gTTS(text=input_text, lang="en", slow=False)
	tts.save(output_filepath)

	os_name = platform.system()
	try:
	if os_name == "Darwin": # macOS
	subprocess.run(['afplay', output_filepath])
	elif os_name == "Windows": # Windows
	subprocess.run([
	'powershell',
	'-c',
	f'(New-Object Media.SoundPlayer "{output_filepath}").PlaySync();'
	])
	elif os_name == "Linux":
	subprocess.run(['aplay', output_filepath])
	else:
	raise OSError("Unsupported OS for audio playback.")
	except Exception as e:
	print(f"[Audio Playback Error] {e}")

	return output_filepath