Spaces:

SabaAnver
/

Audio-to-Audio

Sleeping

App Files Files Community

Audio-to-Audio / app.py

SabaAnver

Create app.py

1a3bef4 verified 6 months ago

raw

history blame contribute delete

2.02 kB

	import os
	from pathlib import Path
	import gradio as gr
	from groq import Groq

	# Load API key from Hugging Face Secret
	api_key = os.getenv("GroqApiKey")

	# Initialize Groq client
	client = Groq(api_key=api_key)


	def ask_ai(audio_file):
	try:
	# 1. Speech-to-Text
	with open(audio_file, "rb") as file:
	transcription = client.audio.transcriptions.create(
	file=("user_input.wav", file.read()),
	model="whisper-large-v3",
	response_format="verbose_json",
	)
	user_text = transcription.text

	# 2. LLM Completion
	completion = client.chat.completions.create(
	model="llama-3.1-8b-instant",
	messages=[{"role": "user", "content": user_text}],
	temperature=1,
	max_completion_tokens=512,
	top_p=1,
	)
	answer_text = completion.choices[0].message.content

	# 3. Text-to-Speech
	speech_file_path = Path("answer.wav")
	response = client.audio.speech.create(
	model="playai-tts",
	voice="Calum-PlayAI",
	response_format="wav",
	input=answer_text,
	)

	# Save audio file
	with open(speech_file_path, "wb") as f:
	for chunk in response.iter_bytes():
	f.write(chunk)

	return user_text, answer_text, str(speech_file_path)

	except Exception as e:
	return "Error processing your request.", str(e), None


	# Gradio Interface
	ui = gr.Interface(
	fn=ask_ai,
	inputs=gr.Audio(
	sources=["microphone", "upload"],
	type="filepath",
	label="Ask me a question (record or upload audio)"
	),
	outputs=[
	gr.Textbox(label="Transcribed Question"),
	gr.Textbox(label="AI Answer"),
	gr.Audio(label="Answer Audio")
	],
	title="🎤 Voice Q&A with Groq AI",
	description="Record or upload an audio file, get an AI-generated spoken answer.",
	)

	if __name__ == "__main__":
	ui.launch()