Spaces:

Subayyal
/

Audio-to-Audio

Sleeping

App Files Files Community

Audio-to-Audio / app.py

Subayyal

Update app.py

80c64db verified 4 months ago

raw

history blame contribute delete

2.35 kB

	import os
	import streamlit as st
	from groq import Groq
	from pathlib import Path
	from pydub import AudioSegment

	# ------------------------------
	# Fetch API key from Secrets
	api_key = os.environ.get("GrokAPI")
	if not api_key:
	st.error("❌ Please set GrokAPI secret")
	st.stop()

	client = Groq(api_key=api_key)

	# ------------------------------
	st.title("🎤 Audio → AI Text → Speech")

	audio_file = st.file_uploader("Upload audio", type=["wav", "m4a"])

	def truncate_text(text, max_chars=1000):
	"""Limit text size for TTS"""
	if len(text) > max_chars:
	return text[:max_chars] + "\n\n⚠️ Answer truncated for TTS."
	return text

	if audio_file:
	try:
	# ------------------------------
	# Convert to WAV
	audio_path = Path("input.wav")
	audio_segment = AudioSegment.from_file(audio_file)
	audio_segment.export(audio_path, format="wav")

	# ------------------------------
	# Transcribe audio
	transcription = client.audio.transcriptions.create(
	file=("input.wav", audio_path.read_bytes()),
	model="whisper-large-v3",
	response_format="text"
	)
	st.text_area("📝 Question", transcription, height=150)

	# ------------------------------
	# Generate AI answer (shorter)
	completion = client.chat.completions.create(
	model="llama-3.1-8b-instant",
	messages=[{"role": "user", "content": transcription}],
	temperature=0.7,
	max_completion_tokens=150, # short answer
	)
	answer_text = completion.choices[0].message.content
	st.text_area("💬 AI Answer", answer_text, height=200)

	# ------------------------------
	# Truncate answer to safe length for TTS
	answer_text_limited = truncate_text(answer_text, max_chars=1000)

	# ------------------------------
	# Convert text → speech
	speech_path = Path("answer.wav")
	response = client.audio.speech.create(
	model="playai-tts",
	voice="Aaliyah-PlayAI",
	response_format="wav",
	input=answer_text_limited
	)
	response.stream_to_file(speech_path)
	st.audio(str(speech_path), format="audio/wav")

	except Exception as e:
	st.error(f"❌ Error: {str(e)}")