Spaces:

jonathanagustin
/

voice-summarizer

Sleeping

App Files Files Community

voice-summarizer / app.py

jonathanagustin

Sync from deploy tool: tutorials/09-voice-summarizer

2b29497 verified 3 months ago

raw

history blame contribute delete

3.98 kB

	import os
	import logging
	import gradio as gr
	from huggingface_hub import InferenceClient

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s \| %(levelname)s \| %(message)s",
	datefmt="%Y-%m-%d %H:%M:%S",
	)
	logger = logging.getLogger(__name__)

	# Environment variables
	HF_TOKEN = os.environ.get("HF_TOKEN", "")
	WHISPER_MODEL = os.environ.get("WHISPER_MODEL", "openai/whisper-large-v3-turbo")
	LLM_MODEL = os.environ.get("LLM_MODEL", "Qwen/Qwen2.5-72B-Instruct")

	logger.info(f"HF_TOKEN configured: {bool(HF_TOKEN)}")
	logger.info(f"WHISPER_MODEL: {WHISPER_MODEL}")
	logger.info(f"LLM_MODEL: {LLM_MODEL}")

	client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient()
	logger.info("InferenceClient initialized")


	def transcribe(audio) -> str:
	"""Transcribe audio to text."""
	if audio is None:
	return ""

	try:
	logger.info(f"Transcribing with {WHISPER_MODEL}...")
	result = client.automatic_speech_recognition(audio, model=WHISPER_MODEL)
	logger.info(f"Transcription: {len(result.text)} chars")
	return result.text
	except Exception as e:
	logger.error(f"Transcription error: {e}")
	return f"❌ Transcription error: {e}"


	def summarize(text: str, style: str) -> str:
	"""Summarize text with LLM."""
	if not text.strip() or text.startswith("❌"):
	return text

	prompts = {
	"Brief Summary": f"Summarize this in 2-3 sentences:\n\n{text}",
	"Key Points": f"Extract the key points as bullet points:\n\n{text}",
	"Action Items": f"Extract any action items or tasks mentioned:\n\n{text}",
	"ELI5": f"Explain the main idea like I'm 5 years old:\n\n{text}",
	}

	try:
	logger.info(f"Summarizing with {LLM_MODEL} \| style={style}")
	response = client.chat.completions.create(
	model=LLM_MODEL,
	messages=[{"role": "user", "content": prompts[style]}],
	max_tokens=300,
	)
	return response.choices[0].message.content
	except Exception as e:
	logger.error(f"Summary error: {e}")
	return f"❌ Summary error: {e}"


	def process_audio(audio, style: str) -> tuple[str, str]:
	"""Full pipeline: transcribe then summarize."""
	logger.info(f"process_audio() called \| style={style}")

	if audio is None:
	return "🎤 Record or upload audio first!", ""

	transcript = transcribe(audio)
	if transcript.startswith("❌"):
	return transcript, ""

	summary = summarize(transcript, style)
	return transcript, summary


	logger.info("Building Gradio interface...")

	with gr.Blocks(title="Voice Summarizer") as demo:
	gr.Markdown("""# 🎙️ Voice Summarizer
	Record audio → Get transcript → Get AI summary!

	Pipeline: Whisper (transcription) → Qwen (summarization)
	""")

	with gr.Row(equal_height=True):
	with gr.Column(scale=1):
	audio_input = gr.Audio(
	sources=["microphone", "upload"],
	type="filepath",
	label="🎤 Record or Upload Audio"
	)

	style = gr.Radio(
	choices=["Brief Summary", "Key Points", "Action Items", "ELI5"],
	value="Brief Summary",
	label="Summary Style"
	)

	btn = gr.Button("🚀 Process!", variant="primary", size="lg")

	with gr.Column(scale=1):
	transcript = gr.Textbox(label="📝 Transcript", lines=6, interactive=False)
	summary = gr.Textbox(label="✨ Summary", lines=6, interactive=False)

	btn.click(process_audio, inputs=[audio_input, style], outputs=[transcript, summary])

	gr.Markdown("""
	### How it works
	1. Whisper transcribes your audio to text
	2. Qwen 2.5 summarizes based on your selected style
	3. All serverless - no downloads needed!
	""")

	demo.queue()
	logger.info("Starting Gradio server...")
	demo.launch()