Spaces:

jonathanagustin
/

voice-summarizer

Sleeping

App Files Files Community

jonathanagustin commited on Dec 8, 2025

Commit

2b29497

verified ·

1 Parent(s): 7270627

Sync from deploy tool: tutorials/09-voice-summarizer

Browse files

Files changed (4) hide show

.env.example +4 -0
README.md +24 -5
app.py +123 -0
requirements.txt +3 -0

.env.example ADDED Viewed

	@@ -0,0 +1,4 @@

+HF_TOKEN=
+# Transcription + summarization models
+WHISPER_MODEL=openai/whisper-large-v3-turbo
+LLM_MODEL=Qwen/Qwen2.5-72B-Instruct

README.md CHANGED Viewed

@@ -1,12 +1,31 @@
 ---
 title: Voice Summarizer
-emoji: 🚀
-colorFrom: pink
-colorTo: yellow
 sdk: gradio
-sdk_version: 6.0.2
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Voice Summarizer
+emoji: 🎙️
+colorFrom: green
+colorTo: blue
 sdk: gradio
+sdk_version: "6.0.2"
 app_file: app.py
 pinned: false
+license: mit
 ---
+## 🎙️ Voice Summarizer
+Record audio → Get transcript → Get AI summary!
+## Features
+- Whisper transcription (large-v3-turbo)
+- Multiple summary styles
+- Chained AI pipeline
+- No model downloads - uses API
+## Setup
+Add your `HF_TOKEN` as a Secret in Space Settings.
+## Environment Variables
+- `WHISPER_MODEL`: Transcription model (default: openai/whisper-large-v3-turbo)
+- `LLM_MODEL`: Summary model (default: Qwen/Qwen2.5-72B-Instruct)

app.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import os
+import logging
+import gradio as gr
+from huggingface_hub import InferenceClient
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s | %(levelname)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+logger = logging.getLogger(__name__)
+# Environment variables
+HF_TOKEN = os.environ.get("HF_TOKEN", "")
+WHISPER_MODEL = os.environ.get("WHISPER_MODEL", "openai/whisper-large-v3-turbo")
+LLM_MODEL = os.environ.get("LLM_MODEL", "Qwen/Qwen2.5-72B-Instruct")
+logger.info(f"HF_TOKEN configured: {bool(HF_TOKEN)}")
+logger.info(f"WHISPER_MODEL: {WHISPER_MODEL}")
+logger.info(f"LLM_MODEL: {LLM_MODEL}")
+client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient()
+logger.info("InferenceClient initialized")
+def transcribe(audio) -> str:
+    """Transcribe audio to text."""
+    if audio is None:
+        return ""
+    try:
+        logger.info(f"Transcribing with {WHISPER_MODEL}...")
+        result = client.automatic_speech_recognition(audio, model=WHISPER_MODEL)
+        logger.info(f"Transcription: {len(result.text)} chars")
+        return result.text
+    except Exception as e:
+        logger.error(f"Transcription error: {e}")
+        return f"❌ Transcription error: {e}"
+def summarize(text: str, style: str) -> str:
+    """Summarize text with LLM."""
+    if not text.strip() or text.startswith("❌"):
+        return text
+    prompts = {
+        "Brief Summary": f"Summarize this in 2-3 sentences:\n\n{text}",
+        "Key Points": f"Extract the key points as bullet points:\n\n{text}",
+        "Action Items": f"Extract any action items or tasks mentioned:\n\n{text}",
+        "ELI5": f"Explain the main idea like I'm 5 years old:\n\n{text}",
+    }
+    try:
+        logger.info(f"Summarizing with {LLM_MODEL} | style={style}")
+        response = client.chat.completions.create(
+            model=LLM_MODEL,
+            messages=[{"role": "user", "content": prompts[style]}],
+            max_tokens=300,
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        logger.error(f"Summary error: {e}")
+        return f"❌ Summary error: {e}"
+def process_audio(audio, style: str) -> tuple[str, str]:
+    """Full pipeline: transcribe then summarize."""
+    logger.info(f"process_audio() called | style={style}")
+    if audio is None:
+        return "🎤 Record or upload audio first!", ""
+    transcript = transcribe(audio)
+    if transcript.startswith("❌"):
+        return transcript, ""
+    summary = summarize(transcript, style)
+    return transcript, summary
+logger.info("Building Gradio interface...")
+with gr.Blocks(title="Voice Summarizer") as demo:
+    gr.Markdown("""# 🎙️ Voice Summarizer
+Record audio → Get transcript → Get AI summary!
+*Pipeline: Whisper (transcription) → Qwen (summarization)*
+""")
+    with gr.Row(equal_height=True):
+        with gr.Column(scale=1):
+            audio_input = gr.Audio(
+                sources=["microphone", "upload"],
+                type="filepath",
+                label="🎤 Record or Upload Audio"
+            )
+            style = gr.Radio(
+                choices=["Brief Summary", "Key Points", "Action Items", "ELI5"],
+                value="Brief Summary",
+                label="Summary Style"
+            )
+            btn = gr.Button("🚀 Process!", variant="primary", size="lg")
+        with gr.Column(scale=1):
+            transcript = gr.Textbox(label="📝 Transcript", lines=6, interactive=False)
+            summary = gr.Textbox(label="✨ Summary", lines=6, interactive=False)
+    btn.click(process_audio, inputs=[audio_input, style], outputs=[transcript, summary])
+    gr.Markdown("""
+### How it works
+1. **Whisper** transcribes your audio to text
+2. **Qwen 2.5** summarizes based on your selected style
+3. All serverless - no downloads needed!
+""")
+demo.queue()
+logger.info("Starting Gradio server...")
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ gradio>=6.0.0
2	+ huggingface_hub>=0.23.0
3	+