jonathanagustin commited on
Commit
2b29497
Β·
verified Β·
1 Parent(s): 7270627

Sync from deploy tool: tutorials/09-voice-summarizer

Browse files
Files changed (4) hide show
  1. .env.example +4 -0
  2. README.md +24 -5
  3. app.py +123 -0
  4. requirements.txt +3 -0
.env.example ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ HF_TOKEN=
2
+ # Transcription + summarization models
3
+ WHISPER_MODEL=openai/whisper-large-v3-turbo
4
+ LLM_MODEL=Qwen/Qwen2.5-72B-Instruct
README.md CHANGED
@@ -1,12 +1,31 @@
1
  ---
2
  title: Voice Summarizer
3
- emoji: πŸš€
4
- colorFrom: pink
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 6.0.2
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Voice Summarizer
3
+ emoji: πŸŽ™οΈ
4
+ colorFrom: green
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: "6.0.2"
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
+ ## πŸŽ™οΈ Voice Summarizer
14
+
15
+ Record audio β†’ Get transcript β†’ Get AI summary!
16
+
17
+ ## Features
18
+
19
+ - Whisper transcription (large-v3-turbo)
20
+ - Multiple summary styles
21
+ - Chained AI pipeline
22
+ - No model downloads - uses API
23
+
24
+ ## Setup
25
+
26
+ Add your `HF_TOKEN` as a Secret in Space Settings.
27
+
28
+ ## Environment Variables
29
+
30
+ - `WHISPER_MODEL`: Transcription model (default: openai/whisper-large-v3-turbo)
31
+ - `LLM_MODEL`: Summary model (default: Qwen/Qwen2.5-72B-Instruct)
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import gradio as gr
4
+ from huggingface_hub import InferenceClient
5
+
6
+ # Configure logging
7
+ logging.basicConfig(
8
+ level=logging.INFO,
9
+ format="%(asctime)s | %(levelname)s | %(message)s",
10
+ datefmt="%Y-%m-%d %H:%M:%S",
11
+ )
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Environment variables
15
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
16
+ WHISPER_MODEL = os.environ.get("WHISPER_MODEL", "openai/whisper-large-v3-turbo")
17
+ LLM_MODEL = os.environ.get("LLM_MODEL", "Qwen/Qwen2.5-72B-Instruct")
18
+
19
+ logger.info(f"HF_TOKEN configured: {bool(HF_TOKEN)}")
20
+ logger.info(f"WHISPER_MODEL: {WHISPER_MODEL}")
21
+ logger.info(f"LLM_MODEL: {LLM_MODEL}")
22
+
23
+ client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient()
24
+ logger.info("InferenceClient initialized")
25
+
26
+
27
+ def transcribe(audio) -> str:
28
+ """Transcribe audio to text."""
29
+ if audio is None:
30
+ return ""
31
+
32
+ try:
33
+ logger.info(f"Transcribing with {WHISPER_MODEL}...")
34
+ result = client.automatic_speech_recognition(audio, model=WHISPER_MODEL)
35
+ logger.info(f"Transcription: {len(result.text)} chars")
36
+ return result.text
37
+ except Exception as e:
38
+ logger.error(f"Transcription error: {e}")
39
+ return f"❌ Transcription error: {e}"
40
+
41
+
42
+ def summarize(text: str, style: str) -> str:
43
+ """Summarize text with LLM."""
44
+ if not text.strip() or text.startswith("❌"):
45
+ return text
46
+
47
+ prompts = {
48
+ "Brief Summary": f"Summarize this in 2-3 sentences:\n\n{text}",
49
+ "Key Points": f"Extract the key points as bullet points:\n\n{text}",
50
+ "Action Items": f"Extract any action items or tasks mentioned:\n\n{text}",
51
+ "ELI5": f"Explain the main idea like I'm 5 years old:\n\n{text}",
52
+ }
53
+
54
+ try:
55
+ logger.info(f"Summarizing with {LLM_MODEL} | style={style}")
56
+ response = client.chat.completions.create(
57
+ model=LLM_MODEL,
58
+ messages=[{"role": "user", "content": prompts[style]}],
59
+ max_tokens=300,
60
+ )
61
+ return response.choices[0].message.content
62
+ except Exception as e:
63
+ logger.error(f"Summary error: {e}")
64
+ return f"❌ Summary error: {e}"
65
+
66
+
67
+ def process_audio(audio, style: str) -> tuple[str, str]:
68
+ """Full pipeline: transcribe then summarize."""
69
+ logger.info(f"process_audio() called | style={style}")
70
+
71
+ if audio is None:
72
+ return "🎀 Record or upload audio first!", ""
73
+
74
+ transcript = transcribe(audio)
75
+ if transcript.startswith("❌"):
76
+ return transcript, ""
77
+
78
+ summary = summarize(transcript, style)
79
+ return transcript, summary
80
+
81
+
82
+ logger.info("Building Gradio interface...")
83
+
84
+ with gr.Blocks(title="Voice Summarizer") as demo:
85
+ gr.Markdown("""# πŸŽ™οΈ Voice Summarizer
86
+ Record audio β†’ Get transcript β†’ Get AI summary!
87
+
88
+ *Pipeline: Whisper (transcription) β†’ Qwen (summarization)*
89
+ """)
90
+
91
+ with gr.Row(equal_height=True):
92
+ with gr.Column(scale=1):
93
+ audio_input = gr.Audio(
94
+ sources=["microphone", "upload"],
95
+ type="filepath",
96
+ label="🎀 Record or Upload Audio"
97
+ )
98
+
99
+ style = gr.Radio(
100
+ choices=["Brief Summary", "Key Points", "Action Items", "ELI5"],
101
+ value="Brief Summary",
102
+ label="Summary Style"
103
+ )
104
+
105
+ btn = gr.Button("πŸš€ Process!", variant="primary", size="lg")
106
+
107
+ with gr.Column(scale=1):
108
+ transcript = gr.Textbox(label="πŸ“ Transcript", lines=6, interactive=False)
109
+ summary = gr.Textbox(label="✨ Summary", lines=6, interactive=False)
110
+
111
+ btn.click(process_audio, inputs=[audio_input, style], outputs=[transcript, summary])
112
+
113
+ gr.Markdown("""
114
+ ### How it works
115
+ 1. **Whisper** transcribes your audio to text
116
+ 2. **Qwen 2.5** summarizes based on your selected style
117
+ 3. All serverless - no downloads needed!
118
+ """)
119
+
120
+ demo.queue()
121
+ logger.info("Starting Gradio server...")
122
+ demo.launch()
123
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio>=6.0.0
2
+ huggingface_hub>=0.23.0
3
+