ArchCoder commited on
Commit
8dc383f
·
verified ·
1 Parent(s): 201d71d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -39
app.py CHANGED
@@ -1,12 +1,13 @@
1
  import gradio as gr
2
  from faster_whisper import WhisperModel
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from duckduckgo_search import DDGS
5
  import time
6
  import torch
7
  import base64
8
  import tempfile
9
  import os
 
10
 
11
  # Initialize models
12
  print("Loading Whisper model...")
@@ -70,8 +71,59 @@ def transcribe_audio_base64(audio_base64):
70
  except Exception as e:
71
  return {"error": f"Transcription failed: {str(e)}"}
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def generate_answer(text_input):
74
- """Generate answer from text input (for Pluely AI endpoint)"""
75
  try:
76
  if not text_input or text_input.strip() == "":
77
  return "No input provided"
@@ -109,8 +161,8 @@ def generate_answer(text_input):
109
  except Exception as e:
110
  return f"Error: {str(e)}"
111
 
112
- def process_audio(audio_path, question_text=None):
113
- """Main pipeline for Gradio UI"""
114
  start_time = time.time()
115
 
116
  # Step 1: Transcribe audio if provided
@@ -119,12 +171,14 @@ def process_audio(audio_path, question_text=None):
119
  segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
120
  question = " ".join([seg.text for seg in segments])
121
  except Exception as e:
122
- return f"❌ Transcription error: {str(e)}", 0.0
 
123
  else:
124
  question = question_text
125
 
126
  if not question or question.strip() == "":
127
- return "❌ No input provided", 0.0
 
128
 
129
  transcription_time = time.time() - start_time
130
 
@@ -133,24 +187,21 @@ def process_audio(audio_path, question_text=None):
133
  search_results = search_web(question, max_results=2)
134
  search_time = time.time() - search_start
135
 
136
- # Step 3: Generate answer
137
  llm_start = time.time()
138
- answer = generate_answer(question)
139
- llm_time = time.time() - llm_start
140
-
141
- total_time = time.time() - start_time
142
- time_emoji = "🟢" if total_time < 3.0 else "🟡" if total_time < 3.5 else "🔴"
143
- timing_info = f"\n\n{time_emoji} **Timing:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**"
144
-
145
- return answer + timing_info, total_time
146
 
147
- # Create Gradio interface with API endpoints
148
- with gr.Blocks(title="Fast Q&A - Pluely Compatible", theme=gr.themes.Soft()) as demo:
149
  gr.Markdown("""
150
  # ⚡ Ultra-Fast Political Q&A System
151
- **Pluely Compatible** - Direct STT and AI endpoints available!
152
 
153
- **Features:** Whisper-tiny + Qwen2.5-0.5B + DuckDuckGo (FREE unlimited search)
154
  """)
155
 
156
  with gr.Tab("🎙️ Audio Input"):
@@ -164,14 +215,14 @@ with gr.Blocks(title="Fast Q&A - Pluely Compatible", theme=gr.themes.Soft()) as
164
  audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
165
 
166
  with gr.Column():
167
- audio_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True)
168
  audio_time = gr.Number(label="Response Time (seconds)", precision=2)
169
 
170
  audio_submit.click(
171
- fn=lambda x: process_audio(x, None),
172
  inputs=[audio_input],
173
  outputs=[audio_output, audio_time],
174
- api_name="audio_query"
175
  )
176
 
177
  with gr.Tab("✍️ Text Input"):
@@ -185,14 +236,14 @@ with gr.Blocks(title="Fast Q&A - Pluely Compatible", theme=gr.themes.Soft()) as
185
  text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg")
186
 
187
  with gr.Column():
188
- text_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True)
189
  text_time = gr.Number(label="Response Time (seconds)", precision=2)
190
 
191
  text_submit.click(
192
- fn=lambda x: process_audio(None, x),
193
  inputs=[text_input],
194
  outputs=[text_output, text_time],
195
- api_name="text_query"
196
  )
197
 
198
  gr.Examples(
@@ -204,12 +255,12 @@ with gr.Blocks(title="Fast Q&A - Pluely Compatible", theme=gr.themes.Soft()) as
204
  inputs=text_input
205
  )
206
 
207
- # Hidden API endpoints for Pluely
208
  with gr.Tab("🔌 Pluely Integration"):
209
  gr.Markdown("""
210
  ## Dedicated Endpoints for Pluely
211
 
212
- ### 1. STT Endpoint (Audio Transcription)
213
  ```
214
  curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\
215
  -H "Content-Type: application/json" \\
@@ -217,13 +268,13 @@ with gr.Blocks(title="Fast Q&A - Pluely Compatible", theme=gr.themes.Soft()) as
217
  ```
218
  **Returns:** `{"data": [{"text": "transcribed text"}]}`
219
 
220
- ### 2. AI Endpoint (Text to Answer)
221
  ```
222
- curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai \\
223
  -H "Content-Type: application/json" \\
224
  -d '{"data": ["Your question here"]}'
225
  ```
226
- **Returns:** `{"data": ["Answer text"]}`
227
 
228
  ---
229
 
@@ -237,39 +288,51 @@ with gr.Blocks(title="Fast Q&A - Pluely Compatible", theme=gr.themes.Soft()) as
237
  --data '{"data": ["{{AUDIO_BASE64}}"]}'
238
  ```
239
  **Response Content Path:** `data[0].text`
 
240
 
241
- ### Custom AI Provider:
242
  **Curl Command:**
243
  ```
244
- curl --location 'https://archcoder-basic-app.hf.space/call/answer_ai' \\
245
  --header 'Content-Type: application/json' \\
246
  --data '{"data": ["{{TEXT}}"]}'
247
  ```
248
- **Response Content Path:** `data[0]`
 
 
 
 
 
 
 
249
  """)
250
 
251
  gr.Markdown("""
252
  ---
253
  🟢 = Under 3s | 🟡 = 3-3.5s | 🔴 = Over 3.5s
 
 
254
  """)
255
 
256
  # Register API endpoints
257
- demo.api_name = "pluely_integration"
258
-
259
- # STT endpoint for Pluely
260
  @demo.api(api_name="transcribe_stt")
261
  def api_transcribe(audio_base64: str):
262
- """API endpoint for audio transcription (Pluely STT)"""
263
  result = transcribe_audio_base64(audio_base64)
264
  return result
265
 
266
- # AI endpoint for Pluely
267
  @demo.api(api_name="answer_ai")
268
  def api_answer(text: str):
269
- """API endpoint for text-to-answer (Pluely AI)"""
270
  answer = generate_answer(text)
271
  return answer
272
 
 
 
 
 
 
 
273
  if __name__ == "__main__":
274
  demo.queue(max_size=5)
275
  demo.launch()
 
1
  import gradio as gr
2
  from faster_whisper import WhisperModel
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
4
  from duckduckgo_search import DDGS
5
  import time
6
  import torch
7
  import base64
8
  import tempfile
9
  import os
10
+ from threading import Thread
11
 
12
  # Initialize models
13
  print("Loading Whisper model...")
 
71
  except Exception as e:
72
  return {"error": f"Transcription failed: {str(e)}"}
73
 
74
+ def generate_answer_stream(text_input):
75
+ """Generate streaming answer from text input"""
76
+ try:
77
+ if not text_input or text_input.strip() == "":
78
+ yield "No input provided"
79
+ return
80
+
81
+ # Web search (non-streaming part)
82
+ search_results = search_web(text_input, max_results=2)
83
+
84
+ # Prepare messages
85
+ messages = [
86
+ {"role": "system", "content": "You are a helpful assistant. Answer briefly using provided context. Keep responses under 40 words."},
87
+ {"role": "user", "content": f"Context:\n{search_results}\n\nQuestion: {text_input}\n\nAnswer:"}
88
+ ]
89
+
90
+ text = tokenizer.apply_chat_template(
91
+ messages,
92
+ tokenize=False,
93
+ add_generation_prompt=True
94
+ )
95
+
96
+ inputs = tokenizer([text], return_tensors="pt").to("cpu")
97
+
98
+ # Setup streaming
99
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
100
+
101
+ generation_kwargs = dict(
102
+ inputs=inputs['input_ids'],
103
+ attention_mask=inputs['attention_mask'],
104
+ max_new_tokens=80,
105
+ temperature=0.2,
106
+ do_sample=True,
107
+ top_p=0.85,
108
+ pad_token_id=tokenizer.eos_token_id,
109
+ streamer=streamer
110
+ )
111
+
112
+ # Start generation in separate thread
113
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
114
+ thread.start()
115
+
116
+ # Stream tokens as they're generated
117
+ generated_text = ""
118
+ for new_text in streamer:
119
+ generated_text += new_text
120
+ yield generated_text
121
+
122
+ except Exception as e:
123
+ yield f"Error: {str(e)}"
124
+
125
  def generate_answer(text_input):
126
+ """Generate complete answer (non-streaming)"""
127
  try:
128
  if not text_input or text_input.strip() == "":
129
  return "No input provided"
 
161
  except Exception as e:
162
  return f"Error: {str(e)}"
163
 
164
+ def process_audio_stream(audio_path, question_text=None):
165
+ """Streaming pipeline for Gradio UI"""
166
  start_time = time.time()
167
 
168
  # Step 1: Transcribe audio if provided
 
171
  segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
172
  question = " ".join([seg.text for seg in segments])
173
  except Exception as e:
174
+ yield f"❌ Transcription error: {str(e)}", 0.0
175
+ return
176
  else:
177
  question = question_text
178
 
179
  if not question or question.strip() == "":
180
+ yield "❌ No input provided", 0.0
181
+ return
182
 
183
  transcription_time = time.time() - start_time
184
 
 
187
  search_results = search_web(question, max_results=2)
188
  search_time = time.time() - search_start
189
 
190
+ # Step 3: Stream answer generation
191
  llm_start = time.time()
192
+ for partial_answer in generate_answer_stream(question):
193
+ current_time = time.time() - start_time
194
+ time_emoji = "🟢" if current_time < 3.0 else "🟡" if current_time < 3.5 else "🔴"
195
+ timing_info = f"\n\n{time_emoji} **Timing:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={(time.time()-llm_start):.2f}s | **Total={current_time:.2f}s**"
196
+ yield partial_answer + timing_info, current_time
 
 
 
197
 
198
+ # Create Gradio interface
199
+ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as demo:
200
  gr.Markdown("""
201
  # ⚡ Ultra-Fast Political Q&A System
202
+ **Streaming enabled** for instant feedback! Pluely compatible endpoints available.
203
 
204
+ **Features:** Whisper-tiny + Qwen2.5-0.5B + DuckDuckGo + Real-time streaming
205
  """)
206
 
207
  with gr.Tab("🎙️ Audio Input"):
 
215
  audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
216
 
217
  with gr.Column():
218
+ audio_output = gr.Textbox(label="Answer (Streaming)", lines=8, show_copy_button=True)
219
  audio_time = gr.Number(label="Response Time (seconds)", precision=2)
220
 
221
  audio_submit.click(
222
+ fn=lambda x: process_audio_stream(x, None),
223
  inputs=[audio_input],
224
  outputs=[audio_output, audio_time],
225
+ api_name="audio_query_stream"
226
  )
227
 
228
  with gr.Tab("✍️ Text Input"):
 
236
  text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg")
237
 
238
  with gr.Column():
239
+ text_output = gr.Textbox(label="Answer (Streaming)", lines=8, show_copy_button=True)
240
  text_time = gr.Number(label="Response Time (seconds)", precision=2)
241
 
242
  text_submit.click(
243
+ fn=lambda x: process_audio_stream(None, x),
244
  inputs=[text_input],
245
  outputs=[text_output, text_time],
246
+ api_name="text_query_stream"
247
  )
248
 
249
  gr.Examples(
 
255
  inputs=text_input
256
  )
257
 
258
+ # API endpoints for Pluely
259
  with gr.Tab("🔌 Pluely Integration"):
260
  gr.Markdown("""
261
  ## Dedicated Endpoints for Pluely
262
 
263
+ ### 1. STT Endpoint (Audio Transcription) - Non-streaming
264
  ```
265
  curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\
266
  -H "Content-Type: application/json" \\
 
268
  ```
269
  **Returns:** `{"data": [{"text": "transcribed text"}]}`
270
 
271
+ ### 2. AI Endpoint (Text to Answer) - **WITH STREAMING**
272
  ```
273
+ curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai_stream \\
274
  -H "Content-Type: application/json" \\
275
  -d '{"data": ["Your question here"]}'
276
  ```
277
+ **Returns:** Server-Sent Events (SSE) stream of text chunks
278
 
279
  ---
280
 
 
288
  --data '{"data": ["{{AUDIO_BASE64}}"]}'
289
  ```
290
  **Response Content Path:** `data[0].text`
291
+ **Streaming:** OFF (STT doesn't need streaming)
292
 
293
+ ### Custom AI Provider (Streaming):
294
  **Curl Command:**
295
  ```
296
+ curl --location 'https://archcoder-basic-app.hf.space/call/answer_ai_stream' \\
297
  --header 'Content-Type: application/json' \\
298
  --data '{"data": ["{{TEXT}}"]}'
299
  ```
300
+ **Response Content Path:** Leave empty for streaming text
301
+ **Streaming:** **ON** ✅
302
+
303
+ ### Benefits:
304
+ - ⚡ Instant feedback as answer generates
305
+ - 🎯 Better user experience - see words appear in real-time
306
+ - ⏱️ Perceived latency reduced by 50%+
307
+ - 🔄 No actual performance penalty
308
  """)
309
 
310
  gr.Markdown("""
311
  ---
312
  🟢 = Under 3s | 🟡 = 3-3.5s | 🔴 = Over 3.5s
313
+
314
+ **Streaming Mode:** Words appear as they're generated - much faster perceived response!
315
  """)
316
 
317
  # Register API endpoints
 
 
 
318
  @demo.api(api_name="transcribe_stt")
319
  def api_transcribe(audio_base64: str):
320
+ """API endpoint for audio transcription (Pluely STT) - Non-streaming"""
321
  result = transcribe_audio_base64(audio_base64)
322
  return result
323
 
 
324
  @demo.api(api_name="answer_ai")
325
  def api_answer(text: str):
326
+ """API endpoint for text-to-answer (Pluely AI) - Non-streaming fallback"""
327
  answer = generate_answer(text)
328
  return answer
329
 
330
+ @demo.api(api_name="answer_ai_stream")
331
+ def api_answer_stream(text: str):
332
+ """API endpoint for streaming text-to-answer (Pluely AI) - Streaming enabled"""
333
+ for chunk in generate_answer_stream(text):
334
+ yield chunk
335
+
336
  if __name__ == "__main__":
337
  demo.queue(max_size=5)
338
  demo.launch()