ArchCoder commited on
Commit
f81cf03
·
verified ·
1 Parent(s): 82c886b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -60
app.py CHANGED
@@ -51,19 +51,15 @@ def search_web(query, max_results=2):
51
  def transcribe_audio_base64(audio_base64):
52
  """Transcribe audio from base64 string (for Pluely STT endpoint)"""
53
  try:
54
- # Decode base64 audio
55
  audio_bytes = base64.b64decode(audio_base64)
56
 
57
- # Save to temporary file
58
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
59
  temp_audio.write(audio_bytes)
60
  temp_path = temp_audio.name
61
 
62
- # Transcribe
63
  segments, _ = whisper_model.transcribe(temp_path, language="en", beam_size=1)
64
  transcription = " ".join([seg.text for seg in segments])
65
 
66
- # Cleanup
67
  os.unlink(temp_path)
68
 
69
  return {"text": transcription.strip()}
@@ -78,10 +74,8 @@ def generate_answer_stream(text_input):
78
  yield "No input provided"
79
  return
80
 
81
- # Web search (non-streaming part)
82
  search_results = search_web(text_input, max_results=2)
83
 
84
- # Prepare messages
85
  messages = [
86
  {"role": "system", "content": "You are a helpful assistant. Answer briefly using provided context. Keep responses under 40 words."},
87
  {"role": "user", "content": f"Context:\n{search_results}\n\nQuestion: {text_input}\n\nAnswer:"}
@@ -94,8 +88,6 @@ def generate_answer_stream(text_input):
94
  )
95
 
96
  inputs = tokenizer([text], return_tensors="pt").to("cpu")
97
-
98
- # Setup streaming
99
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
100
 
101
  generation_kwargs = dict(
@@ -109,11 +101,9 @@ def generate_answer_stream(text_input):
109
  streamer=streamer
110
  )
111
 
112
- # Start generation in separate thread
113
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
114
  thread.start()
115
 
116
- # Stream tokens as they're generated
117
  generated_text = ""
118
  for new_text in streamer:
119
  generated_text += new_text
@@ -122,27 +112,11 @@ def generate_answer_stream(text_input):
122
  except Exception as e:
123
  yield f"Error: {str(e)}"
124
 
125
- def generate_answer(text_input):
126
- """Generate complete answer (non-streaming)"""
127
- try:
128
- if not text_input or text_input.strip() == "":
129
- return "No input provided"
130
-
131
- # Get the last chunk from streaming
132
- final_answer = ""
133
- for chunk in generate_answer_stream(text_input):
134
- final_answer = chunk
135
-
136
- return final_answer
137
-
138
- except Exception as e:
139
- return f"Error: {str(e)}"
140
-
141
- def process_audio_stream(audio_path, question_text=None):
142
- """Streaming pipeline for Gradio UI - Returns tuple generator"""
143
  start_time = time.time()
144
 
145
- # Step 1: Transcribe audio if provided
146
  if audio_path:
147
  try:
148
  segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
@@ -159,20 +133,30 @@ def process_audio_stream(audio_path, question_text=None):
159
 
160
  transcription_time = time.time() - start_time
161
 
162
- # Step 2: Web search
163
  search_start = time.time()
164
  search_results = search_web(question, max_results=2)
165
  search_time = time.time() - search_start
166
 
167
- # Step 3: Stream answer generation
168
  llm_start = time.time()
169
  for partial_answer in generate_answer_stream(question):
170
  current_time = time.time() - start_time
171
  time_emoji = "🟢" if current_time < 3.0 else "🟡" if current_time < 3.5 else "🔴"
172
  timing_info = f"\n\n{time_emoji} **Timing:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={(time.time()-llm_start):.2f}s | **Total={current_time:.2f}s**"
173
- # IMPORTANT: Must yield tuple (text, number) to match output components
174
  yield partial_answer + timing_info, current_time
175
 
 
 
 
 
 
 
 
 
 
 
 
176
  # Create Gradio interface
177
  with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as demo:
178
  gr.Markdown("""
@@ -196,10 +180,9 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
196
  audio_output = gr.Textbox(label="Answer (Streaming)", lines=8, show_copy_button=True)
197
  audio_time = gr.Number(label="Response Time (seconds)", precision=2)
198
 
199
- # Fixed: Lambda wrapper ensures proper tuple unpacking
200
  audio_submit.click(
201
- fn=process_audio_stream,
202
- inputs=[audio_input, gr.Textbox(value=None, visible=False)],
203
  outputs=[audio_output, audio_time],
204
  api_name="audio_query_stream"
205
  )
@@ -218,9 +201,8 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
218
  text_output = gr.Textbox(label="Answer (Streaming)", lines=8, show_copy_button=True)
219
  text_time = gr.Number(label="Response Time (seconds)", precision=2)
220
 
221
- # Fixed: Proper function call with audio=None
222
  text_submit.click(
223
- fn=lambda text: process_audio_stream(None, text),
224
  inputs=[text_input],
225
  outputs=[text_output, text_time],
226
  api_name="text_query_stream"
@@ -246,7 +228,6 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
246
  -H "Content-Type: application/json" \\
247
  -d '{"data": ["BASE64_AUDIO_DATA"]}'
248
  ```
249
- **Response Format:** `{"data": [{"text": "transcribed text"}]}`
250
 
251
  ### 2. AI Endpoint - Streaming
252
  ```
@@ -254,58 +235,48 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
254
  -H "Content-Type: application/json" \\
255
  -d '{"data": ["Your question here"]}'
256
  ```
257
- **Response Format:** Streaming text chunks
258
-
259
- ---
260
 
261
  ## Pluely Configuration
262
 
263
  ### Custom STT Provider:
264
- **Curl Command:**
265
  ```
266
  curl https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d '{"data": ["{{AUDIO_BASE64}}"]}'
267
  ```
268
- **Response Content Path:** `data[0].text`
269
- **Streaming:** OFF
270
 
271
- ### Custom AI Provider (Streaming):
272
- **Curl Command:**
273
  ```
274
  curl https://archcoder-basic-app.hf.space/call/answer_ai_stream -H "Content-Type: application/json" -d '{"data": ["{{TEXT}}"]}'
275
  ```
276
- **Response Content Path:** `data`
277
- **Streaming:** ON ✅
278
  """)
279
 
280
- # Hidden interface components that create API endpoints
281
  with gr.Row(visible=False):
282
  stt_input = gr.Textbox()
283
  stt_output = gr.JSON()
284
- ai_stream_input = gr.Textbox()
285
- ai_stream_output = gr.Textbox()
286
 
287
- # These create the /call/transcribe_stt and /call/answer_ai_stream endpoints
288
- stt_button = gr.Button("STT", visible=False)
289
- stt_button.click(
290
  fn=transcribe_audio_base64,
291
  inputs=[stt_input],
292
  outputs=[stt_output],
293
  api_name="transcribe_stt"
294
  )
295
 
296
- ai_stream_button = gr.Button("AI Stream", visible=False)
297
- ai_stream_button.click(
298
  fn=generate_answer_stream,
299
- inputs=[ai_stream_input],
300
- outputs=[ai_stream_output],
301
  api_name="answer_ai_stream"
302
  )
303
 
304
  gr.Markdown("""
305
  ---
306
  🟢 = Under 3s | 🟡 = 3-3.5s | 🔴 = Over 3.5s
307
-
308
- **Streaming Mode:** Words appear as they're generated - much faster perceived response!
309
  """)
310
 
311
  if __name__ == "__main__":
 
51
  def transcribe_audio_base64(audio_base64):
52
  """Transcribe audio from base64 string (for Pluely STT endpoint)"""
53
  try:
 
54
  audio_bytes = base64.b64decode(audio_base64)
55
 
 
56
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
57
  temp_audio.write(audio_bytes)
58
  temp_path = temp_audio.name
59
 
 
60
  segments, _ = whisper_model.transcribe(temp_path, language="en", beam_size=1)
61
  transcription = " ".join([seg.text for seg in segments])
62
 
 
63
  os.unlink(temp_path)
64
 
65
  return {"text": transcription.strip()}
 
74
  yield "No input provided"
75
  return
76
 
 
77
  search_results = search_web(text_input, max_results=2)
78
 
 
79
  messages = [
80
  {"role": "system", "content": "You are a helpful assistant. Answer briefly using provided context. Keep responses under 40 words."},
81
  {"role": "user", "content": f"Context:\n{search_results}\n\nQuestion: {text_input}\n\nAnswer:"}
 
88
  )
89
 
90
  inputs = tokenizer([text], return_tensors="pt").to("cpu")
 
 
91
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
92
 
93
  generation_kwargs = dict(
 
101
  streamer=streamer
102
  )
103
 
 
104
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
105
  thread.start()
106
 
 
107
  generated_text = ""
108
  for new_text in streamer:
109
  generated_text += new_text
 
112
  except Exception as e:
113
  yield f"Error: {str(e)}"
114
 
115
+ def process_audio_stream(audio_path, question_text):
116
+ """Streaming pipeline that yields tuples"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  start_time = time.time()
118
 
119
+ # Transcribe if audio provided
120
  if audio_path:
121
  try:
122
  segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
 
133
 
134
  transcription_time = time.time() - start_time
135
 
136
+ # Web search
137
  search_start = time.time()
138
  search_results = search_web(question, max_results=2)
139
  search_time = time.time() - search_start
140
 
141
+ # Stream answer
142
  llm_start = time.time()
143
  for partial_answer in generate_answer_stream(question):
144
  current_time = time.time() - start_time
145
  time_emoji = "🟢" if current_time < 3.0 else "🟡" if current_time < 3.5 else "🔴"
146
  timing_info = f"\n\n{time_emoji} **Timing:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={(time.time()-llm_start):.2f}s | **Total={current_time:.2f}s**"
 
147
  yield partial_answer + timing_info, current_time
148
 
149
+ # Wrapper functions for proper API handling
150
+ def audio_handler(audio_path):
151
+ """Wrapper for audio input"""
152
+ for result in process_audio_stream(audio_path, None):
153
+ yield result
154
+
155
+ def text_handler(text_input):
156
+ """Wrapper for text input"""
157
+ for result in process_audio_stream(None, text_input):
158
+ yield result
159
+
160
  # Create Gradio interface
161
  with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as demo:
162
  gr.Markdown("""
 
180
  audio_output = gr.Textbox(label="Answer (Streaming)", lines=8, show_copy_button=True)
181
  audio_time = gr.Number(label="Response Time (seconds)", precision=2)
182
 
 
183
  audio_submit.click(
184
+ fn=audio_handler,
185
+ inputs=[audio_input],
186
  outputs=[audio_output, audio_time],
187
  api_name="audio_query_stream"
188
  )
 
201
  text_output = gr.Textbox(label="Answer (Streaming)", lines=8, show_copy_button=True)
202
  text_time = gr.Number(label="Response Time (seconds)", precision=2)
203
 
 
204
  text_submit.click(
205
+ fn=text_handler,
206
  inputs=[text_input],
207
  outputs=[text_output, text_time],
208
  api_name="text_query_stream"
 
228
  -H "Content-Type: application/json" \\
229
  -d '{"data": ["BASE64_AUDIO_DATA"]}'
230
  ```
 
231
 
232
  ### 2. AI Endpoint - Streaming
233
  ```
 
235
  -H "Content-Type: application/json" \\
236
  -d '{"data": ["Your question here"]}'
237
  ```
 
 
 
238
 
239
  ## Pluely Configuration
240
 
241
  ### Custom STT Provider:
 
242
  ```
243
  curl https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d '{"data": ["{{AUDIO_BASE64}}"]}'
244
  ```
245
+ **Response Path:** `data[0].text` | **Streaming:** OFF
 
246
 
247
+ ### Custom AI Provider:
 
248
  ```
249
  curl https://archcoder-basic-app.hf.space/call/answer_ai_stream -H "Content-Type: application/json" -d '{"data": ["{{TEXT}}"]}'
250
  ```
251
+ **Response Path:** `data` | **Streaming:** ON ✅
 
252
  """)
253
 
254
+ # Hidden components for API endpoints
255
  with gr.Row(visible=False):
256
  stt_input = gr.Textbox()
257
  stt_output = gr.JSON()
258
+ ai_input = gr.Textbox()
259
+ ai_output = gr.Textbox()
260
 
261
+ stt_btn = gr.Button("STT", visible=False)
262
+ stt_btn.click(
 
263
  fn=transcribe_audio_base64,
264
  inputs=[stt_input],
265
  outputs=[stt_output],
266
  api_name="transcribe_stt"
267
  )
268
 
269
+ ai_btn = gr.Button("AI", visible=False)
270
+ ai_btn.click(
271
  fn=generate_answer_stream,
272
+ inputs=[ai_input],
273
+ outputs=[ai_output],
274
  api_name="answer_ai_stream"
275
  )
276
 
277
  gr.Markdown("""
278
  ---
279
  🟢 = Under 3s | 🟡 = 3-3.5s | 🔴 = Over 3.5s
 
 
280
  """)
281
 
282
  if __name__ == "__main__":