ArchCoder commited on
Commit
c2c3825
·
verified ·
1 Parent(s): 02d77c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -113
app.py CHANGED
@@ -20,7 +20,7 @@ logger = logging.getLogger(__name__)
20
  logger.info("Loading Whisper-tiny...")
21
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
22
 
23
- logger.info("Loading SmolLM2-360M-Instruct (FASTEST)...")
24
  model_name = "HuggingFaceTB/SmolLM2-360M-Instruct"
25
  tokenizer = AutoTokenizer.from_pretrained(model_name)
26
  model = AutoModelForCausalLM.from_pretrained(
@@ -32,7 +32,6 @@ model = AutoModelForCausalLM.from_pretrained(
32
 
33
  logger.info("All models loaded!")
34
 
35
- # API keys
36
  TAVILY_API_KEY = os.getenv('TAVILY_API_KEY', '')
37
  BRAVE_API_KEY = os.getenv('BRAVE_API_KEY', '')
38
 
@@ -170,10 +169,17 @@ def transcribe_audio_base64(audio_base64):
170
  return {"error": str(e)}
171
 
172
  def generate_answer(text_input):
173
- logger.info(f"[AI] Question: {text_input[:60]}...")
 
 
 
 
174
  try:
175
- if not text_input or not text_input.strip():
176
- return "No input provided"
 
 
 
177
 
178
  current_date = datetime.now().strftime("%B %d, %Y")
179
 
@@ -183,7 +189,7 @@ def generate_answer(text_input):
183
  search_time = time.time() - search_start
184
  logger.info(f"[AI] Search completed in {search_time:.2f}s")
185
 
186
- # Generate answer with SmolLM2-360M
187
  messages = [
188
  {
189
  "role": "system",
@@ -195,7 +201,6 @@ def generate_answer(text_input):
195
  }
196
  ]
197
 
198
- # SmolLM2 uses simple chat template
199
  prompt = f"<|im_start|>system\n{messages[0]['content']}<|im_end|>\n<|im_start|>user\n{messages[1]['content']}<|im_end|>\n<|im_start|>assistant\n"
200
 
201
  gen_start = time.time()
@@ -205,7 +210,7 @@ def generate_answer(text_input):
205
  with torch.no_grad():
206
  outputs = model.generate(
207
  **inputs,
208
- max_new_tokens=80, # 60-80 words
209
  temperature=0.7,
210
  do_sample=True,
211
  top_p=0.9,
@@ -219,11 +224,10 @@ def generate_answer(text_input):
219
  logger.info(f"[AI] Generation completed in {gen_time:.2f}s")
220
 
221
  answer = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip()
222
-
223
- # Add source attribution
224
  full_answer = f"{answer}\n\n**Source:** {search_engine}"
225
 
226
  logger.info("[AI] ✓ Complete")
 
227
  return full_answer
228
 
229
  except Exception as e:
@@ -235,7 +239,6 @@ def process_audio(audio_path, question_text):
235
  logger.info("="*50)
236
  logger.info("[MAIN] New request received")
237
 
238
- # Transcribe audio if provided
239
  if audio_path:
240
  logger.info(f"[MAIN] Processing audio: {audio_path}")
241
  try:
@@ -255,14 +258,11 @@ def process_audio(audio_path, question_text):
255
 
256
  transcription_time = time.time() - start_time
257
 
258
- # Generate answer (includes search)
259
  gen_start = time.time()
260
  answer = generate_answer(question)
261
  gen_time = time.time() - gen_start
262
 
263
  total_time = time.time() - start_time
264
-
265
- # Time indicator
266
  time_emoji = "🟢" if total_time < 2.0 else "🟡" if total_time < 3.0 else "🔴"
267
 
268
  timing = f"\n\n{time_emoji} **Performance:** Trans={transcription_time:.2f}s | Search+Gen={gen_time:.2f}s | **Total={total_time:.2f}s**"
@@ -282,140 +282,71 @@ def text_handler(text_input):
282
  with gr.Blocks(title="Ultra-Fast Q&A - SmolLM2-360M", theme=gr.themes.Soft()) as demo:
283
  gr.Markdown("""
284
  # ⚡ Ultra-Fast Political Q&A System
285
- **SmolLM2-360M** (250-400 tok/s) + **Parallel Search** (Optimized for <2s)
286
-
287
- **Features:**
288
- - Whisper-tiny for speech-to-text
289
- - SmolLM2-360M-Instruct (20x faster than Qwen 0.5B)
290
- - Multi-engine parallel search (Tavily → Brave → Searx → DDG)
291
- - Search-grounded answers only
292
  """)
293
 
294
  with gr.Tab("🎙️ Audio Input"):
295
  with gr.Row():
296
  with gr.Column():
297
- audio_input = gr.Audio(
298
- sources=["microphone", "upload"],
299
- type="filepath",
300
- label="Record or Upload Audio"
301
- )
302
- audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
303
-
304
  with gr.Column():
305
  audio_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
306
- audio_time = gr.Number(label="Response Time (seconds)", precision=2)
307
 
308
- audio_submit.click(
309
- fn=audio_handler,
310
- inputs=[audio_input],
311
- outputs=[audio_output, audio_time],
312
- api_name="audio_query"
313
- )
314
 
315
  with gr.Tab("✍️ Text Input"):
316
  with gr.Row():
317
  with gr.Column():
318
- text_input = gr.Textbox(
319
- label="Ask Your Question",
320
- placeholder="Is internet shut down in Bareilly today?",
321
- lines=3
322
- )
323
- text_submit = gr.Button("🚀 Submit Question", variant="primary", size="lg")
324
-
325
  with gr.Column():
326
  text_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
327
- text_time = gr.Number(label="Response Time (seconds)", precision=2)
328
 
329
- text_submit.click(
330
- fn=text_handler,
331
- inputs=[text_input],
332
- outputs=[text_output, text_time],
333
- api_name="text_query"
334
- )
335
 
336
- gr.Examples(
337
- examples=[
338
- ["Is internet shut down in Bareilly today?"],
339
- ["Who won the 2024 US presidential election?"],
340
- ["What is current India inflation rate?"],
341
- ["What are the top 3 news stories today?"]
342
- ],
343
- inputs=text_input
344
- )
345
 
346
  with gr.Tab("🔌 Pluely API"):
347
  gr.Markdown("""
348
- ## API Endpoints for Pluely Integration
349
 
350
- ### STT Endpoint (Audio Transcription)
 
 
351
  ```
352
- curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\
353
- -H "Content-Type: application/json" \\
354
- -d '{"data": ["BASE64_AUDIO_DATA"]}'
355
  ```
356
- **Response Format:** `{"data": [{"text": "transcribed text"}]}`
357
 
358
- ### AI Endpoint (Text to Answer)
359
  ```
360
- curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai \\
361
- -H "Content-Type: application/json" \\
362
- -d '{"data": ["Your question here"]}'
363
  ```
364
- **Response Format:** `{"data": ["Answer with source attribution"]}`
365
 
366
- ---
367
 
368
- ## Pluely Configuration
369
-
370
- ### Custom STT Provider:
371
- **Curl Command:**
372
- ```
373
- curl https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d '{"data": ["{{AUDIO_BASE64}}"]}'
374
- ```
375
- **Response Content Path:** `data[0].text`
376
- **Streaming:** OFF
377
 
378
- ### Custom AI Provider:
379
- **Curl Command:**
380
  ```
381
- curl https://archcoder-basic-app.hf.space/call/answer_ai -H "Content-Type: application/json" -d '{"data": ["{{TEXT}}"]}'
382
  ```
383
- **Response Content Path:** `data[0]`
384
- **Streaming:** OFF
385
  """)
386
 
387
- # Hidden API endpoint components
388
  with gr.Row(visible=False):
389
- stt_input = gr.Textbox()
390
- stt_output = gr.JSON()
391
- ai_input = gr.Textbox()
392
- ai_output = gr.Textbox()
393
-
394
- stt_btn = gr.Button("STT", visible=False)
395
- stt_btn.click(
396
- fn=transcribe_audio_base64,
397
- inputs=[stt_input],
398
- outputs=[stt_output],
399
- api_name="transcribe_stt"
400
- )
401
 
402
- ai_btn = gr.Button("AI", visible=False)
403
- ai_btn.click(
404
- fn=generate_answer,
405
- inputs=[ai_input],
406
- outputs=[ai_output],
407
- api_name="answer_ai"
408
- )
409
 
410
- gr.Markdown("""
411
- ---
412
- **Model:** SmolLM2-360M-Instruct (250-400 tokens/second on CPU)
413
- **Search:** Parallel multi-engine (Tavily → Brave → Searx → DDG)
414
- **Expected Speed:** 1.5-2.5 seconds total
415
- **All requests logged** - Check Logs tab in HF Space
416
-
417
- 🟢 < 2s | 🟡 2-3s | 🔴 > 3s
418
- """)
419
 
420
  if __name__ == "__main__":
421
  demo.queue(max_size=5)
 
20
  logger.info("Loading Whisper-tiny...")
21
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
22
 
23
+ logger.info("Loading SmolLM2-360M-Instruct...")
24
  model_name = "HuggingFaceTB/SmolLM2-360M-Instruct"
25
  tokenizer = AutoTokenizer.from_pretrained(model_name)
26
  model = AutoModelForCausalLM.from_pretrained(
 
32
 
33
  logger.info("All models loaded!")
34
 
 
35
  TAVILY_API_KEY = os.getenv('TAVILY_API_KEY', '')
36
  BRAVE_API_KEY = os.getenv('BRAVE_API_KEY', '')
37
 
 
169
  return {"error": str(e)}
170
 
171
  def generate_answer(text_input):
172
+ """Main answer generation - with debug logging"""
173
+ logger.info("="*60)
174
+ logger.info(f"[AI] Raw input: '{text_input}'")
175
+ logger.info(f"[AI] Input type: {type(text_input)}, Length: {len(text_input) if text_input else 0}")
176
+
177
  try:
178
+ # Handle literal {{TEXT}} from Pluely
179
+ if not text_input or text_input.strip() in ["", "{{TEXT}}", "{{text}}", "$TEXT"]:
180
+ error_msg = "❌ ERROR: No question received. Pluely sent empty/template variable.\n\nPluely Config Issue:\n- Check your curl command uses correct format\n- Make sure variable substitution is enabled"
181
+ logger.error(f"[AI] {error_msg}")
182
+ return error_msg
183
 
184
  current_date = datetime.now().strftime("%B %d, %Y")
185
 
 
189
  search_time = time.time() - search_start
190
  logger.info(f"[AI] Search completed in {search_time:.2f}s")
191
 
192
+ # Generate
193
  messages = [
194
  {
195
  "role": "system",
 
201
  }
202
  ]
203
 
 
204
  prompt = f"<|im_start|>system\n{messages[0]['content']}<|im_end|>\n<|im_start|>user\n{messages[1]['content']}<|im_end|>\n<|im_start|>assistant\n"
205
 
206
  gen_start = time.time()
 
210
  with torch.no_grad():
211
  outputs = model.generate(
212
  **inputs,
213
+ max_new_tokens=80,
214
  temperature=0.7,
215
  do_sample=True,
216
  top_p=0.9,
 
224
  logger.info(f"[AI] Generation completed in {gen_time:.2f}s")
225
 
226
  answer = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip()
 
 
227
  full_answer = f"{answer}\n\n**Source:** {search_engine}"
228
 
229
  logger.info("[AI] ✓ Complete")
230
+ logger.info("="*60)
231
  return full_answer
232
 
233
  except Exception as e:
 
239
  logger.info("="*50)
240
  logger.info("[MAIN] New request received")
241
 
 
242
  if audio_path:
243
  logger.info(f"[MAIN] Processing audio: {audio_path}")
244
  try:
 
258
 
259
  transcription_time = time.time() - start_time
260
 
 
261
  gen_start = time.time()
262
  answer = generate_answer(question)
263
  gen_time = time.time() - gen_start
264
 
265
  total_time = time.time() - start_time
 
 
266
  time_emoji = "🟢" if total_time < 2.0 else "🟡" if total_time < 3.0 else "🔴"
267
 
268
  timing = f"\n\n{time_emoji} **Performance:** Trans={transcription_time:.2f}s | Search+Gen={gen_time:.2f}s | **Total={total_time:.2f}s**"
 
282
  with gr.Blocks(title="Ultra-Fast Q&A - SmolLM2-360M", theme=gr.themes.Soft()) as demo:
283
  gr.Markdown("""
284
  # ⚡ Ultra-Fast Political Q&A System
285
+ **SmolLM2-360M** (250-400 tok/s) + **Parallel Search**
 
 
 
 
 
 
286
  """)
287
 
288
  with gr.Tab("🎙️ Audio Input"):
289
  with gr.Row():
290
  with gr.Column():
291
+ audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio")
292
+ audio_submit = gr.Button("🚀 Submit", variant="primary")
 
 
 
 
 
293
  with gr.Column():
294
  audio_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
295
+ audio_time = gr.Number(label="Time (s)", precision=2)
296
 
297
+ audio_submit.click(fn=audio_handler, inputs=[audio_input], outputs=[audio_output, audio_time], api_name="audio_query")
 
 
 
 
 
298
 
299
  with gr.Tab("✍️ Text Input"):
300
  with gr.Row():
301
  with gr.Column():
302
+ text_input = gr.Textbox(label="Question", placeholder="Ask anything...", lines=3)
303
+ text_submit = gr.Button("🚀 Submit", variant="primary")
 
 
 
 
 
304
  with gr.Column():
305
  text_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
306
+ text_time = gr.Number(label="Time (s)", precision=2)
307
 
308
+ text_submit.click(fn=text_handler, inputs=[text_input], outputs=[text_output, text_time], api_name="text_query")
 
 
 
 
 
309
 
310
+ gr.Examples(examples=[["Who is the US president?"]], inputs=text_input)
 
 
 
 
 
 
 
 
311
 
312
  with gr.Tab("🔌 Pluely API"):
313
  gr.Markdown("""
314
+ ## ⚠️ IMPORTANT: Pluely Configuration
315
 
316
+ ### If you see "{{TEXT}}" in logs, try these formats:
317
+
318
+ **Format 1 (Windows CMD - Use This First):**
319
  ```
320
+ curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai -H "Content-Type: application/json" -d "{\\"data\\": [\\"TEXT_PLACEHOLDER\\"]}"
 
 
321
  ```
322
+ Then in Pluely, replace `TEXT_PLACEHOLDER` with `{{TEXT}}`
323
 
324
+ **Format 2 (Alternative):**
325
  ```
326
+ curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai -H "Content-Type: application/json" --data-binary "{\\"data\\": [\\"{{TEXT}}\\"]}"
 
 
327
  ```
 
328
 
329
+ **Response Path:** `data[0]`
330
 
331
+ ---
 
 
 
 
 
 
 
 
332
 
333
+ ### STT Endpoint:
 
334
  ```
335
+ curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d "{\\"data\\": [\\"{{AUDIO_BASE64}}\\"]}"
336
  ```
337
+ **Response Path:** `data[0].text`
 
338
  """)
339
 
 
340
  with gr.Row(visible=False):
341
+ stt_in = gr.Textbox()
342
+ stt_out = gr.JSON()
343
+ ai_in = gr.Textbox()
344
+ ai_out = gr.Textbox()
 
 
 
 
 
 
 
 
345
 
346
+ gr.Button("STT", visible=False).click(fn=transcribe_audio_base64, inputs=[stt_in], outputs=[stt_out], api_name="transcribe_stt")
347
+ gr.Button("AI", visible=False).click(fn=generate_answer, inputs=[ai_in], outputs=[ai_out], api_name="answer_ai")
 
 
 
 
 
348
 
349
+ gr.Markdown("🟢 < 2s | 🟡 2-3s | 🔴 > 3s")
 
 
 
 
 
 
 
 
350
 
351
  if __name__ == "__main__":
352
  demo.queue(max_size=5)