ArchCoder commited on
Commit
02d77c2
·
verified ·
1 Parent(s): 4b20d59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +186 -126
app.py CHANGED
@@ -13,15 +13,15 @@ from concurrent.futures import ThreadPoolExecutor
13
  from html.parser import HTMLParser
14
 
15
  # Setup logging
16
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
17
  logger = logging.getLogger(__name__)
18
 
19
  # Initialize models
20
- logger.info("Loading Whisper model...")
21
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
22
 
23
- logger.info("Loading Qwen 2.5 0.5B-Instruct (FASTEST)...")
24
- model_name = "Qwen/Qwen2.5-0.5B-Instruct" # SWITCHED BACK to 0.5B for speed
25
  tokenizer = AutoTokenizer.from_pretrained(model_name)
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_name,
@@ -32,91 +32,69 @@ model = AutoModelForCausalLM.from_pretrained(
32
 
33
  logger.info("All models loaded!")
34
 
 
35
  TAVILY_API_KEY = os.getenv('TAVILY_API_KEY', '')
36
  BRAVE_API_KEY = os.getenv('BRAVE_API_KEY', '')
37
 
38
  def search_tavily(query):
39
- logger.info("[TAVILY] Starting...")
40
  if not TAVILY_API_KEY:
41
  return None
42
-
43
  try:
44
  response = requests.post(
45
  'https://api.tavily.com/search',
46
- json={'api_key': TAVILY_API_KEY, 'query': query, 'max_results': 3},
47
- timeout=2 # REDUCED timeout
48
  )
49
-
50
  if response.status_code == 200:
51
  data = response.json()
52
  results = data.get('results', [])
53
- context = ""
54
- for i, result in enumerate(results[:3], 1):
55
- context += f"\n[{i}] {result.get('title', '')}\n{result.get('content', '')}\n"
56
- logger.info(f"[TAVILY] ✓")
57
- return context
58
  except:
59
  pass
60
  return None
61
 
62
  def search_brave(query):
63
- logger.info("[BRAVE] Starting...")
64
  if not BRAVE_API_KEY:
65
  return None
66
-
67
  try:
68
  response = requests.get(
69
  'https://api.search.brave.com/res/v1/web/search',
70
- params={'q': query, 'count': 3},
71
  headers={'X-Subscription-Token': BRAVE_API_KEY},
72
- timeout=2
73
  )
74
-
75
  if response.status_code == 200:
76
  data = response.json()
77
  results = data.get('web', {}).get('results', [])
78
- context = ""
79
- for i, result in enumerate(results[:3], 1):
80
- context += f"\n[{i}] {result.get('title', '')}\n{result.get('description', '')}\n"
81
- logger.info(f"[BRAVE] ✓")
82
- return context
83
  except:
84
  pass
85
  return None
86
 
87
  def search_searx(query):
88
- logger.info("[SEARX] Starting...")
89
-
90
  for instance in ['https://searx.be/search', 'https://searx.work/search']:
91
  try:
92
  response = requests.get(
93
  instance,
94
- params={'q': query, 'format': 'json', 'categories': 'general'},
95
- timeout=2
96
  )
97
-
98
  if response.status_code == 200:
99
  data = response.json()
100
  results = data.get('results', [])
101
- context = ""
102
- for i, result in enumerate(results[:3], 1):
103
- context += f"\n[{i}] {result.get('title', '')}\n{result.get('content', '')}\n"
104
- logger.info(f"[SEARX] ✓")
105
- return context
106
  except:
107
  continue
108
  return None
109
 
110
- def search_duckduckgo_html(query):
111
- logger.info("[DDG] Starting...")
112
  try:
113
  response = requests.get(
114
  'https://html.duckduckgo.com/html/',
115
  params={'q': query},
116
  headers={'User-Agent': 'Mozilla/5.0'},
117
- timeout=2
118
  )
119
-
120
  if response.status_code == 200:
121
  class DDGParser(HTMLParser):
122
  def __init__(self):
@@ -130,59 +108,49 @@ def search_duckduckgo_html(query):
130
  self.in_result = True
131
 
132
  def handle_data(self, data):
133
- if self.in_result:
134
- self.current_text += data.strip()
135
 
136
  def handle_endtag(self, tag):
137
  if tag == 'a' and self.in_result:
138
- self.results.append(self.current_text)
 
139
  self.current_text = ""
140
  self.in_result = False
141
 
142
  parser = DDGParser()
143
  parser.feed(response.text)
144
-
145
- context = ""
146
- for i, result in enumerate(parser.results[:3], 1):
147
- context += f"\n[{i}] {result}\n"
148
-
149
- if context:
150
- logger.info(f"[DDG] ✓")
151
- return context
152
  except:
153
  pass
154
  return None
155
 
156
  def search_parallel(query):
157
- logger.info("[SEARCH] Parallel start")
158
 
159
  with ThreadPoolExecutor(max_workers=4) as executor:
160
  futures = {
161
  executor.submit(search_tavily, query): "Tavily",
162
  executor.submit(search_brave, query): "Brave",
163
  executor.submit(search_searx, query): "Searx",
164
- executor.submit(search_duckduckgo_html, query): "DDG"
165
  }
166
 
167
- results = {}
168
  for future in futures:
169
  engine = futures[future]
170
  try:
171
- result = future.result(timeout=3)
172
  if result:
173
- results[engine] = result
 
174
  except:
175
  pass
176
 
177
- for engine in ["Tavily", "Brave", "Searx", "DDG"]:
178
- if engine in results:
179
- logger.info(f"[SEARCH] Using {engine}")
180
- return results[engine], engine
181
-
182
  return "No search results available.", "None"
183
 
184
  def transcribe_audio_base64(audio_base64):
185
- logger.info("[STT] Request")
186
  try:
187
  audio_bytes = base64.b64decode(audio_base64)
188
 
@@ -194,72 +162,69 @@ def transcribe_audio_base64(audio_base64):
194
  transcription = " ".join([seg.text for seg in segments])
195
  os.unlink(temp_path)
196
 
197
- logger.info(f"[STT] ✓")
198
  return {"text": transcription.strip()}
 
199
  except Exception as e:
 
200
  return {"error": str(e)}
201
 
202
  def generate_answer(text_input):
203
- logger.info(f"[AI] Q: {text_input}")
204
  try:
205
  if not text_input or not text_input.strip():
206
  return "No input provided"
207
 
208
  current_date = datetime.now().strftime("%B %d, %Y")
209
 
 
210
  search_start = time.time()
211
  search_results, search_engine = search_parallel(text_input)
212
  search_time = time.time() - search_start
213
- logger.info(f"[AI] Search: {search_time:.2f}s")
214
 
215
- # IMPROVED PROMPT - Structured multi-point answers
216
  messages = [
217
  {
218
  "role": "system",
219
- "content": f"""Today is {current_date}. You are a concise assistant.
220
-
221
- When answering:
222
- - If question asks about multiple things, list each with a one-line description
223
- - Use bullet points for multiple items
224
- - Keep total answer to 80-100 words
225
- - Answer ONLY from search results"""
226
  },
227
  {
228
  "role": "user",
229
- "content": f"""Search Results:
230
- {search_results}
231
-
232
- Question: {text_input}
233
-
234
- Answer (80-100 words, use bullets if multiple topics):"""
235
  }
236
  ]
237
 
238
- text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
239
 
240
  gen_start = time.time()
241
- inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1200)
242
 
 
243
  with torch.no_grad():
244
  outputs = model.generate(
245
  **inputs,
246
- max_new_tokens=100, # REDUCED from 150
247
- temperature=0.7, # INCREASED for faster sampling
248
  do_sample=True,
249
  top_p=0.9,
250
- top_k=50, # ADDED for speed
251
- repetition_penalty=1.1,
252
- pad_token_id=tokenizer.eos_token_id
 
253
  )
254
 
255
  gen_time = time.time() - gen_start
256
- logger.info(f"[AI] Gen: {gen_time:.2f}s")
257
 
258
  answer = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip()
259
- answer_with_source = f"{answer}\n\n**Source:** {search_engine}"
260
 
261
- logger.info(f"[AI] ✓")
262
- return answer_with_source
 
 
 
263
 
264
  except Exception as e:
265
  logger.error(f"[AI] Error: {str(e)}")
@@ -267,28 +232,43 @@ Answer (80-100 words, use bullets if multiple topics):"""
267
 
268
  def process_audio(audio_path, question_text):
269
  start_time = time.time()
270
- logger.info("="*40)
 
271
 
 
272
  if audio_path:
 
273
  try:
274
  segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
275
  question = " ".join([seg.text for seg in segments])
 
276
  except Exception as e:
277
- return f" Error: {str(e)}", 0.0
 
278
  else:
279
  question = question_text
 
280
 
281
  if not question or not question.strip():
282
- return " No input", 0.0
 
283
 
 
 
 
 
284
  answer = generate_answer(question)
 
 
285
  total_time = time.time() - start_time
286
 
287
- time_emoji = "🟢" if total_time < 3.0 else "🟡" if total_time < 5.0 else "🔴"
288
- timing = f"\n\n{time_emoji} **Time:** {total_time:.2f}s"
289
 
290
- logger.info(f"[TOTAL] {total_time:.2f}s")
291
- logger.info("="*40)
 
 
292
 
293
  return answer + timing, total_time
294
 
@@ -298,63 +278,143 @@ def audio_handler(audio_path):
298
  def text_handler(text_input):
299
  return process_audio(None, text_input)
300
 
301
- # Gradio UI
302
- with gr.Blocks(title="Fast Q&A", theme=gr.themes.Soft()) as demo:
303
  gr.Markdown("""
304
- # ⚡ Ultra-Fast Q&A System
305
- **Qwen 0.5B + Parallel Search** (Optimized for <3s response)
 
 
 
 
 
 
306
  """)
307
 
308
- with gr.Tab("🎙️ Audio"):
309
  with gr.Row():
310
  with gr.Column():
311
- audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath")
312
- audio_submit = gr.Button("🚀 Submit", variant="primary", size="lg")
 
 
 
 
 
313
  with gr.Column():
314
- audio_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True)
315
- audio_time = gr.Number(label="Time (s)", precision=2)
316
 
317
- audio_submit.click(fn=audio_handler, inputs=[audio_input], outputs=[audio_output, audio_time], api_name="audio_query")
 
 
 
 
 
318
 
319
- with gr.Tab("✍️ Text"):
320
  with gr.Row():
321
  with gr.Column():
322
- text_input = gr.Textbox(label="Question", placeholder="Ask anything...", lines=3)
323
- text_submit = gr.Button("🚀 Submit", variant="primary", size="lg")
 
 
 
 
 
324
  with gr.Column():
325
- text_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True)
326
- text_time = gr.Number(label="Time (s)", precision=2)
327
 
328
- text_submit.click(fn=text_handler, inputs=[text_input], outputs=[text_output, text_time], api_name="text_query")
 
 
 
 
 
329
 
330
  gr.Examples(
331
  examples=[
332
- ["What are the top 3 news stories today?"],
333
- ["Is internet shut down in Bareilly?"],
334
- ["Who won 2024 US election?"]
 
335
  ],
336
  inputs=text_input
337
  )
338
 
339
- with gr.Tab("🔌 API"):
340
  gr.Markdown("""
341
- **Endpoints:**
342
- - STT: `/call/transcribe_stt` → Path: `data[0].text`
343
- - AI: `/call/answer_ai` Path: `data[0]`
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  """)
345
 
 
346
  with gr.Row(visible=False):
347
- stt_in = gr.Textbox()
348
- stt_out = gr.JSON()
349
- ai_in = gr.Textbox()
350
- ai_out = gr.Textbox()
351
 
352
- gr.Button("STT", visible=False).click(fn=transcribe_audio_base64, inputs=[stt_in], outputs=[stt_out], api_name="transcribe_stt")
353
- gr.Button("AI", visible=False).click(fn=generate_answer, inputs=[ai_in], outputs=[ai_out], api_name="answer_ai")
 
 
 
 
 
 
 
 
 
 
 
 
 
354
 
355
  gr.Markdown("""
356
- **Speed:** Qwen 0.5B (1-2s) + Parallel search (1s) = **2-3s total**
357
- 🟢 < 3s | 🟡 3-5s | 🔴 > 5s
 
 
 
 
 
358
  """)
359
 
360
  if __name__ == "__main__":
 
13
  from html.parser import HTMLParser
14
 
15
  # Setup logging
16
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
17
  logger = logging.getLogger(__name__)
18
 
19
  # Initialize models
20
+ logger.info("Loading Whisper-tiny...")
21
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
22
 
23
+ logger.info("Loading SmolLM2-360M-Instruct (FASTEST)...")
24
+ model_name = "HuggingFaceTB/SmolLM2-360M-Instruct"
25
  tokenizer = AutoTokenizer.from_pretrained(model_name)
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_name,
 
32
 
33
  logger.info("All models loaded!")
34
 
35
+ # API keys
36
  TAVILY_API_KEY = os.getenv('TAVILY_API_KEY', '')
37
  BRAVE_API_KEY = os.getenv('BRAVE_API_KEY', '')
38
 
39
  def search_tavily(query):
 
40
  if not TAVILY_API_KEY:
41
  return None
 
42
  try:
43
  response = requests.post(
44
  'https://api.tavily.com/search',
45
+ json={'api_key': TAVILY_API_KEY, 'query': query, 'max_results': 2},
46
+ timeout=1.5
47
  )
 
48
  if response.status_code == 200:
49
  data = response.json()
50
  results = data.get('results', [])
51
+ return "\n".join([f"• {r.get('title', '')}: {r.get('content', '')[:120]}" for r in results[:2]])
 
 
 
 
52
  except:
53
  pass
54
  return None
55
 
56
  def search_brave(query):
 
57
  if not BRAVE_API_KEY:
58
  return None
 
59
  try:
60
  response = requests.get(
61
  'https://api.search.brave.com/res/v1/web/search',
62
+ params={'q': query, 'count': 2},
63
  headers={'X-Subscription-Token': BRAVE_API_KEY},
64
+ timeout=1.5
65
  )
 
66
  if response.status_code == 200:
67
  data = response.json()
68
  results = data.get('web', {}).get('results', [])
69
+ return "\n".join([f"• {r.get('title', '')}: {r.get('description', '')[:120]}" for r in results[:2]])
 
 
 
 
70
  except:
71
  pass
72
  return None
73
 
74
  def search_searx(query):
 
 
75
  for instance in ['https://searx.be/search', 'https://searx.work/search']:
76
  try:
77
  response = requests.get(
78
  instance,
79
+ params={'q': query, 'format': 'json', 'categories': 'general', 'language': 'en'},
80
+ timeout=1.5
81
  )
 
82
  if response.status_code == 200:
83
  data = response.json()
84
  results = data.get('results', [])
85
+ return "\n".join([f"• {r.get('title', '')}: {r.get('content', '')[:120]}" for r in results[:2]])
 
 
 
 
86
  except:
87
  continue
88
  return None
89
 
90
+ def search_duckduckgo(query):
 
91
  try:
92
  response = requests.get(
93
  'https://html.duckduckgo.com/html/',
94
  params={'q': query},
95
  headers={'User-Agent': 'Mozilla/5.0'},
96
+ timeout=1.5
97
  )
 
98
  if response.status_code == 200:
99
  class DDGParser(HTMLParser):
100
  def __init__(self):
 
108
  self.in_result = True
109
 
110
  def handle_data(self, data):
111
+ if self.in_result and data.strip():
112
+ self.current_text += data.strip() + " "
113
 
114
  def handle_endtag(self, tag):
115
  if tag == 'a' and self.in_result:
116
+ if self.current_text:
117
+ self.results.append(self.current_text.strip()[:120])
118
  self.current_text = ""
119
  self.in_result = False
120
 
121
  parser = DDGParser()
122
  parser.feed(response.text)
123
+ return "\n".join([f"• {r}" for r in parser.results[:2]]) if parser.results else None
 
 
 
 
 
 
 
124
  except:
125
  pass
126
  return None
127
 
128
  def search_parallel(query):
129
+ logger.info("[SEARCH] Starting parallel search...")
130
 
131
  with ThreadPoolExecutor(max_workers=4) as executor:
132
  futures = {
133
  executor.submit(search_tavily, query): "Tavily",
134
  executor.submit(search_brave, query): "Brave",
135
  executor.submit(search_searx, query): "Searx",
136
+ executor.submit(search_duckduckgo, query): "DuckDuckGo"
137
  }
138
 
 
139
  for future in futures:
140
  engine = futures[future]
141
  try:
142
+ result = future.result(timeout=2)
143
  if result:
144
+ logger.info(f"[SEARCH] {engine}")
145
+ return result, engine
146
  except:
147
  pass
148
 
149
+ logger.warning("[SEARCH] All engines failed")
 
 
 
 
150
  return "No search results available.", "None"
151
 
152
  def transcribe_audio_base64(audio_base64):
153
+ logger.info("[STT] Processing audio...")
154
  try:
155
  audio_bytes = base64.b64decode(audio_base64)
156
 
 
162
  transcription = " ".join([seg.text for seg in segments])
163
  os.unlink(temp_path)
164
 
165
+ logger.info("[STT] ✓ Transcribed")
166
  return {"text": transcription.strip()}
167
+
168
  except Exception as e:
169
+ logger.error(f"[STT] Error: {str(e)}")
170
  return {"error": str(e)}
171
 
172
  def generate_answer(text_input):
173
+ logger.info(f"[AI] Question: {text_input[:60]}...")
174
  try:
175
  if not text_input or not text_input.strip():
176
  return "No input provided"
177
 
178
  current_date = datetime.now().strftime("%B %d, %Y")
179
 
180
+ # Search
181
  search_start = time.time()
182
  search_results, search_engine = search_parallel(text_input)
183
  search_time = time.time() - search_start
184
+ logger.info(f"[AI] Search completed in {search_time:.2f}s")
185
 
186
+ # Generate answer with SmolLM2-360M
187
  messages = [
188
  {
189
  "role": "system",
190
+ "content": f"You are a helpful assistant. Today is {current_date}. Answer questions using the provided search results. Be concise (60-80 words). Use bullet points for multiple items."
 
 
 
 
 
 
191
  },
192
  {
193
  "role": "user",
194
+ "content": f"Search Results:\n{search_results}\n\nQuestion: {text_input}\n\nAnswer based strictly on search results (60-80 words):"
 
 
 
 
 
195
  }
196
  ]
197
 
198
+ # SmolLM2 uses simple chat template
199
+ prompt = f"<|im_start|>system\n{messages[0]['content']}<|im_end|>\n<|im_start|>user\n{messages[1]['content']}<|im_end|>\n<|im_start|>assistant\n"
200
 
201
  gen_start = time.time()
202
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=800)
203
 
204
+ logger.info("[AI] Generating answer...")
205
  with torch.no_grad():
206
  outputs = model.generate(
207
  **inputs,
208
+ max_new_tokens=80, # 60-80 words
209
+ temperature=0.7,
210
  do_sample=True,
211
  top_p=0.9,
212
+ top_k=40,
213
+ repetition_penalty=1.15,
214
+ pad_token_id=tokenizer.eos_token_id,
215
+ eos_token_id=tokenizer.eos_token_id
216
  )
217
 
218
  gen_time = time.time() - gen_start
219
+ logger.info(f"[AI] Generation completed in {gen_time:.2f}s")
220
 
221
  answer = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip()
 
222
 
223
+ # Add source attribution
224
+ full_answer = f"{answer}\n\n**Source:** {search_engine}"
225
+
226
+ logger.info("[AI] ✓ Complete")
227
+ return full_answer
228
 
229
  except Exception as e:
230
  logger.error(f"[AI] Error: {str(e)}")
 
232
 
233
  def process_audio(audio_path, question_text):
234
  start_time = time.time()
235
+ logger.info("="*50)
236
+ logger.info("[MAIN] New request received")
237
 
238
+ # Transcribe audio if provided
239
  if audio_path:
240
+ logger.info(f"[MAIN] Processing audio: {audio_path}")
241
  try:
242
  segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
243
  question = " ".join([seg.text for seg in segments])
244
+ logger.info(f"[MAIN] Transcribed: {question}")
245
  except Exception as e:
246
+ logger.error(f"[MAIN] Transcription failed: {str(e)}")
247
+ return f"❌ Transcription error: {str(e)}", 0.0
248
  else:
249
  question = question_text
250
+ logger.info(f"[MAIN] Text input: {question}")
251
 
252
  if not question or not question.strip():
253
+ logger.warning("[MAIN] No input provided")
254
+ return "❌ No input provided", 0.0
255
 
256
+ transcription_time = time.time() - start_time
257
+
258
+ # Generate answer (includes search)
259
+ gen_start = time.time()
260
  answer = generate_answer(question)
261
+ gen_time = time.time() - gen_start
262
+
263
  total_time = time.time() - start_time
264
 
265
+ # Time indicator
266
+ time_emoji = "🟢" if total_time < 2.0 else "🟡" if total_time < 3.0 else "🔴"
267
 
268
+ timing = f"\n\n{time_emoji} **Performance:** Trans={transcription_time:.2f}s | Search+Gen={gen_time:.2f}s | **Total={total_time:.2f}s**"
269
+
270
+ logger.info(f"[MAIN] Total time: {total_time:.2f}s")
271
+ logger.info("="*50)
272
 
273
  return answer + timing, total_time
274
 
 
278
  def text_handler(text_input):
279
  return process_audio(None, text_input)
280
 
281
+ # Gradio Interface
282
+ with gr.Blocks(title="Ultra-Fast Q&A - SmolLM2-360M", theme=gr.themes.Soft()) as demo:
283
  gr.Markdown("""
284
+ # ⚡ Ultra-Fast Political Q&A System
285
+ **SmolLM2-360M** (250-400 tok/s) + **Parallel Search** (Optimized for <2s)
286
+
287
+ **Features:**
288
+ - Whisper-tiny for speech-to-text
289
+ - SmolLM2-360M-Instruct (20x faster than Qwen 0.5B)
290
+ - Multi-engine parallel search (Tavily → Brave → Searx → DDG)
291
+ - Search-grounded answers only
292
  """)
293
 
294
+ with gr.Tab("🎙️ Audio Input"):
295
  with gr.Row():
296
  with gr.Column():
297
+ audio_input = gr.Audio(
298
+ sources=["microphone", "upload"],
299
+ type="filepath",
300
+ label="Record or Upload Audio"
301
+ )
302
+ audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
303
+
304
  with gr.Column():
305
+ audio_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
306
+ audio_time = gr.Number(label="Response Time (seconds)", precision=2)
307
 
308
+ audio_submit.click(
309
+ fn=audio_handler,
310
+ inputs=[audio_input],
311
+ outputs=[audio_output, audio_time],
312
+ api_name="audio_query"
313
+ )
314
 
315
+ with gr.Tab("✍️ Text Input"):
316
  with gr.Row():
317
  with gr.Column():
318
+ text_input = gr.Textbox(
319
+ label="Ask Your Question",
320
+ placeholder="Is internet shut down in Bareilly today?",
321
+ lines=3
322
+ )
323
+ text_submit = gr.Button("🚀 Submit Question", variant="primary", size="lg")
324
+
325
  with gr.Column():
326
+ text_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
327
+ text_time = gr.Number(label="Response Time (seconds)", precision=2)
328
 
329
+ text_submit.click(
330
+ fn=text_handler,
331
+ inputs=[text_input],
332
+ outputs=[text_output, text_time],
333
+ api_name="text_query"
334
+ )
335
 
336
  gr.Examples(
337
  examples=[
338
+ ["Is internet shut down in Bareilly today?"],
339
+ ["Who won the 2024 US presidential election?"],
340
+ ["What is current India inflation rate?"],
341
+ ["What are the top 3 news stories today?"]
342
  ],
343
  inputs=text_input
344
  )
345
 
346
+ with gr.Tab("🔌 Pluely API"):
347
  gr.Markdown("""
348
+ ## API Endpoints for Pluely Integration
349
+
350
+ ### STT Endpoint (Audio Transcription)
351
+ ```
352
+ curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\
353
+ -H "Content-Type: application/json" \\
354
+ -d '{"data": ["BASE64_AUDIO_DATA"]}'
355
+ ```
356
+ **Response Format:** `{"data": [{"text": "transcribed text"}]}`
357
+
358
+ ### AI Endpoint (Text to Answer)
359
+ ```
360
+ curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai \\
361
+ -H "Content-Type: application/json" \\
362
+ -d '{"data": ["Your question here"]}'
363
+ ```
364
+ **Response Format:** `{"data": ["Answer with source attribution"]}`
365
+
366
+ ---
367
+
368
+ ## Pluely Configuration
369
+
370
+ ### Custom STT Provider:
371
+ **Curl Command:**
372
+ ```
373
+ curl https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d '{"data": ["{{AUDIO_BASE64}}"]}'
374
+ ```
375
+ **Response Content Path:** `data[0].text`
376
+ **Streaming:** OFF
377
+
378
+ ### Custom AI Provider:
379
+ **Curl Command:**
380
+ ```
381
+ curl https://archcoder-basic-app.hf.space/call/answer_ai -H "Content-Type: application/json" -d '{"data": ["{{TEXT}}"]}'
382
+ ```
383
+ **Response Content Path:** `data[0]`
384
+ **Streaming:** OFF
385
  """)
386
 
387
+ # Hidden API endpoint components
388
  with gr.Row(visible=False):
389
+ stt_input = gr.Textbox()
390
+ stt_output = gr.JSON()
391
+ ai_input = gr.Textbox()
392
+ ai_output = gr.Textbox()
393
 
394
+ stt_btn = gr.Button("STT", visible=False)
395
+ stt_btn.click(
396
+ fn=transcribe_audio_base64,
397
+ inputs=[stt_input],
398
+ outputs=[stt_output],
399
+ api_name="transcribe_stt"
400
+ )
401
+
402
+ ai_btn = gr.Button("AI", visible=False)
403
+ ai_btn.click(
404
+ fn=generate_answer,
405
+ inputs=[ai_input],
406
+ outputs=[ai_output],
407
+ api_name="answer_ai"
408
+ )
409
 
410
  gr.Markdown("""
411
+ ---
412
+ **Model:** SmolLM2-360M-Instruct (250-400 tokens/second on CPU)
413
+ **Search:** Parallel multi-engine (Tavily → Brave → Searx → DDG)
414
+ **Expected Speed:** 1.5-2.5 seconds total
415
+ **All requests logged** - Check Logs tab in HF Space
416
+
417
+ 🟢 < 2s | 🟡 2-3s | 🔴 > 3s
418
  """)
419
 
420
  if __name__ == "__main__":