ArchCoder commited on
Commit
c51f8c4
·
verified ·
1 Parent(s): 107d19b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +282 -85
app.py CHANGED
@@ -1,13 +1,17 @@
1
  import gradio as gr
2
  from faster_whisper import WhisperModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
4
  import requests
5
- import time
6
  import base64
7
  import tempfile
8
  import os
9
  import logging
 
 
10
  from datetime import datetime
 
 
11
 
12
  # Setup logging
13
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -17,8 +21,8 @@ logger = logging.getLogger(__name__)
17
  logger.info("Loading Whisper model...")
18
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
19
 
20
- logger.info("Loading Qwen 0.5B (fastest model)...")
21
- model_name = "Qwen/Qwen2.5-0.5B-Instruct"
22
  tokenizer = AutoTokenizer.from_pretrained(model_name)
23
  model = AutoModelForCausalLM.from_pretrained(
24
  model_name,
@@ -29,53 +33,188 @@ model = AutoModelForCausalLM.from_pretrained(
29
 
30
  logger.info("All models loaded!")
31
 
32
- def search_web_google(query, max_results=3):
33
- """Use Google Custom Search API (free tier: 100 queries/day)"""
34
- logger.info(f"[SEARCH] Query: {query}")
 
 
 
 
 
 
 
35
 
36
- # Free Google Custom Search - No API key needed for basic search
37
  try:
38
- # Alternative: SerpAPI free tier or direct Google scraping
39
- url = "https://www.googleapis.com/customsearch/v1"
40
- params = {
41
- 'q': query,
42
- 'num': max_results,
43
- 'key': os.getenv('GOOGLE_API_KEY', ''), # Optional
44
- 'cx': os.getenv('GOOGLE_CX', '') # Optional
45
- }
46
-
47
- # Fallback to Searx (public instance - no API key)
48
- searx_url = "https://searx.be/search"
49
- searx_params = {
50
- 'q': query,
51
- 'format': 'json',
52
- 'categories': 'general',
53
- 'language': 'en'
54
- }
55
-
56
- response = requests.get(searx_url, params=searx_params, timeout=5)
57
 
58
  if response.status_code == 200:
59
  data = response.json()
60
  results = data.get('results', [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  context = ""
63
- for i, result in enumerate(results[:max_results], 1):
64
- title = result.get('title', '')
65
- content = result.get('content', '')
66
- context += f"\n[Source {i}] {title}\n{content}\n"
67
- logger.info(f"[SEARCH] Result {i}: {title[:50]}...")
68
 
69
  if context:
70
- logger.info(f"[SEARCH] Success - {len(results)} results")
71
- return context.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- logger.warning("[SEARCH] No results from Searx")
74
- return "Unable to fetch current information. Please try a different question."
 
75
 
76
- except Exception as e:
77
- logger.error(f"[SEARCH] Error: {str(e)}")
78
- return f"Search unavailable: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  def transcribe_audio_base64(audio_base64):
81
  """Transcribe audio from base64"""
@@ -100,7 +239,7 @@ def transcribe_audio_base64(audio_base64):
100
  return {"error": str(e)}
101
 
102
  def generate_answer(text_input):
103
- """Generate fast answer using search results"""
104
  logger.info(f"[PLUELY AI] Question: {text_input}")
105
  try:
106
  if not text_input or not text_input.strip():
@@ -108,36 +247,62 @@ def generate_answer(text_input):
108
 
109
  current_date = datetime.now().strftime("%B %d, %Y")
110
 
111
- # Search
112
- logger.info("[PLUELY AI] Searching...")
113
- search_results = search_web_google(text_input, max_results=3)
114
- logger.info(f"[PLUELY AI] Search done ({len(search_results)} chars)")
115
 
116
- # Simple prompt for speed
117
- prompt = f"""Today is {current_date}. Answer based on these search results:
118
-
 
 
 
 
 
 
119
  {search_results}
120
 
121
  Question: {text_input}
122
- Answer (80-100 words):"""
123
 
124
- logger.info("[PLUELY AI] Generating...")
125
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  with torch.no_grad():
128
  outputs = model.generate(
129
  **inputs,
130
- max_new_tokens=120,
131
- temperature=0.3,
132
  do_sample=True,
133
  top_p=0.9,
 
134
  pad_token_id=tokenizer.eos_token_id
135
  )
136
 
137
  answer = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip()
138
 
139
- logger.info(f"[PLUELY AI] Done ({len(answer)} chars)")
140
- return answer
 
 
 
141
 
142
  except Exception as e:
143
  logger.error(f"[PLUELY AI] Error: {str(e)}")
@@ -156,7 +321,7 @@ def process_audio(audio_path, question_text):
156
  question = " ".join([seg.text for seg in segments])
157
  logger.info(f"[MAIN] Transcribed: {question}")
158
  except Exception as e:
159
- logger.error(f"[MAIN] Transcription failed: {str(e)}")
160
  return f"❌ Error: {str(e)}", 0.0
161
  else:
162
  question = question_text
@@ -167,23 +332,18 @@ def process_audio(audio_path, question_text):
167
 
168
  transcription_time = time.time() - start_time
169
 
170
- # Search
171
- search_start = time.time()
172
- search_web_google(question, max_results=3)
173
- search_time = time.time() - search_start
174
-
175
- # Generate
176
- llm_start = time.time()
177
  answer = generate_answer(question)
178
- llm_time = time.time() - llm_start
179
 
180
  total_time = time.time() - start_time
181
- time_emoji = "🟢" if total_time < 3.0 else "🟡" if total_time < 5.0 else "🔴"
182
 
183
  logger.info(f"[MAIN] Total: {total_time:.2f}s")
184
  logger.info("="*50)
185
 
186
- timing = f"\n\n{time_emoji} **Time:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**"
187
 
188
  return answer + timing, total_time
189
 
@@ -194,53 +354,83 @@ def text_handler(text_input):
194
  return process_audio(None, text_input)
195
 
196
  # Gradio UI
197
- with gr.Blocks(title="Fast Q&A", theme=gr.themes.Soft()) as demo:
198
  gr.Markdown("""
199
- # ⚡ Ultra-Fast Political Q&A
200
- **Search-grounded answers** - Qwen 0.5B + Searx
 
 
 
 
 
 
201
  """)
202
 
203
  with gr.Tab("🎙️ Audio"):
204
  with gr.Row():
205
  with gr.Column():
206
- audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio")
207
- audio_submit = gr.Button("🚀 Submit", variant="primary", size="lg")
208
  with gr.Column():
209
- audio_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True)
210
- audio_time = gr.Number(label="Time (s)", precision=2)
211
 
212
  audio_submit.click(fn=audio_handler, inputs=[audio_input], outputs=[audio_output, audio_time], api_name="audio_query")
213
 
214
  with gr.Tab("✍️ Text"):
215
  with gr.Row():
216
  with gr.Column():
217
- text_input = gr.Textbox(label="Question", placeholder="Ask anything...", lines=3)
218
- text_submit = gr.Button("🚀 Submit", variant="primary", size="lg")
219
  with gr.Column():
220
- text_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True)
221
- text_time = gr.Number(label="Time (s)", precision=2)
222
 
223
  text_submit.click(fn=text_handler, inputs=[text_input], outputs=[text_output, text_time], api_name="text_query")
224
 
225
  gr.Examples(
226
  examples=[
227
  ["Is internet shut down in Bareilly today?"],
228
- ["Who won 2024 US election?"],
229
- ["Current India inflation rate?"]
 
230
  ],
231
  inputs=text_input
232
  )
233
 
234
- with gr.Tab("🔌 API"):
235
  gr.Markdown("""
236
- ### Pluely Endpoints
 
 
 
 
 
 
 
 
237
 
238
- **STT:** `https://archcoder-basic-app.hf.space/call/transcribe_stt`
239
- **AI:** `https://archcoder-basic-app.hf.space/call/answer_ai`
 
 
 
 
 
240
 
241
- **Response Paths:**
242
- STT: `data[0].text`
243
- AI: `data[0]`
 
 
 
 
 
 
 
 
 
 
244
  """)
245
 
246
  with gr.Row(visible=False):
@@ -252,7 +442,14 @@ with gr.Blocks(title="Fast Q&A", theme=gr.themes.Soft()) as demo:
252
  gr.Button("STT", visible=False).click(fn=transcribe_audio_base64, inputs=[stt_in], outputs=[stt_out], api_name="transcribe_stt")
253
  gr.Button("AI", visible=False).click(fn=generate_answer, inputs=[ai_in], outputs=[ai_out], api_name="answer_ai")
254
 
255
- gr.Markdown("🟢 < 3s | 🟡 3-5s | 🔴 > 5s")
 
 
 
 
 
 
 
256
 
257
  if __name__ == "__main__":
258
  demo.queue(max_size=5)
 
1
  import gradio as gr
2
  from faster_whisper import WhisperModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import torch
5
  import requests
 
6
  import base64
7
  import tempfile
8
  import os
9
  import logging
10
+ import asyncio
11
+ import aiohttp
12
  from datetime import datetime
13
+ from concurrent.futures import ThreadPoolExecutor
14
+ from functools import partial
15
 
16
  # Setup logging
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
21
  logger.info("Loading Whisper model...")
22
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
23
 
24
+ logger.info("Loading Qwen 2.5 1.5B-Instruct (fastest quality model)...")
25
+ model_name = "Qwen/Qwen2.5-1.5B-Instruct"
26
  tokenizer = AutoTokenizer.from_pretrained(model_name)
27
  model = AutoModelForCausalLM.from_pretrained(
28
  model_name,
 
33
 
34
  logger.info("All models loaded!")
35
 
36
+ # Search APIs configuration (priority order)
37
+ TAVILY_API_KEY = os.getenv('TAVILY_API_KEY', '') # Get from environment
38
+ BRAVE_API_KEY = os.getenv('BRAVE_API_KEY', '')
39
+
40
+ def search_tavily(query):
41
+ """Priority 1: Tavily AI search (best for AI agents)"""
42
+ logger.info("[TAVILY] Starting search...")
43
+ if not TAVILY_API_KEY:
44
+ logger.warning("[TAVILY] No API key, skipping")
45
+ return None
46
 
 
47
  try:
48
+ response = requests.post(
49
+ 'https://api.tavily.com/search',
50
+ json={
51
+ 'api_key': TAVILY_API_KEY,
52
+ 'query': query,
53
+ 'max_results': 3,
54
+ 'include_answer': True
55
+ },
56
+ timeout=3
57
+ )
 
 
 
 
 
 
 
 
 
58
 
59
  if response.status_code == 200:
60
  data = response.json()
61
  results = data.get('results', [])
62
+ context = ""
63
+ for i, result in enumerate(results[:3], 1):
64
+ context += f"\n[Tavily {i}] {result.get('title', '')}\n{result.get('content', '')}\n"
65
+ logger.info(f"[TAVILY] Success - {len(results)} results")
66
+ return context
67
+ except Exception as e:
68
+ logger.error(f"[TAVILY] Error: {str(e)}")
69
+ return None
70
+
71
+ def search_brave(query):
72
+ """Priority 2: Brave Search API"""
73
+ logger.info("[BRAVE] Starting search...")
74
+ if not BRAVE_API_KEY:
75
+ logger.warning("[BRAVE] No API key, skipping")
76
+ return None
77
+
78
+ try:
79
+ response = requests.get(
80
+ 'https://api.search.brave.com/res/v1/web/search',
81
+ params={'q': query, 'count': 3},
82
+ headers={'X-Subscription-Token': BRAVE_API_KEY},
83
+ timeout=3
84
+ )
85
+
86
+ if response.status_code == 200:
87
+ data = response.json()
88
+ results = data.get('web', {}).get('results', [])
89
+ context = ""
90
+ for i, result in enumerate(results[:3], 1):
91
+ context += f"\n[Brave {i}] {result.get('title', '')}\n{result.get('description', '')}\n"
92
+ logger.info(f"[BRAVE] Success - {len(results)} results")
93
+ return context
94
+ except Exception as e:
95
+ logger.error(f"[BRAVE] Error: {str(e)}")
96
+ return None
97
+
98
+ def search_searx(query):
99
+ """Priority 3: Searx (free, unlimited)"""
100
+ logger.info("[SEARX] Starting search...")
101
+
102
+ # Try multiple public Searx instances
103
+ searx_instances = [
104
+ 'https://searx.be/search',
105
+ 'https://searx.work/search',
106
+ 'https://search.sapti.me/search'
107
+ ]
108
+
109
+ for instance in searx_instances:
110
+ try:
111
+ response = requests.get(
112
+ instance,
113
+ params={'q': query, 'format': 'json', 'categories': 'general', 'language': 'en'},
114
+ timeout=3
115
+ )
116
+
117
+ if response.status_code == 200:
118
+ data = response.json()
119
+ results = data.get('results', [])
120
+ context = ""
121
+ for i, result in enumerate(results[:3], 1):
122
+ context += f"\n[Searx {i}] {result.get('title', '')}\n{result.get('content', '')}\n"
123
+ logger.info(f"[SEARX] Success - {len(results)} results from {instance}")
124
+ return context
125
+ except Exception as e:
126
+ logger.warning(f"[SEARX] Failed {instance}: {str(e)}")
127
+ continue
128
+
129
+ logger.error("[SEARX] All instances failed")
130
+ return None
131
+
132
+ def search_duckduckgo_html(query):
133
+ """Priority 4: DuckDuckGo HTML scraping (fallback)"""
134
+ logger.info("[DDG] Starting search...")
135
+ try:
136
+ response = requests.get(
137
+ 'https://html.duckduckgo.com/html/',
138
+ params={'q': query},
139
+ headers={'User-Agent': 'Mozilla/5.0'},
140
+ timeout=3
141
+ )
142
+
143
+ if response.status_code == 200:
144
+ # Simple HTML parsing (basic extraction)
145
+ from html.parser import HTMLParser
146
+
147
+ class DDGParser(HTMLParser):
148
+ def __init__(self):
149
+ super().__init__()
150
+ self.results = []
151
+ self.in_result = False
152
+ self.current_text = ""
153
+
154
+ def handle_starttag(self, tag, attrs):
155
+ if tag == 'a' and any(k == 'class' and 'result__a' in v for k, v in attrs):
156
+ self.in_result = True
157
+
158
+ def handle_data(self, data):
159
+ if self.in_result:
160
+ self.current_text += data.strip()
161
+
162
+ def handle_endtag(self, tag):
163
+ if tag == 'a' and self.in_result:
164
+ self.results.append(self.current_text)
165
+ self.current_text = ""
166
+ self.in_result = False
167
+
168
+ parser = DDGParser()
169
+ parser.feed(response.text)
170
 
171
  context = ""
172
+ for i, result in enumerate(parser.results[:3], 1):
173
+ context += f"\n[DDG {i}] {result}\n"
 
 
 
174
 
175
  if context:
176
+ logger.info(f"[DDG] Success - {len(parser.results)} results")
177
+ return context
178
+ except Exception as e:
179
+ logger.error(f"[DDG] Error: {str(e)}")
180
+ return None
181
+
182
+ def search_parallel(query):
183
+ """Execute all searches in parallel, return first successful result"""
184
+ logger.info("[PARALLEL SEARCH] Starting all search engines...")
185
+
186
+ with ThreadPoolExecutor(max_workers=4) as executor:
187
+ # Submit all searches simultaneously
188
+ futures = {
189
+ executor.submit(search_tavily, query): "Tavily",
190
+ executor.submit(search_brave, query): "Brave",
191
+ executor.submit(search_searx, query): "Searx",
192
+ executor.submit(search_duckduckgo_html, query): "DuckDuckGo"
193
+ }
194
 
195
+ # Priority order: Tavily > Brave > Searx > DDG
196
+ priority_order = ["Tavily", "Brave", "Searx", "DuckDuckGo"]
197
+ results = {}
198
 
199
+ # Collect all results
200
+ for future in futures:
201
+ engine = futures[future]
202
+ try:
203
+ result = future.result(timeout=4)
204
+ if result:
205
+ results[engine] = result
206
+ logger.info(f"[PARALLEL SEARCH] {engine} completed successfully")
207
+ except Exception as e:
208
+ logger.error(f"[PARALLEL SEARCH] {engine} failed: {str(e)}")
209
+
210
+ # Return results by priority
211
+ for engine in priority_order:
212
+ if engine in results and results[engine]:
213
+ logger.info(f"[PARALLEL SEARCH] Using {engine} results (highest priority available)")
214
+ return results[engine], engine
215
+
216
+ logger.error("[PARALLEL SEARCH] All search engines failed")
217
+ return "Unable to fetch search results. All search engines are unavailable.", "None"
218
 
219
  def transcribe_audio_base64(audio_base64):
220
  """Transcribe audio from base64"""
 
239
  return {"error": str(e)}
240
 
241
  def generate_answer(text_input):
242
+ """Generate answer using Qwen 2.5 1.5B"""
243
  logger.info(f"[PLUELY AI] Question: {text_input}")
244
  try:
245
  if not text_input or not text_input.strip():
 
247
 
248
  current_date = datetime.now().strftime("%B %d, %Y")
249
 
250
+ # Parallel search
251
+ logger.info("[PLUELY AI] Starting parallel search...")
252
+ search_results, search_engine = search_parallel(text_input)
253
+ logger.info(f"[PLUELY AI] Using {search_engine} results ({len(search_results)} chars)")
254
 
255
+ # Enhanced prompt for Qwen 2.5
256
+ messages = [
257
+ {
258
+ "role": "system",
259
+ "content": f"You are a factual assistant. Today is {current_date}. Answer questions using ONLY the provided search results. Be concise (100-120 words)."
260
+ },
261
+ {
262
+ "role": "user",
263
+ "content": f"""Search Results:
264
  {search_results}
265
 
266
  Question: {text_input}
 
267
 
268
+ Instructions:
269
+ 1. Answer based STRICTLY on the search results above
270
+ 2. Include relevant dates and facts from search results
271
+ 3. If search results are insufficient, say so
272
+ 4. Keep answer to 100-120 words
273
+
274
+ Answer:"""
275
+ }
276
+ ]
277
+
278
+ # Apply chat template
279
+ text = tokenizer.apply_chat_template(
280
+ messages,
281
+ tokenize=False,
282
+ add_generation_prompt=True
283
+ )
284
+
285
+ logger.info("[PLUELY AI] Generating answer...")
286
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1500)
287
 
288
  with torch.no_grad():
289
  outputs = model.generate(
290
  **inputs,
291
+ max_new_tokens=150,
292
+ temperature=0.4,
293
  do_sample=True,
294
  top_p=0.9,
295
+ repetition_penalty=1.1,
296
  pad_token_id=tokenizer.eos_token_id
297
  )
298
 
299
  answer = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip()
300
 
301
+ # Add source attribution
302
+ answer_with_source = f"{answer}\n\n**Source:** {search_engine}"
303
+
304
+ logger.info(f"[PLUELY AI] Answer generated ({len(answer)} chars)")
305
+ return answer_with_source
306
 
307
  except Exception as e:
308
  logger.error(f"[PLUELY AI] Error: {str(e)}")
 
321
  question = " ".join([seg.text for seg in segments])
322
  logger.info(f"[MAIN] Transcribed: {question}")
323
  except Exception as e:
324
+ logger.error(f"[MAIN] Error: {str(e)}")
325
  return f"❌ Error: {str(e)}", 0.0
326
  else:
327
  question = question_text
 
332
 
333
  transcription_time = time.time() - start_time
334
 
335
+ # Generate (includes parallel search)
336
+ gen_start = time.time()
 
 
 
 
 
337
  answer = generate_answer(question)
338
+ gen_time = time.time() - gen_start
339
 
340
  total_time = time.time() - start_time
341
+ time_emoji = "🟢" if total_time < 4.0 else "🟡" if total_time < 6.0 else "🔴"
342
 
343
  logger.info(f"[MAIN] Total: {total_time:.2f}s")
344
  logger.info("="*50)
345
 
346
+ timing = f"\n\n{time_emoji} **Performance:** Trans={transcription_time:.2f}s | Search+Gen={gen_time:.2f}s | **Total={total_time:.2f}s**"
347
 
348
  return answer + timing, total_time
349
 
 
354
  return process_audio(None, text_input)
355
 
356
  # Gradio UI
357
+ with gr.Blocks(title="Fast Q&A - Qwen 1.5B + Multi-Search", theme=gr.themes.Soft()) as demo:
358
  gr.Markdown("""
359
+ # ⚡ Ultra-Fast Political Q&A System
360
+ **Parallel multi-search** (Tavily → Brave → Searx → DDG) + **Qwen 2.5 1.5B**
361
+
362
+ **Features:**
363
+ - Whisper-tiny transcription
364
+ - 4 search engines running in parallel (uses fastest available)
365
+ - Qwen 2.5 1.5B-Instruct (2-3s CPU inference)
366
+ - Search-grounded answers only
367
  """)
368
 
369
  with gr.Tab("🎙️ Audio"):
370
  with gr.Row():
371
  with gr.Column():
372
+ audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record/Upload Audio")
373
+ audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
374
  with gr.Column():
375
+ audio_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
376
+ audio_time = gr.Number(label="Time (seconds)", precision=2)
377
 
378
  audio_submit.click(fn=audio_handler, inputs=[audio_input], outputs=[audio_output, audio_time], api_name="audio_query")
379
 
380
  with gr.Tab("✍️ Text"):
381
  with gr.Row():
382
  with gr.Column():
383
+ text_input = gr.Textbox(label="Ask anything...", placeholder="Is internet shut down in Bareilly today?", lines=3)
384
+ text_submit = gr.Button("🚀 Submit Question", variant="primary", size="lg")
385
  with gr.Column():
386
+ text_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
387
+ text_time = gr.Number(label="Time (seconds)", precision=2)
388
 
389
  text_submit.click(fn=text_handler, inputs=[text_input], outputs=[text_output, text_time], api_name="text_query")
390
 
391
  gr.Examples(
392
  examples=[
393
  ["Is internet shut down in Bareilly today?"],
394
+ ["Who won the 2024 US presidential election?"],
395
+ ["What is current India inflation rate?"],
396
+ ["Latest Israel Palestine conflict news?"]
397
  ],
398
  inputs=text_input
399
  )
400
 
401
+ with gr.Tab("🔌 Pluely API"):
402
  gr.Markdown("""
403
+ ### API Endpoints
404
+
405
+ **STT (Audio → Text):**
406
+ ```
407
+ curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\
408
+ -H "Content-Type: application/json" \\
409
+ -d '{"data": ["BASE64_AUDIO"]}'
410
+ ```
411
+ **Response Path:** `data[0].text`
412
 
413
+ **AI (Text → Answer):**
414
+ ```
415
+ curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai \\
416
+ -H "Content-Type: application/json" \\
417
+ -d '{"data": ["Your question"]}'
418
+ ```
419
+ **Response Path:** `data[0]`
420
 
421
+ ---
422
+
423
+ ### Pluely Configuration
424
+
425
+ **Custom STT Provider:**
426
+ ```
427
+ curl https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d '{"data": ["{{AUDIO_BASE64}}"]}'
428
+ ```
429
+
430
+ **Custom AI Provider:**
431
+ ```
432
+ curl https://archcoder-basic-app.hf.space/call/answer_ai -H "Content-Type: application/json" -d '{"data": ["{{TEXT}}"]}'
433
+ ```
434
  """)
435
 
436
  with gr.Row(visible=False):
 
442
  gr.Button("STT", visible=False).click(fn=transcribe_audio_base64, inputs=[stt_in], outputs=[stt_out], api_name="transcribe_stt")
443
  gr.Button("AI", visible=False).click(fn=generate_answer, inputs=[ai_in], outputs=[ai_out], api_name="answer_ai")
444
 
445
+ gr.Markdown("""
446
+ ---
447
+ **Model:** Qwen 2.5 1.5B-Instruct (fastest quality model for CPU)
448
+ **Search Strategy:** Parallel execution (Tavily → Brave → Searx → DDG by priority)
449
+ **All requests logged** - Check Logs tab
450
+
451
+ 🟢 < 4s | 🟡 4-6s | 🔴 > 6s
452
+ """)
453
 
454
  if __name__ == "__main__":
455
  demo.queue(max_size=5)