ArchCoder commited on
Commit
4b20d59
·
verified ·
1 Parent(s): 7b37201

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -89
app.py CHANGED
@@ -7,7 +7,7 @@ import base64
7
  import tempfile
8
  import os
9
  import logging
10
- import time # ADDED - was missing!
11
  from datetime import datetime
12
  from concurrent.futures import ThreadPoolExecutor
13
  from html.parser import HTMLParser
@@ -20,8 +20,8 @@ logger = logging.getLogger(__name__)
20
  logger.info("Loading Whisper model...")
21
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
22
 
23
- logger.info("Loading Qwen 2.5 1.5B-Instruct...")
24
- model_name = "Qwen/Qwen2.5-1.5B-Instruct"
25
  tokenizer = AutoTokenizer.from_pretrained(model_name)
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_name,
@@ -30,24 +30,21 @@ model = AutoModelForCausalLM.from_pretrained(
30
  low_cpu_mem_usage=True
31
  )
32
 
33
- logger.info("All models loaded successfully!")
34
 
35
- # Search APIs configuration
36
  TAVILY_API_KEY = os.getenv('TAVILY_API_KEY', '')
37
  BRAVE_API_KEY = os.getenv('BRAVE_API_KEY', '')
38
 
39
  def search_tavily(query):
40
- """Priority 1: Tavily AI search"""
41
  logger.info("[TAVILY] Starting...")
42
  if not TAVILY_API_KEY:
43
- logger.warning("[TAVILY] No API key")
44
  return None
45
 
46
  try:
47
  response = requests.post(
48
  'https://api.tavily.com/search',
49
  json={'api_key': TAVILY_API_KEY, 'query': query, 'max_results': 3},
50
- timeout=3
51
  )
52
 
53
  if response.status_code == 200:
@@ -55,18 +52,16 @@ def search_tavily(query):
55
  results = data.get('results', [])
56
  context = ""
57
  for i, result in enumerate(results[:3], 1):
58
- context += f"\n[Tavily {i}] {result.get('title', '')}\n{result.get('content', '')}\n"
59
- logger.info(f"[TAVILY] Success - {len(results)} results")
60
  return context
61
- except Exception as e:
62
- logger.error(f"[TAVILY] Error: {str(e)}")
63
  return None
64
 
65
  def search_brave(query):
66
- """Priority 2: Brave Search"""
67
  logger.info("[BRAVE] Starting...")
68
  if not BRAVE_API_KEY:
69
- logger.warning("[BRAVE] No API key")
70
  return None
71
 
72
  try:
@@ -74,7 +69,7 @@ def search_brave(query):
74
  'https://api.search.brave.com/res/v1/web/search',
75
  params={'q': query, 'count': 3},
76
  headers={'X-Subscription-Token': BRAVE_API_KEY},
77
- timeout=3
78
  )
79
 
80
  if response.status_code == 200:
@@ -82,29 +77,22 @@ def search_brave(query):
82
  results = data.get('web', {}).get('results', [])
83
  context = ""
84
  for i, result in enumerate(results[:3], 1):
85
- context += f"\n[Brave {i}] {result.get('title', '')}\n{result.get('description', '')}\n"
86
- logger.info(f"[BRAVE] Success - {len(results)} results")
87
  return context
88
- except Exception as e:
89
- logger.error(f"[BRAVE] Error: {str(e)}")
90
  return None
91
 
92
  def search_searx(query):
93
- """Priority 3: Searx"""
94
  logger.info("[SEARX] Starting...")
95
 
96
- searx_instances = [
97
- 'https://searx.be/search',
98
- 'https://searx.work/search',
99
- 'https://search.sapti.me/search'
100
- ]
101
-
102
- for instance in searx_instances:
103
  try:
104
  response = requests.get(
105
  instance,
106
  params={'q': query, 'format': 'json', 'categories': 'general'},
107
- timeout=3
108
  )
109
 
110
  if response.status_code == 200:
@@ -112,23 +100,21 @@ def search_searx(query):
112
  results = data.get('results', [])
113
  context = ""
114
  for i, result in enumerate(results[:3], 1):
115
- context += f"\n[Searx {i}] {result.get('title', '')}\n{result.get('content', '')}\n"
116
- logger.info(f"[SEARX] Success from {instance}")
117
  return context
118
- except Exception as e:
119
- logger.warning(f"[SEARX] Failed {instance}: {str(e)}")
120
-
121
  return None
122
 
123
  def search_duckduckgo_html(query):
124
- """Priority 4: DuckDuckGo HTML"""
125
  logger.info("[DDG] Starting...")
126
  try:
127
  response = requests.get(
128
  'https://html.duckduckgo.com/html/',
129
  params={'q': query},
130
  headers={'User-Agent': 'Mozilla/5.0'},
131
- timeout=3
132
  )
133
 
134
  if response.status_code == 200:
@@ -158,51 +144,45 @@ def search_duckduckgo_html(query):
158
 
159
  context = ""
160
  for i, result in enumerate(parser.results[:3], 1):
161
- context += f"\n[DDG {i}] {result}\n"
162
 
163
  if context:
164
- logger.info(f"[DDG] Success")
165
  return context
166
- except Exception as e:
167
- logger.error(f"[DDG] Error: {str(e)}")
168
  return None
169
 
170
  def search_parallel(query):
171
- """Execute all searches in parallel"""
172
- logger.info("[PARALLEL] Starting all engines...")
173
 
174
  with ThreadPoolExecutor(max_workers=4) as executor:
175
  futures = {
176
  executor.submit(search_tavily, query): "Tavily",
177
  executor.submit(search_brave, query): "Brave",
178
  executor.submit(search_searx, query): "Searx",
179
- executor.submit(search_duckduckgo_html, query): "DuckDuckGo"
180
  }
181
 
182
- priority_order = ["Tavily", "Brave", "Searx", "DuckDuckGo"]
183
  results = {}
184
-
185
  for future in futures:
186
  engine = futures[future]
187
  try:
188
- result = future.result(timeout=4)
189
  if result:
190
  results[engine] = result
191
- logger.info(f"[PARALLEL] {engine} completed")
192
- except Exception as e:
193
- logger.error(f"[PARALLEL] {engine} failed: {str(e)}")
194
 
195
- for engine in priority_order:
196
- if engine in results and results[engine]:
197
- logger.info(f"[PARALLEL] Using {engine}")
198
  return results[engine], engine
199
 
200
- logger.error("[PARALLEL] All failed")
201
- return "Unable to fetch search results.", "None"
202
 
203
  def transcribe_audio_base64(audio_base64):
204
- """Transcribe audio"""
205
- logger.info("[PLUELY STT] Request")
206
  try:
207
  audio_bytes = base64.b64decode(audio_base64)
208
 
@@ -214,61 +194,80 @@ def transcribe_audio_base64(audio_base64):
214
  transcription = " ".join([seg.text for seg in segments])
215
  os.unlink(temp_path)
216
 
217
- logger.info(f"[PLUELY STT] Success")
218
  return {"text": transcription.strip()}
219
-
220
  except Exception as e:
221
- logger.error(f"[PLUELY STT] Error: {str(e)}")
222
  return {"error": str(e)}
223
 
224
  def generate_answer(text_input):
225
- """Generate answer"""
226
- logger.info(f"[PLUELY AI] Question: {text_input}")
227
  try:
228
  if not text_input or not text_input.strip():
229
  return "No input provided"
230
 
231
  current_date = datetime.now().strftime("%B %d, %Y")
232
 
233
- logger.info("[PLUELY AI] Searching...")
234
  search_results, search_engine = search_parallel(text_input)
235
- logger.info(f"[PLUELY AI] Using {search_engine}")
 
236
 
 
237
  messages = [
238
- {"role": "system", "content": f"Today is {current_date}. Answer using ONLY the search results. Be concise (100-120 words)."},
239
- {"role": "user", "content": f"Search Results:\n{search_results}\n\nQuestion: {text_input}\n\nAnswer based strictly on search results:"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  ]
241
 
242
  text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
243
 
244
- logger.info("[PLUELY AI] Generating...")
245
- inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1500)
246
 
247
  with torch.no_grad():
248
  outputs = model.generate(
249
  **inputs,
250
- max_new_tokens=150,
251
- temperature=0.4,
252
  do_sample=True,
253
  top_p=0.9,
 
254
  repetition_penalty=1.1,
255
  pad_token_id=tokenizer.eos_token_id
256
  )
257
 
 
 
 
258
  answer = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip()
259
  answer_with_source = f"{answer}\n\n**Source:** {search_engine}"
260
 
261
- logger.info(f"[PLUELY AI] Done")
262
  return answer_with_source
263
 
264
  except Exception as e:
265
- logger.error(f"[PLUELY AI] Error: {str(e)}")
266
  return f"Error: {str(e)}"
267
 
268
  def process_audio(audio_path, question_text):
269
- """Main pipeline"""
270
  start_time = time.time()
271
- logger.info("="*50)
272
 
273
  if audio_path:
274
  try:
@@ -285,11 +284,11 @@ def process_audio(audio_path, question_text):
285
  answer = generate_answer(question)
286
  total_time = time.time() - start_time
287
 
288
- time_emoji = "🟢" if total_time < 4.0 else "🟡" if total_time < 6.0 else "🔴"
289
  timing = f"\n\n{time_emoji} **Time:** {total_time:.2f}s"
290
 
291
- logger.info(f"[MAIN] Total: {total_time:.2f}s")
292
- logger.info("="*50)
293
 
294
  return answer + timing, total_time
295
 
@@ -302,8 +301,8 @@ def text_handler(text_input):
302
  # Gradio UI
303
  with gr.Blocks(title="Fast Q&A", theme=gr.themes.Soft()) as demo:
304
  gr.Markdown("""
305
- # ⚡ Fast Political Q&A
306
- **Parallel multi-search + Qwen 2.5 1.5B**
307
  """)
308
 
309
  with gr.Tab("🎙️ Audio"):
@@ -312,7 +311,7 @@ with gr.Blocks(title="Fast Q&A", theme=gr.themes.Soft()) as demo:
312
  audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath")
313
  audio_submit = gr.Button("🚀 Submit", variant="primary", size="lg")
314
  with gr.Column():
315
- audio_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
316
  audio_time = gr.Number(label="Time (s)", precision=2)
317
 
318
  audio_submit.click(fn=audio_handler, inputs=[audio_input], outputs=[audio_output, audio_time], api_name="audio_query")
@@ -323,14 +322,15 @@ with gr.Blocks(title="Fast Q&A", theme=gr.themes.Soft()) as demo:
323
  text_input = gr.Textbox(label="Question", placeholder="Ask anything...", lines=3)
324
  text_submit = gr.Button("🚀 Submit", variant="primary", size="lg")
325
  with gr.Column():
326
- text_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
327
  text_time = gr.Number(label="Time (s)", precision=2)
328
 
329
  text_submit.click(fn=text_handler, inputs=[text_input], outputs=[text_output, text_time], api_name="text_query")
330
 
331
  gr.Examples(
332
  examples=[
333
- ["Is internet shut down in Bareilly today?"],
 
334
  ["Who won 2024 US election?"]
335
  ],
336
  inputs=text_input
@@ -338,14 +338,9 @@ with gr.Blocks(title="Fast Q&A", theme=gr.themes.Soft()) as demo:
338
 
339
  with gr.Tab("🔌 API"):
340
  gr.Markdown("""
341
- **Pluely Endpoints:**
342
-
343
- STT: `https://archcoder-basic-app.hf.space/call/transcribe_stt`
344
- AI: `https://archcoder-basic-app.hf.space/call/answer_ai`
345
-
346
- **Response Paths:**
347
- STT: `data[0].text`
348
- AI: `data[0]`
349
  """)
350
 
351
  with gr.Row(visible=False):
@@ -357,7 +352,10 @@ with gr.Blocks(title="Fast Q&A", theme=gr.themes.Soft()) as demo:
357
  gr.Button("STT", visible=False).click(fn=transcribe_audio_base64, inputs=[stt_in], outputs=[stt_out], api_name="transcribe_stt")
358
  gr.Button("AI", visible=False).click(fn=generate_answer, inputs=[ai_in], outputs=[ai_out], api_name="answer_ai")
359
 
360
- gr.Markdown("🟢 < 4s | 🟡 4-6s | 🔴 > 6s")
 
 
 
361
 
362
  if __name__ == "__main__":
363
  demo.queue(max_size=5)
 
7
  import tempfile
8
  import os
9
  import logging
10
+ import time
11
  from datetime import datetime
12
  from concurrent.futures import ThreadPoolExecutor
13
  from html.parser import HTMLParser
 
20
  logger.info("Loading Whisper model...")
21
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
22
 
23
+ logger.info("Loading Qwen 2.5 0.5B-Instruct (FASTEST)...")
24
+ model_name = "Qwen/Qwen2.5-0.5B-Instruct" # SWITCHED BACK to 0.5B for speed
25
  tokenizer = AutoTokenizer.from_pretrained(model_name)
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_name,
 
30
  low_cpu_mem_usage=True
31
  )
32
 
33
+ logger.info("All models loaded!")
34
 
 
35
  TAVILY_API_KEY = os.getenv('TAVILY_API_KEY', '')
36
  BRAVE_API_KEY = os.getenv('BRAVE_API_KEY', '')
37
 
38
  def search_tavily(query):
 
39
  logger.info("[TAVILY] Starting...")
40
  if not TAVILY_API_KEY:
 
41
  return None
42
 
43
  try:
44
  response = requests.post(
45
  'https://api.tavily.com/search',
46
  json={'api_key': TAVILY_API_KEY, 'query': query, 'max_results': 3},
47
+ timeout=2 # REDUCED timeout
48
  )
49
 
50
  if response.status_code == 200:
 
52
  results = data.get('results', [])
53
  context = ""
54
  for i, result in enumerate(results[:3], 1):
55
+ context += f"\n[{i}] {result.get('title', '')}\n{result.get('content', '')}\n"
56
+ logger.info(f"[TAVILY] ")
57
  return context
58
+ except:
59
+ pass
60
  return None
61
 
62
  def search_brave(query):
 
63
  logger.info("[BRAVE] Starting...")
64
  if not BRAVE_API_KEY:
 
65
  return None
66
 
67
  try:
 
69
  'https://api.search.brave.com/res/v1/web/search',
70
  params={'q': query, 'count': 3},
71
  headers={'X-Subscription-Token': BRAVE_API_KEY},
72
+ timeout=2
73
  )
74
 
75
  if response.status_code == 200:
 
77
  results = data.get('web', {}).get('results', [])
78
  context = ""
79
  for i, result in enumerate(results[:3], 1):
80
+ context += f"\n[{i}] {result.get('title', '')}\n{result.get('description', '')}\n"
81
+ logger.info(f"[BRAVE] ")
82
  return context
83
+ except:
84
+ pass
85
  return None
86
 
87
  def search_searx(query):
 
88
  logger.info("[SEARX] Starting...")
89
 
90
+ for instance in ['https://searx.be/search', 'https://searx.work/search']:
 
 
 
 
 
 
91
  try:
92
  response = requests.get(
93
  instance,
94
  params={'q': query, 'format': 'json', 'categories': 'general'},
95
+ timeout=2
96
  )
97
 
98
  if response.status_code == 200:
 
100
  results = data.get('results', [])
101
  context = ""
102
  for i, result in enumerate(results[:3], 1):
103
+ context += f"\n[{i}] {result.get('title', '')}\n{result.get('content', '')}\n"
104
+ logger.info(f"[SEARX] ")
105
  return context
106
+ except:
107
+ continue
 
108
  return None
109
 
110
  def search_duckduckgo_html(query):
 
111
  logger.info("[DDG] Starting...")
112
  try:
113
  response = requests.get(
114
  'https://html.duckduckgo.com/html/',
115
  params={'q': query},
116
  headers={'User-Agent': 'Mozilla/5.0'},
117
+ timeout=2
118
  )
119
 
120
  if response.status_code == 200:
 
144
 
145
  context = ""
146
  for i, result in enumerate(parser.results[:3], 1):
147
+ context += f"\n[{i}] {result}\n"
148
 
149
  if context:
150
+ logger.info(f"[DDG] ")
151
  return context
152
+ except:
153
+ pass
154
  return None
155
 
156
  def search_parallel(query):
157
+ logger.info("[SEARCH] Parallel start")
 
158
 
159
  with ThreadPoolExecutor(max_workers=4) as executor:
160
  futures = {
161
  executor.submit(search_tavily, query): "Tavily",
162
  executor.submit(search_brave, query): "Brave",
163
  executor.submit(search_searx, query): "Searx",
164
+ executor.submit(search_duckduckgo_html, query): "DDG"
165
  }
166
 
 
167
  results = {}
 
168
  for future in futures:
169
  engine = futures[future]
170
  try:
171
+ result = future.result(timeout=3)
172
  if result:
173
  results[engine] = result
174
+ except:
175
+ pass
 
176
 
177
+ for engine in ["Tavily", "Brave", "Searx", "DDG"]:
178
+ if engine in results:
179
+ logger.info(f"[SEARCH] Using {engine}")
180
  return results[engine], engine
181
 
182
+ return "No search results available.", "None"
 
183
 
184
  def transcribe_audio_base64(audio_base64):
185
+ logger.info("[STT] Request")
 
186
  try:
187
  audio_bytes = base64.b64decode(audio_base64)
188
 
 
194
  transcription = " ".join([seg.text for seg in segments])
195
  os.unlink(temp_path)
196
 
197
+ logger.info(f"[STT] ")
198
  return {"text": transcription.strip()}
 
199
  except Exception as e:
 
200
  return {"error": str(e)}
201
 
202
  def generate_answer(text_input):
203
+ logger.info(f"[AI] Q: {text_input}")
 
204
  try:
205
  if not text_input or not text_input.strip():
206
  return "No input provided"
207
 
208
  current_date = datetime.now().strftime("%B %d, %Y")
209
 
210
+ search_start = time.time()
211
  search_results, search_engine = search_parallel(text_input)
212
+ search_time = time.time() - search_start
213
+ logger.info(f"[AI] Search: {search_time:.2f}s")
214
 
215
+ # IMPROVED PROMPT - Structured multi-point answers
216
  messages = [
217
+ {
218
+ "role": "system",
219
+ "content": f"""Today is {current_date}. You are a concise assistant.
220
+
221
+ When answering:
222
+ - If question asks about multiple things, list each with a one-line description
223
+ - Use bullet points for multiple items
224
+ - Keep total answer to 80-100 words
225
+ - Answer ONLY from search results"""
226
+ },
227
+ {
228
+ "role": "user",
229
+ "content": f"""Search Results:
230
+ {search_results}
231
+
232
+ Question: {text_input}
233
+
234
+ Answer (80-100 words, use bullets if multiple topics):"""
235
+ }
236
  ]
237
 
238
  text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
239
 
240
+ gen_start = time.time()
241
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1200)
242
 
243
  with torch.no_grad():
244
  outputs = model.generate(
245
  **inputs,
246
+ max_new_tokens=100, # REDUCED from 150
247
+ temperature=0.7, # INCREASED for faster sampling
248
  do_sample=True,
249
  top_p=0.9,
250
+ top_k=50, # ADDED for speed
251
  repetition_penalty=1.1,
252
  pad_token_id=tokenizer.eos_token_id
253
  )
254
 
255
+ gen_time = time.time() - gen_start
256
+ logger.info(f"[AI] Gen: {gen_time:.2f}s")
257
+
258
  answer = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip()
259
  answer_with_source = f"{answer}\n\n**Source:** {search_engine}"
260
 
261
+ logger.info(f"[AI] ")
262
  return answer_with_source
263
 
264
  except Exception as e:
265
+ logger.error(f"[AI] Error: {str(e)}")
266
  return f"Error: {str(e)}"
267
 
268
  def process_audio(audio_path, question_text):
 
269
  start_time = time.time()
270
+ logger.info("="*40)
271
 
272
  if audio_path:
273
  try:
 
284
  answer = generate_answer(question)
285
  total_time = time.time() - start_time
286
 
287
+ time_emoji = "🟢" if total_time < 3.0 else "🟡" if total_time < 5.0 else "🔴"
288
  timing = f"\n\n{time_emoji} **Time:** {total_time:.2f}s"
289
 
290
+ logger.info(f"[TOTAL] {total_time:.2f}s")
291
+ logger.info("="*40)
292
 
293
  return answer + timing, total_time
294
 
 
301
  # Gradio UI
302
  with gr.Blocks(title="Fast Q&A", theme=gr.themes.Soft()) as demo:
303
  gr.Markdown("""
304
+ # ⚡ Ultra-Fast Q&A System
305
+ **Qwen 0.5B + Parallel Search** (Optimized for <3s response)
306
  """)
307
 
308
  with gr.Tab("🎙️ Audio"):
 
311
  audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath")
312
  audio_submit = gr.Button("🚀 Submit", variant="primary", size="lg")
313
  with gr.Column():
314
+ audio_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True)
315
  audio_time = gr.Number(label="Time (s)", precision=2)
316
 
317
  audio_submit.click(fn=audio_handler, inputs=[audio_input], outputs=[audio_output, audio_time], api_name="audio_query")
 
322
  text_input = gr.Textbox(label="Question", placeholder="Ask anything...", lines=3)
323
  text_submit = gr.Button("🚀 Submit", variant="primary", size="lg")
324
  with gr.Column():
325
+ text_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True)
326
  text_time = gr.Number(label="Time (s)", precision=2)
327
 
328
  text_submit.click(fn=text_handler, inputs=[text_input], outputs=[text_output, text_time], api_name="text_query")
329
 
330
  gr.Examples(
331
  examples=[
332
+ ["What are the top 3 news stories today?"],
333
+ ["Is internet shut down in Bareilly?"],
334
  ["Who won 2024 US election?"]
335
  ],
336
  inputs=text_input
 
338
 
339
  with gr.Tab("🔌 API"):
340
  gr.Markdown("""
341
+ **Endpoints:**
342
+ - STT: `/call/transcribe_stt` → Path: `data[0].text`
343
+ - AI: `/call/answer_ai` → Path: `data[0]`
 
 
 
 
 
344
  """)
345
 
346
  with gr.Row(visible=False):
 
352
  gr.Button("STT", visible=False).click(fn=transcribe_audio_base64, inputs=[stt_in], outputs=[stt_out], api_name="transcribe_stt")
353
  gr.Button("AI", visible=False).click(fn=generate_answer, inputs=[ai_in], outputs=[ai_out], api_name="answer_ai")
354
 
355
+ gr.Markdown("""
356
+ **Speed:** Qwen 0.5B (1-2s) + Parallel search (1s) = **2-3s total**
357
+ 🟢 < 3s | 🟡 3-5s | 🔴 > 5s
358
+ """)
359
 
360
  if __name__ == "__main__":
361
  demo.queue(max_size=5)