ArchCoder commited on
Commit
e28d45d
·
verified ·
1 Parent(s): c2c3825

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -231
app.py CHANGED
@@ -11,6 +11,9 @@ import time
11
  from datetime import datetime
12
  from concurrent.futures import ThreadPoolExecutor
13
  from html.parser import HTMLParser
 
 
 
14
 
15
  # Setup logging
16
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
@@ -35,58 +38,9 @@ logger.info("All models loaded!")
35
  TAVILY_API_KEY = os.getenv('TAVILY_API_KEY', '')
36
  BRAVE_API_KEY = os.getenv('BRAVE_API_KEY', '')
37
 
38
- def search_tavily(query):
39
- if not TAVILY_API_KEY:
40
- return None
41
- try:
42
- response = requests.post(
43
- 'https://api.tavily.com/search',
44
- json={'api_key': TAVILY_API_KEY, 'query': query, 'max_results': 2},
45
- timeout=1.5
46
- )
47
- if response.status_code == 200:
48
- data = response.json()
49
- results = data.get('results', [])
50
- return "\n".join([f"• {r.get('title', '')}: {r.get('content', '')[:120]}" for r in results[:2]])
51
- except:
52
- pass
53
- return None
54
-
55
- def search_brave(query):
56
- if not BRAVE_API_KEY:
57
- return None
58
- try:
59
- response = requests.get(
60
- 'https://api.search.brave.com/res/v1/web/search',
61
- params={'q': query, 'count': 2},
62
- headers={'X-Subscription-Token': BRAVE_API_KEY},
63
- timeout=1.5
64
- )
65
- if response.status_code == 200:
66
- data = response.json()
67
- results = data.get('web', {}).get('results', [])
68
- return "\n".join([f"• {r.get('title', '')}: {r.get('description', '')[:120]}" for r in results[:2]])
69
- except:
70
- pass
71
- return None
72
-
73
- def search_searx(query):
74
- for instance in ['https://searx.be/search', 'https://searx.work/search']:
75
- try:
76
- response = requests.get(
77
- instance,
78
- params={'q': query, 'format': 'json', 'categories': 'general', 'language': 'en'},
79
- timeout=1.5
80
- )
81
- if response.status_code == 200:
82
- data = response.json()
83
- results = data.get('results', [])
84
- return "\n".join([f"• {r.get('title', '')}: {r.get('content', '')[:120]}" for r in results[:2]])
85
- except:
86
- continue
87
- return None
88
-
89
- def search_duckduckgo(query):
90
  try:
91
  response = requests.get(
92
  'https://html.duckduckgo.com/html/',
@@ -119,86 +73,32 @@ def search_duckduckgo(query):
119
 
120
  parser = DDGParser()
121
  parser.feed(response.text)
122
- return "\n".join([f"• {r}" for r in parser.results[:2]]) if parser.results else None
 
 
123
  except:
124
  pass
125
- return None
126
-
127
- def search_parallel(query):
128
- logger.info("[SEARCH] Starting parallel search...")
129
-
130
- with ThreadPoolExecutor(max_workers=4) as executor:
131
- futures = {
132
- executor.submit(search_tavily, query): "Tavily",
133
- executor.submit(search_brave, query): "Brave",
134
- executor.submit(search_searx, query): "Searx",
135
- executor.submit(search_duckduckgo, query): "DuckDuckGo"
136
- }
137
-
138
- for future in futures:
139
- engine = futures[future]
140
- try:
141
- result = future.result(timeout=2)
142
- if result:
143
- logger.info(f"[SEARCH] ✓ {engine}")
144
- return result, engine
145
- except:
146
- pass
147
-
148
- logger.warning("[SEARCH] All engines failed")
149
- return "No search results available.", "None"
150
-
151
- def transcribe_audio_base64(audio_base64):
152
- logger.info("[STT] Processing audio...")
153
- try:
154
- audio_bytes = base64.b64decode(audio_base64)
155
-
156
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
157
- temp_audio.write(audio_bytes)
158
- temp_path = temp_audio.name
159
-
160
- segments, _ = whisper_model.transcribe(temp_path, language="en", beam_size=1)
161
- transcription = " ".join([seg.text for seg in segments])
162
- os.unlink(temp_path)
163
-
164
- logger.info("[STT] ✓ Transcribed")
165
- return {"text": transcription.strip()}
166
-
167
- except Exception as e:
168
- logger.error(f"[STT] Error: {str(e)}")
169
- return {"error": str(e)}
170
 
171
  def generate_answer(text_input):
172
- """Main answer generation - with debug logging"""
173
- logger.info("="*60)
174
- logger.info(f"[AI] Raw input: '{text_input}'")
175
- logger.info(f"[AI] Input type: {type(text_input)}, Length: {len(text_input) if text_input else 0}")
176
 
177
  try:
178
- # Handle literal {{TEXT}} from Pluely
179
- if not text_input or text_input.strip() in ["", "{{TEXT}}", "{{text}}", "$TEXT"]:
180
- error_msg = "❌ ERROR: No question received. Pluely sent empty/template variable.\n\nPluely Config Issue:\n- Check your curl command uses correct format\n- Make sure variable substitution is enabled"
181
- logger.error(f"[AI] {error_msg}")
182
- return error_msg
183
 
184
  current_date = datetime.now().strftime("%B %d, %Y")
185
 
186
  # Search
187
  search_start = time.time()
188
  search_results, search_engine = search_parallel(text_input)
189
- search_time = time.time() - search_start
190
- logger.info(f"[AI] Search completed in {search_time:.2f}s")
191
 
192
  # Generate
193
  messages = [
194
- {
195
- "role": "system",
196
- "content": f"You are a helpful assistant. Today is {current_date}. Answer questions using the provided search results. Be concise (60-80 words). Use bullet points for multiple items."
197
- },
198
- {
199
- "role": "user",
200
- "content": f"Search Results:\n{search_results}\n\nQuestion: {text_input}\n\nAnswer based strictly on search results (60-80 words):"
201
- }
202
  ]
203
 
204
  prompt = f"<|im_start|>system\n{messages[0]['content']}<|im_end|>\n<|im_start|>user\n{messages[1]['content']}<|im_end|>\n<|im_start|>assistant\n"
@@ -206,7 +106,6 @@ def generate_answer(text_input):
206
  gen_start = time.time()
207
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=800)
208
 
209
- logger.info("[AI] Generating answer...")
210
  with torch.no_grad():
211
  outputs = model.generate(
212
  **inputs,
@@ -216,138 +115,114 @@ def generate_answer(text_input):
216
  top_p=0.9,
217
  top_k=40,
218
  repetition_penalty=1.15,
219
- pad_token_id=tokenizer.eos_token_id,
220
- eos_token_id=tokenizer.eos_token_id
221
  )
222
 
223
- gen_time = time.time() - gen_start
224
- logger.info(f"[AI] Generation completed in {gen_time:.2f}s")
225
-
226
  answer = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip()
227
- full_answer = f"{answer}\n\n**Source:** {search_engine}"
228
 
229
- logger.info("[AI] ✓ Complete")
230
- logger.info("="*60)
231
- return full_answer
232
 
233
  except Exception as e:
234
  logger.error(f"[AI] Error: {str(e)}")
235
  return f"Error: {str(e)}"
236
 
237
- def process_audio(audio_path, question_text):
238
- start_time = time.time()
239
- logger.info("="*50)
240
- logger.info("[MAIN] New request received")
241
-
242
- if audio_path:
243
- logger.info(f"[MAIN] Processing audio: {audio_path}")
244
- try:
245
- segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
246
- question = " ".join([seg.text for seg in segments])
247
- logger.info(f"[MAIN] Transcribed: {question}")
248
- except Exception as e:
249
- logger.error(f"[MAIN] Transcription failed: {str(e)}")
250
- return f"❌ Transcription error: {str(e)}", 0.0
251
- else:
252
- question = question_text
253
- logger.info(f"[MAIN] Text input: {question}")
254
-
255
- if not question or not question.strip():
256
- logger.warning("[MAIN] No input provided")
257
- return "❌ No input provided", 0.0
258
-
259
- transcription_time = time.time() - start_time
260
-
261
- gen_start = time.time()
262
- answer = generate_answer(question)
263
- gen_time = time.time() - gen_start
264
-
265
- total_time = time.time() - start_time
266
- time_emoji = "🟢" if total_time < 2.0 else "🟡" if total_time < 3.0 else "🔴"
267
-
268
- timing = f"\n\n{time_emoji} **Performance:** Trans={transcription_time:.2f}s | Search+Gen={gen_time:.2f}s | **Total={total_time:.2f}s**"
269
-
270
- logger.info(f"[MAIN] Total time: {total_time:.2f}s")
271
- logger.info("="*50)
272
-
273
- return answer + timing, total_time
274
-
275
- def audio_handler(audio_path):
276
- return process_audio(audio_path, None)
277
-
278
- def text_handler(text_input):
279
- return process_audio(None, text_input)
280
-
281
- # Gradio Interface
282
- with gr.Blocks(title="Ultra-Fast Q&A - SmolLM2-360M", theme=gr.themes.Soft()) as demo:
283
- gr.Markdown("""
284
- # ⚡ Ultra-Fast Political Q&A System
285
- **SmolLM2-360M** (250-400 tok/s) + **Parallel Search**
286
- """)
287
-
288
- with gr.Tab("🎙️ Audio Input"):
289
- with gr.Row():
290
- with gr.Column():
291
- audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio")
292
- audio_submit = gr.Button("🚀 Submit", variant="primary")
293
- with gr.Column():
294
- audio_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
295
- audio_time = gr.Number(label="Time (s)", precision=2)
296
-
297
- audio_submit.click(fn=audio_handler, inputs=[audio_input], outputs=[audio_output, audio_time], api_name="audio_query")
298
-
299
- with gr.Tab("✍️ Text Input"):
300
- with gr.Row():
301
- with gr.Column():
302
- text_input = gr.Textbox(label="Question", placeholder="Ask anything...", lines=3)
303
- text_submit = gr.Button("🚀 Submit", variant="primary")
304
- with gr.Column():
305
- text_output = gr.Textbox(label="Answer", lines=10, show_copy_button=True)
306
- text_time = gr.Number(label="Time (s)", precision=2)
307
 
308
- text_submit.click(fn=text_handler, inputs=[text_input], outputs=[text_output, text_time], api_name="text_query")
 
 
309
 
310
- gr.Examples(examples=[["Who is the US president?"]], inputs=text_input)
311
-
312
- with gr.Tab("🔌 Pluely API"):
313
- gr.Markdown("""
314
- ## ⚠️ IMPORTANT: Pluely Configuration
315
 
316
- ### If you see "{{TEXT}}" in logs, try these formats:
 
317
 
318
- **Format 1 (Windows CMD - Use This First):**
319
- ```
320
- curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai -H "Content-Type: application/json" -d "{\\"data\\": [\\"TEXT_PLACEHOLDER\\"]}"
321
- ```
322
- Then in Pluely, replace `TEXT_PLACEHOLDER` with `{{TEXT}}`
 
 
 
 
 
 
 
 
323
 
324
- **Format 2 (Alternative):**
325
- ```
326
- curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai -H "Content-Type: application/json" --data-binary "{\\"data\\": [\\"{{TEXT}}\\"]}"
327
- ```
328
 
329
- **Response Path:** `data[0]`
 
330
 
331
- ---
 
 
 
 
 
 
 
 
 
332
 
333
- ### STT Endpoint:
334
- ```
335
- curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d "{\\"data\\": [\\"{{AUDIO_BASE64}}\\"]}"
336
- ```
337
- **Response Path:** `data[0].text`
338
- """)
339
 
340
- with gr.Row(visible=False):
341
- stt_in = gr.Textbox()
342
- stt_out = gr.JSON()
343
- ai_in = gr.Textbox()
344
- ai_out = gr.Textbox()
345
 
346
- gr.Button("STT", visible=False).click(fn=transcribe_audio_base64, inputs=[stt_in], outputs=[stt_out], api_name="transcribe_stt")
347
- gr.Button("AI", visible=False).click(fn=generate_answer, inputs=[ai_in], outputs=[ai_out], api_name="answer_ai")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
- gr.Markdown("🟢 < 2s | 🟡 2-3s | 🔴 > 3s")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
 
351
  if __name__ == "__main__":
352
- demo.queue(max_size=5)
353
- demo.launch()
 
11
  from datetime import datetime
12
  from concurrent.futures import ThreadPoolExecutor
13
  from html.parser import HTMLParser
14
+ from fastapi import FastAPI, Request
15
+ from fastapi.responses import JSONResponse
16
+ import uvicorn
17
 
18
  # Setup logging
19
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
 
38
  TAVILY_API_KEY = os.getenv('TAVILY_API_KEY', '')
39
  BRAVE_API_KEY = os.getenv('BRAVE_API_KEY', '')
40
 
41
+ def search_parallel(query):
42
+ """Simplified search - just DuckDuckGo for speed"""
43
+ logger.info("[SEARCH] Starting...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  try:
45
  response = requests.get(
46
  'https://html.duckduckgo.com/html/',
 
73
 
74
  parser = DDGParser()
75
  parser.feed(response.text)
76
+ result = "\n".join([f"• {r}" for r in parser.results[:2]]) if parser.results else "No results"
77
+ logger.info("[SEARCH] ✓")
78
+ return result, "DuckDuckGo"
79
  except:
80
  pass
81
+ return "No search results", "None"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  def generate_answer(text_input):
84
+ """Main answer generation"""
85
+ logger.info(f"[AI] Question: {text_input[:60]}...")
 
 
86
 
87
  try:
88
+ if not text_input or not text_input.strip():
89
+ return "No input provided"
 
 
 
90
 
91
  current_date = datetime.now().strftime("%B %d, %Y")
92
 
93
  # Search
94
  search_start = time.time()
95
  search_results, search_engine = search_parallel(text_input)
96
+ logger.info(f"[AI] Search: {time.time()-search_start:.2f}s")
 
97
 
98
  # Generate
99
  messages = [
100
+ {"role": "system", "content": f"Today is {current_date}. Answer briefly using search results (60-80 words)."},
101
+ {"role": "user", "content": f"Search:\n{search_results}\n\nQ: {text_input}\nA:"}
 
 
 
 
 
 
102
  ]
103
 
104
  prompt = f"<|im_start|>system\n{messages[0]['content']}<|im_end|>\n<|im_start|>user\n{messages[1]['content']}<|im_end|>\n<|im_start|>assistant\n"
 
106
  gen_start = time.time()
107
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=800)
108
 
 
109
  with torch.no_grad():
110
  outputs = model.generate(
111
  **inputs,
 
115
  top_p=0.9,
116
  top_k=40,
117
  repetition_penalty=1.15,
118
+ pad_token_id=tokenizer.eos_token_id
 
119
  )
120
 
 
 
 
121
  answer = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip()
122
+ logger.info(f"[AI] Gen: {time.time()-gen_start:.2f}s | ✓")
123
 
124
+ return f"{answer}\n\n**Source:** {search_engine}"
 
 
125
 
126
  except Exception as e:
127
  logger.error(f"[AI] Error: {str(e)}")
128
  return f"Error: {str(e)}"
129
 
130
+ def transcribe_audio_base64(audio_base64):
131
+ """Transcribe audio"""
132
+ logger.info("[STT] Start")
133
+ try:
134
+ audio_bytes = base64.b64decode(audio_base64)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
137
+ temp_audio.write(audio_bytes)
138
+ temp_path = temp_audio.name
139
 
140
+ segments, _ = whisper_model.transcribe(temp_path, language="en", beam_size=1)
141
+ transcription = " ".join([seg.text for seg in segments])
142
+ os.unlink(temp_path)
 
 
143
 
144
+ logger.info("[STT] ")
145
+ return transcription.strip()
146
 
147
+ except Exception as e:
148
+ logger.error(f"[STT] Error: {str(e)}")
149
+ return ""
150
+
151
+ # Create FastAPI app for Pluely endpoints
152
+ app = FastAPI()
153
+
154
+ @app.post("/api/stt")
155
+ async def api_stt(request: Request):
156
+ """Direct STT endpoint for Pluely"""
157
+ try:
158
+ body = await request.json()
159
+ logger.info(f"[API STT] Received: {body}")
160
 
161
+ audio_base64 = body.get("audio", "")
162
+ if not audio_base64:
163
+ return JSONResponse({"error": "No audio data"}, status_code=400)
 
164
 
165
+ text = transcribe_audio_base64(audio_base64)
166
+ return JSONResponse({"text": text})
167
 
168
+ except Exception as e:
169
+ logger.error(f"[API STT] Error: {str(e)}")
170
+ return JSONResponse({"error": str(e)}, status_code=500)
171
+
172
+ @app.post("/api/ai")
173
+ async def api_ai(request: Request):
174
+ """Direct AI endpoint for Pluely"""
175
+ try:
176
+ body = await request.json()
177
+ logger.info(f"[API AI] Received: {body}")
178
 
179
+ question = body.get("text", "")
180
+ if not question:
181
+ return JSONResponse({"error": "No text provided"}, status_code=400)
 
 
 
182
 
183
+ answer = generate_answer(question)
184
+ return JSONResponse({"answer": answer})
 
 
 
185
 
186
+ except Exception as e:
187
+ logger.error(f"[API AI] Error: {str(e)}")
188
+ return JSONResponse({"error": str(e)}, status_code=500)
189
+
190
+ @app.get("/health")
191
+ async def health():
192
+ """Health check"""
193
+ return {"status": "ok", "model": "SmolLM2-360M"}
194
+
195
+ # Gradio UI (optional, for testing)
196
+ with gr.Blocks(title="Fast Q&A", theme=gr.themes.Soft()) as demo:
197
+ gr.Markdown("""
198
+ # ⚡ Ultra-Fast Q&A System
199
+ **SmolLM2-360M** + **Direct REST API** for Pluely
200
+
201
+ ## Pluely Configuration:
202
+
203
+ ### STT Endpoint:
204
+ ```
205
+ curl -X POST https://archcoder-basic-app.hf.space/api/stt -H "Content-Type: application/json" -d '{"audio": "{{AUDIO_BASE64}}"}'
206
+ ```
207
+ **Response Path:** `text`
208
 
209
+ ### AI Endpoint:
210
+ ```
211
+ curl -X POST https://archcoder-basic-app.hf.space/api/ai -H "Content-Type: application/json" -d '{"text": "{{TEXT}}"}'
212
+ ```
213
+ **Response Path:** `answer`
214
+ """)
215
+
216
+ with gr.Tab("Test"):
217
+ with gr.Row():
218
+ test_input = gr.Textbox(label="Question", placeholder="Ask anything...")
219
+ test_btn = gr.Button("🚀 Test")
220
+ test_output = gr.Textbox(label="Answer", lines=8)
221
+
222
+ test_btn.click(fn=generate_answer, inputs=[test_input], outputs=[test_output])
223
+
224
+ # Mount Gradio to FastAPI
225
+ app = gr.mount_gradio_app(app, demo, path="/")
226
 
227
  if __name__ == "__main__":
228
+ uvicorn.run(app, host="0.0.0.0", port=7860)