ArchCoder commited on
Commit
e4a835d
·
verified ·
1 Parent(s): a46776b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -193
app.py CHANGED
@@ -1,147 +1,142 @@
1
  import gradio as gr
2
  from faster_whisper import WhisperModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
- from duckduckgo_search import DDGS
5
  import time
6
- import torch
7
  import base64
8
  import tempfile
9
  import os
10
  import logging
11
  from datetime import datetime
12
 
13
- # Setup comprehensive logging
14
- logging.basicConfig(
15
- level=logging.INFO,
16
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
17
- )
18
  logger = logging.getLogger(__name__)
19
 
20
  # Initialize models
21
  logger.info("Loading Whisper model...")
22
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
23
 
24
- logger.info("Loading Phi-2 model (faster inference)...")
25
- model_name = "microsoft/phi-2" # 2.7B - Faster CPU inference than Qwen
26
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
27
  model = AutoModelForCausalLM.from_pretrained(
28
  model_name,
29
  torch_dtype=torch.float32,
30
  device_map="cpu",
31
- low_cpu_mem_usage=True,
32
- trust_remote_code=True
33
  )
34
- tokenizer.pad_token = tokenizer.eos_token
35
 
36
- # Initialize DuckDuckGo Search
37
- ddgs = DDGS(timeout=3)
38
- logger.info("All models loaded successfully!")
39
 
40
- def search_web(query, max_results=3):
41
- """Perform web search using DuckDuckGo"""
42
  logger.info(f"[SEARCH] Query: {query}")
 
 
43
  try:
44
- results = ddgs.text(
45
- keywords=query,
46
- region='wt-wt',
47
- safesearch='moderate',
48
- timelimit='m',
49
- max_results=max_results
50
- )
 
 
 
 
 
 
 
 
 
 
51
 
52
- context = ""
53
- for i, result in enumerate(results[:max_results], 1):
54
- title = result.get('title', '')
55
- body = result.get('body', '')
56
- context += f"\n[Source {i}] {title}\n{body}\n"
57
- logger.info(f"[SEARCH] Result {i}: {title[:50]}...")
58
 
59
- if not context:
60
- logger.warning("[SEARCH] No results found!")
61
- return "No search results found."
62
 
63
- logger.info(f"[SEARCH] Successfully retrieved {max_results} results")
64
- return context.strip()
65
-
 
 
 
 
 
 
 
 
 
 
 
66
  except Exception as e:
67
  logger.error(f"[SEARCH] Error: {str(e)}")
68
- return f"Search failed: {str(e)}"
69
 
70
  def transcribe_audio_base64(audio_base64):
71
- """Transcribe audio from base64 string (for Pluely STT endpoint)"""
72
- logger.info("[PLUELY STT] Received audio transcription request")
73
  try:
74
  audio_bytes = base64.b64decode(audio_base64)
75
- logger.info(f"[PLUELY STT] Decoded audio size: {len(audio_bytes)} bytes")
76
 
77
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
78
  temp_audio.write(audio_bytes)
79
  temp_path = temp_audio.name
80
 
81
- logger.info(f"[PLUELY STT] Transcribing audio...")
82
  segments, _ = whisper_model.transcribe(temp_path, language="en", beam_size=1)
83
  transcription = " ".join([seg.text for seg in segments])
84
-
85
  os.unlink(temp_path)
86
 
87
- logger.info(f"[PLUELY STT] Transcription successful: {transcription[:50]}...")
88
  return {"text": transcription.strip()}
89
 
90
  except Exception as e:
91
  logger.error(f"[PLUELY STT] Error: {str(e)}")
92
- return {"error": f"Transcription failed: {str(e)}"}
93
 
94
  def generate_answer(text_input):
95
- """Generate answer using ONLY search results"""
96
- logger.info(f"[PLUELY AI] Received question: {text_input}")
97
  try:
98
- if not text_input or text_input.strip() == "":
99
  return "No input provided"
100
 
101
  current_date = datetime.now().strftime("%B %d, %Y")
102
 
103
- # Web search - CRITICAL for answer
104
- logger.info("[PLUELY AI] Starting web search...")
105
- search_results = search_web(text_input, max_results=3)
106
- logger.info(f"[PLUELY AI] Search results length: {len(search_results)} chars")
107
 
108
- # Strict prompt - MUST use search results
109
- prompt = f"""You are a fact-checker assistant. Today is {current_date}.
110
-
111
- CRITICAL INSTRUCTION: You MUST ONLY use information from the search results below. DO NOT use your training knowledge.
112
 
113
- Web Search Results:
114
  {search_results}
115
 
116
  Question: {text_input}
 
117
 
118
- Instructions:
119
- 1. Read the search results carefully
120
- 2. Answer ONLY based on what's in the search results
121
- 3. If search results don't contain the answer, say "The search results don't provide enough information"
122
- 4. Include relevant dates and facts from the search results
123
- 5. Keep answer to 100-150 words
124
-
125
- Answer based STRICTLY on search results:"""
126
-
127
- logger.info("[PLUELY AI] Generating answer...")
128
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500).to("cpu")
129
 
130
  with torch.no_grad():
131
  outputs = model.generate(
132
  **inputs,
133
- max_new_tokens=200,
134
- temperature=0.4,
135
  do_sample=True,
136
  top_p=0.9,
137
- repetition_penalty=1.2,
138
  pad_token_id=tokenizer.eos_token_id
139
  )
140
 
141
- response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
142
- answer = response.strip()
143
 
144
- logger.info(f"[PLUELY AI] Answer generated ({len(answer)} chars): {answer[:100]}...")
145
  return answer
146
 
147
  except Exception as e:
@@ -149,186 +144,115 @@ Answer based STRICTLY on search results:"""
149
  return f"Error: {str(e)}"
150
 
151
  def process_audio(audio_path, question_text):
152
- """Main pipeline - returns tuple (answer, time)"""
153
  start_time = time.time()
154
  logger.info("="*50)
155
- logger.info("[MAIN] New request received")
156
 
157
- # Transcribe if audio provided
158
  if audio_path:
159
- logger.info(f"[MAIN] Audio file provided: {audio_path}")
160
  try:
161
  segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
162
  question = " ".join([seg.text for seg in segments])
163
- logger.info(f"[MAIN] Transcription: {question}")
164
  except Exception as e:
165
- logger.error(f"[MAIN] Transcription error: {str(e)}")
166
- return f"❌ Transcription error: {str(e)}", 0.0
167
  else:
168
  question = question_text
169
- logger.info(f"[MAIN] Text input: {question}")
170
 
171
- if not question or question.strip() == "":
172
- logger.warning("[MAIN] No input provided")
173
- return "❌ No input provided", 0.0
174
 
175
  transcription_time = time.time() - start_time
176
 
177
- # Web search
178
  search_start = time.time()
179
- search_results = search_web(question, max_results=3)
180
  search_time = time.time() - search_start
181
 
182
- # Generate answer
183
  llm_start = time.time()
184
  answer = generate_answer(question)
185
  llm_time = time.time() - llm_start
186
 
187
  total_time = time.time() - start_time
188
- time_emoji = "🟢" if total_time < 4.0 else "🟡" if total_time < 6.0 else "🔴"
189
 
190
- logger.info(f"[MAIN] Total time: {total_time:.2f}s (Trans={transcription_time:.2f}s, Search={search_time:.2f}s, LLM={llm_time:.2f}s)")
191
  logger.info("="*50)
192
 
193
- timing_info = f"\n\n{time_emoji} **Performance:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**"
194
 
195
- return answer + timing_info, total_time
196
 
197
- # Wrapper functions
198
  def audio_handler(audio_path):
199
- """Wrapper for audio input"""
200
  return process_audio(audio_path, None)
201
 
202
  def text_handler(text_input):
203
- """Wrapper for text input"""
204
  return process_audio(None, text_input)
205
 
206
- # Gradio interface
207
- with gr.Blocks(title="Fast Political Q&A - Phi-2", theme=gr.themes.Soft()) as demo:
208
  gr.Markdown("""
209
- # ⚡ Fast Political Q&A System
210
- **Search-grounded answers** - Powered by Phi-2 (2.7B)
211
-
212
- **Features:** Whisper-tiny + Phi-2 (fast CPU inference) + DuckDuckGo + Search-only responses
213
  """)
214
 
215
- with gr.Tab("🎙️ Audio Input"):
216
  with gr.Row():
217
  with gr.Column():
218
- audio_input = gr.Audio(
219
- sources=["microphone", "upload"],
220
- type="filepath",
221
- label="Record or upload audio"
222
- )
223
- audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
224
-
225
  with gr.Column():
226
- audio_output = gr.Textbox(label="Search-Grounded Answer", lines=10, show_copy_button=True)
227
- audio_time = gr.Number(label="Response Time (seconds)", precision=2)
228
 
229
- audio_submit.click(
230
- fn=audio_handler,
231
- inputs=[audio_input],
232
- outputs=[audio_output, audio_time],
233
- api_name="audio_query"
234
- )
235
 
236
- with gr.Tab("✍️ Text Input"):
237
  with gr.Row():
238
  with gr.Column():
239
- text_input = gr.Textbox(
240
- label="Type your question",
241
- placeholder="Is internet shut down in Bareilly today?",
242
- lines=3
243
- )
244
- text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg")
245
-
246
  with gr.Column():
247
- text_output = gr.Textbox(label="Search-Grounded Answer", lines=10, show_copy_button=True)
248
- text_time = gr.Number(label="Response Time (seconds)", precision=2)
249
 
250
- text_submit.click(
251
- fn=text_handler,
252
- inputs=[text_input],
253
- outputs=[text_output, text_time],
254
- api_name="text_query"
255
- )
256
 
257
  gr.Examples(
258
  examples=[
259
  ["Is internet shut down in Bareilly today?"],
260
- ["Who won the 2024 US presidential election?"],
261
- ["What is the current inflation rate in India?"],
262
- ["What happened in Israel Palestine conflict today?"]
263
  ],
264
  inputs=text_input
265
  )
266
 
267
- # API endpoints for Pluely
268
- with gr.Tab("🔌 Pluely Integration"):
269
  gr.Markdown("""
270
- ## API Endpoints (All requests logged in console)
271
-
272
- ### STT Endpoint
273
- ```
274
- curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\
275
- -H "Content-Type: application/json" \\
276
- -d '{"data": ["BASE64_AUDIO_DATA"]}'
277
- ```
278
-
279
- ### AI Endpoint
280
- ```
281
- curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai \\
282
- -H "Content-Type: application/json" \\
283
- -d '{"data": ["Your question here"]}'
284
- ```
285
-
286
- ## Pluely Configuration
287
 
288
- **STT Provider:**
289
- ```
290
- curl https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d '{"data": ["{{AUDIO_BASE64}}"]}'
291
- ```
292
- **Response Path:** `data[0].text`
293
 
294
- **AI Provider:**
295
- ```
296
- curl https://archcoder-basic-app.hf.space/call/answer_ai -H "Content-Type: application/json" -d '{"data": ["{{TEXT}}"]}'
297
- ```
298
- **Response Path:** `data[0]`
299
  """)
300
 
301
- # Hidden components for API endpoints
302
  with gr.Row(visible=False):
303
- stt_input = gr.Textbox()
304
- stt_output = gr.JSON()
305
- ai_input = gr.Textbox()
306
- ai_output = gr.Textbox()
307
 
308
- stt_btn = gr.Button("STT", visible=False)
309
- stt_btn.click(
310
- fn=transcribe_audio_base64,
311
- inputs=[stt_input],
312
- outputs=[stt_output],
313
- api_name="transcribe_stt"
314
- )
315
-
316
- ai_btn = gr.Button("AI", visible=False)
317
- ai_btn.click(
318
- fn=generate_answer,
319
- inputs=[ai_input],
320
- outputs=[ai_output],
321
- api_name="answer_ai"
322
- )
323
 
324
- gr.Markdown("""
325
- ---
326
- **Model:** Phi-2 (2.7B) - Fast CPU inference, excellent reasoning
327
- **Output:** 100-150 words based STRICTLY on web search results
328
- **Logging:** All Pluely requests logged in console (check Logs tab)
329
-
330
- 🟢 = Under 4s | 🟡 = 4-6s | 🔴 = Over 6s
331
- """)
332
 
333
  if __name__ == "__main__":
334
  demo.queue(max_size=5)
 
1
  import gradio as gr
2
  from faster_whisper import WhisperModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import requests
5
  import time
 
6
  import base64
7
  import tempfile
8
  import os
9
  import logging
10
  from datetime import datetime
11
 
12
+ # Setup logging
13
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
 
14
  logger = logging.getLogger(__name__)
15
 
16
  # Initialize models
17
  logger.info("Loading Whisper model...")
18
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
19
 
20
+ logger.info("Loading Qwen 0.5B (fastest model)...")
21
+ model_name = "Qwen/Qwen2.5-0.5B-Instruct"
22
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
23
  model = AutoModelForCausalLM.from_pretrained(
24
  model_name,
25
  torch_dtype=torch.float32,
26
  device_map="cpu",
27
+ low_cpu_mem_usage=True
 
28
  )
 
29
 
30
+ logger.info("All models loaded!")
 
 
31
 
32
+ def search_web_google(query, max_results=3):
33
+ """Use Google Custom Search API (free tier: 100 queries/day)"""
34
  logger.info(f"[SEARCH] Query: {query}")
35
+
36
+ # Free Google Custom Search - No API key needed for basic search
37
  try:
38
+ # Alternative: SerpAPI free tier or direct Google scraping
39
+ url = "https://www.googleapis.com/customsearch/v1"
40
+ params = {
41
+ 'q': query,
42
+ 'num': max_results,
43
+ 'key': os.getenv('GOOGLE_API_KEY', ''), # Optional
44
+ 'cx': os.getenv('GOOGLE_CX', '') # Optional
45
+ }
46
+
47
+ # Fallback to Searx (public instance - no API key)
48
+ searx_url = "https://searx.be/search"
49
+ searx_params = {
50
+ 'q': query,
51
+ 'format': 'json',
52
+ 'categories': 'general',
53
+ 'language': 'en'
54
+ }
55
 
56
+ response = requests.get(searx_url, params=searx_params, timeout=5)
 
 
 
 
 
57
 
58
+ if response.status_code == 200:
59
+ data = response.json()
60
+ results = data.get('results', [])
61
 
62
+ context = ""
63
+ for i, result in enumerate(results[:max_results], 1):
64
+ title = result.get('title', '')
65
+ content = result.get('content', '')
66
+ context += f"\n[Source {i}] {title}\n{content}\n"
67
+ logger.info(f"[SEARCH] Result {i}: {title[:50]}...")
68
+
69
+ if context:
70
+ logger.info(f"[SEARCH] Success - {len(results)} results")
71
+ return context.strip()
72
+
73
+ logger.warning("[SEARCH] No results from Searx")
74
+ return "Unable to fetch current information. Please try a different question."
75
+
76
  except Exception as e:
77
  logger.error(f"[SEARCH] Error: {str(e)}")
78
+ return f"Search unavailable: {str(e)}"
79
 
80
  def transcribe_audio_base64(audio_base64):
81
+ """Transcribe audio from base64"""
82
+ logger.info("[PLUELY STT] Request received")
83
  try:
84
  audio_bytes = base64.b64decode(audio_base64)
85
+ logger.info(f"[PLUELY STT] Audio size: {len(audio_bytes)} bytes")
86
 
87
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
88
  temp_audio.write(audio_bytes)
89
  temp_path = temp_audio.name
90
 
 
91
  segments, _ = whisper_model.transcribe(temp_path, language="en", beam_size=1)
92
  transcription = " ".join([seg.text for seg in segments])
 
93
  os.unlink(temp_path)
94
 
95
+ logger.info(f"[PLUELY STT] Success: {transcription[:50]}...")
96
  return {"text": transcription.strip()}
97
 
98
  except Exception as e:
99
  logger.error(f"[PLUELY STT] Error: {str(e)}")
100
+ return {"error": str(e)}
101
 
102
  def generate_answer(text_input):
103
+ """Generate fast answer using search results"""
104
+ logger.info(f"[PLUELY AI] Question: {text_input}")
105
  try:
106
+ if not text_input or not text_input.strip():
107
  return "No input provided"
108
 
109
  current_date = datetime.now().strftime("%B %d, %Y")
110
 
111
+ # Search
112
+ logger.info("[PLUELY AI] Searching...")
113
+ search_results = search_web_google(text_input, max_results=3)
114
+ logger.info(f"[PLUELY AI] Search done ({len(search_results)} chars)")
115
 
116
+ # Simple prompt for speed
117
+ prompt = f"""Today is {current_date}. Answer based on these search results:
 
 
118
 
 
119
  {search_results}
120
 
121
  Question: {text_input}
122
+ Answer (80-100 words):"""
123
 
124
+ logger.info("[PLUELY AI] Generating...")
125
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1000)
 
 
 
 
 
 
 
 
 
126
 
127
  with torch.no_grad():
128
  outputs = model.generate(
129
  **inputs,
130
+ max_new_tokens=120,
131
+ temperature=0.3,
132
  do_sample=True,
133
  top_p=0.9,
 
134
  pad_token_id=tokenizer.eos_token_id
135
  )
136
 
137
+ answer = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip()
 
138
 
139
+ logger.info(f"[PLUELY AI] Done ({len(answer)} chars)")
140
  return answer
141
 
142
  except Exception as e:
 
144
  return f"Error: {str(e)}"
145
 
146
  def process_audio(audio_path, question_text):
147
+ """Main pipeline"""
148
  start_time = time.time()
149
  logger.info("="*50)
150
+ logger.info("[MAIN] New request")
151
 
 
152
  if audio_path:
153
+ logger.info(f"[MAIN] Audio: {audio_path}")
154
  try:
155
  segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
156
  question = " ".join([seg.text for seg in segments])
157
+ logger.info(f"[MAIN] Transcribed: {question}")
158
  except Exception as e:
159
+ logger.error(f"[MAIN] Transcription failed: {str(e)}")
160
+ return f"❌ Error: {str(e)}", 0.0
161
  else:
162
  question = question_text
163
+ logger.info(f"[MAIN] Text: {question}")
164
 
165
+ if not question or not question.strip():
166
+ return " No input", 0.0
 
167
 
168
  transcription_time = time.time() - start_time
169
 
170
+ # Search
171
  search_start = time.time()
172
+ search_web_google(question, max_results=3)
173
  search_time = time.time() - search_start
174
 
175
+ # Generate
176
  llm_start = time.time()
177
  answer = generate_answer(question)
178
  llm_time = time.time() - llm_start
179
 
180
  total_time = time.time() - start_time
181
+ time_emoji = "🟢" if total_time < 3.0 else "🟡" if total_time < 5.0 else "🔴"
182
 
183
+ logger.info(f"[MAIN] Total: {total_time:.2f}s")
184
  logger.info("="*50)
185
 
186
+ timing = f"\n\n{time_emoji} **Time:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**"
187
 
188
+ return answer + timing, total_time
189
 
 
190
  def audio_handler(audio_path):
 
191
  return process_audio(audio_path, None)
192
 
193
  def text_handler(text_input):
 
194
  return process_audio(None, text_input)
195
 
196
+ # Gradio UI
197
+ with gr.Blocks(title="Fast Q&A", theme=gr.themes.Soft()) as demo:
198
  gr.Markdown("""
199
+ # ⚡ Ultra-Fast Political Q&A
200
+ **Search-grounded answers** - Qwen 0.5B + Searx
 
 
201
  """)
202
 
203
+ with gr.Tab("🎙️ Audio"):
204
  with gr.Row():
205
  with gr.Column():
206
+ audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio")
207
+ audio_submit = gr.Button("🚀 Submit", variant="primary", size="lg")
 
 
 
 
 
208
  with gr.Column():
209
+ audio_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True)
210
+ audio_time = gr.Number(label="Time (s)", precision=2)
211
 
212
+ audio_submit.click(fn=audio_handler, inputs=[audio_input], outputs=[audio_output, audio_time], api_name="audio_query")
 
 
 
 
 
213
 
214
+ with gr.Tab("✍️ Text"):
215
  with gr.Row():
216
  with gr.Column():
217
+ text_input = gr.Textbox(label="Question", placeholder="Ask anything...", lines=3)
218
+ text_submit = gr.Button("🚀 Submit", variant="primary", size="lg")
 
 
 
 
 
219
  with gr.Column():
220
+ text_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True)
221
+ text_time = gr.Number(label="Time (s)", precision=2)
222
 
223
+ text_submit.click(fn=text_handler, inputs=[text_input], outputs=[text_output, text_time], api_name="text_query")
 
 
 
 
 
224
 
225
  gr.Examples(
226
  examples=[
227
  ["Is internet shut down in Bareilly today?"],
228
+ ["Who won 2024 US election?"],
229
+ ["Current India inflation rate?"]
 
230
  ],
231
  inputs=text_input
232
  )
233
 
234
+ with gr.Tab("🔌 API"):
 
235
  gr.Markdown("""
236
+ ### Pluely Endpoints
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
+ **STT:** `https://archcoder-basic-app.hf.space/call/transcribe_stt`
239
+ **AI:** `https://archcoder-basic-app.hf.space/call/answer_ai`
 
 
 
240
 
241
+ **Response Paths:**
242
+ STT: `data[0].text`
243
+ AI: `data[0]`
 
 
244
  """)
245
 
 
246
  with gr.Row(visible=False):
247
+ stt_in = gr.Textbox()
248
+ stt_out = gr.JSON()
249
+ ai_in = gr.Textbox()
250
+ ai_out = gr.Textbox()
251
 
252
+ gr.Button("STT", visible=False).click(fn=transcribe_audio_base64, inputs=[stt_in], outputs=[stt_out], api_name="transcribe_stt")
253
+ gr.Button("AI", visible=False).click(fn=generate_answer, inputs=[ai_in], outputs=[ai_out], api_name="answer_ai")
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
+ gr.Markdown("🟢 < 3s | 🟡 3-5s | 🔴 > 5s")
 
 
 
 
 
 
 
256
 
257
  if __name__ == "__main__":
258
  demo.queue(max_size=5)