ArchCoder commited on
Commit
f817cfc
·
verified ·
1 Parent(s): a971d1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -62
app.py CHANGED
@@ -7,27 +7,39 @@ import torch
7
  import base64
8
  import tempfile
9
  import os
 
10
  from datetime import datetime
11
 
 
 
 
 
 
 
 
12
  # Initialize models
13
- print("Loading Whisper model...")
14
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
15
 
16
- print("Loading LLM...")
17
- model_name = "Qwen/Qwen2.5-1.5B-Instruct" # Upgraded to 1.5B for better quality
18
- tokenizer = AutoTokenizer.from_pretrained(model_name)
19
  model = AutoModelForCausalLM.from_pretrained(
20
  model_name,
21
  torch_dtype=torch.float32,
22
  device_map="cpu",
23
- low_cpu_mem_usage=True
 
24
  )
 
25
 
26
  # Initialize DuckDuckGo Search
27
  ddgs = DDGS(timeout=3)
 
28
 
29
  def search_web(query, max_results=3):
30
  """Perform web search using DuckDuckGo"""
 
31
  try:
32
  results = ddgs.text(
33
  keywords=query,
@@ -42,108 +54,122 @@ def search_web(query, max_results=3):
42
  title = result.get('title', '')
43
  body = result.get('body', '')
44
  context += f"\n[Source {i}] {title}\n{body}\n"
 
45
 
46
- return context.strip() if context else "No search results found."
 
 
 
 
 
47
 
48
  except Exception as e:
 
49
  return f"Search failed: {str(e)}"
50
 
51
  def transcribe_audio_base64(audio_base64):
52
  """Transcribe audio from base64 string (for Pluely STT endpoint)"""
 
53
  try:
54
  audio_bytes = base64.b64decode(audio_base64)
 
55
 
56
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
57
  temp_audio.write(audio_bytes)
58
  temp_path = temp_audio.name
59
 
 
60
  segments, _ = whisper_model.transcribe(temp_path, language="en", beam_size=1)
61
  transcription = " ".join([seg.text for seg in segments])
62
 
63
  os.unlink(temp_path)
64
 
 
65
  return {"text": transcription.strip()}
66
 
67
  except Exception as e:
 
68
  return {"error": f"Transcription failed: {str(e)}"}
69
 
70
  def generate_answer(text_input):
71
- """Generate complete answer with context"""
 
72
  try:
73
  if not text_input or text_input.strip() == "":
74
  return "No input provided"
75
 
76
- # Get current date for context
77
  current_date = datetime.now().strftime("%B %d, %Y")
78
 
79
- # Web search for current information
 
80
  search_results = search_web(text_input, max_results=3)
 
81
 
82
- # Enhanced prompt for comprehensive responses
83
- messages = [
84
- {"role": "system", "content": f"""You are a knowledgeable assistant providing comprehensive, well-researched answers. Today's date is {current_date}.
85
 
86
- When answering:
87
- 1. Provide the direct answer first
88
- 2. Add relevant context and background information
89
- 3. Include recent developments or current status when applicable
90
- 4. Be informative but concise (150-200 words)
91
- 5. Use the web search results to ensure accuracy and currency"""},
92
- {"role": "user", "content": f"""Based on these current web search results:
93
 
 
94
  {search_results}
95
 
96
  Question: {text_input}
97
 
98
- Provide a comprehensive answer that includes:
99
- - Direct answer to the question
100
- - Relevant context and background
101
- - Recent developments (as of {current_date})
102
- - Key points the user should know
 
103
 
104
- Answer:"""}
105
- ]
106
-
107
- text = tokenizer.apply_chat_template(
108
- messages,
109
- tokenize=False,
110
- add_generation_prompt=True
111
- )
112
-
113
- inputs = tokenizer([text], return_tensors="pt").to("cpu")
114
 
115
  with torch.no_grad():
116
  outputs = model.generate(
117
  **inputs,
118
- max_new_tokens=250, # Increased from 80 to 250
119
- temperature=0.3, # Slightly higher for more natural responses
120
  do_sample=True,
121
  top_p=0.9,
122
- repetition_penalty=1.1,
123
  pad_token_id=tokenizer.eos_token_id
124
  )
125
 
126
  response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
127
- return response.strip()
 
 
 
128
 
129
  except Exception as e:
 
130
  return f"Error: {str(e)}"
131
 
132
  def process_audio(audio_path, question_text):
133
  """Main pipeline - returns tuple (answer, time)"""
134
  start_time = time.time()
 
 
135
 
136
  # Transcribe if audio provided
137
  if audio_path:
 
138
  try:
139
  segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
140
  question = " ".join([seg.text for seg in segments])
 
141
  except Exception as e:
 
142
  return f"❌ Transcription error: {str(e)}", 0.0
143
  else:
144
  question = question_text
 
145
 
146
  if not question or question.strip() == "":
 
147
  return "❌ No input provided", 0.0
148
 
149
  transcription_time = time.time() - start_time
@@ -159,7 +185,10 @@ def process_audio(audio_path, question_text):
159
  llm_time = time.time() - llm_start
160
 
161
  total_time = time.time() - start_time
162
- time_emoji = "🟢" if total_time < 5.0 else "🟡" if total_time < 7.0 else "🔴"
 
 
 
163
 
164
  timing_info = f"\n\n{time_emoji} **Performance:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**"
165
 
@@ -175,12 +204,12 @@ def text_handler(text_input):
175
  return process_audio(None, text_input)
176
 
177
  # Gradio interface
178
- with gr.Blocks(title="Enhanced Political Q&A", theme=gr.themes.Soft()) as demo:
179
  gr.Markdown("""
180
- # 🎯 Enhanced Political Q&A System
181
- **Comprehensive answers with context** - Powered by Qwen2.5-1.5B
182
 
183
- **Features:** Whisper-tiny + Qwen2.5-1.5B + DuckDuckGo + Rich contextual responses
184
  """)
185
 
186
  with gr.Tab("🎙️ Audio Input"):
@@ -194,7 +223,7 @@ with gr.Blocks(title="Enhanced Political Q&A", theme=gr.themes.Soft()) as demo:
194
  audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
195
 
196
  with gr.Column():
197
- audio_output = gr.Textbox(label="Comprehensive Answer", lines=12, show_copy_button=True)
198
  audio_time = gr.Number(label="Response Time (seconds)", precision=2)
199
 
200
  audio_submit.click(
@@ -209,13 +238,13 @@ with gr.Blocks(title="Enhanced Political Q&A", theme=gr.themes.Soft()) as demo:
209
  with gr.Column():
210
  text_input = gr.Textbox(
211
  label="Type your question",
212
- placeholder="Who is the current US president?",
213
  lines=3
214
  )
215
  text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg")
216
 
217
  with gr.Column():
218
- text_output = gr.Textbox(label="Comprehensive Answer", lines=12, show_copy_button=True)
219
  text_time = gr.Number(label="Response Time (seconds)", precision=2)
220
 
221
  text_submit.click(
@@ -227,10 +256,10 @@ with gr.Blocks(title="Enhanced Political Q&A", theme=gr.themes.Soft()) as demo:
227
 
228
  gr.Examples(
229
  examples=[
 
230
  ["Who won the 2024 US presidential election?"],
231
  ["What is the current inflation rate in India?"],
232
- ["Who is the prime minister of UK and what are their key policies?"],
233
- ["Explain the latest developments in AI regulation"]
234
  ],
235
  inputs=text_input
236
  )
@@ -238,37 +267,35 @@ with gr.Blocks(title="Enhanced Political Q&A", theme=gr.themes.Soft()) as demo:
238
  # API endpoints for Pluely
239
  with gr.Tab("🔌 Pluely Integration"):
240
  gr.Markdown("""
241
- ## API Endpoints for Pluely
242
 
243
- ### STT Endpoint (Audio Transcription)
244
  ```
245
  curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\
246
  -H "Content-Type: application/json" \\
247
  -d '{"data": ["BASE64_AUDIO_DATA"]}'
248
  ```
249
- **Response:** `{"data": [{"text": "transcribed text"}]}`
250
 
251
- ### AI Endpoint (Enhanced Responses)
252
  ```
253
  curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai \\
254
  -H "Content-Type: application/json" \\
255
  -d '{"data": ["Your question here"]}'
256
  ```
257
- **Response:** `{"data": ["Comprehensive answer with context"]}`
258
 
259
  ## Pluely Configuration
260
 
261
- ### Custom STT Provider:
262
  ```
263
  curl https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d '{"data": ["{{AUDIO_BASE64}}"]}'
264
  ```
265
- **Response Path:** `data[0].text` | **Streaming:** OFF
266
 
267
- ### Custom AI Provider:
268
  ```
269
  curl https://archcoder-basic-app.hf.space/call/answer_ai -H "Content-Type: application/json" -d '{"data": ["{{TEXT}}"]}'
270
  ```
271
- **Response Path:** `data[0]` | **Streaming:** OFF
272
  """)
273
 
274
  # Hidden components for API endpoints
@@ -296,12 +323,12 @@ with gr.Blocks(title="Enhanced Political Q&A", theme=gr.themes.Soft()) as demo:
296
 
297
  gr.Markdown("""
298
  ---
299
- **Model:** Qwen2.5-1.5B-Instruct (3x larger for better answers)
300
- **Output:** 150-200 words with context and background
301
- **Date-aware:** Responses reference current date ({})
302
 
303
- 🟢 = Under 5s | 🟡 = 5-7s | 🔴 = Over 7s
304
- """.format(datetime.now().strftime("%B %d, %Y")))
305
 
306
  if __name__ == "__main__":
307
  demo.queue(max_size=5)
 
7
  import base64
8
  import tempfile
9
  import os
10
+ import logging
11
  from datetime import datetime
12
 
13
+ # Setup comprehensive logging
14
+ logging.basicConfig(
15
+ level=logging.INFO,
16
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
17
+ )
18
+ logger = logging.getLogger(__name__)
19
+
20
  # Initialize models
21
+ logger.info("Loading Whisper model...")
22
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
23
 
24
+ logger.info("Loading Phi-2 model (faster inference)...")
25
+ model_name = "microsoft/phi-2" # 2.7B - Faster CPU inference than Qwen
26
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
27
  model = AutoModelForCausalLM.from_pretrained(
28
  model_name,
29
  torch_dtype=torch.float32,
30
  device_map="cpu",
31
+ low_cpu_mem_usage=True,
32
+ trust_remote_code=True
33
  )
34
+ tokenizer.pad_token = tokenizer.eos_token
35
 
36
  # Initialize DuckDuckGo Search
37
  ddgs = DDGS(timeout=3)
38
+ logger.info("All models loaded successfully!")
39
 
40
  def search_web(query, max_results=3):
41
  """Perform web search using DuckDuckGo"""
42
+ logger.info(f"[SEARCH] Query: {query}")
43
  try:
44
  results = ddgs.text(
45
  keywords=query,
 
54
  title = result.get('title', '')
55
  body = result.get('body', '')
56
  context += f"\n[Source {i}] {title}\n{body}\n"
57
+ logger.info(f"[SEARCH] Result {i}: {title[:50]}...")
58
 
59
+ if not context:
60
+ logger.warning("[SEARCH] No results found!")
61
+ return "No search results found."
62
+
63
+ logger.info(f"[SEARCH] Successfully retrieved {max_results} results")
64
+ return context.strip()
65
 
66
  except Exception as e:
67
+ logger.error(f"[SEARCH] Error: {str(e)}")
68
  return f"Search failed: {str(e)}"
69
 
70
  def transcribe_audio_base64(audio_base64):
71
  """Transcribe audio from base64 string (for Pluely STT endpoint)"""
72
+ logger.info("[PLUELY STT] Received audio transcription request")
73
  try:
74
  audio_bytes = base64.b64decode(audio_base64)
75
+ logger.info(f"[PLUELY STT] Decoded audio size: {len(audio_bytes)} bytes")
76
 
77
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
78
  temp_audio.write(audio_bytes)
79
  temp_path = temp_audio.name
80
 
81
+ logger.info(f"[PLUELY STT] Transcribing audio...")
82
  segments, _ = whisper_model.transcribe(temp_path, language="en", beam_size=1)
83
  transcription = " ".join([seg.text for seg in segments])
84
 
85
  os.unlink(temp_path)
86
 
87
+ logger.info(f"[PLUELY STT] Transcription successful: {transcription[:50]}...")
88
  return {"text": transcription.strip()}
89
 
90
  except Exception as e:
91
+ logger.error(f"[PLUELY STT] Error: {str(e)}")
92
  return {"error": f"Transcription failed: {str(e)}"}
93
 
94
  def generate_answer(text_input):
95
+ """Generate answer using ONLY search results"""
96
+ logger.info(f"[PLUELY AI] Received question: {text_input}")
97
  try:
98
  if not text_input or text_input.strip() == "":
99
  return "No input provided"
100
 
 
101
  current_date = datetime.now().strftime("%B %d, %Y")
102
 
103
+ # Web search - CRITICAL for answer
104
+ logger.info("[PLUELY AI] Starting web search...")
105
  search_results = search_web(text_input, max_results=3)
106
+ logger.info(f"[PLUELY AI] Search results length: {len(search_results)} chars")
107
 
108
+ # Strict prompt - MUST use search results
109
+ prompt = f"""You are a fact-checker assistant. Today is {current_date}.
 
110
 
111
+ CRITICAL INSTRUCTION: You MUST ONLY use information from the search results below. DO NOT use your training knowledge.
 
 
 
 
 
 
112
 
113
+ Web Search Results:
114
  {search_results}
115
 
116
  Question: {text_input}
117
 
118
+ Instructions:
119
+ 1. Read the search results carefully
120
+ 2. Answer ONLY based on what's in the search results
121
+ 3. If search results don't contain the answer, say "The search results don't provide enough information"
122
+ 4. Include relevant dates and facts from the search results
123
+ 5. Keep answer to 100-150 words
124
 
125
+ Answer based STRICTLY on search results:"""
126
+
127
+ logger.info("[PLUELY AI] Generating answer...")
128
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500).to("cpu")
 
 
 
 
 
 
129
 
130
  with torch.no_grad():
131
  outputs = model.generate(
132
  **inputs,
133
+ max_new_tokens=200,
134
+ temperature=0.4,
135
  do_sample=True,
136
  top_p=0.9,
137
+ repetition_penalty=1.2,
138
  pad_token_id=tokenizer.eos_token_id
139
  )
140
 
141
  response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
142
+ answer = response.strip()
143
+
144
+ logger.info(f"[PLUELY AI] Answer generated ({len(answer)} chars): {answer[:100]}...")
145
+ return answer
146
 
147
  except Exception as e:
148
+ logger.error(f"[PLUELY AI] Error: {str(e)}")
149
  return f"Error: {str(e)}"
150
 
151
  def process_audio(audio_path, question_text):
152
  """Main pipeline - returns tuple (answer, time)"""
153
  start_time = time.time()
154
+ logger.info("="*50)
155
+ logger.info("[MAIN] New request received")
156
 
157
  # Transcribe if audio provided
158
  if audio_path:
159
+ logger.info(f"[MAIN] Audio file provided: {audio_path}")
160
  try:
161
  segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
162
  question = " ".join([seg.text for seg in segments])
163
+ logger.info(f"[MAIN] Transcription: {question}")
164
  except Exception as e:
165
+ logger.error(f"[MAIN] Transcription error: {str(e)}")
166
  return f"❌ Transcription error: {str(e)}", 0.0
167
  else:
168
  question = question_text
169
+ logger.info(f"[MAIN] Text input: {question}")
170
 
171
  if not question or question.strip() == "":
172
+ logger.warning("[MAIN] No input provided")
173
  return "❌ No input provided", 0.0
174
 
175
  transcription_time = time.time() - start_time
 
185
  llm_time = time.time() - llm_start
186
 
187
  total_time = time.time() - start_time
188
+ time_emoji = "🟢" if total_time < 4.0 else "🟡" if total_time < 6.0 else "🔴"
189
+
190
+ logger.info(f"[MAIN] Total time: {total_time:.2f}s (Trans={transcription_time:.2f}s, Search={search_time:.2f}s, LLM={llm_time:.2f}s)")
191
+ logger.info("="*50)
192
 
193
  timing_info = f"\n\n{time_emoji} **Performance:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**"
194
 
 
204
  return process_audio(None, text_input)
205
 
206
  # Gradio interface
207
+ with gr.Blocks(title="Fast Political Q&A - Phi-2", theme=gr.themes.Soft()) as demo:
208
  gr.Markdown("""
209
+ # Fast Political Q&A System
210
+ **Search-grounded answers** - Powered by Phi-2 (2.7B)
211
 
212
+ **Features:** Whisper-tiny + Phi-2 (fast CPU inference) + DuckDuckGo + Search-only responses
213
  """)
214
 
215
  with gr.Tab("🎙️ Audio Input"):
 
223
  audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
224
 
225
  with gr.Column():
226
+ audio_output = gr.Textbox(label="Search-Grounded Answer", lines=10, show_copy_button=True)
227
  audio_time = gr.Number(label="Response Time (seconds)", precision=2)
228
 
229
  audio_submit.click(
 
238
  with gr.Column():
239
  text_input = gr.Textbox(
240
  label="Type your question",
241
+ placeholder="Is internet shut down in Bareilly today?",
242
  lines=3
243
  )
244
  text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg")
245
 
246
  with gr.Column():
247
+ text_output = gr.Textbox(label="Search-Grounded Answer", lines=10, show_copy_button=True)
248
  text_time = gr.Number(label="Response Time (seconds)", precision=2)
249
 
250
  text_submit.click(
 
256
 
257
  gr.Examples(
258
  examples=[
259
+ ["Is internet shut down in Bareilly today?"],
260
  ["Who won the 2024 US presidential election?"],
261
  ["What is the current inflation rate in India?"],
262
+ ["What happened in Israel Palestine conflict today?"]
 
263
  ],
264
  inputs=text_input
265
  )
 
267
  # API endpoints for Pluely
268
  with gr.Tab("🔌 Pluely Integration"):
269
  gr.Markdown("""
270
+ ## API Endpoints (All requests logged in console)
271
 
272
+ ### STT Endpoint
273
  ```
274
  curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\
275
  -H "Content-Type: application/json" \\
276
  -d '{"data": ["BASE64_AUDIO_DATA"]}'
277
  ```
 
278
 
279
+ ### AI Endpoint
280
  ```
281
  curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai \\
282
  -H "Content-Type: application/json" \\
283
  -d '{"data": ["Your question here"]}'
284
  ```
 
285
 
286
  ## Pluely Configuration
287
 
288
+ **STT Provider:**
289
  ```
290
  curl https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d '{"data": ["{{AUDIO_BASE64}}"]}'
291
  ```
292
+ **Response Path:** `data[0].text`
293
 
294
+ **AI Provider:**
295
  ```
296
  curl https://archcoder-basic-app.hf.space/call/answer_ai -H "Content-Type: application/json" -d '{"data": ["{{TEXT}}"]}'
297
  ```
298
+ **Response Path:** `data[0]`
299
  """)
300
 
301
  # Hidden components for API endpoints
 
323
 
324
  gr.Markdown("""
325
  ---
326
+ **Model:** Phi-2 (2.7B) - Fast CPU inference, excellent reasoning
327
+ **Output:** 100-150 words based STRICTLY on web search results
328
+ **Logging:** All Pluely requests logged in console (check Logs tab)
329
 
330
+ 🟢 = Under 4s | 🟡 = 4-6s | 🔴 = Over 6s
331
+ """)
332
 
333
  if __name__ == "__main__":
334
  demo.queue(max_size=5)