ArchCoder commited on
Commit
990db9b
·
verified ·
1 Parent(s): b53f1b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -76
app.py CHANGED
@@ -1,34 +1,35 @@
1
  import gradio as gr
2
  from faster_whisper import WhisperModel
3
- from llama_cpp import Llama
4
  from duckduckgo_search import DDGS
5
  import time
 
6
 
7
  # Initialize models
8
  print("Loading Whisper model...")
9
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
10
 
11
  print("Loading LLM...")
12
- llm = Llama.from_pretrained(
13
- repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
14
- filename="qwen2.5-0.5b-instruct-q4_k_m.gguf",
15
- n_ctx=2048,
16
- n_threads=4,
17
- verbose=False
 
18
  )
19
 
20
- # Initialize DuckDuckGo Search (no API key needed!)
21
  ddgs = DDGS(timeout=3)
22
 
23
- def search_web(query, max_results=3):
24
  """Perform web search using DuckDuckGo (FREE & UNLIMITED)"""
25
  try:
26
- # Use text search for fast results
27
  results = ddgs.text(
28
  keywords=query,
29
- region='wt-wt', # Worldwide results
30
  safesearch='moderate',
31
- timelimit='m', # Last month for freshness
32
  max_results=max_results
33
  )
34
 
@@ -62,53 +63,59 @@ def process_audio(audio_path, question_text=None):
62
 
63
  transcription_time = time.time() - start_time
64
 
65
- # Step 2: Web search for current info
66
  search_start = time.time()
67
- search_results = search_web(question, max_results=2) # Reduced to 2 for speed
68
  search_time = time.time() - search_start
69
 
70
  # Step 3: Generate answer with LLM
71
  llm_start = time.time()
72
- prompt = f"""Answer the question briefly using the context below.
73
-
74
- Context:
75
- {search_results}
76
-
77
- Question: {question}
78
-
79
- Answer:"""
80
 
81
  try:
82
- response = llm(
83
- prompt,
84
- max_tokens=120, # Reduced for faster generation
85
- temperature=0.2, # Lower for faster, more focused responses
86
- top_p=0.85,
87
- stop=["Question:", "\n\n\n"],
88
- echo=False
89
  )
90
 
91
- answer = response['choices'][0]['text'].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  except Exception as e:
93
  answer = f"❌ LLM error: {str(e)}"
94
 
95
  llm_time = time.time() - llm_start
96
  total_time = time.time() - start_time
97
 
98
- # Color code timing (green if under 3s, yellow if close, red if over)
99
  time_emoji = "🟢" if total_time < 3.0 else "🟡" if total_time < 3.5 else "🔴"
100
-
101
  timing_info = f"\n\n{time_emoji} **Timing:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**"
102
 
103
  return answer + timing_info, total_time
104
 
105
- # Create Gradio interface
106
- with gr.Blocks(title="Fast Q&A - FREE Unlimited Search", theme=gr.themes.Soft()) as demo:
107
  gr.Markdown("""
108
  # ⚡ Ultra-Fast Political Q&A System
109
- Ask questions via audio or text. **FREE unlimited web search** with DuckDuckGo!
110
 
111
- **Features:** Whisper-tiny + Qwen2.5-0.5B + DuckDuckGo (No API Key!)
112
  """)
113
 
114
  with gr.Tab("🎙️ Audio Input"):
@@ -122,11 +129,7 @@ with gr.Blocks(title="Fast Q&A - FREE Unlimited Search", theme=gr.themes.Soft())
122
  audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
123
 
124
  with gr.Column():
125
- audio_output = gr.Textbox(
126
- label="Answer",
127
- lines=8,
128
- show_copy_button=True
129
- )
130
  audio_time = gr.Number(label="Response Time (seconds)", precision=2)
131
 
132
  audio_submit.click(
@@ -147,11 +150,7 @@ with gr.Blocks(title="Fast Q&A - FREE Unlimited Search", theme=gr.themes.Soft())
147
  text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg")
148
 
149
  with gr.Column():
150
- text_output = gr.Textbox(
151
- label="Answer",
152
- lines=8,
153
- show_copy_button=True
154
- )
155
  text_time = gr.Number(label="Response Time (seconds)", precision=2)
156
 
157
  text_submit.click(
@@ -165,44 +164,16 @@ with gr.Blocks(title="Fast Q&A - FREE Unlimited Search", theme=gr.themes.Soft())
165
  examples=[
166
  ["Who won the 2024 US presidential election?"],
167
  ["What is the current inflation rate in India?"],
168
- ["Who is the prime minister of UK?"],
169
- ["What is the latest news about AI?"]
170
  ],
171
  inputs=text_input
172
  )
173
 
174
- with gr.Accordion("📡 API Usage via curl", open=False):
175
- gr.Markdown("""
176
- ### Text Query (Simplest):
177
- ```
178
- curl -X POST https://archcoder-basic-app.hf.space/call/text_query \\
179
- -H "Content-Type: application/json" \\
180
- -d '{"data": ["Who is the current US president?"]}'
181
- ```
182
-
183
- ### Audio Query:
184
- ```
185
- # Upload audio
186
- curl -F "files=@audio.mp3" https://archcoder-basic-app.hf.space/upload
187
-
188
- # Query (replace path from upload response)
189
- curl -X POST https://archcoder-basic-app.hf.space/call/audio_query \\
190
- -H "Content-Type: application/json" \\
191
- -d '{"data": [{"path": "/tmp/gradio/YOUR_FILE.mp3"}]}'
192
- ```
193
- """)
194
-
195
  gr.Markdown("""
196
  ---
197
- ### 🎯 System Specs
198
- - **Search:** DuckDuckGo (FREE, unlimited, no API key!)
199
- - **Transcription:** Whisper-tiny (optimized for speed)
200
- - **LLM:** Qwen2.5-0.5B Q4 (fast factual answers)
201
- - **Target:** Sub-3s total response time
202
-
203
  🟢 = Under 3s | 🟡 = 3-3.5s | 🔴 = Over 3.5s
204
  """)
205
 
206
  if __name__ == "__main__":
207
- demo.queue(max_size=5) # Limit queue for consistent performance
208
  demo.launch()
 
1
  import gradio as gr
2
  from faster_whisper import WhisperModel
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from duckduckgo_search import DDGS
5
  import time
6
+ import torch
7
 
8
  # Initialize models
9
  print("Loading Whisper model...")
10
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
11
 
12
  print("Loading LLM...")
13
+ model_name = "Qwen/Qwen2.5-0.5B-Instruct"
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ model_name,
17
+ torch_dtype=torch.float32,
18
+ device_map="cpu",
19
+ low_cpu_mem_usage=True
20
  )
21
 
22
+ # Initialize DuckDuckGo Search
23
  ddgs = DDGS(timeout=3)
24
 
25
+ def search_web(query, max_results=2):
26
  """Perform web search using DuckDuckGo (FREE & UNLIMITED)"""
27
  try:
 
28
  results = ddgs.text(
29
  keywords=query,
30
+ region='wt-wt',
31
  safesearch='moderate',
32
+ timelimit='m',
33
  max_results=max_results
34
  )
35
 
 
63
 
64
  transcription_time = time.time() - start_time
65
 
66
+ # Step 2: Web search
67
  search_start = time.time()
68
+ search_results = search_web(question, max_results=2)
69
  search_time = time.time() - search_start
70
 
71
  # Step 3: Generate answer with LLM
72
  llm_start = time.time()
73
+
74
+ messages = [
75
+ {"role": "system", "content": "You are a helpful assistant. Answer questions briefly using the provided context."},
76
+ {"role": "user", "content": f"Context:\n{search_results}\n\nQuestion: {question}\n\nAnswer:"}
77
+ ]
 
 
 
78
 
79
  try:
80
+ text = tokenizer.apply_chat_template(
81
+ messages,
82
+ tokenize=False,
83
+ add_generation_prompt=True
 
 
 
84
  )
85
 
86
+ inputs = tokenizer([text], return_tensors="pt").to("cpu")
87
+
88
+ with torch.no_grad():
89
+ outputs = model.generate(
90
+ **inputs,
91
+ max_new_tokens=120,
92
+ temperature=0.2,
93
+ do_sample=True,
94
+ top_p=0.85,
95
+ pad_token_id=tokenizer.eos_token_id
96
+ )
97
+
98
+ response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
99
+ answer = response.strip()
100
+
101
  except Exception as e:
102
  answer = f"❌ LLM error: {str(e)}"
103
 
104
  llm_time = time.time() - llm_start
105
  total_time = time.time() - start_time
106
 
 
107
  time_emoji = "🟢" if total_time < 3.0 else "🟡" if total_time < 3.5 else "🔴"
 
108
  timing_info = f"\n\n{time_emoji} **Timing:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**"
109
 
110
  return answer + timing_info, total_time
111
 
112
+ # Create Gradio interface (same as before)
113
+ with gr.Blocks(title="Fast Q&A - No Building Required!", theme=gr.themes.Soft()) as demo:
114
  gr.Markdown("""
115
  # ⚡ Ultra-Fast Political Q&A System
116
+ **No wheel building** - Fast deployment with transformers!
117
 
118
+ **Features:** Whisper-tiny + Qwen2.5-0.5B + DuckDuckGo (FREE unlimited search)
119
  """)
120
 
121
  with gr.Tab("🎙️ Audio Input"):
 
129
  audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
130
 
131
  with gr.Column():
132
+ audio_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True)
 
 
 
 
133
  audio_time = gr.Number(label="Response Time (seconds)", precision=2)
134
 
135
  audio_submit.click(
 
150
  text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg")
151
 
152
  with gr.Column():
153
+ text_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True)
 
 
 
 
154
  text_time = gr.Number(label="Response Time (seconds)", precision=2)
155
 
156
  text_submit.click(
 
164
  examples=[
165
  ["Who won the 2024 US presidential election?"],
166
  ["What is the current inflation rate in India?"],
167
+ ["Who is the prime minister of UK?"]
 
168
  ],
169
  inputs=text_input
170
  )
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  gr.Markdown("""
173
  ---
 
 
 
 
 
 
174
  🟢 = Under 3s | 🟡 = 3-3.5s | 🔴 = Over 3.5s
175
  """)
176
 
177
  if __name__ == "__main__":
178
+ demo.queue(max_size=5)
179
  demo.launch()