basic_app / app.py
ArchCoder's picture
Update app.py
d6e4129 verified
raw
history blame
7.01 kB
import gradio as gr
from faster_whisper import WhisperModel
from llama_cpp import Llama
from duckduckgo_search import DDGS
import time
# Initialize models
print("Loading Whisper model...")
whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
print("Loading LLM...")
llm = Llama.from_pretrained(
repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
filename="qwen2.5-0.5b-instruct-q4_k_m.gguf",
n_ctx=2048,
n_threads=4,
verbose=False
)
# Initialize DuckDuckGo Search (no API key needed!)
ddgs = DDGS(timeout=3)
def search_web(query, max_results=3):
"""Perform web search using DuckDuckGo (FREE & UNLIMITED)"""
try:
# Use text search for fast results
results = ddgs.text(
keywords=query,
region='wt-wt', # Worldwide results
safesearch='moderate',
timelimit='m', # Last month for freshness
max_results=max_results
)
context = ""
for i, result in enumerate(results[:max_results], 1):
title = result.get('title', '')
body = result.get('body', '')
context += f"\n[{i}] {title}\n{body}\n"
return context.strip() if context else "No search results found."
except Exception as e:
return f"Search failed: {str(e)}"
def process_audio(audio_path, question_text=None):
"""Main pipeline: audio -> text -> search -> answer"""
start_time = time.time()
# Step 1: Transcribe audio if provided
if audio_path:
try:
segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
question = " ".join([seg.text for seg in segments])
except Exception as e:
return f"❌ Transcription error: {str(e)}", 0.0
else:
question = question_text
if not question or question.strip() == "":
return "❌ No input provided", 0.0
transcription_time = time.time() - start_time
# Step 2: Web search for current info
search_start = time.time()
search_results = search_web(question, max_results=2) # Reduced to 2 for speed
search_time = time.time() - search_start
# Step 3: Generate answer with LLM
llm_start = time.time()
prompt = f"""Answer the question briefly using the context below.
Context:
{search_results}
Question: {question}
Answer:"""
try:
response = llm(
prompt,
max_tokens=120, # Reduced for faster generation
temperature=0.2, # Lower for faster, more focused responses
top_p=0.85,
stop=["Question:", "\n\n\n"],
echo=False
)
answer = response['choices'][0]['text'].strip()
except Exception as e:
answer = f"❌ LLM error: {str(e)}"
llm_time = time.time() - llm_start
total_time = time.time() - start_time
# Color code timing (green if under 3s, yellow if close, red if over)
time_emoji = "🟢" if total_time < 3.0 else "🟡" if total_time < 3.5 else "🔴"
timing_info = f"\n\n{time_emoji} **Timing:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**"
return answer + timing_info, total_time
# Create Gradio interface
with gr.Blocks(title="Fast Q&A - FREE Unlimited Search", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# ⚡ Ultra-Fast Political Q&A System
Ask questions via audio or text. **FREE unlimited web search** with DuckDuckGo!
**Features:** Whisper-tiny + Qwen2.5-0.5B + DuckDuckGo (No API Key!)
""")
with gr.Tab("🎙️ Audio Input"):
with gr.Row():
with gr.Column():
audio_input = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="Record or upload audio"
)
audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
with gr.Column():
audio_output = gr.Textbox(
label="Answer",
lines=8,
show_copy_button=True
)
audio_time = gr.Number(label="Response Time (seconds)", precision=2)
audio_submit.click(
fn=lambda x: process_audio(x, None),
inputs=[audio_input],
outputs=[audio_output, audio_time],
api_name="audio_query"
)
with gr.Tab("✍️ Text Input"):
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Type your question",
placeholder="Who is the current US president?",
lines=3
)
text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg")
with gr.Column():
text_output = gr.Textbox(
label="Answer",
lines=8,
show_copy_button=True
)
text_time = gr.Number(label="Response Time (seconds)", precision=2)
text_submit.click(
fn=lambda x: process_audio(None, x),
inputs=[text_input],
outputs=[text_output, text_time],
api_name="text_query"
)
gr.Examples(
examples=[
["Who won the 2024 US presidential election?"],
["What is the current inflation rate in India?"],
["Who is the prime minister of UK?"],
["What is the latest news about AI?"]
],
inputs=text_input
)
with gr.Accordion("📡 API Usage via curl", open=False):
gr.Markdown("""
### Text Query (Simplest):
```
curl -X POST https://archcoder-basic-app.hf.space/call/text_query \\
-H "Content-Type: application/json" \\
-d '{"data": ["Who is the current US president?"]}'
```
### Audio Query:
```
# Upload audio
curl -F "files=@audio.mp3" https://archcoder-basic-app.hf.space/upload
# Query (replace path from upload response)
curl -X POST https://archcoder-basic-app.hf.space/call/audio_query \\
-H "Content-Type: application/json" \\
-d '{"data": [{"path": "/tmp/gradio/YOUR_FILE.mp3"}]}'
```
""")
gr.Markdown("""
---
### 🎯 System Specs
- **Search:** DuckDuckGo (FREE, unlimited, no API key!)
- **Transcription:** Whisper-tiny (optimized for speed)
- **LLM:** Qwen2.5-0.5B Q4 (fast factual answers)
- **Target:** Sub-3s total response time
🟢 = Under 3s | 🟡 = 3-3.5s | 🔴 = Over 3.5s
""")
if __name__ == "__main__":
demo.queue(max_size=5) # Limit queue for consistent performance
demo.launch()