basic_app / app.py
ArchCoder's picture
Update app.py
7ac8cfa verified
raw
history blame
6.94 kB
import gradio as gr
from faster_whisper import WhisperModel
from llama_cpp import Llama
import requests
import os
import time
# Initialize models
print("Loading Whisper model...")
whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
print("Loading LLM...")
llm = Llama.from_pretrained(
repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
filename="qwen2.5-0.5b-instruct-q4_k_m.gguf",
n_ctx=2048,
n_threads=4,
verbose=False
)
# Get Brave API key from environment
BRAVE_API_KEY = os.getenv("BRAVE_API_KEY", "")
def search_web(query, max_results=3):
"""Perform web search using Brave API"""
if not BRAVE_API_KEY:
return "⚠️ Brave API key not configured. Add it in Space Settings."
try:
headers = {
"Accept": "application/json",
"Accept-Encoding": "gzip",
"X-Subscription-Token": BRAVE_API_KEY
}
params = {
"q": query,
"count": max_results
}
response = requests.get(
"https://api.search.brave.com/res/v1/web/search",
headers=headers,
params=params,
timeout=2
)
if response.status_code != 200:
return f"Search error: {response.status_code}"
data = response.json()
results = data.get("web", {}).get("results", [])
context = ""
for i, result in enumerate(results[:max_results], 1):
title = result.get("title", "")
description = result.get("description", "")
context += f"\n[{i}] {title}\n{description}\n"
return context.strip() if context else "No search results found."
except Exception as e:
return f"Search failed: {str(e)}"
def process_audio(audio_path, question_text=None):
"""Main pipeline: audio -> text -> search -> answer"""
start_time = time.time()
# Step 1: Transcribe audio if provided
if audio_path:
try:
segments, _ = whisper_model.transcribe(audio_path, language="en")
question = " ".join([seg.text for seg in segments])
except Exception as e:
return f"Transcription error: {str(e)}", 0.0
else:
question = question_text
if not question or question.strip() == "":
return "❌ No input provided", 0.0
transcription_time = time.time() - start_time
# Step 2: Web search for current info
search_start = time.time()
search_results = search_web(question)
search_time = time.time() - search_start
# Step 3: Generate answer with LLM
llm_start = time.time()
prompt = f"""You are a helpful assistant. Answer the question briefly based on the context below.
Context from web search:
{search_results}
Question: {question}
Answer (be concise and accurate):"""
try:
response = llm(
prompt,
max_tokens=150,
temperature=0.3,
top_p=0.9,
stop=["Question:", "\n\n\n"],
echo=False
)
answer = response['choices'][0]['text'].strip()
except Exception as e:
answer = f"LLM error: {str(e)}"
llm_time = time.time() - llm_start
total_time = time.time() - start_time
timing_info = f"\n\n⏱️ **Timing:** Transcription={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**"
return answer + timing_info, total_time
# Create Gradio interface
with gr.Blocks(title="Fast Q&A with Web Search", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# 🎤 Fast Political Q&A System
Ask questions via audio or text. Get web-grounded answers in ~3 seconds!
**Features:** Whisper-tiny + Qwen2.5-0.5B + Brave Search API
""")
with gr.Tab("🎙️ Audio Input"):
with gr.Row():
with gr.Column():
audio_input = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="Record or upload audio"
)
audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
with gr.Column():
audio_output = gr.Textbox(
label="Answer",
lines=8,
show_copy_button=True
)
audio_time = gr.Number(label="Response Time (seconds)", precision=2)
audio_submit.click(
fn=lambda x: process_audio(x, None),
inputs=[audio_input],
outputs=[audio_output, audio_time],
api_name="audio_query"
)
with gr.Tab("✍️ Text Input"):
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Type your question",
placeholder="Who is the current US president?",
lines=3
)
text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg")
with gr.Column():
text_output = gr.Textbox(
label="Answer",
lines=8,
show_copy_button=True
)
text_time = gr.Number(label="Response Time (seconds)", precision=2)
text_submit.click(
fn=lambda x: process_audio(None, x),
inputs=[text_input],
outputs=[text_output, text_time],
api_name="text_query"
)
gr.Examples(
examples=[
["Who won the 2024 US presidential election?"],
["What is the current inflation rate in India?"],
["Who is the prime minister of UK?"]
],
inputs=text_input
)
with gr.Accordion("📡 API Usage", open=False):
gr.Markdown("""
### Using curl to query this endpoint:
**Text Query:**
```
curl -X POST https://archcoder-basic-app.hf.space/call/text_query \\
-H "Content-Type: application/json" \\
-d '{"data": ["Who is the current US president?"]}'
```
**Audio Query:**
```
# 1. Upload audio file
curl -F "files=@audio.mp3" https://archcoder-basic-app.hf.space/upload
# 2. Query with returned path
curl -X POST https://archcoder-basic-app.hf.space/call/audio_query \\
-H "Content-Type: application/json" \\
-d '{"data": [{"path": "/tmp/gradio/audio.mp3"}]}'
```
""")
gr.Markdown("""
---
**Note:** This Space uses free-tier resources. For production use, consider upgrading to a persistent Space.
""")
if __name__ == "__main__":
demo.queue()
demo.launch()