import gradio as gr from faster_whisper import WhisperModel from transformers import AutoTokenizer, AutoModelForCausalLM from duckduckgo_search import DDGS import time import torch # Initialize models print("Loading Whisper model...") whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8") print("Loading LLM...") model_name = "Qwen/Qwen2.5-0.5B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float32, device_map="cpu", low_cpu_mem_usage=True ) # Initialize DuckDuckGo Search ddgs = DDGS(timeout=3) def search_web(query, max_results=2): """Perform web search using DuckDuckGo (FREE & UNLIMITED)""" try: results = ddgs.text( keywords=query, region='wt-wt', safesearch='moderate', timelimit='m', max_results=max_results ) context = "" for i, result in enumerate(results[:max_results], 1): title = result.get('title', '') body = result.get('body', '') context += f"\n[{i}] {title}\n{body}\n" return context.strip() if context else "No search results found." except Exception as e: return f"Search failed: {str(e)}" def process_audio(audio_path, question_text=None): """Main pipeline: audio -> text -> search -> answer""" start_time = time.time() # Step 1: Transcribe audio if provided if audio_path: try: segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1) question = " ".join([seg.text for seg in segments]) except Exception as e: return f"❌ Transcription error: {str(e)}", 0.0 else: question = question_text if not question or question.strip() == "": return "❌ No input provided", 0.0 transcription_time = time.time() - start_time # Step 2: Web search search_start = time.time() search_results = search_web(question, max_results=2) search_time = time.time() - search_start # Step 3: Generate answer with LLM llm_start = time.time() messages = [ {"role": "system", "content": "You are a helpful assistant. Answer questions briefly using the provided context."}, {"role": "user", "content": f"Context:\n{search_results}\n\nQuestion: {question}\n\nAnswer:"} ] try: text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer([text], return_tensors="pt").to("cpu") with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=120, temperature=0.2, do_sample=True, top_p=0.85, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) answer = response.strip() except Exception as e: answer = f"❌ LLM error: {str(e)}" llm_time = time.time() - llm_start total_time = time.time() - start_time time_emoji = "🟢" if total_time < 3.0 else "🟡" if total_time < 3.5 else "🔴" timing_info = f"\n\n{time_emoji} **Timing:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**" return answer + timing_info, total_time # Create Gradio interface (same as before) with gr.Blocks(title="Fast Q&A - No Building Required!", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # ⚡ Ultra-Fast Political Q&A System **No wheel building** - Fast deployment with transformers! **Features:** Whisper-tiny + Qwen2.5-0.5B + DuckDuckGo (FREE unlimited search) """) with gr.Tab("🎙️ Audio Input"): with gr.Row(): with gr.Column(): audio_input = gr.Audio( sources=["microphone", "upload"], type="filepath", label="Record or upload audio" ) audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg") with gr.Column(): audio_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True) audio_time = gr.Number(label="Response Time (seconds)", precision=2) audio_submit.click( fn=lambda x: process_audio(x, None), inputs=[audio_input], outputs=[audio_output, audio_time], api_name="audio_query" ) with gr.Tab("✍️ Text Input"): with gr.Row(): with gr.Column(): text_input = gr.Textbox( label="Type your question", placeholder="Who is the current US president?", lines=3 ) text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg") with gr.Column(): text_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True) text_time = gr.Number(label="Response Time (seconds)", precision=2) text_submit.click( fn=lambda x: process_audio(None, x), inputs=[text_input], outputs=[text_output, text_time], api_name="text_query" ) gr.Examples( examples=[ ["Who won the 2024 US presidential election?"], ["What is the current inflation rate in India?"], ["Who is the prime minister of UK?"] ], inputs=text_input ) gr.Markdown(""" --- 🟢 = Under 3s | 🟡 = 3-3.5s | 🔴 = Over 3.5s """) if __name__ == "__main__": demo.queue(max_size=5) demo.launch()