import gradio as gr from faster_whisper import WhisperModel from transformers import AutoTokenizer, AutoModelForCausalLM from duckduckgo_search import DDGS import time import torch import base64 import tempfile import os from datetime import datetime # Initialize models print("Loading Whisper model...") whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8") print("Loading LLM...") model_name = "Qwen/Qwen2.5-1.5B-Instruct" # Upgraded to 1.5B for better quality tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float32, device_map="cpu", low_cpu_mem_usage=True ) # Initialize DuckDuckGo Search ddgs = DDGS(timeout=3) def search_web(query, max_results=3): """Perform web search using DuckDuckGo""" try: results = ddgs.text( keywords=query, region='wt-wt', safesearch='moderate', timelimit='m', max_results=max_results ) context = "" for i, result in enumerate(results[:max_results], 1): title = result.get('title', '') body = result.get('body', '') context += f"\n[Source {i}] {title}\n{body}\n" return context.strip() if context else "No search results found." except Exception as e: return f"Search failed: {str(e)}" def transcribe_audio_base64(audio_base64): """Transcribe audio from base64 string (for Pluely STT endpoint)""" try: audio_bytes = base64.b64decode(audio_base64) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: temp_audio.write(audio_bytes) temp_path = temp_audio.name segments, _ = whisper_model.transcribe(temp_path, language="en", beam_size=1) transcription = " ".join([seg.text for seg in segments]) os.unlink(temp_path) return {"text": transcription.strip()} except Exception as e: return {"error": f"Transcription failed: {str(e)}"} def generate_answer(text_input): """Generate complete answer with context""" try: if not text_input or text_input.strip() == "": return "No input provided" # Get current date for context current_date = datetime.now().strftime("%B %d, %Y") # Web search for current information search_results = search_web(text_input, max_results=3) # Enhanced prompt for comprehensive responses messages = [ {"role": "system", "content": f"""You are a knowledgeable assistant providing comprehensive, well-researched answers. Today's date is {current_date}. When answering: 1. Provide the direct answer first 2. Add relevant context and background information 3. Include recent developments or current status when applicable 4. Be informative but concise (150-200 words) 5. Use the web search results to ensure accuracy and currency"""}, {"role": "user", "content": f"""Based on these current web search results: {search_results} Question: {text_input} Provide a comprehensive answer that includes: - Direct answer to the question - Relevant context and background - Recent developments (as of {current_date}) - Key points the user should know Answer:"""} ] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer([text], return_tensors="pt").to("cpu") with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=250, # Increased from 80 to 250 temperature=0.3, # Slightly higher for more natural responses do_sample=True, top_p=0.9, repetition_penalty=1.1, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) return response.strip() except Exception as e: return f"Error: {str(e)}" def process_audio(audio_path, question_text): """Main pipeline - returns tuple (answer, time)""" start_time = time.time() # Transcribe if audio provided if audio_path: try: segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1) question = " ".join([seg.text for seg in segments]) except Exception as e: return f"❌ Transcription error: {str(e)}", 0.0 else: question = question_text if not question or question.strip() == "": return "❌ No input provided", 0.0 transcription_time = time.time() - start_time # Web search search_start = time.time() search_results = search_web(question, max_results=3) search_time = time.time() - search_start # Generate answer llm_start = time.time() answer = generate_answer(question) llm_time = time.time() - llm_start total_time = time.time() - start_time time_emoji = "🟢" if total_time < 5.0 else "🟡" if total_time < 7.0 else "🔴" timing_info = f"\n\n{time_emoji} **Performance:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**" return answer + timing_info, total_time # Wrapper functions def audio_handler(audio_path): """Wrapper for audio input""" return process_audio(audio_path, None) def text_handler(text_input): """Wrapper for text input""" return process_audio(None, text_input) # Gradio interface with gr.Blocks(title="Enhanced Political Q&A", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🎯 Enhanced Political Q&A System **Comprehensive answers with context** - Powered by Qwen2.5-1.5B **Features:** Whisper-tiny + Qwen2.5-1.5B + DuckDuckGo + Rich contextual responses """) with gr.Tab("🎙️ Audio Input"): with gr.Row(): with gr.Column(): audio_input = gr.Audio( sources=["microphone", "upload"], type="filepath", label="Record or upload audio" ) audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg") with gr.Column(): audio_output = gr.Textbox(label="Comprehensive Answer", lines=12, show_copy_button=True) audio_time = gr.Number(label="Response Time (seconds)", precision=2) audio_submit.click( fn=audio_handler, inputs=[audio_input], outputs=[audio_output, audio_time], api_name="audio_query" ) with gr.Tab("✍️ Text Input"): with gr.Row(): with gr.Column(): text_input = gr.Textbox( label="Type your question", placeholder="Who is the current US president?", lines=3 ) text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg") with gr.Column(): text_output = gr.Textbox(label="Comprehensive Answer", lines=12, show_copy_button=True) text_time = gr.Number(label="Response Time (seconds)", precision=2) text_submit.click( fn=text_handler, inputs=[text_input], outputs=[text_output, text_time], api_name="text_query" ) gr.Examples( examples=[ ["Who won the 2024 US presidential election?"], ["What is the current inflation rate in India?"], ["Who is the prime minister of UK and what are their key policies?"], ["Explain the latest developments in AI regulation"] ], inputs=text_input ) # API endpoints for Pluely with gr.Tab("🔌 Pluely Integration"): gr.Markdown(""" ## API Endpoints for Pluely ### STT Endpoint (Audio Transcription) ``` curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\ -H "Content-Type: application/json" \\ -d '{"data": ["BASE64_AUDIO_DATA"]}' ``` **Response:** `{"data": [{"text": "transcribed text"}]}` ### AI Endpoint (Enhanced Responses) ``` curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai \\ -H "Content-Type: application/json" \\ -d '{"data": ["Your question here"]}' ``` **Response:** `{"data": ["Comprehensive answer with context"]}` ## Pluely Configuration ### Custom STT Provider: ``` curl https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d '{"data": ["{{AUDIO_BASE64}}"]}' ``` **Response Path:** `data[0].text` | **Streaming:** OFF ### Custom AI Provider: ``` curl https://archcoder-basic-app.hf.space/call/answer_ai -H "Content-Type: application/json" -d '{"data": ["{{TEXT}}"]}' ``` **Response Path:** `data[0]` | **Streaming:** OFF """) # Hidden components for API endpoints with gr.Row(visible=False): stt_input = gr.Textbox() stt_output = gr.JSON() ai_input = gr.Textbox() ai_output = gr.Textbox() stt_btn = gr.Button("STT", visible=False) stt_btn.click( fn=transcribe_audio_base64, inputs=[stt_input], outputs=[stt_output], api_name="transcribe_stt" ) ai_btn = gr.Button("AI", visible=False) ai_btn.click( fn=generate_answer, inputs=[ai_input], outputs=[ai_output], api_name="answer_ai" ) gr.Markdown(""" --- **Model:** Qwen2.5-1.5B-Instruct (3x larger for better answers) **Output:** 150-200 words with context and background **Date-aware:** Responses reference current date ({}) 🟢 = Under 5s | 🟡 = 5-7s | 🔴 = Over 7s """.format(datetime.now().strftime("%B %d, %Y"))) if __name__ == "__main__": demo.queue(max_size=5) demo.launch()