import gradio as gr from faster_whisper import WhisperModel from transformers import AutoTokenizer, AutoModelForCausalLM from duckduckgo_search import DDGS import time import torch import base64 import tempfile import os # Initialize models print("Loading Whisper model...") whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8") print("Loading LLM...") model_name = "Qwen/Qwen2.5-0.5B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float32, device_map="cpu", low_cpu_mem_usage=True ) # Initialize DuckDuckGo Search ddgs = DDGS(timeout=3) def search_web(query, max_results=2): """Perform web search using DuckDuckGo""" try: results = ddgs.text( keywords=query, region='wt-wt', safesearch='moderate', timelimit='m', max_results=max_results ) context = "" for i, result in enumerate(results[:max_results], 1): title = result.get('title', '') body = result.get('body', '') context += f"\n[{i}] {title}\n{body}\n" return context.strip() if context else "No search results found." except Exception as e: return f"Search failed: {str(e)}" def transcribe_audio_base64(audio_base64): """Transcribe audio from base64 string (for Pluely STT endpoint)""" try: # Decode base64 audio audio_bytes = base64.b64decode(audio_base64) # Save to temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: temp_audio.write(audio_bytes) temp_path = temp_audio.name # Transcribe segments, _ = whisper_model.transcribe(temp_path, language="en", beam_size=1) transcription = " ".join([seg.text for seg in segments]) # Cleanup os.unlink(temp_path) return {"text": transcription.strip()} except Exception as e: return {"error": f"Transcription failed: {str(e)}"} def generate_answer(text_input): """Generate answer from text input (for Pluely AI endpoint)""" try: if not text_input or text_input.strip() == "": return "No input provided" # Web search search_results = search_web(text_input, max_results=2) # Generate answer messages = [ {"role": "system", "content": "You are a helpful assistant. Answer briefly using provided context. Keep responses under 40 words."}, {"role": "user", "content": f"Context:\n{search_results}\n\nQuestion: {text_input}\n\nAnswer:"} ] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer([text], return_tensors="pt").to("cpu") with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=80, temperature=0.2, do_sample=True, top_p=0.85, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) return response.strip() except Exception as e: return f"Error: {str(e)}" def process_audio(audio_path, question_text=None): """Main pipeline for Gradio UI""" start_time = time.time() # Step 1: Transcribe audio if provided if audio_path: try: segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1) question = " ".join([seg.text for seg in segments]) except Exception as e: return f"❌ Transcription error: {str(e)}", 0.0 else: question = question_text if not question or question.strip() == "": return "❌ No input provided", 0.0 transcription_time = time.time() - start_time # Step 2: Web search search_start = time.time() search_results = search_web(question, max_results=2) search_time = time.time() - search_start # Step 3: Generate answer llm_start = time.time() answer = generate_answer(question) llm_time = time.time() - llm_start total_time = time.time() - start_time time_emoji = "🟢" if total_time < 3.0 else "🟡" if total_time < 3.5 else "🔴" timing_info = f"\n\n{time_emoji} **Timing:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**" return answer + timing_info, total_time # Create Gradio interface with API endpoints with gr.Blocks(title="Fast Q&A - Pluely Compatible", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # ⚡ Ultra-Fast Political Q&A System **Pluely Compatible** - Direct STT and AI endpoints available! **Features:** Whisper-tiny + Qwen2.5-0.5B + DuckDuckGo (FREE unlimited search) """) with gr.Tab("🎙️ Audio Input"): with gr.Row(): with gr.Column(): audio_input = gr.Audio( sources=["microphone", "upload"], type="filepath", label="Record or upload audio" ) audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg") with gr.Column(): audio_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True) audio_time = gr.Number(label="Response Time (seconds)", precision=2) audio_submit.click( fn=lambda x: process_audio(x, None), inputs=[audio_input], outputs=[audio_output, audio_time], api_name="audio_query" ) with gr.Tab("✍️ Text Input"): with gr.Row(): with gr.Column(): text_input = gr.Textbox( label="Type your question", placeholder="Who is the current US president?", lines=3 ) text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg") with gr.Column(): text_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True) text_time = gr.Number(label="Response Time (seconds)", precision=2) text_submit.click( fn=lambda x: process_audio(None, x), inputs=[text_input], outputs=[text_output, text_time], api_name="text_query" ) gr.Examples( examples=[ ["Who won the 2024 US presidential election?"], ["What is the current inflation rate in India?"], ["Who is the prime minister of UK?"] ], inputs=text_input ) # Hidden API endpoints for Pluely with gr.Tab("🔌 Pluely Integration"): gr.Markdown(""" ## Dedicated Endpoints for Pluely ### 1. STT Endpoint (Audio Transcription) ``` curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\ -H "Content-Type: application/json" \\ -d '{"data": ["BASE64_AUDIO_DATA"]}' ``` **Returns:** `{"data": [{"text": "transcribed text"}]}` ### 2. AI Endpoint (Text to Answer) ``` curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai \\ -H "Content-Type: application/json" \\ -d '{"data": ["Your question here"]}' ``` **Returns:** `{"data": ["Answer text"]}` --- ## Pluely Configuration ### Custom STT Provider: **Curl Command:** ``` curl --location 'https://archcoder-basic-app.hf.space/call/transcribe_stt' \\ --header 'Content-Type: application/json' \\ --data '{"data": ["{{AUDIO_BASE64}}"]}' ``` **Response Content Path:** `data[0].text` ### Custom AI Provider: **Curl Command:** ``` curl --location 'https://archcoder-basic-app.hf.space/call/answer_ai' \\ --header 'Content-Type: application/json' \\ --data '{"data": ["{{TEXT}}"]}' ``` **Response Content Path:** `data[0]` """) gr.Markdown(""" --- 🟢 = Under 3s | 🟡 = 3-3.5s | 🔴 = Over 3.5s """) # Register API endpoints demo.api_name = "pluely_integration" # STT endpoint for Pluely @demo.api(api_name="transcribe_stt") def api_transcribe(audio_base64: str): """API endpoint for audio transcription (Pluely STT)""" result = transcribe_audio_base64(audio_base64) return result # AI endpoint for Pluely @demo.api(api_name="answer_ai") def api_answer(text: str): """API endpoint for text-to-answer (Pluely AI)""" answer = generate_answer(text) return answer if __name__ == "__main__": demo.queue(max_size=5) demo.launch()