Spaces:

ArchCoder
/

basic_app

Sleeping

App Files Files Community

ArchCoder commited on Oct 11, 2025

Commit

a971d1c

verified ·

1 Parent(s): f81cf03

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -69

app.py CHANGED Viewed

@@ -1,20 +1,20 @@
 import gradio as gr
 from faster_whisper import WhisperModel
-from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 from duckduckgo_search import DDGS
 import time
 import torch
 import base64
 import tempfile
 import os
-from threading import Thread
 # Initialize models
 print("Loading Whisper model...")
 whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
 print("Loading LLM...")
-model_name = "Qwen/Qwen2.5-0.5B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
@@ -26,7 +26,7 @@ model = AutoModelForCausalLM.from_pretrained(
 # Initialize DuckDuckGo Search
 ddgs = DDGS(timeout=3)
-def search_web(query, max_results=2):
     """Perform web search using DuckDuckGo"""
     try:
         results = ddgs.text(
@@ -41,7 +41,7 @@ def search_web(query, max_results=2):
         for i, result in enumerate(results[:max_results], 1):
             title = result.get('title', '')
             body = result.get('body', '')
-            context += f"\n[{i}] {title}\n{body}\n"
         return context.strip() if context else "No search results found."
@@ -67,18 +67,41 @@ def transcribe_audio_base64(audio_base64):
     except Exception as e:
         return {"error": f"Transcription failed: {str(e)}"}
-def generate_answer_stream(text_input):
-    """Generate streaming answer from text input"""
     try:
         if not text_input or text_input.strip() == "":
-            yield "No input provided"
-            return
-        search_results = search_web(text_input, max_results=2)
         messages = [
-            {"role": "system", "content": "You are a helpful assistant. Answer briefly using provided context. Keep responses under 40 words."},
-            {"role": "user", "content": f"Context:\n{search_results}\n\nQuestion: {text_input}\n\nAnswer:"}
         ]
         text = tokenizer.apply_chat_template(
@@ -88,32 +111,26 @@ def generate_answer_stream(text_input):
         )
         inputs = tokenizer([text], return_tensors="pt").to("cpu")
-        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-        generation_kwargs = dict(
-            inputs=inputs['input_ids'],
-            attention_mask=inputs['attention_mask'],
-            max_new_tokens=80,
-            temperature=0.2,
-            do_sample=True,
-            top_p=0.85,
-            pad_token_id=tokenizer.eos_token_id,
-            streamer=streamer
-        )
-        thread = Thread(target=model.generate, kwargs=generation_kwargs)
-        thread.start()
-        generated_text = ""
-        for new_text in streamer:
-            generated_text += new_text
-            yield generated_text
     except Exception as e:
-        yield f"Error: {str(e)}"
-def process_audio_stream(audio_path, question_text):
-    """Streaming pipeline that yields tuples"""
     start_time = time.time()
     # Transcribe if audio provided
@@ -122,48 +139,48 @@ def process_audio_stream(audio_path, question_text):
             segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
             question = " ".join([seg.text for seg in segments])
         except Exception as e:
-            yield f"❌ Transcription error: {str(e)}", 0.0
-            return
     else:
         question = question_text
     if not question or question.strip() == "":
-        yield "❌ No input provided", 0.0
-        return
     transcription_time = time.time() - start_time
     # Web search
     search_start = time.time()
-    search_results = search_web(question, max_results=2)
     search_time = time.time() - search_start
-    # Stream answer
     llm_start = time.time()
-    for partial_answer in generate_answer_stream(question):
-        current_time = time.time() - start_time
-        time_emoji = "🟢" if current_time < 3.0 else "🟡" if current_time < 3.5 else "🔴"
-        timing_info = f"\n\n{time_emoji} **Timing:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={(time.time()-llm_start):.2f}s | **Total={current_time:.2f}s**"
-        yield partial_answer + timing_info, current_time
-# Wrapper functions for proper API handling
 def audio_handler(audio_path):
     """Wrapper for audio input"""
-    for result in process_audio_stream(audio_path, None):
-        yield result
 def text_handler(text_input):
     """Wrapper for text input"""
-    for result in process_audio_stream(None, text_input):
-        yield result
-# Create Gradio interface
-with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
-    # ⚡ Ultra-Fast Political Q&A System
-    **Streaming enabled** for instant feedback! Pluely compatible endpoints available.
-    **Features:** Whisper-tiny + Qwen2.5-0.5B + DuckDuckGo + Real-time streaming
     """)
     with gr.Tab("🎙️ Audio Input"):
@@ -177,14 +194,14 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
                 audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
             with gr.Column():
-                audio_output = gr.Textbox(label="Answer (Streaming)", lines=8, show_copy_button=True)
                 audio_time = gr.Number(label="Response Time (seconds)", precision=2)
         audio_submit.click(
             fn=audio_handler,
             inputs=[audio_input],
             outputs=[audio_output, audio_time],
-            api_name="audio_query_stream"
         )
     with gr.Tab("✍️ Text Input"):
@@ -198,21 +215,22 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
                 text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg")
             with gr.Column():
-                text_output = gr.Textbox(label="Answer (Streaming)", lines=8, show_copy_button=True)
                 text_time = gr.Number(label="Response Time (seconds)", precision=2)
         text_submit.click(
             fn=text_handler,
             inputs=[text_input],
             outputs=[text_output, text_time],
-            api_name="text_query_stream"
         )
         gr.Examples(
             examples=[
                 ["Who won the 2024 US presidential election?"],
                 ["What is the current inflation rate in India?"],
-                ["Who is the prime minister of UK?"]
             ],
             inputs=text_input
         )
@@ -220,21 +238,23 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
     # API endpoints for Pluely
     with gr.Tab("🔌 Pluely Integration"):
         gr.Markdown("""
-        ## Dedicated Endpoints for Pluely
-        ### 1. STT Endpoint (Audio Transcription)
         ```
         curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\
           -H "Content-Type: application/json" \\
           -d '{"data": ["BASE64_AUDIO_DATA"]}'
         ```
-        ### 2. AI Endpoint - Streaming
         ```
-        curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai_stream \\
           -H "Content-Type: application/json" \\
           -d '{"data": ["Your question here"]}'
         ```
         ## Pluely Configuration
@@ -246,9 +266,9 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
         ### Custom AI Provider:
         ```
-        curl https://archcoder-basic-app.hf.space/call/answer_ai_stream -H "Content-Type: application/json" -d '{"data": ["{{TEXT}}"]}'
         ```
-        **Response Path:** `data` | **Streaming:** ON ✅
         """)
         # Hidden components for API endpoints
@@ -268,16 +288,20 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
         ai_btn = gr.Button("AI", visible=False)
         ai_btn.click(
-            fn=generate_answer_stream,
             inputs=[ai_input],
             outputs=[ai_output],
-            api_name="answer_ai_stream"
         )
     gr.Markdown("""
     ---
-    🟢 = Under 3s | 🟡 = 3-3.5s | 🔴 = Over 3.5s
-    """)
 if __name__ == "__main__":
     demo.queue(max_size=5)

 import gradio as gr
 from faster_whisper import WhisperModel
+from transformers import AutoTokenizer, AutoModelForCausalLM
 from duckduckgo_search import DDGS
 import time
 import torch
 import base64
 import tempfile
 import os
+from datetime import datetime
 # Initialize models
 print("Loading Whisper model...")
 whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
 print("Loading LLM...")
+model_name = "Qwen/Qwen2.5-1.5B-Instruct"  # Upgraded to 1.5B for better quality
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
 # Initialize DuckDuckGo Search
 ddgs = DDGS(timeout=3)
+def search_web(query, max_results=3):
     """Perform web search using DuckDuckGo"""
     try:
         results = ddgs.text(
         for i, result in enumerate(results[:max_results], 1):
             title = result.get('title', '')
             body = result.get('body', '')
+            context += f"\n[Source {i}] {title}\n{body}\n"
         return context.strip() if context else "No search results found."
     except Exception as e:
         return {"error": f"Transcription failed: {str(e)}"}
+def generate_answer(text_input):
+    """Generate complete answer with context"""
     try:
         if not text_input or text_input.strip() == "":
+            return "No input provided"
+        # Get current date for context
+        current_date = datetime.now().strftime("%B %d, %Y")
+        # Web search for current information
+        search_results = search_web(text_input, max_results=3)
+        # Enhanced prompt for comprehensive responses
         messages = [
+            {"role": "system", "content": f"""You are a knowledgeable assistant providing comprehensive, well-researched answers. Today's date is {current_date}.
+When answering:
+1. Provide the direct answer first
+2. Add relevant context and background information
+3. Include recent developments or current status when applicable
+4. Be informative but concise (150-200 words)
+5. Use the web search results to ensure accuracy and currency"""},
+            {"role": "user", "content": f"""Based on these current web search results:
+{search_results}
+Question: {text_input}
+Provide a comprehensive answer that includes:
+- Direct answer to the question
+- Relevant context and background
+- Recent developments (as of {current_date})
+- Key points the user should know
+Answer:"""}
         ]
         text = tokenizer.apply_chat_template(
         )
         inputs = tokenizer([text], return_tensors="pt").to("cpu")
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=250,  # Increased from 80 to 250
+                temperature=0.3,     # Slightly higher for more natural responses
+                do_sample=True,
+                top_p=0.9,
+                repetition_penalty=1.1,
+                pad_token_id=tokenizer.eos_token_id
+            )
+        response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
+        return response.strip()
     except Exception as e:
+        return f"Error: {str(e)}"
+def process_audio(audio_path, question_text):
+    """Main pipeline - returns tuple (answer, time)"""
     start_time = time.time()
     # Transcribe if audio provided
             segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
             question = " ".join([seg.text for seg in segments])
         except Exception as e:
+            return f"❌ Transcription error: {str(e)}", 0.0
     else:
         question = question_text
     if not question or question.strip() == "":
+        return "❌ No input provided", 0.0
     transcription_time = time.time() - start_time
     # Web search
     search_start = time.time()
+    search_results = search_web(question, max_results=3)
     search_time = time.time() - search_start
+    # Generate answer
     llm_start = time.time()
+    answer = generate_answer(question)
+    llm_time = time.time() - llm_start
+    total_time = time.time() - start_time
+    time_emoji = "🟢" if total_time < 5.0 else "🟡" if total_time < 7.0 else "🔴"
+    timing_info = f"\n\n{time_emoji} **Performance:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**"
+    return answer + timing_info, total_time
+# Wrapper functions
 def audio_handler(audio_path):
     """Wrapper for audio input"""
+    return process_audio(audio_path, None)
 def text_handler(text_input):
     """Wrapper for text input"""
+    return process_audio(None, text_input)
+# Gradio interface
+with gr.Blocks(title="Enhanced Political Q&A", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
+    # 🎯 Enhanced Political Q&A System
+    **Comprehensive answers with context** - Powered by Qwen2.5-1.5B
+    **Features:** Whisper-tiny + Qwen2.5-1.5B + DuckDuckGo + Rich contextual responses
     """)
     with gr.Tab("🎙️ Audio Input"):
                 audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
             with gr.Column():
+                audio_output = gr.Textbox(label="Comprehensive Answer", lines=12, show_copy_button=True)
                 audio_time = gr.Number(label="Response Time (seconds)", precision=2)
         audio_submit.click(
             fn=audio_handler,
             inputs=[audio_input],
             outputs=[audio_output, audio_time],
+            api_name="audio_query"
         )
     with gr.Tab("✍️ Text Input"):
                 text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg")
             with gr.Column():
+                text_output = gr.Textbox(label="Comprehensive Answer", lines=12, show_copy_button=True)
                 text_time = gr.Number(label="Response Time (seconds)", precision=2)
         text_submit.click(
             fn=text_handler,
             inputs=[text_input],
             outputs=[text_output, text_time],
+            api_name="text_query"
         )
         gr.Examples(
             examples=[
                 ["Who won the 2024 US presidential election?"],
                 ["What is the current inflation rate in India?"],
+                ["Who is the prime minister of UK and what are their key policies?"],
+                ["Explain the latest developments in AI regulation"]
             ],
             inputs=text_input
         )
     # API endpoints for Pluely
     with gr.Tab("🔌 Pluely Integration"):
         gr.Markdown("""
+        ## API Endpoints for Pluely
+        ### STT Endpoint (Audio Transcription)
         ```
         curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\
           -H "Content-Type: application/json" \\
           -d '{"data": ["BASE64_AUDIO_DATA"]}'
         ```
+        **Response:** `{"data": [{"text": "transcribed text"}]}`
+        ### AI Endpoint (Enhanced Responses)
         ```
+        curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai \\
           -H "Content-Type: application/json" \\
           -d '{"data": ["Your question here"]}'
         ```
+        **Response:** `{"data": ["Comprehensive answer with context"]}`
         ## Pluely Configuration
         ### Custom AI Provider:
         ```
+        curl https://archcoder-basic-app.hf.space/call/answer_ai -H "Content-Type: application/json" -d '{"data": ["{{TEXT}}"]}'
         ```
+        **Response Path:** `data[0]` | **Streaming:** OFF
         """)
         # Hidden components for API endpoints
         ai_btn = gr.Button("AI", visible=False)
         ai_btn.click(
+            fn=generate_answer,
             inputs=[ai_input],
             outputs=[ai_output],
+            api_name="answer_ai"
         )
     gr.Markdown("""
     ---
+    **Model:** Qwen2.5-1.5B-Instruct (3x larger for better answers)
+    **Output:** 150-200 words with context and background
+    **Date-aware:** Responses reference current date ({})
+    🟢 = Under 5s | 🟡 = 5-7s | 🔴 = Over 7s
+    """.format(datetime.now().strftime("%B %d, %Y")))
 if __name__ == "__main__":
     demo.queue(max_size=5)