Spaces:

ArchCoder
/

basic_app

Sleeping

App Files Files Community

ArchCoder commited on Oct 10, 2025

Commit

8dc383f

verified ·

1 Parent(s): 201d71d

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -39

app.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import gradio as gr
 from faster_whisper import WhisperModel
-from transformers import AutoTokenizer, AutoModelForCausalLM
 from duckduckgo_search import DDGS
 import time
 import torch
 import base64
 import tempfile
 import os
 # Initialize models
 print("Loading Whisper model...")
@@ -70,8 +71,59 @@ def transcribe_audio_base64(audio_base64):
     except Exception as e:
         return {"error": f"Transcription failed: {str(e)}"}
 def generate_answer(text_input):
-    """Generate answer from text input (for Pluely AI endpoint)"""
     try:
         if not text_input or text_input.strip() == "":
             return "No input provided"
@@ -109,8 +161,8 @@ def generate_answer(text_input):
     except Exception as e:
         return f"Error: {str(e)}"
-def process_audio(audio_path, question_text=None):
-    """Main pipeline for Gradio UI"""
     start_time = time.time()
     # Step 1: Transcribe audio if provided
@@ -119,12 +171,14 @@ def process_audio(audio_path, question_text=None):
             segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
             question = " ".join([seg.text for seg in segments])
         except Exception as e:
-            return f"❌ Transcription error: {str(e)}", 0.0
     else:
         question = question_text
     if not question or question.strip() == "":
-        return "❌ No input provided", 0.0
     transcription_time = time.time() - start_time
@@ -133,24 +187,21 @@ def process_audio(audio_path, question_text=None):
     search_results = search_web(question, max_results=2)
     search_time = time.time() - search_start
-    # Step 3: Generate answer
     llm_start = time.time()
-    answer = generate_answer(question)
-    llm_time = time.time() - llm_start
-    total_time = time.time() - start_time
-    time_emoji = "🟢" if total_time < 3.0 else "🟡" if total_time < 3.5 else "🔴"
-    timing_info = f"\n\n{time_emoji} **Timing:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={llm_time:.2f}s | **Total={total_time:.2f}s**"
-    return answer + timing_info, total_time
-# Create Gradio interface with API endpoints
-with gr.Blocks(title="Fast Q&A - Pluely Compatible", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     # ⚡ Ultra-Fast Political Q&A System
-    **Pluely Compatible** - Direct STT and AI endpoints available!
-    **Features:** Whisper-tiny + Qwen2.5-0.5B + DuckDuckGo (FREE unlimited search)
     """)
     with gr.Tab("🎙️ Audio Input"):
@@ -164,14 +215,14 @@ with gr.Blocks(title="Fast Q&A - Pluely Compatible", theme=gr.themes.Soft()) as
                 audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
             with gr.Column():
-                audio_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True)
                 audio_time = gr.Number(label="Response Time (seconds)", precision=2)
         audio_submit.click(
-            fn=lambda x: process_audio(x, None),
             inputs=[audio_input],
             outputs=[audio_output, audio_time],
-            api_name="audio_query"
         )
     with gr.Tab("✍️ Text Input"):
@@ -185,14 +236,14 @@ with gr.Blocks(title="Fast Q&A - Pluely Compatible", theme=gr.themes.Soft()) as
                 text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg")
             with gr.Column():
-                text_output = gr.Textbox(label="Answer", lines=8, show_copy_button=True)
                 text_time = gr.Number(label="Response Time (seconds)", precision=2)
         text_submit.click(
-            fn=lambda x: process_audio(None, x),
             inputs=[text_input],
             outputs=[text_output, text_time],
-            api_name="text_query"
         )
         gr.Examples(
@@ -204,12 +255,12 @@ with gr.Blocks(title="Fast Q&A - Pluely Compatible", theme=gr.themes.Soft()) as
             inputs=text_input
         )
-    # Hidden API endpoints for Pluely
     with gr.Tab("🔌 Pluely Integration"):
         gr.Markdown("""
         ## Dedicated Endpoints for Pluely
-        ### 1. STT Endpoint (Audio Transcription)
         ```
         curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\
           -H "Content-Type: application/json" \\
@@ -217,13 +268,13 @@ with gr.Blocks(title="Fast Q&A - Pluely Compatible", theme=gr.themes.Soft()) as
         ```
         **Returns:** `{"data": [{"text": "transcribed text"}]}`
-        ### 2. AI Endpoint (Text to Answer)
         ```
-        curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai \\
           -H "Content-Type: application/json" \\
           -d '{"data": ["Your question here"]}'
         ```
-        **Returns:** `{"data": ["Answer text"]}`
         ---
@@ -237,39 +288,51 @@ with gr.Blocks(title="Fast Q&A - Pluely Compatible", theme=gr.themes.Soft()) as
           --data '{"data": ["{{AUDIO_BASE64}}"]}'
         ```
         **Response Content Path:** `data[0].text`
-        ### Custom AI Provider:
         **Curl Command:**
         ```
-        curl --location 'https://archcoder-basic-app.hf.space/call/answer_ai' \\
           --header 'Content-Type: application/json' \\
           --data '{"data": ["{{TEXT}}"]}'
         ```
-        **Response Content Path:** `data[0]`
         """)
     gr.Markdown("""
     ---
     🟢 = Under 3s | 🟡 = 3-3.5s | 🔴 = Over 3.5s
     """)
 # Register API endpoints
-demo.api_name = "pluely_integration"
-# STT endpoint for Pluely
 @demo.api(api_name="transcribe_stt")
 def api_transcribe(audio_base64: str):
-    """API endpoint for audio transcription (Pluely STT)"""
     result = transcribe_audio_base64(audio_base64)
     return result
-# AI endpoint for Pluely
 @demo.api(api_name="answer_ai")
 def api_answer(text: str):
-    """API endpoint for text-to-answer (Pluely AI)"""
     answer = generate_answer(text)
     return answer
 if __name__ == "__main__":
     demo.queue(max_size=5)
     demo.launch()

 import gradio as gr
 from faster_whisper import WhisperModel
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 from duckduckgo_search import DDGS
 import time
 import torch
 import base64
 import tempfile
 import os
+from threading import Thread
 # Initialize models
 print("Loading Whisper model...")
     except Exception as e:
         return {"error": f"Transcription failed: {str(e)}"}
+def generate_answer_stream(text_input):
+    """Generate streaming answer from text input"""
+    try:
+        if not text_input or text_input.strip() == "":
+            yield "No input provided"
+            return
+        # Web search (non-streaming part)
+        search_results = search_web(text_input, max_results=2)
+        # Prepare messages
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant. Answer briefly using provided context. Keep responses under 40 words."},
+            {"role": "user", "content": f"Context:\n{search_results}\n\nQuestion: {text_input}\n\nAnswer:"}
+        ]
+        text = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        inputs = tokenizer([text], return_tensors="pt").to("cpu")
+        # Setup streaming
+        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+        generation_kwargs = dict(
+            inputs=inputs['input_ids'],
+            attention_mask=inputs['attention_mask'],
+            max_new_tokens=80,
+            temperature=0.2,
+            do_sample=True,
+            top_p=0.85,
+            pad_token_id=tokenizer.eos_token_id,
+            streamer=streamer
+        )
+        # Start generation in separate thread
+        thread = Thread(target=model.generate, kwargs=generation_kwargs)
+        thread.start()
+        # Stream tokens as they're generated
+        generated_text = ""
+        for new_text in streamer:
+            generated_text += new_text
+            yield generated_text
+    except Exception as e:
+        yield f"Error: {str(e)}"
 def generate_answer(text_input):
+    """Generate complete answer (non-streaming)"""
     try:
         if not text_input or text_input.strip() == "":
             return "No input provided"
     except Exception as e:
         return f"Error: {str(e)}"
+def process_audio_stream(audio_path, question_text=None):
+    """Streaming pipeline for Gradio UI"""
     start_time = time.time()
     # Step 1: Transcribe audio if provided
             segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
             question = " ".join([seg.text for seg in segments])
         except Exception as e:
+            yield f"❌ Transcription error: {str(e)}", 0.0
+            return
     else:
         question = question_text
     if not question or question.strip() == "":
+        yield "❌ No input provided", 0.0
+        return
     transcription_time = time.time() - start_time
     search_results = search_web(question, max_results=2)
     search_time = time.time() - search_start
+    # Step 3: Stream answer generation
     llm_start = time.time()
+    for partial_answer in generate_answer_stream(question):
+        current_time = time.time() - start_time
+        time_emoji = "🟢" if current_time < 3.0 else "🟡" if current_time < 3.5 else "🔴"
+        timing_info = f"\n\n{time_emoji} **Timing:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={(time.time()-llm_start):.2f}s | **Total={current_time:.2f}s**"
+        yield partial_answer + timing_info, current_time
+# Create Gradio interface
+with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     # ⚡ Ultra-Fast Political Q&A System
+    **Streaming enabled** for instant feedback! Pluely compatible endpoints available.
+    **Features:** Whisper-tiny + Qwen2.5-0.5B + DuckDuckGo + Real-time streaming
     """)
     with gr.Tab("🎙️ Audio Input"):
                 audio_submit = gr.Button("🚀 Submit Audio", variant="primary", size="lg")
             with gr.Column():
+                audio_output = gr.Textbox(label="Answer (Streaming)", lines=8, show_copy_button=True)
                 audio_time = gr.Number(label="Response Time (seconds)", precision=2)
         audio_submit.click(
+            fn=lambda x: process_audio_stream(x, None),
             inputs=[audio_input],
             outputs=[audio_output, audio_time],
+            api_name="audio_query_stream"
         )
     with gr.Tab("✍️ Text Input"):
                 text_submit = gr.Button("🚀 Submit Text", variant="primary", size="lg")
             with gr.Column():
+                text_output = gr.Textbox(label="Answer (Streaming)", lines=8, show_copy_button=True)
                 text_time = gr.Number(label="Response Time (seconds)", precision=2)
         text_submit.click(
+            fn=lambda x: process_audio_stream(None, x),
             inputs=[text_input],
             outputs=[text_output, text_time],
+            api_name="text_query_stream"
         )
         gr.Examples(
             inputs=text_input
         )
+    # API endpoints for Pluely
     with gr.Tab("🔌 Pluely Integration"):
         gr.Markdown("""
         ## Dedicated Endpoints for Pluely
+        ### 1. STT Endpoint (Audio Transcription) - Non-streaming
         ```
         curl -X POST https://archcoder-basic-app.hf.space/call/transcribe_stt \\
           -H "Content-Type: application/json" \\
         ```
         **Returns:** `{"data": [{"text": "transcribed text"}]}`
+        ### 2. AI Endpoint (Text to Answer) - **WITH STREAMING**
         ```
+        curl -X POST https://archcoder-basic-app.hf.space/call/answer_ai_stream \\
           -H "Content-Type: application/json" \\
           -d '{"data": ["Your question here"]}'
         ```
+        **Returns:** Server-Sent Events (SSE) stream of text chunks
         ---
           --data '{"data": ["{{AUDIO_BASE64}}"]}'
         ```
         **Response Content Path:** `data[0].text`
+        **Streaming:** OFF (STT doesn't need streaming)
+        ### Custom AI Provider (Streaming):
         **Curl Command:**
         ```
+        curl --location 'https://archcoder-basic-app.hf.space/call/answer_ai_stream' \\
           --header 'Content-Type: application/json' \\
           --data '{"data": ["{{TEXT}}"]}'
         ```
+        **Response Content Path:** Leave empty for streaming text
+        **Streaming:** **ON** ✅
+        ### Benefits:
+        - ⚡ Instant feedback as answer generates
+        - 🎯 Better user experience - see words appear in real-time
+        - ⏱️ Perceived latency reduced by 50%+
+        - 🔄 No actual performance penalty
         """)
     gr.Markdown("""
     ---
     🟢 = Under 3s | 🟡 = 3-3.5s | 🔴 = Over 3.5s
+    **Streaming Mode:** Words appear as they're generated - much faster perceived response!
     """)
 # Register API endpoints
 @demo.api(api_name="transcribe_stt")
 def api_transcribe(audio_base64: str):
+    """API endpoint for audio transcription (Pluely STT) - Non-streaming"""
     result = transcribe_audio_base64(audio_base64)
     return result
 @demo.api(api_name="answer_ai")
 def api_answer(text: str):
+    """API endpoint for text-to-answer (Pluely AI) - Non-streaming fallback"""
     answer = generate_answer(text)
     return answer
+@demo.api(api_name="answer_ai_stream")
+def api_answer_stream(text: str):
+    """API endpoint for streaming text-to-answer (Pluely AI) - Streaming enabled"""
+    for chunk in generate_answer_stream(text):
+        yield chunk
 if __name__ == "__main__":
     demo.queue(max_size=5)
     demo.launch()