Spaces:

ArchCoder
/

basic_app

Sleeping

App Files Files Community

ArchCoder commited on Oct 10, 2025

Commit

f81cf03

verified ·

1 Parent(s): 82c886b

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -60

app.py CHANGED Viewed

@@ -51,19 +51,15 @@ def search_web(query, max_results=2):
 def transcribe_audio_base64(audio_base64):
     """Transcribe audio from base64 string (for Pluely STT endpoint)"""
     try:
-        # Decode base64 audio
         audio_bytes = base64.b64decode(audio_base64)
-        # Save to temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
             temp_audio.write(audio_bytes)
             temp_path = temp_audio.name
-        # Transcribe
         segments, _ = whisper_model.transcribe(temp_path, language="en", beam_size=1)
         transcription = " ".join([seg.text for seg in segments])
-        # Cleanup
         os.unlink(temp_path)
         return {"text": transcription.strip()}
@@ -78,10 +74,8 @@ def generate_answer_stream(text_input):
             yield "No input provided"
             return
-        # Web search (non-streaming part)
         search_results = search_web(text_input, max_results=2)
-        # Prepare messages
         messages = [
             {"role": "system", "content": "You are a helpful assistant. Answer briefly using provided context. Keep responses under 40 words."},
             {"role": "user", "content": f"Context:\n{search_results}\n\nQuestion: {text_input}\n\nAnswer:"}
@@ -94,8 +88,6 @@ def generate_answer_stream(text_input):
         )
         inputs = tokenizer([text], return_tensors="pt").to("cpu")
-        # Setup streaming
         streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         generation_kwargs = dict(
@@ -109,11 +101,9 @@ def generate_answer_stream(text_input):
             streamer=streamer
         )
-        # Start generation in separate thread
         thread = Thread(target=model.generate, kwargs=generation_kwargs)
         thread.start()
-        # Stream tokens as they're generated
         generated_text = ""
         for new_text in streamer:
             generated_text += new_text
@@ -122,27 +112,11 @@ def generate_answer_stream(text_input):
     except Exception as e:
         yield f"Error: {str(e)}"
-def generate_answer(text_input):
-    """Generate complete answer (non-streaming)"""
-    try:
-        if not text_input or text_input.strip() == "":
-            return "No input provided"
-        # Get the last chunk from streaming
-        final_answer = ""
-        for chunk in generate_answer_stream(text_input):
-            final_answer = chunk
-        return final_answer
-    except Exception as e:
-        return f"Error: {str(e)}"
-def process_audio_stream(audio_path, question_text=None):
-    """Streaming pipeline for Gradio UI - Returns tuple generator"""
     start_time = time.time()
-    # Step 1: Transcribe audio if provided
     if audio_path:
         try:
             segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
@@ -159,20 +133,30 @@ def process_audio_stream(audio_path, question_text=None):
     transcription_time = time.time() - start_time
-    # Step 2: Web search
     search_start = time.time()
     search_results = search_web(question, max_results=2)
     search_time = time.time() - search_start
-    # Step 3: Stream answer generation
     llm_start = time.time()
     for partial_answer in generate_answer_stream(question):
         current_time = time.time() - start_time
         time_emoji = "🟢" if current_time < 3.0 else "🟡" if current_time < 3.5 else "🔴"
         timing_info = f"\n\n{time_emoji} **Timing:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={(time.time()-llm_start):.2f}s | **Total={current_time:.2f}s**"
-        # IMPORTANT: Must yield tuple (text, number) to match output components
         yield partial_answer + timing_info, current_time
 # Create Gradio interface
 with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
@@ -196,10 +180,9 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
                 audio_output = gr.Textbox(label="Answer (Streaming)", lines=8, show_copy_button=True)
                 audio_time = gr.Number(label="Response Time (seconds)", precision=2)
-        # Fixed: Lambda wrapper ensures proper tuple unpacking
         audio_submit.click(
-            fn=process_audio_stream,
-            inputs=[audio_input, gr.Textbox(value=None, visible=False)],
             outputs=[audio_output, audio_time],
             api_name="audio_query_stream"
         )
@@ -218,9 +201,8 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
                 text_output = gr.Textbox(label="Answer (Streaming)", lines=8, show_copy_button=True)
                 text_time = gr.Number(label="Response Time (seconds)", precision=2)
-        # Fixed: Proper function call with audio=None
         text_submit.click(
-            fn=lambda text: process_audio_stream(None, text),
             inputs=[text_input],
             outputs=[text_output, text_time],
             api_name="text_query_stream"
@@ -246,7 +228,6 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
           -H "Content-Type: application/json" \\
           -d '{"data": ["BASE64_AUDIO_DATA"]}'
         ```
-        **Response Format:** `{"data": [{"text": "transcribed text"}]}`
         ### 2. AI Endpoint - Streaming
         ```
@@ -254,58 +235,48 @@ with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as
           -H "Content-Type: application/json" \\
           -d '{"data": ["Your question here"]}'
         ```
-        **Response Format:** Streaming text chunks
-        ---
         ## Pluely Configuration
         ### Custom STT Provider:
-        **Curl Command:**
         ```
         curl https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d '{"data": ["{{AUDIO_BASE64}}"]}'
         ```
-        **Response Content Path:** `data[0].text`
-        **Streaming:** OFF
-        ### Custom AI Provider (Streaming):
-        **Curl Command:**
         ```
         curl https://archcoder-basic-app.hf.space/call/answer_ai_stream -H "Content-Type: application/json" -d '{"data": ["{{TEXT}}"]}'
         ```
-        **Response Content Path:** `data`
-        **Streaming:** ON ✅
         """)
-        # Hidden interface components that create API endpoints
         with gr.Row(visible=False):
             stt_input = gr.Textbox()
             stt_output = gr.JSON()
-            ai_stream_input = gr.Textbox()
-            ai_stream_output = gr.Textbox()
-        # These create the /call/transcribe_stt and /call/answer_ai_stream endpoints
-        stt_button = gr.Button("STT", visible=False)
-        stt_button.click(
             fn=transcribe_audio_base64,
             inputs=[stt_input],
             outputs=[stt_output],
             api_name="transcribe_stt"
         )
-        ai_stream_button = gr.Button("AI Stream", visible=False)
-        ai_stream_button.click(
             fn=generate_answer_stream,
-            inputs=[ai_stream_input],
-            outputs=[ai_stream_output],
             api_name="answer_ai_stream"
         )
     gr.Markdown("""
     ---
     🟢 = Under 3s | 🟡 = 3-3.5s | 🔴 = Over 3.5s
-    **Streaming Mode:** Words appear as they're generated - much faster perceived response!
     """)
 if __name__ == "__main__":

 def transcribe_audio_base64(audio_base64):
     """Transcribe audio from base64 string (for Pluely STT endpoint)"""
     try:
         audio_bytes = base64.b64decode(audio_base64)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
             temp_audio.write(audio_bytes)
             temp_path = temp_audio.name
         segments, _ = whisper_model.transcribe(temp_path, language="en", beam_size=1)
         transcription = " ".join([seg.text for seg in segments])
         os.unlink(temp_path)
         return {"text": transcription.strip()}
             yield "No input provided"
             return
         search_results = search_web(text_input, max_results=2)
         messages = [
             {"role": "system", "content": "You are a helpful assistant. Answer briefly using provided context. Keep responses under 40 words."},
             {"role": "user", "content": f"Context:\n{search_results}\n\nQuestion: {text_input}\n\nAnswer:"}
         )
         inputs = tokenizer([text], return_tensors="pt").to("cpu")
         streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         generation_kwargs = dict(
             streamer=streamer
         )
         thread = Thread(target=model.generate, kwargs=generation_kwargs)
         thread.start()
         generated_text = ""
         for new_text in streamer:
             generated_text += new_text
     except Exception as e:
         yield f"Error: {str(e)}"
+def process_audio_stream(audio_path, question_text):
+    """Streaming pipeline that yields tuples"""
     start_time = time.time()
+    # Transcribe if audio provided
     if audio_path:
         try:
             segments, _ = whisper_model.transcribe(audio_path, language="en", beam_size=1)
     transcription_time = time.time() - start_time
+    # Web search
     search_start = time.time()
     search_results = search_web(question, max_results=2)
     search_time = time.time() - search_start
+    # Stream answer
     llm_start = time.time()
     for partial_answer in generate_answer_stream(question):
         current_time = time.time() - start_time
         time_emoji = "🟢" if current_time < 3.0 else "🟡" if current_time < 3.5 else "🔴"
         timing_info = f"\n\n{time_emoji} **Timing:** Trans={transcription_time:.2f}s | Search={search_time:.2f}s | LLM={(time.time()-llm_start):.2f}s | **Total={current_time:.2f}s**"
         yield partial_answer + timing_info, current_time
+# Wrapper functions for proper API handling
+def audio_handler(audio_path):
+    """Wrapper for audio input"""
+    for result in process_audio_stream(audio_path, None):
+        yield result
+def text_handler(text_input):
+    """Wrapper for text input"""
+    for result in process_audio_stream(None, text_input):
+        yield result
 # Create Gradio interface
 with gr.Blocks(title="Fast Q&A - Streaming Enabled", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
                 audio_output = gr.Textbox(label="Answer (Streaming)", lines=8, show_copy_button=True)
                 audio_time = gr.Number(label="Response Time (seconds)", precision=2)
         audio_submit.click(
+            fn=audio_handler,
+            inputs=[audio_input],
             outputs=[audio_output, audio_time],
             api_name="audio_query_stream"
         )
                 text_output = gr.Textbox(label="Answer (Streaming)", lines=8, show_copy_button=True)
                 text_time = gr.Number(label="Response Time (seconds)", precision=2)
         text_submit.click(
+            fn=text_handler,
             inputs=[text_input],
             outputs=[text_output, text_time],
             api_name="text_query_stream"
           -H "Content-Type: application/json" \\
           -d '{"data": ["BASE64_AUDIO_DATA"]}'
         ```
         ### 2. AI Endpoint - Streaming
         ```
           -H "Content-Type: application/json" \\
           -d '{"data": ["Your question here"]}'
         ```
         ## Pluely Configuration
         ### Custom STT Provider:
         ```
         curl https://archcoder-basic-app.hf.space/call/transcribe_stt -H "Content-Type: application/json" -d '{"data": ["{{AUDIO_BASE64}}"]}'
         ```
+        **Response Path:** `data[0].text` | **Streaming:** OFF
+        ### Custom AI Provider:
         ```
         curl https://archcoder-basic-app.hf.space/call/answer_ai_stream -H "Content-Type: application/json" -d '{"data": ["{{TEXT}}"]}'
         ```
+        **Response Path:** `data` | **Streaming:** ON ✅
         """)
+        # Hidden components for API endpoints
         with gr.Row(visible=False):
             stt_input = gr.Textbox()
             stt_output = gr.JSON()
+            ai_input = gr.Textbox()
+            ai_output = gr.Textbox()
+        stt_btn = gr.Button("STT", visible=False)
+        stt_btn.click(
             fn=transcribe_audio_base64,
             inputs=[stt_input],
             outputs=[stt_output],
             api_name="transcribe_stt"
         )
+        ai_btn = gr.Button("AI", visible=False)
+        ai_btn.click(
             fn=generate_answer_stream,
+            inputs=[ai_input],
+            outputs=[ai_output],
             api_name="answer_ai_stream"
         )
     gr.Markdown("""
     ---
     🟢 = Under 3s | 🟡 = 3-3.5s | 🔴 = Over 3.5s
     """)
 if __name__ == "__main__":