Spaces:

AptlyDigital
/

simpleai

Sleeping

App Files Files Community

AptlyDigital commited on Jan 14

Commit

c8759b1

verified ·

1 Parent(s): 3030ea6

Update app.py

Browse files

Files changed (1) hide show

app.py +205 -18

app.py CHANGED Viewed

@@ -1,27 +1,214 @@
 import gradio as gr
 from transformers import pipeline
-chatbot = pipeline("text-generation", model="Qwen/Qwen2.5-0.5B-Instruct", trust_remote_code=True)
-def respond(message, history):
-    prompt = f"Human: {message}\nAssistant:"
-    response = chatbot(prompt, max_new_tokens=100)[0]['generated_text']
-    return response.split("Assistant:")[-1].strip()
-# Simple interface with voice
-with gr.Blocks() as demo:
-    gr.Markdown("# Voice AI Assistant")
-    with gr.Row():
-        audio = gr.Audio(sources=["microphone"], type="filepath")
-        chatbot_display = gr.Chatbot(height=400)
-    def process_audio(audio_path):
-        if audio_path:
-            # In production, add Whisper here
-            return "Voice detected! Add Whisper for transcription."
-        return "No audio"
-    audio.change(process_audio, audio, chatbot_display)
-demo.launch()

 import gradio as gr
 from transformers import pipeline
+import whisper
+# Load Qwen model
+chatbot = pipeline(
+    "text-generation",
+    model="Qwen/Qwen2.5-0.5B-Instruct",
+    trust_remote_code=True
+)
+# Load Whisper model
+whisper_model = whisper.load_model("base")
+def transcribe_audio(audio_file):
+    """Convert speech to text using Whisper"""
+    if audio_file is None:
+        return ""
+    try:
+        result = whisper_model.transcribe(audio_file)
+        return result["text"].strip()
+    except Exception as e:
+        print(f"Transcription error: {e}")
+        return ""
+def get_ai_response(message):
+    """Get response from Qwen model"""
+    try:
+        prompt = f"Human: {message}\nAssistant:"
+        response = chatbot(
+            prompt,
+            max_new_tokens=150,
+            temperature=0.7,
+            do_sample=True
+        )
+        full_text = response[0]['generated_text']
+        assistant_response = full_text.split("Assistant:")[-1].strip()
+        return assistant_response if assistant_response else "I'm thinking..."
+    except Exception as e:
+        return f"Error: {str(e)[:100]}"
+def process_voice_input(audio, chat_history):
+    """Process voice input - FIXED message format"""
+    if audio is None:
+        return chat_history, "", None
+    # Transcribe audio to text
+    text = transcribe_audio(audio)
+    if not text:
+        # CORRECT FORMAT: list of dicts
+        chat_history.append({"role": "user", "content": "Voice input"})
+        chat_history.append({"role": "assistant", "content": "Sorry, I couldn't understand the audio."})
+        return chat_history, "", None
+    # Get AI response
+    response = get_ai_response(text)
+    # CORRECT FORMAT: list of dicts
+    chat_history.append({"role": "user", "content": f"🎤 {text}"})
+    chat_history.append({"role": "assistant", "content": response})
+    return chat_history, "", None
+def process_text_input(text, chat_history):
+    """Process text input - FIXED message format"""
+    if not text.strip():
+        return chat_history, ""
+    # Get AI response
+    response = get_ai_response(text)
+    # CORRECT FORMAT: list of dicts
+    chat_history.append({"role": "user", "content": text})
+    chat_history.append({"role": "assistant", "content": response})
+    return chat_history, ""
+def clear_chat():
+    """Clear chat history"""
+    return [], []
+def set_voice_mode():
+    """Set voice mode status"""
+    return "**Status:** Voice mode active - Click microphone to speak"
+def set_text_mode():
+    """Set text mode status"""
+    return "**Status:** Text mode active - Type your message"
+# Create the interface
+with gr.Blocks(fill_height=True) as demo:
+    # Simple background animation
+    gr.HTML("""
+    <div style="position:fixed; top:0; left:0; width:100vw; height:100vh;
+                background: linear-gradient(45deg, #0a0a0f, #1a1a2e);
+                z-index:-1;">
+    </div>
+    """)
+    # Main content
+    with gr.Column(elem_id="main-content"):
+        gr.Markdown("# 🎤 Voice-Enabled AI Assistant")
+        gr.Markdown("Talk to the AI using your voice or type your message")
+        # Chat display
+        chatbot_display = gr.Chatbot(
+            label="Conversation",
+            height=400
+        )
+        # Voice input section
+        with gr.Row():
+            with gr.Column(scale=1):
+                audio_input = gr.Audio(
+                    sources=["microphone"],
+                    type="filepath",
+                    label="🎤 Speak your message",
+                    interactive=True
+                )
+            with gr.Column(scale=1):
+                gr.Markdown("### Or type your message:")
+                text_input = gr.Textbox(
+                    label="Type here",
+                    placeholder="Type your message...",
+                    lines=2
+                )
+                text_submit = gr.Button("Send Text", variant="primary")
+        # Control buttons
+        with gr.Row():
+            clear_btn = gr.Button("Clear Chat", variant="secondary")
+            voice_mode_btn = gr.Button("🎤 Voice Mode", variant="primary")
+            text_mode_btn = gr.Button("📝 Text Mode", variant="secondary")
+        # Status indicator
+        status = gr.Markdown("**Status:** Ready - Click microphone or type to chat")
+        # State - initialize with welcome message
+        chat_history = gr.State([
+            {"role": "assistant", "content": "Hello! I'm your AI assistant. Speak into the microphone or type your message."}
+        ])
+    # Event handlers
+    # Voice input
+    audio_input.stop_recording(
+        fn=process_voice_input,
+        inputs=[audio_input, chat_history],
+        outputs=[chatbot_display, text_input, audio_input]
+    )
+    # Text input via button
+    text_submit.click(
+        fn=process_text_input,
+        inputs=[text_input, chat_history],
+        outputs=[chatbot_display, text_input]
+    )
+    # Text input via Enter key
+    text_input.submit(
+        fn=process_text_input,
+        inputs=[text_input, chat_history],
+        outputs=[chatbot_display, text_input]
+    )
+    # Clear chat
+    clear_btn.click(
+        fn=clear_chat,
+        outputs=[chatbot_display, chat_history]
+    )
+    # Mode buttons
+    voice_mode_btn.click(
+        fn=set_voice_mode,
+        outputs=[status]
+    )
+    text_mode_btn.click(
+        fn=set_text_mode,
+        outputs=[status]
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        theme=gr.themes.Soft(),
+        css="""
+        #main-content {
+            background: rgba(15, 15, 25, 0.85);
+            backdrop-filter: blur(10px);
+            padding: 30px;
+            border-radius: 20px;
+            border: 1px solid rgba(255, 255, 255, 0.1);
+            max-width: 800px;
+            margin: 50px auto;
+            box-shadow: 0 20px 40px rgba(0, 0, 0, 0.3);
+        }
+        .gradio-container {
+            background: transparent !important;
+            min-height: 100vh !important;
+        }
+        """
+    )