Spaces:

pgits
/

voiceCal

Sleeping

Peter Michael Gits Claude commited on Aug 26, 2025

Commit

3cb7ae9

1 Parent(s): 35326c5

feat: Complete fresh restart with clean Streamlit configuration

- Replaced complex Gradio app with minimal Streamlit interface
- Updated README.md to use app.py instead of hello_world.py
- Clean configuration to resolve persistent 500 errors
- Testing basic Streamlit functionality in existing space

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show

README.md +1 -1
app.py +12 -426

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🎤📅
 colorFrom: blue
 colorTo: purple
 sdk: streamlit
-app_file: hello_world.py
 pinned: false
 license: mit
 short_description: Voice-enabled AI assistant with WebRTC integration

 colorFrom: blue
 colorTo: purple
 sdk: streamlit
+app_file: app.py
 pinned: false
 license: mit
 short_description: Voice-enabled AI assistant with WebRTC integration

app.py CHANGED Viewed

@@ -1,430 +1,16 @@
-#!/usr/bin/env python3
-"""
-ChatCal Voice-Enabled AI Assistant - Hugging Face Gradio Implementation
-A voice-enabled calendar booking assistant with real-time speech-to-text,
-text-to-speech responses, and Google Calendar integration.
-"""
-import gradio as gr
-import os
-import asyncio
-import json
-from typing import Dict, List, Tuple, Optional
-from datetime import datetime
-# Hugging Face Spaces GPU support
-import spaces
-# Core functionality imports
-from core.chat_agent import ChatCalAgent
-from core.session_manager import SessionManager
-from core.mcp_audio_handler import MCPAudioHandler
-from core.config import config
-from version import get_version_info
-# WebRTC imports - re-enabled for WebRTC-first approach
-from webrtc.server.fastapi_integration import create_fastapi_app
-# Required GPU function for Hugging Face Spaces
-@spaces.GPU
-def initialize_gpu_resources():
-    """Initialize GPU resources for VoiceCal AI processing."""
-    # This function satisfies the HF Spaces requirement for @spaces.GPU
-    # GPU resources are actually used by external STT/TTS services
-    return "GPU initialized for VoiceCal v0.5.7"
-class ChatCalVoiceApp:
-    """Main application class for voice-enabled ChatCal."""
-    def __init__(self):
-        self.session_manager = SessionManager()
-        self.chat_agent = ChatCalAgent()
-        self.audio_handler = MCPAudioHandler()
-    async def process_message(
-        self,
-        message: str,
-        history: List[Tuple[str, str]],
-        session_id: str
-    ) -> Tuple[List[Tuple[str, str]], str]:
-        """Process a chat message and return updated history."""
-        try:
-            # Get or create session
-            session = await self.session_manager.get_session(session_id)
-            # Process message through ChatCal agent
-            response = await self.chat_agent.process_message(message, session)
-            # Update conversation history
-            history.append((message, response))
-            return history, ""
-        except Exception as e:
-            error_msg = f"Sorry, I encountered an error: {str(e)}"
-            history.append((message, error_msg))
-            return history, ""
-    async def process_audio(
-        self,
-        audio_data: bytes,
-        history: List[Tuple[str, str]],
-        session_id: str
-    ) -> Tuple[List[Tuple[str, str]], str, bytes]:
-        """Process audio input and return transcription + response audio."""
-        try:
-            # Convert audio to text via STT service
-            transcription = await self.audio_handler.speech_to_text(audio_data)
-            # Process the transcribed message
-            history, _ = await self.process_message(transcription, history, session_id)
-            # Get the latest response for TTS
-            if history:
-                latest_response = history[-1][1]
-                # Convert response to speech
-                response_audio = await self.audio_handler.text_to_speech(latest_response)
-                return history, transcription, response_audio
-            return history, transcription, None
-        except Exception as e:
-            error_msg = f"Audio processing error: {str(e)}"
-            history.append(("(Audio input)", error_msg))
-            return history, "", None
-    def create_interface(self) -> gr.Interface:
-        """Create the main Gradio interface."""
-        with gr.Blocks(
-            theme=gr.themes.Soft(),
-            title="ChatCal Voice Assistant",
-            # Note: max_file_size removed for HF Spaces compatibility
-            css="""
-            .chat-container {
-                max-height: 500px;
-                overflow-y: auto;
-            }
-            .voice-controls {
-                background: linear-gradient(45deg, #667eea 0%, #764ba2 100%);
-                padding: 10px;
-                border-radius: 10px;
-                margin: 10px 0;
-            }
-            .status-indicator {
-                display: inline-block;
-                width: 12px;
-                height: 12px;
-                border-radius: 50%;
-                margin-right: 8px;
-            }
-            .recording { background-color: #ff4444; }
-            .idle { background-color: #44ff44; }
-            """
-        ) as demo:
-            # Title and description
-            gr.Markdown("""
-            # 🎤📅 ChatCal Voice Assistant
-            **Book your Google Calendar appointments with voice or text!**
-            - ����️ **Voice Input**: Click record, speak naturally
-            - 💬 **Text Input**: Type your message
-            - 📅 **Smart Booking**: AI understands dates, times, and preferences
-            - 🎥 **Google Meet**: Automatic video conference setup
-            """)
-            # Session state
-            session_id = gr.State(value=lambda: f"session_{datetime.now().timestamp()}")
-            with gr.Row():
-                with gr.Column(scale=3):
-                    # Chat history display
-                    chatbot = gr.Chatbot(
-                        label="Chat History",
-                        height=400,
-                        elem_classes=["chat-container"],
-                        type="messages"
-                    )
-                    with gr.Row(elem_classes=["voice-controls"]):
-                        # Traditional Voice input section
-                        with gr.Column(scale=2):
-                            audio_input = gr.Audio(
-                                type="numpy",
-                                label="🎤 Voice Input (Gradio)",
-                                interactive=True
-                            )
-                            voice_status = gr.HTML(
-                                value='<span class="status-indicator idle"></span>Ready for voice input'
-                            )
-                        with gr.Column(scale=1):
-                            # Audio output
-                            audio_output = gr.Audio(
-                                label="🔊 AI Response",
-                                type="numpy",
-                                interactive=False
-                            )
-                    # WebRTC Real-time Voice Section
-                    with gr.Row():
-                        gr.HTML("""
-                            <div style="background: linear-gradient(45deg, #28a745 0%, #20c997 100%);
-                                        padding: 15px; border-radius: 10px; margin: 10px 0;">
-                                <h3 style="color: white; margin: 0;">🚀 WebRTC Real-time Voice (Beta)</h3>
-                                <p style="color: white; margin: 5px 0;">
-                                    Enhanced real-time voice interaction with streaming transcription
-                                </p>
-                                <p style="color: white; margin: 5px 0; font-size: 0.9em;">
-                                    📡 <strong>WebSocket endpoints:</strong> /ws/webrtc/{client_id} |
-                                    🧪 <strong>Test page:</strong> <a href="/webrtc/demo" style="color: #fff; text-decoration: underline;">WebRTC Demo</a> |
-                                    ⚡ <strong>API Status:</strong> <a href="/webrtc/test" style="color: #fff; text-decoration: underline;">Test Endpoint</a>
-                                </p>
-                            </div>
-                        """)
-                    # Text input section
-                    with gr.Row():
-                        text_input = gr.Textbox(
-                            label="💬 Type your message or see voice transcription",
-                            placeholder="Hi! I'm [Your Name]. Book a 30-minute meeting tomorrow at 2 PM...",
-                            lines=2,
-                            scale=4
-                        )
-                        send_btn = gr.Button("Send", variant="primary", scale=1)
-                with gr.Column(scale=1):
-                    # Quick action buttons
-                    gr.Markdown("### 🚀 Quick Actions")
-                    quick_meet = gr.Button(
-                        "🎥 Google Meet (30m)",
-                        variant="secondary"
-                    )
-                    quick_availability = gr.Button(
-                        "📅 Check Availability",
-                        variant="secondary"
-                    )
-                    quick_cancel = gr.Button(
-                        "❌ Cancel Meeting",
-                        variant="secondary"
-                    )
-                    # Version info
-                    version_btn = gr.Button(
-                        "ℹ️ Version Info",
-                        variant="secondary"
-                    )
-                    version_display = gr.Textbox(
-                        label="Version Information",
-                        interactive=False,
-                        visible=False
-                    )
-                    # Voice settings
-                    gr.Markdown("### 🎭 Voice Settings")
-                    voice_enabled = gr.Checkbox(
-                        label="Enable voice responses",
-                        value=True
-                    )
-                    voice_selection = gr.Dropdown(
-                        choices=[
-                            "v2/en_speaker_0",
-                            "v2/en_speaker_1",
-                            "v2/en_speaker_2",
-                            "v2/en_speaker_6",
-                            "v2/en_speaker_9"
-                        ],
-                        value="v2/en_speaker_6",
-                        label="AI Voice"
-                    )
-            # Event handlers
-            def handle_text_submit(message, history, session):
-                if message.strip():
-                    # Use asyncio to handle the async function
-                    loop = asyncio.new_event_loop()
-                    asyncio.set_event_loop(loop)
-                    try:
-                        result = loop.run_until_complete(
-                            app.process_message(message, history, session)
-                        )
-                        return result
-                    finally:
-                        loop.close()
-                return history, message
-            def handle_audio_submit(audio, history, session):
-                print(f"🎤 AUDIO DEBUG: Received audio input: {type(audio)}")
-                print(f"🎤 AUDIO DEBUG: Audio data: {audio}")
-                if audio is not None:
-                    print(f"🎤 AUDIO DEBUG: Processing audio...")
-                    # Convert audio data and process
-                    loop = asyncio.new_event_loop()
-                    asyncio.set_event_loop(loop)
-                    try:
-                        # Debug audio format
-                        if isinstance(audio, tuple) and len(audio) >= 2:
-                            sample_rate, audio_array = audio
-                            print(f"🎤 AUDIO DEBUG: Sample rate: {sample_rate}")
-                            print(f"🎤 AUDIO DEBUG: Audio array type: {type(audio_array)}")
-                            print(f"🎤 AUDIO DEBUG: Audio array shape: {audio_array.shape if hasattr(audio_array, 'shape') else 'No shape'}")
-                            # Use the audio handler's process method instead
-                            transcription = app.audio_handler.process_audio_input(audio)
-                            print(f"🎤 AUDIO DEBUG: Transcription result: {transcription}")
-                            if transcription and transcription != "No audio received":
-                                # Process the transcription as a message
-                                result = loop.run_until_complete(
-                                    app.process_message(transcription, history, session)
-                                )
-                                # Return updated history, transcription in text box, and no audio output for now
-                                return result[0], transcription, None
-                            else:
-                                print(f"🎤 AUDIO DEBUG: No valid transcription received")
-                                return history, "No audio transcription available", None
-                        else:
-                            print(f"🎤 AUDIO DEBUG: Invalid audio format")
-                            return history, "Invalid audio format", None
-                    except Exception as e:
-                        print(f"🎤 AUDIO ERROR: {str(e)}")
-                        import traceback
-                        traceback.print_exc()
-                        return history, f"Audio processing error: {str(e)}", None
-                    finally:
-                        loop.close()
-                else:
-                    print(f"🎤 AUDIO DEBUG: No audio received")
-                    return history, "No audio received", None
-            def handle_quick_action(action_text, history, session):
-                """Handle quick action button clicks."""
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-                try:
-                    result = loop.run_until_complete(
-                        app.process_message(action_text, history, session)
-                    )
-                    return result[0], ""  # Return updated history and clear text input
-                finally:
-                    loop.close()
-            # Wire up the event handlers
-            send_btn.click(
-                fn=handle_text_submit,
-                inputs=[text_input, chatbot, session_id],
-                outputs=[chatbot, text_input]
-            )
-            text_input.submit(
-                fn=handle_text_submit,
-                inputs=[text_input, chatbot, session_id],
-                outputs=[chatbot, text_input]
-            )
-            audio_input.change(
-                fn=handle_audio_submit,
-                inputs=[audio_input, chatbot, session_id],
-                outputs=[chatbot, text_input, audio_output]
-            )
-            # Quick action handlers
-            quick_meet.click(
-                fn=lambda hist, sess: handle_quick_action(
-                    "Book a 30-minute Google Meet with Peter for next available time",
-                    hist, sess
-                ),
-                inputs=[chatbot, session_id],
-                outputs=[chatbot, text_input]
-            )
-            quick_availability.click(
-                fn=lambda hist, sess: handle_quick_action(
-                    "What is Peter's availability this week?",
-                    hist, sess
-                ),
-                inputs=[chatbot, session_id],
-                outputs=[chatbot, text_input]
-            )
-            quick_cancel.click(
-                fn=lambda hist, sess: handle_quick_action(
-                    "Cancel my upcoming meeting with Peter",
-                    hist, sess
-                ),
-                inputs=[chatbot, session_id],
-                outputs=[chatbot, text_input]
-            )
-            # Version info handler
-            def show_version():
-                info = get_version_info()
-                version_text = f"Version: {info['version']}\nBuild: {info['build_date']}\nDescription: {info['description']}\nStatus: {info['status']}"
-                return version_text, gr.update(visible=True)
-            version_btn.click(
-                fn=show_version,
-                outputs=[version_display, version_display]
-            )
-        return demo
-# Initialize GPU resources for Hugging Face Spaces
-gpu_status = initialize_gpu_resources()
-print(f"🚀 {gpu_status}")
-# Global app instance
-app = ChatCalVoiceApp()
-# Create and launch the interface
-if __name__ == "__main__":
-    import uvicorn
-    try:
-        # Create WebRTC-enabled FastAPI app as main app
-        webrtc_app = create_fastapi_app()
-        # Hybrid approach: Mount Gradio on FastAPI for complete functionality
-        from version import __version__
-        print(f"🚀 VoiceCal - Voice-Enabled AI Scheduling Assistant v{__version__}")
-        print("📡 WebSocket endpoint: /ws/webrtc/{client_id}")
-        print("🧪 WebRTC demo page: /webrtc/demo")
-        print("⚡ API status: /webrtc/test")
-        print("🎤 Full Gradio interface with voice integration")
-        # Create Gradio interface
-        demo = app.create_interface()
-        # Mount Gradio app on FastAPI for hybrid deployment
-        webrtc_app.mount("/", demo.app)
-        # Launch hybrid app with both WebRTC and Gradio
-        uvicorn.run(webrtc_app, host="0.0.0.0", port=7860)
-    except Exception as e:
-        print(f"❌ WebRTC integration error: {e}")
-        print("📋 Falling back to Gradio-only deployment")
-        import traceback
-        traceback.print_exc()
-        # Create stable Gradio interface fallback
-        demo = app.create_interface()
-        from version import __version__
-        print(f"🚀 VoiceCal - Voice-Enabled AI Scheduling Assistant v{__version__} (Fallback)")
-        print("📱 Traditional voice input available via Gradio Audio component")
-        print("⚙️  WebRTC real-time streaming: Error - using fallback mode")
-        # Launch configuration for HF Spaces (stable fallback)
-        demo.launch(
-            server_name="0.0.0.0",
-            server_port=7860,
-            share=False,  # HF handles sharing
-            show_error=True
-        )

+import streamlit as st
+st.set_page_config(
+    page_title="VoiceCal Fresh",
+    page_icon="🎤",
+    layout="centered"
+)
+st.title("🎤📅 VoiceCal - Fresh Start")
+st.write("Testing fresh deployment with clean configuration.")
+if st.button("Test Basic Functionality"):
+    st.success("✅ Basic Streamlit functionality working!")
+    st.balloons()
+st.info("If you see this interface, the space infrastructure issues have been resolved.")