#!/usr/bin/env python3
"""
ChatCal Voice-Enabled AI Assistant - Hugging Face Gradio Implementation

A voice-enabled calendar booking assistant with real-time speech-to-text,
text-to-speech responses, and Google Calendar integration.
"""

import gradio as gr
import os
import asyncio
import json
from typing import Dict, List, Tuple, Optional
from datetime import datetime

# Core functionality imports
from core.chat_agent import ChatCalAgent
from core.session_manager import SessionManager
from core.mcp_audio_handler import MCPAudioHandler
from core.config import config
from version import get_version_info

# WebRTC imports - re-enabled for WebRTC-first approach
from webrtc.server.fastapi_integration import create_fastapi_app

class ChatCalVoiceApp:
    """Main application class for voice-enabled ChatCal."""
    
    def __init__(self):
        self.session_manager = SessionManager()
        self.chat_agent = ChatCalAgent()
        self.audio_handler = MCPAudioHandler()
        
    async def process_message(
        self,
        message: str,
        history: List[Tuple[str, str]],
        session_id: str
    ) -> Tuple[List[Tuple[str, str]], str]:
        """Process a chat message and return updated history."""
        try:
            # Get or create session
            session = await self.session_manager.get_session(session_id)
            
            # Process message through ChatCal agent
            response = await self.chat_agent.process_message(message, session)
            
            # Update conversation history
            history.append((message, response))
            
            return history, ""
            
        except Exception as e:
            error_msg = f"Sorry, I encountered an error: {str(e)}"
            history.append((message, error_msg))
            return history, ""
    
    async def process_audio(
        self,
        audio_data: bytes,
        history: List[Tuple[str, str]],
        session_id: str
    ) -> Tuple[List[Tuple[str, str]], str, bytes]:
        """Process audio input and return transcription + response audio."""
        try:
            # Convert audio to text via STT service
            transcription = await self.audio_handler.speech_to_text(audio_data)
            
            # Process the transcribed message
            history, _ = await self.process_message(transcription, history, session_id)
            
            # Get the latest response for TTS
            if history:
                latest_response = history[-1][1]
                # Convert response to speech
                response_audio = await self.audio_handler.text_to_speech(latest_response)
                return history, transcription, response_audio
            
            return history, transcription, None
            
        except Exception as e:
            error_msg = f"Audio processing error: {str(e)}"
            history.append(("(Audio input)", error_msg))
            return history, "", None

    def create_interface(self) -> gr.Interface:
        """Create the main Gradio interface."""
        
        with gr.Blocks(
            theme=gr.themes.Soft(),
            title="ChatCal Voice Assistant",
            css="""
            .chat-container { 
                max-height: 500px; 
                overflow-y: auto; 
            }
            .voice-controls {
                background: linear-gradient(45deg, #667eea 0%, #764ba2 100%);
                padding: 10px;
                border-radius: 10px;
                margin: 10px 0;
            }
            .status-indicator {
                display: inline-block;
                width: 12px;
                height: 12px;
                border-radius: 50%;
                margin-right: 8px;
            }
            .recording { background-color: #ff4444; }
            .idle { background-color: #44ff44; }
            """
        ) as demo:
            
            # Title and description
            gr.Markdown("""
            # 🎤📅 ChatCal Voice Assistant
            
            **Book your Google Calendar appointments with voice or text!**
            
            - 🗣️ **Voice Input**: Click record, speak naturally
            - 💬 **Text Input**: Type your message
            - 📅 **Smart Booking**: AI understands dates, times, and preferences
            - 🎥 **Google Meet**: Automatic video conference setup
            """)
            
            # Session state
            session_id = gr.State(value=lambda: f"session_{datetime.now().timestamp()}")
            
            with gr.Row():
                with gr.Column(scale=3):
                    # Chat history display
                    chatbot = gr.Chatbot(
                        label="Chat History",
                        height=400,
                        elem_classes=["chat-container"]
                    )
                    
                    with gr.Row(elem_classes=["voice-controls"]):
                        # Traditional Voice input section
                        with gr.Column(scale=2):
                            audio_input = gr.Audio(
                                type="numpy",
                                label="🎤 Voice Input (Gradio)",
                                interactive=True
                            )
                            voice_status = gr.HTML(
                                value='<span class="status-indicator idle"></span>Ready for voice input'
                            )
                        
                        with gr.Column(scale=1):
                            # Audio output
                            audio_output = gr.Audio(
                                label="🔊 AI Response",
                                type="numpy",
                                interactive=False
                            )
                    
                    # WebRTC Real-time Voice Section
                    with gr.Row():
                        gr.HTML("""
                            <div style="background: linear-gradient(45deg, #28a745 0%, #20c997 100%); 
                                        padding: 15px; border-radius: 10px; margin: 10px 0;">
                                <h3 style="color: white; margin: 0;">🚀 WebRTC Real-time Voice (Beta)</h3>
                                <p style="color: white; margin: 5px 0;">
                                    Enhanced real-time voice interaction with streaming transcription
                                </p>
                                <p style="color: white; margin: 5px 0; font-size: 0.9em;">
                                    📡 <strong>WebSocket endpoints:</strong> /ws/webrtc/{client_id} | 
                                    🧪 <strong>Test page:</strong> <a href="/webrtc/demo" style="color: #fff; text-decoration: underline;">WebRTC Demo</a> | 
                                    ⚡ <strong>API Status:</strong> <a href="/webrtc/test" style="color: #fff; text-decoration: underline;">Test Endpoint</a>
                                </p>
                            </div>
                        """)
                    
                    # Text input section
                    with gr.Row():
                        text_input = gr.Textbox(
                            label="💬 Type your message or see voice transcription",
                            placeholder="Hi! I'm [Your Name]. Book a 30-minute meeting tomorrow at 2 PM...",
                            lines=2,
                            scale=4
                        )
                        send_btn = gr.Button("Send", variant="primary", scale=1)
                
                with gr.Column(scale=1):
                    # Quick action buttons
                    gr.Markdown("### 🚀 Quick Actions")
                    
                    quick_meet = gr.Button(
                        "🎥 Google Meet (30m)",
                        variant="secondary"
                    )
                    quick_availability = gr.Button(
                        "📅 Check Availability", 
                        variant="secondary"
                    )
                    quick_cancel = gr.Button(
                        "❌ Cancel Meeting",
                        variant="secondary"
                    )
                    
                    # Version info
                    version_btn = gr.Button(
                        "ℹ️ Version Info",
                        variant="secondary"
                    )
                    version_display = gr.Textbox(
                        label="Version Information",
                        interactive=False,
                        visible=False
                    )
                    
                    # Voice settings
                    gr.Markdown("### 🎭 Voice Settings")
                    voice_enabled = gr.Checkbox(
                        label="Enable voice responses",
                        value=True
                    )
                    voice_selection = gr.Dropdown(
                        choices=[
                            "v2/en_speaker_0",
                            "v2/en_speaker_1", 
                            "v2/en_speaker_2",
                            "v2/en_speaker_6",
                            "v2/en_speaker_9"
                        ],
                        value="v2/en_speaker_6",
                        label="AI Voice"
                    )
            
            # Event handlers
            def handle_text_submit(message, history, session):
                if message.strip():
                    # Use asyncio to handle the async function
                    loop = asyncio.new_event_loop()
                    asyncio.set_event_loop(loop)
                    try:
                        result = loop.run_until_complete(
                            app.process_message(message, history, session)
                        )
                        return result
                    finally:
                        loop.close()
                return history, message
            
            def handle_audio_submit(audio, history, session):
                print(f"🎤 AUDIO DEBUG: Received audio input: {type(audio)}")
                print(f"🎤 AUDIO DEBUG: Audio data: {audio}")
                
                if audio is not None:
                    print(f"🎤 AUDIO DEBUG: Processing audio...")
                    # Convert audio data and process
                    loop = asyncio.new_event_loop()
                    asyncio.set_event_loop(loop)
                    try:
                        # Debug audio format
                        if isinstance(audio, tuple) and len(audio) >= 2:
                            sample_rate, audio_array = audio
                            print(f"🎤 AUDIO DEBUG: Sample rate: {sample_rate}")
                            print(f"🎤 AUDIO DEBUG: Audio array type: {type(audio_array)}")
                            print(f"🎤 AUDIO DEBUG: Audio array shape: {audio_array.shape if hasattr(audio_array, 'shape') else 'No shape'}")
                            
                            # Use the audio handler's process method instead
                            transcription = app.audio_handler.process_audio_input(audio)
                            print(f"🎤 AUDIO DEBUG: Transcription result: {transcription}")
                            
                            if transcription and transcription != "No audio received":
                                # Process the transcription as a message
                                result = loop.run_until_complete(
                                    app.process_message(transcription, history, session)
                                )
                                # Return updated history, transcription in text box, and no audio output for now
                                return result[0], transcription, None
                            else:
                                print(f"🎤 AUDIO DEBUG: No valid transcription received")
                                return history, "No audio transcription available", None
                        else:
                            print(f"🎤 AUDIO DEBUG: Invalid audio format")
                            return history, "Invalid audio format", None
                            
                    except Exception as e:
                        print(f"🎤 AUDIO ERROR: {str(e)}")
                        import traceback
                        traceback.print_exc()
                        return history, f"Audio processing error: {str(e)}", None
                    finally:
                        loop.close()
                else:
                    print(f"🎤 AUDIO DEBUG: No audio received")
                    return history, "No audio received", None
            
            def handle_quick_action(action_text, history, session):
                """Handle quick action button clicks."""
                loop = asyncio.new_event_loop()
                asyncio.set_event_loop(loop)
                try:
                    result = loop.run_until_complete(
                        app.process_message(action_text, history, session)
                    )
                    return result[0], ""  # Return updated history and clear text input
                finally:
                    loop.close()
            
            # Wire up the event handlers
            send_btn.click(
                fn=handle_text_submit,
                inputs=[text_input, chatbot, session_id],
                outputs=[chatbot, text_input]
            )
            
            text_input.submit(
                fn=handle_text_submit,
                inputs=[text_input, chatbot, session_id],
                outputs=[chatbot, text_input]
            )
            
            audio_input.change(
                fn=handle_audio_submit,
                inputs=[audio_input, chatbot, session_id],
                outputs=[chatbot, text_input, audio_output]
            )
            
            # Quick action handlers
            quick_meet.click(
                fn=lambda hist, sess: handle_quick_action(
                    "Book a 30-minute Google Meet with Peter for next available time", 
                    hist, sess
                ),
                inputs=[chatbot, session_id],
                outputs=[chatbot, text_input]
            )
            
            quick_availability.click(
                fn=lambda hist, sess: handle_quick_action(
                    "What is Peter's availability this week?", 
                    hist, sess
                ),
                inputs=[chatbot, session_id], 
                outputs=[chatbot, text_input]
            )
            
            quick_cancel.click(
                fn=lambda hist, sess: handle_quick_action(
                    "Cancel my upcoming meeting with Peter", 
                    hist, sess
                ),
                inputs=[chatbot, session_id],
                outputs=[chatbot, text_input]
            )
            
            # Version info handler
            def show_version():
                info = get_version_info()
                version_text = f"Version: {info['version']}\nBuild: {info['build_date']}\nDescription: {info['description']}\nStatus: {info['status']}"
                return version_text, gr.update(visible=True)
            
            version_btn.click(
                fn=show_version,
                outputs=[version_display, version_display]
            )
        
        return demo

# Global app instance
app = ChatCalVoiceApp()

# Create and launch the interface
if __name__ == "__main__":
    import uvicorn
    
    try:
        # Create WebRTC-enabled FastAPI app as main app
        webrtc_app = create_fastapi_app()
        
        # Create Gradio interface (for future integration)  
        demo = app.create_interface()
        
        # WebRTC-first approach: Launch FastAPI with WebSocket endpoints
        print("🚀 ChatCal WebRTC-First Deployment v0.4.3")
        print("📡 WebSocket endpoint: /ws/webrtc/{client_id}")
        print("🧪 WebRTC demo page: /webrtc/demo")
        print("⚡ API status: /webrtc/test")
        print("⚠️  Gradio interface development - WebRTC priority")
        
        # Launch WebRTC FastAPI app directly
        uvicorn.run(webrtc_app, host="0.0.0.0", port=7860)
        
    except Exception as e:
        print(f"❌ WebRTC integration error: {e}")
        print("📋 Falling back to Gradio-only deployment")
        import traceback
        traceback.print_exc()
        
        # Create stable Gradio interface fallback
        demo = app.create_interface()
        
        print("🚀 ChatCal Voice-Enabled Assistant v0.4.3")
        print("📱 Traditional voice input available via Gradio Audio component")
        print("⚙️  WebRTC real-time streaming: Debugging in progress")
        
        # Launch configuration for HF Spaces (stable fallback)
        demo.launch(
            server_name="0.0.0.0",
            server_port=7860,
            share=False,  # HF handles sharing
            show_error=True
        )