"""
Multi-Language Detection Demo with Gradio and OpenAI
Supports text, audio, and video input for language detection
"""

import gradio as gr
import openai
from openai import OpenAI
import json
import os
from pydantic import BaseModel
import tempfile
import base64
from moviepy import VideoFileClip
# Initialize OpenAI client (will be set with API key from UI)
client = None

# Model configuration
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"


def get_client(api_key, model_name):
    """Get appropriate OpenAI client based on model type"""
    if model_name.lower().startswith("gemini"):
        return OpenAI(
            api_key=api_key,
            base_url=GEMINI_BASE_URL
        )
    else:
        return OpenAI(api_key=api_key)


def extract_audio_from_video(video_path):
    """Extract audio from video file using moviepy"""
    try:
        video = VideoFileClip(video_path)
        audio_path = tempfile.mktemp(suffix=".mp3")
        video.audio.write_audiofile(audio_path, codec='libmp3lame')
        return audio_path
    except Exception as e:
        raise Exception(f"Failed to extract audio from video: {str(e)}")
    

def transcribe_audio(api_key, audio_path, model_name="gemini-2.5-flash"):
    """Transcribe audio using OpenAI Whisper or Gemini"""
    global client
    
    if model_name.lower().startswith("gemini"):
        # Use Gemini audio understanding
        client = get_client(api_key, model_name)
        
        try:
            with open(audio_path, "rb") as audio_file:
                base64_audio = base64.b64encode(audio_file.read()).decode('utf-8')
            
            # Determine audio format from file extension
            audio_format = audio_path.split('.')[-1].lower()
            if audio_format == "mp3":
                audio_format = "mp3"
            elif audio_format in ["wav", "webm", "ogg"]:
                audio_format = audio_format
            else:
                audio_format = "mp3"  # Default to mp3
            
            response = client.chat.completions.create(
                model=model_name,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "text",
                                "text": "Transcribe this audio file exactly as spoken. Return only the transcription text, nothing else."
                            },
                            {
                                "type": "input_audio",
                                "input_audio": {
                                    "data": base64_audio,
                                    "format": audio_format
                                }
                            }
                        ]
                    }
                ]
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
            raise Exception(f"Gemini transcription failed: {str(e)}")
    else:
        # Use Whisper for OpenAI models
        client = get_client(api_key, model_name)
        
        try:
            with open(audio_path, "rb") as audio_file:
                transcript = client.audio.transcriptions.create(
                    model="whisper-1",
                    file=audio_file,
                    response_format="text"
                )
            return transcript
        except Exception as e:
            raise Exception(f"Whisper transcription failed: {str(e)}")


class Language(BaseModel):
    code: str
    name: str
    percentage: float
    sample: str

class LanguageDetection(BaseModel):
    languages: list[Language]
    primary_language: str
    is_multilingual: bool
    confidence: str
LanguageDetection.model_rebuild()

def detect_language(api_key, text, model_name="gemini-2.5-flash")->LanguageDetection:
    """Detect language(s) in text using OpenAI GPT or Gemini"""
    global client
    client = get_client(api_key, model_name)
    
    prompt = f"""Analyze the following text and identify all languages present. 
If multiple languages are detected, provide the percentage breakdown.

Respond ONLY with valid JSON in this exact format (no markdown, no code blocks):
{{
    "languages": [
        {{
            "code": "en",
            "name": "English",
            "percentage": 100,
            "sample": "sample text from the language"
        }}
    ],
    "primary_language": "en",
    "is_multilingual": false,
    "confidence": "high"
}}

Text to analyze:
{text}"""
    
    try:
        response = client.beta.chat.completions.parse(
            model=model_name,
            messages=[
                {"role": "system", "content": "You are a language detection expert. Always respond with valid JSON only."},
                {"role": "user", "content": prompt}
            ],
            response_format=LanguageDetection,
            temperature=0.1,
            max_tokens=1000
        )
        
        result = response.choices[0].message.parsed


        return result
    except json.JSONDecodeError as e:
        raise Exception(f"Failed to parse language detection response: {str(e)}")
    except Exception as e:
        raise Exception(f"Language detection failed: {str(e)}")


def format_results(detection_result:LanguageDetection, transcribed_text=None):
    """Format detection results for display"""
    output = "# 🌍 Language Detection Results\n\n"
    
    if detection_result.is_multilingual:
        output += "**📊 Status:** Multiple languages detected\n\n"
    else:
        output += "**📊 Status:** Single language detected\n\n"
    
    output += f"**🎯 Primary Language:** {detection_result.primary_language}\n\n"
    output += f"**✅ Confidence:** {detection_result.confidence}\n\n"
    
    output += "---\n\n## Detected Languages:\n\n"
    
    for lang in detection_result.languages:
        output += f"### {lang.name} ({lang.code})\n"
        output += f"- **Percentage:** {lang.percentage}%\n"
        if lang.sample:
            output += f"- **Sample:** *\"{lang.sample}\"*\n"
        output += "\n"
    
    if transcribed_text:
        output += "---\n\n## 📝 Transcribed Text:\n\n"
        output += f"```\n{transcribed_text}\n```\n"
    
    return output


def process_text_input(api_key, text, model_name):
    """Process text input for language detection (provides progress updates)"""
    if not api_key:
        yield "❌ Error: Please enter your API key", ""
        return

    if not text or not text.strip():
        yield "❌ Error: Please enter some text to analyze", ""
        return

    try:
        yield "🔍 Starting language detection... (10%)", ""
        result = detect_language(api_key, text, model_name)
        yield "🧠 Analyzing results... (70%)", ""
        formatted = format_results(result)
        yield "✅ Done (100%)", formatted
    except Exception as e:
        yield f"❌ Error: {str(e)}", ""


def process_audio_input(api_key, audio_file, model_name):
    """Process audio input for transcription and language detection (provides progress updates)"""
    if not api_key:
        yield "❌ Error: Please enter your API key", ""
        return

    if audio_file is None:
        yield "❌ Error: Please upload an audio file", ""
        return

    try:
        yield "🎧 Upload received. Starting transcription... (10%)", ""
        transcribed_text = transcribe_audio(api_key, audio_file, model_name)
        yield "📝 Transcription complete. Detecting language... (60%)", ""
        result = detect_language(api_key, transcribed_text, model_name)
        yield "🧾 Analysis complete. Formatting results... (90%)", ""
        formatted = format_results(result, transcribed_text)
        yield "✅ Done (100%)", formatted
    except Exception as e:
        yield f"❌ Error: {str(e)}", ""


def process_video_input(api_key, video_file, model_name):
    """Process video input by extracting audio, transcribing, and detecting language (provides progress updates)"""
    if not api_key:
        yield "❌ Error: Please enter your API key", ""
        return

    if video_file is None:
        yield "❌ Error: Please upload a video file", ""
        return

    audio_path = None
    try:
        yield "🎬 Received video. Extracting audio... (10%)", ""
        audio_path = extract_audio_from_video(video_file)
        yield "🎧 Audio extracted. Starting transcription... (40%)", ""
        transcribed_text = transcribe_audio(api_key, audio_path, model_name)
        yield "📝 Transcription complete. Detecting language... (70%)", ""
        result = detect_language(api_key, transcribed_text, model_name)
        yield "🧾 Analysis complete. Formatting results... (90%)", ""
        formatted = format_results(result, transcribed_text)
        yield "✅ Done (100%)", formatted
    except Exception as e:
        yield f"❌ Error: {str(e)}", ""
    finally:
        # Clean up temporary audio file
        if audio_path and os.path.exists(audio_path):
            try:
                os.remove(audio_path)
            except:
                pass


# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), title="Multi-Language Detector") as demo:
    gr.Markdown("""
    # 🌍 Multi-Language Detector
    Detect and distinguish multiple languages from text, audio, or video input using OpenAI or Gemini APIs.
    """)
    
    # API Key input
    with gr.Row():
        api_key_input = gr.Textbox(
            label="API Key",
            placeholder="sk-... (OpenAI) or GEMINI_API_KEY",
            type="password",
            info="Enter your OpenAI API key or Gemini API key"
        )
        # Model selector (supports GPT and Gemini families)
        model_selector = gr.Dropdown(
            label="Model",
            choices=["gpt-4", "gpt-4o", "gemini-2.5-flash", "gemini-2.5-pro"],
            value="gemini-2.5-flash",
            info="Choose model. Gemini models use Google's API endpoint."
        )
    
    gr.Markdown("---")
    
    # Create tabs for different input types
    with gr.Tabs():
        # Text Input Tab
        with gr.Tab("📝 Text Input"):
            with gr.Row():
                with gr.Column():
                    text_input = gr.Textbox(
                        label="Enter Text",
                        placeholder="Type or paste text in any language...",
                        lines=8
                    )
                    text_button = gr.Button("🔍 Detect Language", variant="primary")
                with gr.Column():
                    text_progress = gr.Markdown(value="", label="Progress")
                    text_output = gr.Markdown(label="Results")
            
            # Examples
            gr.Examples(
                examples=[
                    ["Hello, how are you today?"],
                    ["Bonjour! Comment allez-vous?"],
                    ["こんにちは、お元気ですか？"],
                    ["Hola, ¿cómo estás? Hello, how are you?"],
                    ["Привет! مرحبا! 你好！"]
                ],
                inputs=text_input,
                label="Example Texts"
            )
        
        # Audio Input Tab
        with gr.Tab("🎤 Audio Input"):
            with gr.Row():
                with gr.Column():
                    audio_input = gr.Audio(
                        label="Upload Audio File",
                        type="filepath"
                    )
                    audio_button = gr.Button("🔍 Transcribe & Detect Language", variant="primary")
                with gr.Column():
                    audio_progress = gr.Markdown(value="", label="Progress")
                    audio_output = gr.Markdown(label="Results")
        
        # Video Input Tab
        with gr.Tab("🎥 Video Input"):
            with gr.Row():
                with gr.Column():
                    video_input = gr.Video(
                        label="Upload Video File"
                    )
                    video_button = gr.Button("🔍 Extract Audio, Transcribe & Detect", variant="primary")
                with gr.Column():
                    video_progress = gr.Markdown(value="", label="Progress")
                    video_output = gr.Markdown(label="Results")
    
    # Set up event handlers
    text_button.click(
        fn=process_text_input,
        inputs=[api_key_input, text_input, model_selector],
        outputs=[text_progress, text_output]
    )
    
    audio_button.click(
        fn=process_audio_input,
        inputs=[api_key_input, audio_input, model_selector],
        outputs=[audio_progress, audio_output]
    )
    
    video_button.click(
        fn=process_video_input,
        inputs=[api_key_input, video_input, model_selector],
        outputs=[video_progress, video_output]
    )

# Launch the demo
if __name__ == "__main__":
    demo.launch(share=False)