import gradio as gr
import requests
import os
from datetime import datetime
import pandas as pd

# Hugging Face token from environment
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")

# Four different LLM models from USA and China
MODELS = [
    "meta-llama/Llama-3.3-70B-Instruct",    # 🇺🇸 USA (Meta)
    "google/gemma-2-9b-it",                 # 🇺🇸 USA (Google)
    "Qwen/Qwen2.5-72B-Instruct",            # 🇨🇳 China (Alibaba)
    "deepseek-ai/DeepSeek-R1",              # 🇨🇳 China (DeepSeek)
]

def query_model(model_id, prompt, max_tokens=300, temperature=0.7):
    """Send prompt to model and get response"""
    API_URL = "https://router.huggingface.co/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {HF_TOKEN}",
        "Content-Type": "application/json"
    }
    
    payload = {
        "model": model_id,
        "messages": [{"role": "user", "content": prompt}],
        "max_tokens": max_tokens,
        "temperature": temperature,
        "stream": False
    }
    
    try:
        response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
        
        if response.status_code == 200:
            result = response.json()
            return result["choices"][0]["message"]["content"]
        else:
            error_detail = response.json() if response.text else response.text
            return f"Error {response.status_code}: {error_detail}"
            
    except Exception as e:
        return f"Exception: {str(e)}"

def collect_batch_responses(prompts_text, max_tokens=300, temperature=0.7):
    """Collect responses from all models for multiple prompts"""
    # Split prompts by newline and filter empty lines
    prompts = [p.strip() for p in prompts_text.split('\n') if p.strip()]
    
    if not prompts:
        return pd.DataFrame(), None, "⚠️ No prompts provided"
    
    results = []
    status_msg = f"Processing {len(prompts)} prompt(s) across {len(MODELS)} model(s)..."
    
    # Process each prompt independently
    for prompt_idx, prompt_text in enumerate(prompts, 1):
        # Each prompt gets fresh responses from all models
        for model in MODELS:
            response = query_model(model, prompt_text, max_tokens, temperature)
            
            results.append({
                'timestamp': datetime.now().isoformat(),
                'prompt_number': prompt_idx,
                'prompt': prompt_text,
                'model': model.split('/')[-1],  # Short model name
                'full_model': model,
                'response': response
            })
    
    df = pd.DataFrame(results)
    csv_filename = f"batch_responses_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
    df.to_csv(csv_filename, index=False)
    
    completion_msg = f"✅ Completed! Processed {len(prompts)} prompt(s) × {len(MODELS)} models = {len(results)} total responses"
    
    return df, csv_filename, completion_msg

# Gradio interface
with gr.Blocks(title="Batch LLM Response Collector") as demo:
    gr.Markdown("""
    # 🤖 Batch Multi-LLM Response Collector
    
    Compare responses from 4 different LLMs (2 US, 2 China):
    - **Llama 3.3 70B** 🇺🇸 - Meta's latest model (USA)
    - **Gemma 2 9B** 🇺🇸 - Google's efficient model (USA)
    - **Qwen 2.5 72B** 🇨🇳 - Alibaba's flagship model (China)
    - **DeepSeek R1** 🇨🇳 - DeepSeek's reasoning model (China)
    
    **Batch Processing:** Enter multiple prompts (one per line). Each prompt is processed independently with no conversation history or cross-contamination.
    """)
    
    with gr.Row():
        with gr.Column():
            prompt_input = gr.Textbox(
                label="Enter your prompts (one per line)",
                placeholder="What is artificial intelligence?\nExplain quantum computing.\nDescribe machine learning.",
                lines=8
            )
            
            with gr.Row():
                max_tokens = gr.Slider(
                    minimum=50,
                    maximum=500,
                    value=300,
                    step=50,
                    label="Max Response Length"
                )
                temperature = gr.Slider(
                    minimum=0.0,
                    maximum=1.5,
                    value=0.7,
                    step=0.1,
                    label="Temperature"
                )
            
            submit_btn = gr.Button("Process Batch", variant="primary", size="lg")
    
    status_output = gr.Textbox(label="Status", interactive=False)
    df_output = gr.Dataframe(label="Results", wrap=True)
    csv_output = gr.File(label="Download CSV")
    
    submit_btn.click(
        fn=collect_batch_responses,
        inputs=[prompt_input, max_tokens, temperature],
        outputs=[df_output, csv_output, status_output]
    )
    
    gr.Markdown("""
    ---
    ### 📝 About
    - Uses Hugging Face Router API
    - **Each prompt is completely independent** - no conversation history
    - Multiple prompts processed sequentially (one per line)
    - Each prompt gets fresh responses from all 4 models
    - Results include prompt_number for easy tracking
    - All results saved to timestamped CSV for analysis
    
    ### 💡 Tips
    - Separate prompts with line breaks
    - Empty lines are automatically ignored
    - Processing time scales with: (number of prompts) × (number of models)
    """)

if __name__ == "__main__":
    demo.launch()