import gradio as gr
from huggingface_hub import InferenceClient
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
from datetime import datetime
import os

# Use Hugging Face Inference API (no model loading needed!)
# This is FREE and much faster!

AGENT_CONFIGS = {
    "researcher": {
        "model": "mistralai/Mistral-7B-Instruct-v0.2",
        "role": "Research and gather information",
        "system_prompt": "You are a research agent specialized in gathering and analyzing information. Provide detailed, well-researched responses."
    },
    "coder": {
        "model": "bigcode/starcoder2-15b",
        "role": "Generate and explain code",
        "system_prompt": "You are an expert programmer. Generate clean, efficient, well-commented code."
    },
    "analyzer": {
        "model": "mistralai/Mistral-7B-Instruct-v0.2",
        "role": "Analyze data and provide insights",
        "system_prompt": "You are a data analyst. Provide clear insights and actionable recommendations."
    },
    "writer": {
        "model": "mistralai/Mistral-7B-Instruct-v0.2",
        "role": "Create content and documentation",
        "system_prompt": "You are a technical writer. Create clear, professional documentation and content."
    }
}

class AgentSystem:
    def __init__(self):
        # No model loading! Using HF Inference API
        self.clients = {}
        self.executor = ThreadPoolExecutor(max_workers=4)
        
        # Initialize inference clients for each agent
        for agent_name in AGENT_CONFIGS.keys():
            model = AGENT_CONFIGS[agent_name]["model"]
            self.clients[agent_name] = InferenceClient(model=model)
        
        print("✅ Agent system initialized with Inference API!")
    
    def generate_response(self, agent_name, task, max_tokens=300):
        """Generate response using HF Inference API"""
        try:
            config = AGENT_CONFIGS[agent_name]
            client = self.clients[agent_name]
            
            # Create prompt
            messages = [
                {
                    "role": "system",
                    "content": config["system_prompt"]
                },
                {
                    "role": "user",
                    "content": f"Task: {task}"
                }
            ]
            
            # Generate response
            response_text = ""
            for message in client.chat_completion(
                messages=messages,
                max_tokens=max_tokens,
                temperature=0.7,
                stream=True
            ):
                if hasattr(message.choices[0].delta, 'content'):
                    response_text += message.choices[0].delta.content
            
            return {
                "agent": agent_name,
                "role": config["role"],
                "response": response_text.strip(),
                "status": "success"
            }
            
        except Exception as e:
            return {
                "agent": agent_name,
                "role": AGENT_CONFIGS[agent_name]["role"],
                "response": f"Error: {str(e)}",
                "status": "error"
            }
    
    def run_agents_parallel(self, task, selected_agents, max_tokens=300):
        """Run multiple agents in parallel"""
        start_time = time.time()
        futures = {}
        results = []
        
        # Submit tasks to thread pool
        for agent_name in selected_agents:
            future = self.executor.submit(
                self.generate_response,
                agent_name,
                task,
                max_tokens
            )
            futures[future] = agent_name
        
        # Collect results as they complete
        for future in as_completed(futures):
            agent_name = futures[future]
            try:
                result = future.result(timeout=30)  # 30 second timeout per agent
                result["time_taken"] = round(time.time() - start_time, 2)
                results.append(result)
            except Exception as e:
                results.append({
                    "agent": agent_name,
                    "role": AGENT_CONFIGS[agent_name]["role"],
                    "response": f"Timeout or error: {str(e)}",
                    "status": "error",
                    "time_taken": round(time.time() - start_time, 2)
                })
        
        total_time = round(time.time() - start_time, 2)
        return results, total_time

# Initialize the agent system
print("🚀 Initializing AI Agent System...")
agent_system = AgentSystem()
print("✅ System ready!")

def process_task(task, researcher, coder, analyzer, writer, max_tokens, progress=gr.Progress()):
    """Process task with selected agents"""
    if not task.strip():
        return "⚠️ Please enter a task!", "", ""
    
    # Determine which agents to use
    selected_agents = []
    if researcher:
        selected_agents.append("researcher")
    if coder:
        selected_agents.append("coder")
    if analyzer:
        selected_agents.append("analyzer")
    if writer:
        selected_agents.append("writer")
    
    if not selected_agents:
        return "⚠️ Please select at least one agent!", "", ""
    
    progress(0, desc="Starting agents...")
    
    # Run agents in parallel
    results, total_time = agent_system.run_agents_parallel(task, selected_agents, max_tokens)
    
    progress(1, desc="Complete!")
    
    # Format output
    output = f"# 🤖 AI Agent System Results\n\n"
    output += f"**Task:** {task}\n\n"
    output += f"**Agents Used:** {len(selected_agents)} agents running in parallel\n\n"
    output += f"**Total Time:** {total_time}s\n\n"
    output += "---\n\n"
    
    for idx, result in enumerate(results, 1):
        status_emoji = "✅" if result["status"] == "success" else "❌"
        output += f"## {status_emoji} Agent {idx}: {result['agent'].upper()}\n\n"
        output += f"**Role:** {result['role']}\n\n"
        output += f"**Response:**\n\n{result['response']}\n\n"
        output += f"*⏱️ Completed in {result['time_taken']}s*\n\n"
        output += "---\n\n"
    
    # Create summary stats
    success_count = sum(1 for r in results if r["status"] == "success")
    stats = f"""📊 **Execution Stats**
- Total Agents: {len(selected_agents)}
- Successful: {success_count}
- Failed: {len(selected_agents) - success_count}
- Total Time: {total_time}s
- Average per Agent: {round(total_time / len(selected_agents), 2)}s
"""
    
    # Detailed JSON for download
    details = {
        "task": task,
        "agents_used": selected_agents,
        "total_time": total_time,
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "results": results
    }
    
    import json
    json_output = json.dumps(details, indent=2)
    
    return output, stats, json_output

# Create Gradio Interface
custom_css = """
.gradio-container {
    font-family: 'Inter', sans-serif;
}
.main-header {
    text-align: center;
    padding: 20px;
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    color: white;
    border-radius: 10px;
    margin-bottom: 20px;
}
"""

with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Agent System") as demo:
    gr.HTML("""
        <div class="main-header">
            <h1>🤖 Multi-Agent AI System</h1>
            <p>Parallel AI Processing with Specialized Agents | Powered by Hugging Face Inference API</p>
        </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 📝 Task Configuration")
            
            task_input = gr.Textbox(
                label="What do you want the agents to work on?",
                placeholder="Example: Build a user authentication system with JWT tokens",
                lines=5
            )
            
            gr.Markdown("### 🎯 Select Your Team")
            
            with gr.Group():
                researcher_check = gr.Checkbox(
                    label="🔍 Researcher Agent", 
                    value=True,
                    info="Gathers information and best practices"
                )
                coder_check = gr.Checkbox(
                    label="💻 Coder Agent", 
                    value=True,
                    info="Writes production-ready code"
                )
                analyzer_check = gr.Checkbox(
                    label="📊 Analyzer Agent", 
                    value=True,
                    info="Provides insights and recommendations"
                )
                writer_check = gr.Checkbox(
                    label="✍️ Writer Agent", 
                    value=True,
                    info="Creates documentation"
                )
            
            gr.Markdown("### ⚙️ Settings")
            
            max_tokens = gr.Slider(
                minimum=100,
                maximum=500,
                value=300,
                step=50,
                label="Response Length",
                info="Tokens per agent response"
            )
            
            process_btn = gr.Button(
                "🚀 Deploy Agents", 
                variant="primary", 
                size="lg"
            )
            
            gr.Markdown("""
                ### 💡 Pro Tips
                - Use all 4 agents for comprehensive results
                - Agents run simultaneously = 3-4x faster!
                - Each agent brings unique expertise
                - No model downloads = instant startup
            """)
        
        with gr.Column(scale=2):
            gr.Markdown("### 📊 Results Dashboard")
            
            output_display = gr.Markdown(
                value="*Results will appear here after running agents...*",
                label="Agent Outputs"
            )
            
            with gr.Accordion("📈 Execution Statistics", open=True):
                stats_display = gr.Markdown(value="*No data yet*")
            
            with gr.Accordion("💾 Download Results (JSON)", open=False):
                json_output = gr.Code(
                    label="Complete Results", 
                    language="json",
                    lines=10
                )
    
    gr.Markdown("### 📚 Quick Start Examples")
    
    gr.Examples(
        examples=[
            ["Create a REST API for a todo list application with authentication"],
            ["Build a machine learning pipeline for image classification"],
            ["Design a microservices architecture for an e-commerce platform"],
            ["Develop a real-time chat application using WebSockets"],
            ["Create a data visualization dashboard for sales analytics"],
        ],
        inputs=task_input
    )
    
    gr.Markdown("""
        ---
        
        ## 🏗️ System Architecture
        
        **How It Works:**
        
        1. **Task Distribution** → Your task is sent to selected agents
        2. **Parallel Processing** → All agents work simultaneously (not sequential!)
        3. **Smart Aggregation** → Results are collected as they complete
        4. **Instant Results** → See output from each agent in real-time
        
        **Technology:**
        - ⚡ Hugging Face Inference API (serverless, no model loading)
        - 🔄 ThreadPoolExecutor for true parallelism
        - 🚀 Free tier compatible
        - 📊 Real-time progress tracking
        
        **Models Used:**
        - Mistral-7B-Instruct (Researcher, Analyzer, Writer)
        - StarCoder2-15B (Coder)
    """)
    
    # Connect button
    process_btn.click(
        fn=process_task,
        inputs=[
            task_input, 
            researcher_check, 
            coder_check, 
            analyzer_check, 
            writer_check, 
            max_tokens
        ],
        outputs=[output_display, stats_display, json_output]
    )

# Launch with optimized settings
if __name__ == "__main__":
    demo.queue(max_size=20)  # Handle multiple users
    demo.launch(
        show_error=True,
        share=False
    )