import gradio as gr
from huggingface_hub import InferenceClient
import json
import os

# Initialize the client
# Switching to Qwen/Qwen2.5-72B-Instruct as requested. It is SOTA and supported for chat.
token = os.getenv("HUGGING_FACE_HUB_TOKEN")
client = InferenceClient("Qwen/Qwen2.5-72B-Instruct", token=token)

SYSTEM_PROMPT = """You are ModelForge, an elite AI Solutions Architect with deep expertise in MLOps, research-level machine learning, and software engineering.

Your goal is to analyze the user's problem and design a **complete, production-ready, and theoretically sound** machine learning solution. You must handle complex, multimodal, and research-level requests with ease.

### Output Format
You must return a SINGLE valid JSON object. Do not include any markdown formatting (like ```json) outside the object.

{
  "analysis": {
    "dataType": "image" | "text" | "tabular" | "audio" | "video" | "time_series" | "multimodal",
    "taskType": "classification" | "regression" | "nlp" | "vision" | "forecasting" | "multimodal_reasoning" | "reinforcement_learning" | "generative",
    "complexity": "low" | "medium" | "high" | "research",
    "domain": "string (e.g., 'High-Frequency Trading', 'Autonomous Driving', 'Genomic Research')",
    "keyChallenges": ["challenge 1", "challenge 2"]
  },
  "recommendations": [
    {
      "name": "Model/Architecture Name",
      "description": "A comprehensive technical description. For research problems, describe the novel architecture (e.g., 'Dual-Encoder with Cross-Attention Adapters'). For production, specify the exact backbone (e.g., 'ResNet-50v2 with FPN').",
      "pros": ["Critical advantage 1", "Critical advantage 2", "Critical advantage 3"],
      "cons": ["Trade-off 1", "Trade-off 2"],
      "architectureDiagram": "A detailed Mermaid.js graph. CRITICAL SYNTAX RULES: (1) Start with 'graph TD', (2) EVERY node must have a unique ID followed by square brackets, e.g., 'Node1[Label] --> Node2[Another Label]', (3) NEVER use just brackets without an ID like '[Label] --> [Next]', (4) NO curly braces {}, (5) Use \\n for newlines. Example: 'graph TD\\nNode1[Input] --> Node2[Preprocessing]\\nNode2 --> Node3[Model]\\nNode3 --> Node4[Output]'",
      "mlopsBestPractices": [
        "Data Versioning: Strategy (e.g., DVC/Delta Lake)",
        "Experiment Tracking: Tools (e.g., MLflow/W&B)",
        "Deployment: Strategy (e.g., Canary, Blue-Green, Edge)",
        "Monitoring: Metrics to watch (e.g., Drift, Latency)",
        "Hardware: Recommended GPU/TPU"
      ],
      "trainingCode": "Production-grade Python code snippet (PyTorch/TensorFlow/JAX). Include imports, model definition, and a dummy training loop."
    }
  ]
}

### Guidelines for "Robust & Complex"
1. **Deep Analysis**: Don't just say "tabular". Say "High-dimensional tabular data with potential covariate shift".
2. **Novel Architectures**: If the user asks for "latent program synthesis", design a "Neural Module Network with Discrete Latent Variables". Do not recommend generic models for research problems.
3. **Complete Pipelines**: The MLOps section must be actionable and specific to the problem (e.g., "Use ONNX Runtime for <10ms latency").
4. **Valid JSON**: Your response must be parseable by `json.loads()`.
5. **Mermaid Diagrams**: ALWAYS use proper node IDs. WRONG: '[Input] --> [Model]'. CORRECT: 'A[Input] --> B[Model]' or 'Node1[Input] --> Node2[Model]'.
"""

FEW_SHOT_EXAMPLES = """
User Input: "Create a unified multimodal reasoning model that composes and executes latent programs across vision, text, and audio."

JSON Response:
{
  "analysis": {
    "dataType": "multimodal",
    "taskType": "multimodal_reasoning",
    "complexity": "research",
    "domain": "Neuro-Symbolic AI",
    "keyChallenges": ["Cross-modal alignment", "Differentiable program synthesis", "Latent space collapse"]
  },
  "recommendations": [
    {
      "name": "Neuro-Symbolic Latent Programmer",
      "description": "A unified architecture combining a ViT (Vision), RoBERTa (Text), and Wav2Vec2 (Audio) encoder into a shared embedding space. A central 'Program Synthesizer' LSTM decodes discrete symbolic tokens (Map, Filter, Join) which are executed by differentiable neural modules. Uses Gumbel-Softmax for end-to-end training of discrete operations.",
      "pros": ["Interpretable reasoning steps", "Generalizes to new combinations", "End-to-end differentiable"],
      "cons": ["Unstable training dynamics", "High computational cost during search"],
      "architectureDiagram": "graph TD\\nNode1[Image/Text/Audio Input] --> Node2[Modality Encoders]\\nNode2 --> Node3[Shared Latent Space]\\nNode3 --> Node4[Program Synthesizer LSTM]\\nNode4 --> Node5[Symbolic Tokens]\\nNode5 --> Node6[Neural Module Network]\\nNode6 --> Node7[Execution Result]\\nNode7 --> Node8[Loss Calculation]",
      "mlopsBestPractices": [
        "Data: WebDataset for sharded multimodal data",
        "Training: Distributed Data Parallel (DDP) on A100 cluster",
        "Monitoring: Track discrete token distribution entropy",
        "Versioning: Git LFS for checkpoints"
      ],
      "trainingCode": "import torch\\nimport torch.nn as nn\\n\\nclass NeuroSymbolicModel(nn.Module):\\n    def __init__(self):\\n        super().__init__()\\n        self.vision_enc = VisionTransformer()\\n        self.text_enc = BERT()\\n        # ..."
    }
  ]
}
"""

def generate_solution(description):
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": f"User Input: \"{description}\"\n\nProvide the JSON solution:"}
    ]
    
    try:
        # Use chat_completion for conversational models
        response = client.chat_completion(
            messages,
            max_tokens=2500,
            temperature=0.7,
            top_p=0.95
        )
        
        content = response.choices[0].message.content
        
        # Robust JSON cleanup
        json_str = content.strip()
        if "```json" in json_str:
            json_str = json_str.split("```json")[1].split("```")[0].strip()
        elif "```" in json_str:
            json_str = json_str.split("```")[1].split("```")[0].strip()
            
        return json_str
    except Exception as e:
        return json.dumps({
            "error": str(e),
            "analysis": {"dataType": "text", "taskType": "nlp", "complexity": "low", "domain": "error_fallback"},
            "recommendations": []
        })

demo = gr.Interface(
    fn=generate_solution,
    inputs=gr.Textbox(lines=5, placeholder="Describe your complex ML problem..."),
    outputs=gr.JSON(label="Recommendations"),
    title="ModelForge AI Backend v2",
    description="Advanced ML Solutions Architect (Powered by Qwen2.5-72B-Instruct)"
)

if __name__ == "__main__":
    demo.launch()