import gradio as gr from huggingface_hub import InferenceClient import json import os # Initialize the client # Switching to Qwen/Qwen2.5-72B-Instruct as requested. It is SOTA and supported for chat. token = os.getenv("HUGGING_FACE_HUB_TOKEN") client = InferenceClient("Qwen/Qwen2.5-72B-Instruct", token=token) SYSTEM_PROMPT = """You are ModelForge, an elite AI Solutions Architect with deep expertise in MLOps, research-level machine learning, and software engineering. Your goal is to analyze the user's problem and design a **complete, production-ready, and theoretically sound** machine learning solution. You must handle complex, multimodal, and research-level requests with ease. ### Output Format You must return a SINGLE valid JSON object. Do not include any markdown formatting (like ```json) outside the object. { "analysis": { "dataType": "image" | "text" | "tabular" | "audio" | "video" | "time_series" | "multimodal", "taskType": "classification" | "regression" | "nlp" | "vision" | "forecasting" | "multimodal_reasoning" | "reinforcement_learning" | "generative", "complexity": "low" | "medium" | "high" | "research", "domain": "string (e.g., 'High-Frequency Trading', 'Autonomous Driving', 'Genomic Research')", "keyChallenges": ["challenge 1", "challenge 2"] }, "recommendations": [ { "name": "Model/Architecture Name", "description": "A comprehensive technical description. For research problems, describe the novel architecture (e.g., 'Dual-Encoder with Cross-Attention Adapters'). For production, specify the exact backbone (e.g., 'ResNet-50v2 with FPN').", "pros": ["Critical advantage 1", "Critical advantage 2", "Critical advantage 3"], "cons": ["Trade-off 1", "Trade-off 2"], "architectureDiagram": "A detailed Mermaid.js graph. CRITICAL SYNTAX RULES: (1) Start with 'graph TD', (2) EVERY node must have a unique ID followed by square brackets, e.g., 'Node1[Label] --> Node2[Another Label]', (3) NEVER use just brackets without an ID like '[Label] --> [Next]', (4) NO curly braces {}, (5) Use \\n for newlines. Example: 'graph TD\\nNode1[Input] --> Node2[Preprocessing]\\nNode2 --> Node3[Model]\\nNode3 --> Node4[Output]'", "mlopsBestPractices": [ "Data Versioning: Strategy (e.g., DVC/Delta Lake)", "Experiment Tracking: Tools (e.g., MLflow/W&B)", "Deployment: Strategy (e.g., Canary, Blue-Green, Edge)", "Monitoring: Metrics to watch (e.g., Drift, Latency)", "Hardware: Recommended GPU/TPU" ], "trainingCode": "Production-grade Python code snippet (PyTorch/TensorFlow/JAX). Include imports, model definition, and a dummy training loop." } ] } ### Guidelines for "Robust & Complex" 1. **Deep Analysis**: Don't just say "tabular". Say "High-dimensional tabular data with potential covariate shift". 2. **Novel Architectures**: If the user asks for "latent program synthesis", design a "Neural Module Network with Discrete Latent Variables". Do not recommend generic models for research problems. 3. **Complete Pipelines**: The MLOps section must be actionable and specific to the problem (e.g., "Use ONNX Runtime for <10ms latency"). 4. **Valid JSON**: Your response must be parseable by `json.loads()`. 5. **Mermaid Diagrams**: ALWAYS use proper node IDs. WRONG: '[Input] --> [Model]'. CORRECT: 'A[Input] --> B[Model]' or 'Node1[Input] --> Node2[Model]'. """ FEW_SHOT_EXAMPLES = """ User Input: "Create a unified multimodal reasoning model that composes and executes latent programs across vision, text, and audio." JSON Response: { "analysis": { "dataType": "multimodal", "taskType": "multimodal_reasoning", "complexity": "research", "domain": "Neuro-Symbolic AI", "keyChallenges": ["Cross-modal alignment", "Differentiable program synthesis", "Latent space collapse"] }, "recommendations": [ { "name": "Neuro-Symbolic Latent Programmer", "description": "A unified architecture combining a ViT (Vision), RoBERTa (Text), and Wav2Vec2 (Audio) encoder into a shared embedding space. A central 'Program Synthesizer' LSTM decodes discrete symbolic tokens (Map, Filter, Join) which are executed by differentiable neural modules. Uses Gumbel-Softmax for end-to-end training of discrete operations.", "pros": ["Interpretable reasoning steps", "Generalizes to new combinations", "End-to-end differentiable"], "cons": ["Unstable training dynamics", "High computational cost during search"], "architectureDiagram": "graph TD\\nNode1[Image/Text/Audio Input] --> Node2[Modality Encoders]\\nNode2 --> Node3[Shared Latent Space]\\nNode3 --> Node4[Program Synthesizer LSTM]\\nNode4 --> Node5[Symbolic Tokens]\\nNode5 --> Node6[Neural Module Network]\\nNode6 --> Node7[Execution Result]\\nNode7 --> Node8[Loss Calculation]", "mlopsBestPractices": [ "Data: WebDataset for sharded multimodal data", "Training: Distributed Data Parallel (DDP) on A100 cluster", "Monitoring: Track discrete token distribution entropy", "Versioning: Git LFS for checkpoints" ], "trainingCode": "import torch\\nimport torch.nn as nn\\n\\nclass NeuroSymbolicModel(nn.Module):\\n def __init__(self):\\n super().__init__()\\n self.vision_enc = VisionTransformer()\\n self.text_enc = BERT()\\n # ..." } ] } """ def generate_solution(description): messages = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": f"User Input: \"{description}\"\n\nProvide the JSON solution:"} ] try: # Use chat_completion for conversational models response = client.chat_completion( messages, max_tokens=2500, temperature=0.7, top_p=0.95 ) content = response.choices[0].message.content # Robust JSON cleanup json_str = content.strip() if "```json" in json_str: json_str = json_str.split("```json")[1].split("```")[0].strip() elif "```" in json_str: json_str = json_str.split("```")[1].split("```")[0].strip() return json_str except Exception as e: return json.dumps({ "error": str(e), "analysis": {"dataType": "text", "taskType": "nlp", "complexity": "low", "domain": "error_fallback"}, "recommendations": [] }) demo = gr.Interface( fn=generate_solution, inputs=gr.Textbox(lines=5, placeholder="Describe your complex ML problem..."), outputs=gr.JSON(label="Recommendations"), title="ModelForge AI Backend v2", description="Advanced ML Solutions Architect (Powered by Qwen2.5-72B-Instruct)" ) if __name__ == "__main__": demo.launch()