File size: 6,789 Bytes
dce7a6a
 
 
 
 
c03608f
3be6273
ec83d0c
3be6273
dce7a6a
c03608f
 
 
 
 
 
dce7a6a
 
 
 
c03608f
dce7a6a
c03608f
 
dce7a6a
 
 
c03608f
 
 
 
aa7f9ab
c03608f
 
 
 
 
 
 
 
dce7a6a
 
 
 
c03608f
 
 
 
 
aa7f9ab
dce7a6a
 
 
c03608f
 
 
dce7a6a
c03608f
 
 
 
 
 
 
dce7a6a
 
c03608f
 
 
 
aa7f9ab
c03608f
 
 
 
 
 
 
dce7a6a
 
 
 
 
 
c03608f
 
 
 
dce7a6a
 
3be6273
c03608f
 
 
dce7a6a
c03608f
dce7a6a
 
c03608f
 
 
 
 
dce7a6a
c03608f
 
 
dce7a6a
 
 
 
c03608f
dce7a6a
 
 
 
 
c03608f
dce7a6a
c03608f
3be6273
dce7a6a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
from huggingface_hub import InferenceClient
import json
import os

# Initialize the client
# Switching to Qwen/Qwen2.5-72B-Instruct as requested. It is SOTA and supported for chat.
token = os.getenv("HUGGING_FACE_HUB_TOKEN")
client = InferenceClient("Qwen/Qwen2.5-72B-Instruct", token=token)

SYSTEM_PROMPT = """You are ModelForge, an elite AI Solutions Architect with deep expertise in MLOps, research-level machine learning, and software engineering.

Your goal is to analyze the user's problem and design a **complete, production-ready, and theoretically sound** machine learning solution. You must handle complex, multimodal, and research-level requests with ease.

### Output Format
You must return a SINGLE valid JSON object. Do not include any markdown formatting (like ```json) outside the object.

{
  "analysis": {
    "dataType": "image" | "text" | "tabular" | "audio" | "video" | "time_series" | "multimodal",
    "taskType": "classification" | "regression" | "nlp" | "vision" | "forecasting" | "multimodal_reasoning" | "reinforcement_learning" | "generative",
    "complexity": "low" | "medium" | "high" | "research",
    "domain": "string (e.g., 'High-Frequency Trading', 'Autonomous Driving', 'Genomic Research')",
    "keyChallenges": ["challenge 1", "challenge 2"]
  },
  "recommendations": [
    {
      "name": "Model/Architecture Name",
      "description": "A comprehensive technical description. For research problems, describe the novel architecture (e.g., 'Dual-Encoder with Cross-Attention Adapters'). For production, specify the exact backbone (e.g., 'ResNet-50v2 with FPN').",
      "pros": ["Critical advantage 1", "Critical advantage 2", "Critical advantage 3"],
      "cons": ["Trade-off 1", "Trade-off 2"],
      "architectureDiagram": "A detailed Mermaid.js graph. CRITICAL SYNTAX RULES: (1) Start with 'graph TD', (2) EVERY node must have a unique ID followed by square brackets, e.g., 'Node1[Label] --> Node2[Another Label]', (3) NEVER use just brackets without an ID like '[Label] --> [Next]', (4) NO curly braces {}, (5) Use \\n for newlines. Example: 'graph TD\\nNode1[Input] --> Node2[Preprocessing]\\nNode2 --> Node3[Model]\\nNode3 --> Node4[Output]'",
      "mlopsBestPractices": [
        "Data Versioning: Strategy (e.g., DVC/Delta Lake)",
        "Experiment Tracking: Tools (e.g., MLflow/W&B)",
        "Deployment: Strategy (e.g., Canary, Blue-Green, Edge)",
        "Monitoring: Metrics to watch (e.g., Drift, Latency)",
        "Hardware: Recommended GPU/TPU"
      ],
      "trainingCode": "Production-grade Python code snippet (PyTorch/TensorFlow/JAX). Include imports, model definition, and a dummy training loop."
    }
  ]
}

### Guidelines for "Robust & Complex"
1. **Deep Analysis**: Don't just say "tabular". Say "High-dimensional tabular data with potential covariate shift".
2. **Novel Architectures**: If the user asks for "latent program synthesis", design a "Neural Module Network with Discrete Latent Variables". Do not recommend generic models for research problems.
3. **Complete Pipelines**: The MLOps section must be actionable and specific to the problem (e.g., "Use ONNX Runtime for <10ms latency").
4. **Valid JSON**: Your response must be parseable by `json.loads()`.
5. **Mermaid Diagrams**: ALWAYS use proper node IDs. WRONG: '[Input] --> [Model]'. CORRECT: 'A[Input] --> B[Model]' or 'Node1[Input] --> Node2[Model]'.
"""

FEW_SHOT_EXAMPLES = """
User Input: "Create a unified multimodal reasoning model that composes and executes latent programs across vision, text, and audio."

JSON Response:
{
  "analysis": {
    "dataType": "multimodal",
    "taskType": "multimodal_reasoning",
    "complexity": "research",
    "domain": "Neuro-Symbolic AI",
    "keyChallenges": ["Cross-modal alignment", "Differentiable program synthesis", "Latent space collapse"]
  },
  "recommendations": [
    {
      "name": "Neuro-Symbolic Latent Programmer",
      "description": "A unified architecture combining a ViT (Vision), RoBERTa (Text), and Wav2Vec2 (Audio) encoder into a shared embedding space. A central 'Program Synthesizer' LSTM decodes discrete symbolic tokens (Map, Filter, Join) which are executed by differentiable neural modules. Uses Gumbel-Softmax for end-to-end training of discrete operations.",
      "pros": ["Interpretable reasoning steps", "Generalizes to new combinations", "End-to-end differentiable"],
      "cons": ["Unstable training dynamics", "High computational cost during search"],
      "architectureDiagram": "graph TD\\nNode1[Image/Text/Audio Input] --> Node2[Modality Encoders]\\nNode2 --> Node3[Shared Latent Space]\\nNode3 --> Node4[Program Synthesizer LSTM]\\nNode4 --> Node5[Symbolic Tokens]\\nNode5 --> Node6[Neural Module Network]\\nNode6 --> Node7[Execution Result]\\nNode7 --> Node8[Loss Calculation]",
      "mlopsBestPractices": [
        "Data: WebDataset for sharded multimodal data",
        "Training: Distributed Data Parallel (DDP) on A100 cluster",
        "Monitoring: Track discrete token distribution entropy",
        "Versioning: Git LFS for checkpoints"
      ],
      "trainingCode": "import torch\\nimport torch.nn as nn\\n\\nclass NeuroSymbolicModel(nn.Module):\\n    def __init__(self):\\n        super().__init__()\\n        self.vision_enc = VisionTransformer()\\n        self.text_enc = BERT()\\n        # ..."
    }
  ]
}
"""

def generate_solution(description):
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": f"User Input: \"{description}\"\n\nProvide the JSON solution:"}
    ]
    
    try:
        # Use chat_completion for conversational models
        response = client.chat_completion(
            messages,
            max_tokens=2500,
            temperature=0.7,
            top_p=0.95
        )
        
        content = response.choices[0].message.content
        
        # Robust JSON cleanup
        json_str = content.strip()
        if "```json" in json_str:
            json_str = json_str.split("```json")[1].split("```")[0].strip()
        elif "```" in json_str:
            json_str = json_str.split("```")[1].split("```")[0].strip()
            
        return json_str
    except Exception as e:
        return json.dumps({
            "error": str(e),
            "analysis": {"dataType": "text", "taskType": "nlp", "complexity": "low", "domain": "error_fallback"},
            "recommendations": []
        })

demo = gr.Interface(
    fn=generate_solution,
    inputs=gr.Textbox(lines=5, placeholder="Describe your complex ML problem..."),
    outputs=gr.JSON(label="Recommendations"),
    title="ModelForge AI Backend v2",
    description="Advanced ML Solutions Architect (Powered by Qwen2.5-72B-Instruct)"
)

if __name__ == "__main__":
    demo.launch()