Spaces:
Paused
Paused
File size: 6,789 Bytes
dce7a6a c03608f 3be6273 ec83d0c 3be6273 dce7a6a c03608f dce7a6a c03608f dce7a6a c03608f dce7a6a c03608f aa7f9ab c03608f dce7a6a c03608f aa7f9ab dce7a6a c03608f dce7a6a c03608f dce7a6a c03608f aa7f9ab c03608f dce7a6a c03608f dce7a6a 3be6273 c03608f dce7a6a c03608f dce7a6a c03608f dce7a6a c03608f dce7a6a c03608f dce7a6a c03608f dce7a6a c03608f 3be6273 dce7a6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import gradio as gr
from huggingface_hub import InferenceClient
import json
import os
# Initialize the client
# Switching to Qwen/Qwen2.5-72B-Instruct as requested. It is SOTA and supported for chat.
token = os.getenv("HUGGING_FACE_HUB_TOKEN")
client = InferenceClient("Qwen/Qwen2.5-72B-Instruct", token=token)
SYSTEM_PROMPT = """You are ModelForge, an elite AI Solutions Architect with deep expertise in MLOps, research-level machine learning, and software engineering.
Your goal is to analyze the user's problem and design a **complete, production-ready, and theoretically sound** machine learning solution. You must handle complex, multimodal, and research-level requests with ease.
### Output Format
You must return a SINGLE valid JSON object. Do not include any markdown formatting (like ```json) outside the object.
{
"analysis": {
"dataType": "image" | "text" | "tabular" | "audio" | "video" | "time_series" | "multimodal",
"taskType": "classification" | "regression" | "nlp" | "vision" | "forecasting" | "multimodal_reasoning" | "reinforcement_learning" | "generative",
"complexity": "low" | "medium" | "high" | "research",
"domain": "string (e.g., 'High-Frequency Trading', 'Autonomous Driving', 'Genomic Research')",
"keyChallenges": ["challenge 1", "challenge 2"]
},
"recommendations": [
{
"name": "Model/Architecture Name",
"description": "A comprehensive technical description. For research problems, describe the novel architecture (e.g., 'Dual-Encoder with Cross-Attention Adapters'). For production, specify the exact backbone (e.g., 'ResNet-50v2 with FPN').",
"pros": ["Critical advantage 1", "Critical advantage 2", "Critical advantage 3"],
"cons": ["Trade-off 1", "Trade-off 2"],
"architectureDiagram": "A detailed Mermaid.js graph. CRITICAL SYNTAX RULES: (1) Start with 'graph TD', (2) EVERY node must have a unique ID followed by square brackets, e.g., 'Node1[Label] --> Node2[Another Label]', (3) NEVER use just brackets without an ID like '[Label] --> [Next]', (4) NO curly braces {}, (5) Use \\n for newlines. Example: 'graph TD\\nNode1[Input] --> Node2[Preprocessing]\\nNode2 --> Node3[Model]\\nNode3 --> Node4[Output]'",
"mlopsBestPractices": [
"Data Versioning: Strategy (e.g., DVC/Delta Lake)",
"Experiment Tracking: Tools (e.g., MLflow/W&B)",
"Deployment: Strategy (e.g., Canary, Blue-Green, Edge)",
"Monitoring: Metrics to watch (e.g., Drift, Latency)",
"Hardware: Recommended GPU/TPU"
],
"trainingCode": "Production-grade Python code snippet (PyTorch/TensorFlow/JAX). Include imports, model definition, and a dummy training loop."
}
]
}
### Guidelines for "Robust & Complex"
1. **Deep Analysis**: Don't just say "tabular". Say "High-dimensional tabular data with potential covariate shift".
2. **Novel Architectures**: If the user asks for "latent program synthesis", design a "Neural Module Network with Discrete Latent Variables". Do not recommend generic models for research problems.
3. **Complete Pipelines**: The MLOps section must be actionable and specific to the problem (e.g., "Use ONNX Runtime for <10ms latency").
4. **Valid JSON**: Your response must be parseable by `json.loads()`.
5. **Mermaid Diagrams**: ALWAYS use proper node IDs. WRONG: '[Input] --> [Model]'. CORRECT: 'A[Input] --> B[Model]' or 'Node1[Input] --> Node2[Model]'.
"""
FEW_SHOT_EXAMPLES = """
User Input: "Create a unified multimodal reasoning model that composes and executes latent programs across vision, text, and audio."
JSON Response:
{
"analysis": {
"dataType": "multimodal",
"taskType": "multimodal_reasoning",
"complexity": "research",
"domain": "Neuro-Symbolic AI",
"keyChallenges": ["Cross-modal alignment", "Differentiable program synthesis", "Latent space collapse"]
},
"recommendations": [
{
"name": "Neuro-Symbolic Latent Programmer",
"description": "A unified architecture combining a ViT (Vision), RoBERTa (Text), and Wav2Vec2 (Audio) encoder into a shared embedding space. A central 'Program Synthesizer' LSTM decodes discrete symbolic tokens (Map, Filter, Join) which are executed by differentiable neural modules. Uses Gumbel-Softmax for end-to-end training of discrete operations.",
"pros": ["Interpretable reasoning steps", "Generalizes to new combinations", "End-to-end differentiable"],
"cons": ["Unstable training dynamics", "High computational cost during search"],
"architectureDiagram": "graph TD\\nNode1[Image/Text/Audio Input] --> Node2[Modality Encoders]\\nNode2 --> Node3[Shared Latent Space]\\nNode3 --> Node4[Program Synthesizer LSTM]\\nNode4 --> Node5[Symbolic Tokens]\\nNode5 --> Node6[Neural Module Network]\\nNode6 --> Node7[Execution Result]\\nNode7 --> Node8[Loss Calculation]",
"mlopsBestPractices": [
"Data: WebDataset for sharded multimodal data",
"Training: Distributed Data Parallel (DDP) on A100 cluster",
"Monitoring: Track discrete token distribution entropy",
"Versioning: Git LFS for checkpoints"
],
"trainingCode": "import torch\\nimport torch.nn as nn\\n\\nclass NeuroSymbolicModel(nn.Module):\\n def __init__(self):\\n super().__init__()\\n self.vision_enc = VisionTransformer()\\n self.text_enc = BERT()\\n # ..."
}
]
}
"""
def generate_solution(description):
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"User Input: \"{description}\"\n\nProvide the JSON solution:"}
]
try:
# Use chat_completion for conversational models
response = client.chat_completion(
messages,
max_tokens=2500,
temperature=0.7,
top_p=0.95
)
content = response.choices[0].message.content
# Robust JSON cleanup
json_str = content.strip()
if "```json" in json_str:
json_str = json_str.split("```json")[1].split("```")[0].strip()
elif "```" in json_str:
json_str = json_str.split("```")[1].split("```")[0].strip()
return json_str
except Exception as e:
return json.dumps({
"error": str(e),
"analysis": {"dataType": "text", "taskType": "nlp", "complexity": "low", "domain": "error_fallback"},
"recommendations": []
})
demo = gr.Interface(
fn=generate_solution,
inputs=gr.Textbox(lines=5, placeholder="Describe your complex ML problem..."),
outputs=gr.JSON(label="Recommendations"),
title="ModelForge AI Backend v2",
description="Advanced ML Solutions Architect (Powered by Qwen2.5-72B-Instruct)"
)
if __name__ == "__main__":
demo.launch()
|