Ali Mohsin commited on
Commit
aa7f9ab
·
1 Parent(s): 3be6273

Final updates

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -29,7 +29,7 @@ You must return a SINGLE valid JSON object. Do not include any markdown formatti
29
  "description": "A comprehensive technical description. For research problems, describe the novel architecture (e.g., 'Dual-Encoder with Cross-Attention Adapters'). For production, specify the exact backbone (e.g., 'ResNet-50v2 with FPN').",
30
  "pros": ["Critical advantage 1", "Critical advantage 2", "Critical advantage 3"],
31
  "cons": ["Trade-off 1", "Trade-off 2"],
32
- "architectureDiagram": "A detailed Mermaid.js graph (graph TD). Use square brackets [] for ALL nodes. NO curly braces {}. Include data ingestion, preprocessing, backbone, heads, and post-processing.",
33
  "mlopsBestPractices": [
34
  "Data Versioning: Strategy (e.g., DVC/Delta Lake)",
35
  "Experiment Tracking: Tools (e.g., MLflow/W&B)",
@@ -47,6 +47,7 @@ You must return a SINGLE valid JSON object. Do not include any markdown formatti
47
  2. **Novel Architectures**: If the user asks for "latent program synthesis", design a "Neural Module Network with Discrete Latent Variables". Do not recommend generic models for research problems.
48
  3. **Complete Pipelines**: The MLOps section must be actionable and specific to the problem (e.g., "Use ONNX Runtime for <10ms latency").
49
  4. **Valid JSON**: Your response must be parseable by `json.loads()`.
 
50
  """
51
 
52
  FEW_SHOT_EXAMPLES = """
@@ -67,7 +68,7 @@ JSON Response:
67
  "description": "A unified architecture combining a ViT (Vision), RoBERTa (Text), and Wav2Vec2 (Audio) encoder into a shared embedding space. A central 'Program Synthesizer' LSTM decodes discrete symbolic tokens (Map, Filter, Join) which are executed by differentiable neural modules. Uses Gumbel-Softmax for end-to-end training of discrete operations.",
68
  "pros": ["Interpretable reasoning steps", "Generalizes to new combinations", "End-to-end differentiable"],
69
  "cons": ["Unstable training dynamics", "High computational cost during search"],
70
- "architectureDiagram": "graph TD\\nA[Image/Text/Audio Input] --> B[Modality Encoders]\\nB --> C[Shared Latent Space]\\nC --> D[Program Synthesizer LSTM]\\nD --> E[Symbolic Tokens]\\nE --> F[Neural Module Network]\\nF --> G[Execution Result]\\nG --> H[Loss Calculation]",
71
  "mlopsBestPractices": [
72
  "Data: WebDataset for sharded multimodal data",
73
  "Training: Distributed Data Parallel (DDP) on A100 cluster",
 
29
  "description": "A comprehensive technical description. For research problems, describe the novel architecture (e.g., 'Dual-Encoder with Cross-Attention Adapters'). For production, specify the exact backbone (e.g., 'ResNet-50v2 with FPN').",
30
  "pros": ["Critical advantage 1", "Critical advantage 2", "Critical advantage 3"],
31
  "cons": ["Trade-off 1", "Trade-off 2"],
32
+ "architectureDiagram": "A detailed Mermaid.js graph. CRITICAL SYNTAX RULES: (1) Start with 'graph TD', (2) EVERY node must have a unique ID followed by square brackets, e.g., 'Node1[Label] --> Node2[Another Label]', (3) NEVER use just brackets without an ID like '[Label] --> [Next]', (4) NO curly braces {}, (5) Use \\n for newlines. Example: 'graph TD\\nNode1[Input] --> Node2[Preprocessing]\\nNode2 --> Node3[Model]\\nNode3 --> Node4[Output]'",
33
  "mlopsBestPractices": [
34
  "Data Versioning: Strategy (e.g., DVC/Delta Lake)",
35
  "Experiment Tracking: Tools (e.g., MLflow/W&B)",
 
47
  2. **Novel Architectures**: If the user asks for "latent program synthesis", design a "Neural Module Network with Discrete Latent Variables". Do not recommend generic models for research problems.
48
  3. **Complete Pipelines**: The MLOps section must be actionable and specific to the problem (e.g., "Use ONNX Runtime for <10ms latency").
49
  4. **Valid JSON**: Your response must be parseable by `json.loads()`.
50
+ 5. **Mermaid Diagrams**: ALWAYS use proper node IDs. WRONG: '[Input] --> [Model]'. CORRECT: 'A[Input] --> B[Model]' or 'Node1[Input] --> Node2[Model]'.
51
  """
52
 
53
  FEW_SHOT_EXAMPLES = """
 
68
  "description": "A unified architecture combining a ViT (Vision), RoBERTa (Text), and Wav2Vec2 (Audio) encoder into a shared embedding space. A central 'Program Synthesizer' LSTM decodes discrete symbolic tokens (Map, Filter, Join) which are executed by differentiable neural modules. Uses Gumbel-Softmax for end-to-end training of discrete operations.",
69
  "pros": ["Interpretable reasoning steps", "Generalizes to new combinations", "End-to-end differentiable"],
70
  "cons": ["Unstable training dynamics", "High computational cost during search"],
71
+ "architectureDiagram": "graph TD\\nNode1[Image/Text/Audio Input] --> Node2[Modality Encoders]\\nNode2 --> Node3[Shared Latent Space]\\nNode3 --> Node4[Program Synthesizer LSTM]\\nNode4 --> Node5[Symbolic Tokens]\\nNode5 --> Node6[Neural Module Network]\\nNode6 --> Node7[Execution Result]\\nNode7 --> Node8[Loss Calculation]",
72
  "mlopsBestPractices": [
73
  "Data: WebDataset for sharded multimodal data",
74
  "Training: Distributed Data Parallel (DDP) on A100 cluster",