File size: 5,758 Bytes
b7a466b
 
31e3087
4602161
b7a466b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4602161
b7a466b
45c1706
b7a466b
 
 
 
 
 
 
 
 
 
fc9e8cf
b7a466b
 
45c1706
 
b7a466b
fc9e8cf
b7a466b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367c357
b7a466b
31e3087
 
 
 
4602161
31e3087
 
 
 
 
 
 
 
 
 
 
 
 
 
b7a466b
 
 
 
45c1706
b7a466b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31e3087
b7a466b
31e3087
b7a466b
31e3087
 
04c0bde
31e3087
 
b7a466b
 
 
 
 
 
31e3087
b7a466b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
{
  "title": "Ropedia Xperience-10M Research Roadmap",
  "summary": "Staged path from the public-sample task lab to multi-episode held-out evaluation, foundation-model selection, and larger omni/world-model extensions.",
  "current_decision_point": "Keep the public-sample task suite as the development harness, prepare the selected official Xperience-10M episodes for the held-out Qwen3-Omni pilot, then branch into Cosmos 3 world modeling and policy-model experiments after the data preparation path is stable.",
  "phases": [
    {
      "id": "public_sample_task_lab",
      "name": "Public-Sample Task Lab",
      "status": "implemented",
      "entry_condition": "One public Xperience-10M sample episode is available.",
      "deliverables": [
        "1161 aligned windows",
        "12 task contracts",
        "minimal baseline heads",
        "neural MLP heads",
        "modality atlas",
        "task walkthroughs",
        "derived figures"
      ],
      "completion_evidence": [
        "PROJECT_STATUS.md",
        "EVALUATION_PROTOCOL.md",
        "RESEARCH_TAKEAWAYS.md",
        "docs/data/summary_metrics.json",
        "results/episode_task_suite/summary_report.json"
      ],
      "reader_takeaway": "The public sample supports task design, feature contracts, walkthroughs, and baseline comparisons."
    },
    {
      "id": "multi_episode_data_staging",
      "name": "Multi-Episode Data Staging",
      "status": "active",
      "entry_condition": "Gated dataset availability and enough storage for selected episodes.",
      "deliverables": [
        "128 selected episodes",
        "episode manifest",
        "missing-view manifest",
        "held-out episode split",
        "source-discovery report"
      ],
      "completion_evidence": [
        "results/omni_finetune/DATA_ACCESS_STATUS.md",
        "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md",
        "results/omni_finetune/source_discovery.json"
      ],
      "reader_takeaway": "The next scale decision is data preparation, with train/test separation at the episode level."
    },
    {
      "id": "qwen3_omni_lora_pilot",
      "name": "Qwen3-Omni LoRA Pilot",
      "status": "next",
      "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
      "deliverables": [
        "dataset JSONL/media manifests",
        "LoRA adapter checkpoint",
        "progress logs",
        "held-out predictions",
        "metrics",
        "confusion matrices",
        "run report"
      ],
      "completion_evidence": [
        "dataset_manifest.json",
        "training_metadata.json",
        "progress.jsonl",
        "metrics.json",
        "predictions.jsonl",
        "RUN_REPORT.md"
      ],
      "reader_takeaway": "The first omni-model pilot should establish a complete held-out-episode training and evaluation loop."
    },
    {
      "id": "foundation_model_selection_matrix",
      "name": "Foundation-Model Selection Matrix",
      "status": "next",
      "entry_condition": "The selected episodes are prepared or a 3-8 episode dry run is available for preprocessing checks.",
      "deliverables": [
        "backbone registry",
        "Cosmos 3 world-model branch plan",
        "Qwen3-Omni LoRA baseline plan",
        "OpenVLA/openpi/GR00T policy-branch candidates",
        "model-specific evaluation additions"
      ],
      "completion_evidence": [
        "FOUNDATION_MODEL_PLAN.md",
        "docs/data/foundation_model_plan.json",
        "research_roadmap_interactive.json"
      ],
      "reader_takeaway": "Qwen3-Omni remains the first trainable held-out pilot; Cosmos 3 is the first world-model branch; VLA/policy models wait for explicit action targets."
    },
    {
      "id": "robustness_run_64_128_episode",
      "name": "64-128 Episode Robustness Run",
      "status": "planned",
      "entry_condition": "The selected-episode pilot trains and evaluates cleanly.",
      "deliverables": [
        "split-by-session metrics",
        "modality ablations",
        "calibration/object/language error analysis",
        "missing-view sensitivity analysis"
      ],
      "completion_evidence": [
        "held-out metrics by session",
        "held-out metrics by task",
        "held-out metrics by modality",
        "ablation tables",
        "qualitative error analysis"
      ],
      "reader_takeaway": "The robustness run tests whether the pilot conclusions survive broader sessions and missing modalities."
    },
    {
      "id": "foundation_world_model_extensions",
      "name": "Cosmos 3 and Policy-Model Extensions",
      "status": "planned",
      "entry_condition": "Enough multi-episode data, compute budget, and model-specific action/world-state targets.",
      "deliverables": [
        "Cosmos 3 future-window or action-conditioned world-model probe",
        "OpenVLA/openpi/GR00T action-policy baseline",
        "audio/video/depth/pose/mocap conditioning checks",
        "affordance and object-interaction tasks",
        "synthetic-data usefulness test"
      ],
      "completion_evidence": [
        "task-specific held-out evaluations",
        "qualitative inspection",
        "updated model cards"
      ],
      "reader_takeaway": "The long-term direction is richer multimodal representation learning for embodied-AI reasoning, with model branches chosen by task fit rather than by a single default backbone."
    }
  ],
  "public_surfaces_to_update": [
    "README.md",
    "PROJECT_STATUS.md",
    "RESEARCH_TAKEAWAYS.md",
    "EVALUATION_PROTOCOL.md",
    "ARTIFACT_GUIDE.md",
    "docs/index.html",
    "docs/data/research_roadmap.json",
    "Hugging Face Space card",
    "Hugging Face artifact dataset card",
    "Hugging Face model card"
  ]
}