File size: 9,233 Bytes
540e67a
 
 
45c1706
1cd1f8d
540e67a
 
 
 
a8124a8
540e67a
 
 
a8124a8
540e67a
 
 
 
 
 
 
 
 
 
 
 
45c1706
540e67a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca4ac1c
45c1706
ca4ac1c
 
 
 
 
 
45c1706
ca4ac1c
540e67a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7a466b
 
 
 
 
 
 
45c1706
31e3087
 
 
 
 
 
 
 
 
b7a466b
540e67a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a1869c
540e67a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45c1706
540e67a
476e8e8
540e67a
 
1cd1f8d
540e67a
 
 
 
 
 
 
 
 
 
 
 
 
 
b7a466b
 
31e3087
540e67a
ca4ac1c
540e67a
 
 
476e8e8
540e67a
 
 
45c1706
540e67a
45c1706
 
31e3087
540e67a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
{
  "title": "Ropedia Xperience-10M Task Suite Project Status",
  "version": "2026-06-01",
  "decision": "public_sample_pipeline_verified_multi_episode_omni_data_staging",
  "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, and keeps later multi-episode model-quality claims separate from current single-episode evidence.",
  "scope_boundary": {
    "validated_episode_count": 1,
    "aligned_frames": 5821,
    "sliding_windows": 1161,
    "current_feature_dimensions": 8546,
    "core_task_count": 12,
    "neural_head_count": 12,
    "direction_extension_probe_count": 4,
    "audio_featurized": true,
    "raw_xperience10m_data_redistributed": false,
    "qwen3_omni_32_episode_claim": false
  },
  "rows": [
    {
      "area": "Public-sample pipeline",
      "status": "verified",
      "evidence": [
        "results/episode_task_suite/summary_report.json",
        "results/episode_task_suite/windows.csv",
        "results/episode_task_suite/feature_manifest.json"
      ],
      "readout": "One public Xperience-10M sample episode is converted into 5,821 frames, 1,161 aligned 20-frame windows, and an 8,546-dimensional representation for repeatable task evaluation."
    },
    {
      "area": "Task suite",
      "status": "verified",
      "evidence": [
        "scripts/episode_task_suite.py",
        "results/episode_task_suite/",
        "docs/data/summary_metrics.json"
      ],
      "readout": "All 12 task contracts have committed metrics, predictions, and minimal baseline outputs."
    },
        {
            "area": "Neural heads",
            "status": "verified",
            "evidence": [
                "scripts/neural_task_models.py",
                "results/episode_task_suite/neural_mlp/"
            ],
            "readout": "Each task also has a compact PyTorch MLP run over the same feature tensor and chronological split."
        },
        {
            "area": "Audio contribution study",
            "status": "verified",
            "evidence": [
                "scripts/audio_ablation_and_raw_upgrade.py",
                "results/audio_ablation/",
                "docs/data/audio_ablation_summary.json"
            ],
            "readout": "Audio variants improve the primary metric on 6 of 12 task contracts in this single-episode setting."
        },
        {
            "area": "Evaluation protocol",
            "status": "verified",
            "evidence": [
                "EVALUATION_PROTOCOL.md",
                "docs/data/evaluation_protocol.json",
                "scripts/build_evaluation_protocol.py"
            ],
            "readout": "Windowing, chronological split, per-task metrics, leakage controls, and current limitations are generated from committed metric artifacts."
        },
        {
            "area": "Research takeaways",
            "status": "verified",
            "evidence": [
                "RESEARCH_TAKEAWAYS.md",
                "docs/data/research_takeaways.json",
                "scripts/build_research_takeaways.py"
            ],
            "readout": "The main result interpretation is generated from committed metrics: chronological class shift, neural gains on dynamics/order/alignment, open retrieval/reconstruction problems, and the need for held-out episodes."
        },
        {
            "area": "Research roadmap",
            "status": "current",
            "evidence": [
                "RESEARCH_ROADMAP.md",
                "docs/data/research_roadmap.json"
            ],
            "readout": "The staged path connects public-sample task development to 128-episode data staging, Qwen3-Omni LoRA, foundation-model selection, robustness runs, and larger omni/world-model extensions."
        },
        {
            "area": "Foundation-model plan",
            "status": "current",
            "evidence": [
                "FOUNDATION_MODEL_PLAN.md",
                "docs/data/foundation_model_plan.json"
            ],
            "readout": "Qwen3-Omni remains the first trainable held-out LoRA baseline; Cosmos 3 is added as the first world-model/action-generation branch; OpenVLA/openpi/GR00T are policy candidates after action targets are explicit."
        },
        {
            "area": "Official dataset wording",
            "status": "verified",
      "evidence": [
        "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
        "docs/data/xperience10m_dataset_card_alignment.json"
      ],
            "readout": "Public wording is aligned to the official gated Xperience-10M dataset card, public sample card, and HF API metadata, including modalities, scale, access path, sample license/tooling, and current project coverage."
        },
        {
            "area": "Source alignment",
            "status": "verified",
            "evidence": [
                "SOURCE_ALIGNMENT_AUDIT.md",
                "docs/data/source_alignment_audit.json",
                "scripts/validate_source_alignment.py"
            ],
            "readout": "Source facts, sample details, API-listing notes, and project coverage are checked across repo docs, website, and HF cards."
        },
        {
            "area": "Website and HF mirrors",
      "status": "verified",
      "evidence": [
        "docs/data/website_integrity.json",
        "docs/data/mirror_parity.json",
        "docs/data/live_publication_status.json"
      ],
      "readout": "Local website links/assets pass, prepared mirrors match, and public GitHub/HF URLs have been checked after upload."
    },
    {
      "area": "Publication package",
      "status": "verified",
      "evidence": [
        "docs/data/publication_audit.json",
        "QUALITY_GATES.md",
        "docs/data/quality_gates.json"
      ],
      "readout": "Public bundles are checked for raw-data exclusion, cache exclusion, heavy-archive exclusion, token-string scanning, and stale presentation copy."
    },
    {
      "area": "Reproducibility",
      "status": "verified_for_public_sample",
      "evidence": [
        "REPRODUCIBILITY.md",
        "docs/data/reproducibility_matrix.json",
        "notes/reproducibility_audit.md"
      ],
      "readout": "The public sample workflow has explicit commands, expected outputs, and exact-match reproduction evidence."
    },
    {
      "area": "Qwen3-Omni fine-tuning",
      "status": "data_staging_full_metrics_pending",
      "evidence": [
        "results/omni_finetune/DATA_ACCESS_STATUS.md",
        "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md"
      ],
      "readout": "Full-dataset access is granted and a 128-episode selected relay is in progress with chunked parallel transfer and overlapping batch prefetch; final held-out metrics require completed staging, manifest construction, training, and held-out evaluation."
    },
    {
      "area": "Raw Xperience-10M redistribution",
      "status": "not_included",
      "evidence": [
        "DATA_NOTICE.md",
        "docs/data/publication_audit.json"
      ],
      "readout": "Raw MP4, HDF5, RRD files, private gated data, and full Qwen weights are intentionally excluded."
    }
  ],
  "fast_research_route": [
    "Read PROJECT_STATUS.md and EVIDENCE_CONTRACT.md to establish what is implemented.",
        "Open docs/data/project_packet.json for the machine-readable project path.",
        "Inspect RESEARCH_TAKEAWAYS.md and docs/data/research_takeaways.json before interpreting model scores.",
        "Inspect RESEARCH_ROADMAP.md and docs/data/research_roadmap.json for the staged path from public-sample task work to multi-episode modeling.",
        "Inspect FOUNDATION_MODEL_PLAN.md and docs/data/foundation_model_plan.json before choosing a backbone branch.",
        "Inspect docs/data/summary_metrics.json and results/episode_task_suite/neural_mlp/ to check the 12-task outputs.",
        "Inspect results/audio_ablation/AUDIO_ABLATION_SUMMARY.md before judging whether audio helps the current task suite.",
        "Inspect EVALUATION_PROTOCOL.md before judging task metrics or leakage controls.",
        "Inspect SOURCE_ALIGNMENT_AUDIT.md before judging source-card consistency across public surfaces.",
        "Inspect XPERIENCE10M_DATASET_CARD_ALIGNMENT.md before judging dataset wording.",
    "Inspect results/omni_finetune/DATA_ACCESS_STATUS.md before judging Qwen3-Omni scale-up status."
  ],
  "current_reading_notes": [
    "Cross-episode generalization is evaluated in the later multi-episode stage.",
    "Older pilot path names refer to setup files, not completed held-out training results.",
    "The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
    "Audio is one of the synchronized source modalities in the current task representation.",
    "The audio ablation report compares audio/no-audio variants across all 12 task contracts in results/audio_ablation/.",
    "Foundation-model selection is explicit: Qwen3-Omni is the immediate trainable pilot, Cosmos 3 is the first world-model branch, and policy models such as OpenVLA/openpi/GR00T wait for action-target conversion."
  ]
}