FangSen9000 commited on
Commit
8e263cf
·
verified ·
1 Parent(s): 7f585cf

Upload nano_WaveGen

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. nano_WaveGen/.gitignore +45 -0
  3. nano_WaveGen/checkpoints_text2wave/best.pt +3 -0
  4. nano_WaveGen/checkpoints_text2wave/latest.pt +3 -0
  5. nano_WaveGen/checkpoints_text2wave/losses.png +3 -0
  6. nano_WaveGen/checkpoints_text2wave/training_log.txt +9 -0
  7. nano_WaveGen/checkpoints_text2wave/training_stats.npz +3 -0
  8. nano_WaveGen/configs/accelerate_config.yaml +15 -0
  9. nano_WaveGen/configs/default.yaml +154 -0
  10. nano_WaveGen/core_space/20251205_184253_step5_text2wave/batch_statistics.json +151 -0
  11. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/error_statistics.json +4 -0
  12. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/info.txt +10 -0
  13. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/Full_Sample_Data_for_Learning_Target.npz +3 -0
  14. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/camera_trajectory.npz +3 -0
  15. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/depth/depth_merge.npz +3 -0
  16. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/file_manifest.txt +38 -0
  17. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/metadata.json +78 -0
  18. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/normal/normal_merge.npz +3 -0
  19. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/object_coordinates/object_coordinates_merge.npz +3 -0
  20. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/point_clouds/point_clouds_merge.npz +3 -0
  21. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_000.png +0 -0
  22. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_001.png +0 -0
  23. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_002.png +0 -0
  24. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_003.png +0 -0
  25. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_004.png +0 -0
  26. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_005.png +0 -0
  27. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_006.png +0 -0
  28. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_007.png +0 -0
  29. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_008.png +0 -0
  30. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_009.png +0 -0
  31. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_010.png +0 -0
  32. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_011.png +0 -0
  33. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_012.png +0 -0
  34. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_013.png +0 -0
  35. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_014.png +0 -0
  36. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_015.png +0 -0
  37. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_016.png +0 -0
  38. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_017.png +0 -0
  39. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_018.png +0 -0
  40. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_019.png +0 -0
  41. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_020.png +0 -0
  42. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_021.png +0 -0
  43. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_022.png +0 -0
  44. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_023.png +0 -0
  45. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/segmentation/segmentation_merge.npz +3 -0
  46. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/predictions.npz +3 -0
  47. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/targets.npz +3 -0
  48. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_1/error_statistics.json +4 -0
  49. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_1/info.txt +10 -0
  50. nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_1/original_data/Full_Sample_Data_for_Learning_Target.npz +3 -0
.gitattributes CHANGED
@@ -43,3 +43,4 @@ EMS-superquadric_fitting_inference/src/EMS/__pycache__/EMS_recovery.Distance-286
43
  EMS-superquadric_fitting_inference/src/EMS/__pycache__/EMS_recovery.SwitchCost-265.py312.1.nbc filter=lfs diff=lfs merge=lfs -text
44
  EMS-superquadric_fitting_inference/src/EMS/__pycache__/EMS_recovery.SimilarityCandidates-138.py312.1.nbc filter=lfs diff=lfs merge=lfs -text
45
  EMS-superquadric_fitting_inference/src/EMS/__pycache__/EMS_recovery.CostFunc-307.py311.1.nbc filter=lfs diff=lfs merge=lfs -text
 
 
43
  EMS-superquadric_fitting_inference/src/EMS/__pycache__/EMS_recovery.SwitchCost-265.py312.1.nbc filter=lfs diff=lfs merge=lfs -text
44
  EMS-superquadric_fitting_inference/src/EMS/__pycache__/EMS_recovery.SimilarityCandidates-138.py312.1.nbc filter=lfs diff=lfs merge=lfs -text
45
  EMS-superquadric_fitting_inference/src/EMS/__pycache__/EMS_recovery.CostFunc-307.py311.1.nbc filter=lfs diff=lfs merge=lfs -text
46
+ nano_WaveGen/checkpoints_text2wave/losses.png filter=lfs diff=lfs merge=lfs -text
nano_WaveGen/.gitignore ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.pyc
6
+
7
+ # PyTorch
8
+ *.pth
9
+ *.pt
10
+ checkpoint*.pth
11
+ best_model*.pth
12
+ final_model.pth
13
+
14
+ # Logs and outputs
15
+ logs/
16
+ *.log
17
+ core_space/
18
+ outputs/
19
+
20
+ # Jupyter Notebook
21
+ .ipynb_checkpoints/
22
+
23
+ # Environment
24
+ .env
25
+ venv/
26
+
27
+
28
+ # IDE
29
+ .vscode/
30
+ .idea/
31
+ *.swp
32
+ *.swo
33
+ *~
34
+
35
+ # OS
36
+ .DS_Store
37
+ Thumbs.db
38
+
39
+ # Temporary files
40
+ *.tmp
41
+ *.bak
42
+
43
+ data/movi_a_128x128
44
+ wandb
45
+ *.npz
nano_WaveGen/checkpoints_text2wave/best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a83304beb70d3e77c15709df1d486ce35e2a32dbebc13285914d763a54e74ee
3
+ size 926425298
nano_WaveGen/checkpoints_text2wave/latest.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f5e09c520f8fde415c6f95a7347dd0d3fa49f03fb1342da95f04a436c736567
3
+ size 926458332
nano_WaveGen/checkpoints_text2wave/losses.png ADDED

Git LFS Details

  • SHA256: 74f7b41963b5ce8524acc2c5e2bb53d5f332494838411f710f53df54e52908c6
  • Pointer size: 131 Bytes
  • Size of remote file: 269 kB
nano_WaveGen/checkpoints_text2wave/training_log.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-05 18:40:09 ============================================================
2
+ 2025-12-05 18:40:09 Dataset Information:
3
+ 2025-12-05 18:40:09 - Training samples: 100
4
+ 2025-12-05 18:40:09 - Batch size: 24
5
+ 2025-12-05 18:40:09 - Steps per epoch (full dataset): 5
6
+ 2025-12-05 18:40:09 - Total training steps: 50000
7
+ 2025-12-05 18:40:09 - Will traverse dataset: 10000.00 times
8
+ 2025-12-05 18:40:09 ============================================================
9
+ 2025-12-05 18:41:07 New best checkpoint at step 5: train_loss=2085.487305
nano_WaveGen/checkpoints_text2wave/training_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc87b2676cd86e2d19f72c320931e39eeba832d8e385b937c4f10c7261a752cf
3
+ size 3496
nano_WaveGen/configs/accelerate_config.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ compute_environment: LOCAL_MACHINE
2
+ distributed_type: MULTI_GPU
3
+ downcast_bf16: 'no'
4
+ gpu_ids: all
5
+ machine_rank: 0
6
+ main_training_function: main
7
+ mixed_precision: fp16
8
+ num_machines: 1
9
+ num_processes: 8
10
+ rdzv_backend: static
11
+ same_network: true
12
+ tpu_env: []
13
+ tpu_use_cluster: false
14
+ tpu_use_sudo: false
15
+ use_cpu: false
nano_WaveGen/configs/default.yaml ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # configs/default.yaml
2
+
3
+ # ==================== 核心配置 ====================
4
+
5
+ # 训练配置
6
+ training:
7
+ batch_size: 24 # Reduced for faster first step (8 per GPU)
8
+
9
+ # 基于步数的训练(v30新增)
10
+ max_steps: 50000 # 总训练步数(可以根据需要调整)
11
+
12
+ # 时序预测配置
13
+ max_history_frames: 3 # 最大历史帧数(1-3帧)
14
+ bidirectional_training: true # 双向预测训练(从中间帧开始)
15
+ # max_prediction_frames: 1 # <deprecated> 由模型覆盖整段序列,这里保留用于兼容
16
+ initial_teacher_forcing_prob: 0.5 # 初始锚点帧教师强制概率
17
+ initial_frame:
18
+ strategy: "middle" # 可选: middle | fixed | random
19
+ offset: 0 # 在基础策略上的偏移
20
+ random_history_sampling: true # 是否在推理时随机选择0-历史帧数量作为参考帧
21
+ freeze_static_from_anchor: true # 是否在预测序列时固定0-5维的静态参数沿用锚点帧
22
+ multi_sample_attempts: 5 # 每个样本的随机尝试次数(仅训练时启用)
23
+ decoder_noise_std: 0.2 # 解码阶段加入的高斯噪声标准差(0表示禁用)
24
+ frame_rate: 8.0 # MOVi 样本在预处理阶段统一为 8fps,速度计算与训练假设保持一致
25
+
26
+ # 调试和日志配置
27
+ debug_print_interval: 1 # 每多少步打印一次调试信息(损失值等)
28
+ log_interval: 50 # 每多少步记录一次日志
29
+
30
+ learning_rate: 0.001 # Standard T5 learning rate
31
+ gradient_clip_val: 1.0 # Gradient clipping value
32
+
33
+ # GPU配置
34
+ use_multi_gpu: true # 启用多GPU训练
35
+ gpu_list: [0, 1, 2, 3, 4, 5, 6, 7] # 可用GPU列表
36
+ use_free_gpus: true # 自动选择空闲GPU
37
+
38
+ # 保存训练的模型和生成结果 配置
39
+ evaluation:
40
+ max_batches: 0 # 例如0禁用验证,只比较训练 loss。改成 None 就是验证全量;任何正整数则限制评估批次数。
41
+
42
+ save_generation:
43
+ enabled: true #保存训练的模型
44
+ save_gt: true # 是否保存GT数据
45
+ fixed_samples: 5 # 固定样本数量,用于对比
46
+ save_interval: 100 # 每100步保存一次
47
+ save_dir: "core_space" # 保存目录
48
+
49
+
50
+ # ==================== Text2Wave 配置 ====================
51
+
52
+ # 模型设置
53
+ text2wave_model:
54
+ # 原始模型: google/long-t5-tglobal-base
55
+ model_name: "google/t5-v1_1-small"
56
+
57
+ # 损失函数配置
58
+ loss:
59
+
60
+ # 损失权重
61
+ weights:
62
+ wave_loss: 4.0 # 波损失(超二次元参数)权重
63
+ wave_contrastive_loss: 2.0 # 序列级对比损失权重
64
+ world_info_loss: 0.5 # 世界信息损失(相机,缩放,时间)权重
65
+ controllable_info_loss: 0.1 # 可控制信息损失(质量,摩擦,弹性)权重
66
+ pla_loss: 3.0 # 最小作用量约束损失权重
67
+
68
+ wave_contrastive:
69
+ temperature: 0.2 # 对比分布温度
70
+
71
+ # 数据配置
72
+ data:
73
+ # MOVi数据集配置
74
+ num_workers: 32 # 数据加载线程数
75
+ max_sequences: 100 # 最大序列数,-1表示使用所有数据,设置较小值用于快速测试
76
+
77
+ physics:
78
+ gravity: 9.81 # 自由落体重力加速度(单位:m/s^2)
79
+ collision_buffer: 1.05 # 判定碰撞时的半径放大系数
80
+
81
+ # ==================== Wave2Pixel 配置 ====================
82
+
83
+ # 网格配置
84
+ grid:
85
+ size: 64 # 3D网格分辨率
86
+ prob_threshold: 0.5
87
+
88
+ # 世界坐标系配置
89
+ world_coordinate_system:
90
+ enabled: true # 是否启用世界坐标系
91
+ world_scale: 10.0 # 世界坐标范围 ±10米
92
+ voxel_size: 0.05 # 体素大小 5cm
93
+ near_plane: 0.1 # 近平面距离
94
+ far_plane: 50.0 # 远平面距离
95
+ predict_world_scale: true # 让模型预测世界缩放比例
96
+ world_scale_loss_weight: 0.1 # 世界缩放比例损失权重 - 增加到1.0以加快学习
97
+
98
+ # 相机配置
99
+ camera:
100
+ default_view: "front"
101
+ fov: 60
102
+ near: 0.1
103
+ far: 100.0
104
+ # 世界坐标系中的相机位置
105
+ views:
106
+ front: [0, 0, 2]
107
+ back: [0, 0, -2]
108
+ left: [-2, 0, 0]
109
+ right: [2, 0, 0]
110
+ top: [0, 2, 0]
111
+ bottom: [0, -2, 0]
112
+ # 相机旋转角度 (pitch, yaw, roll)
113
+ view_rotations:
114
+ front: [0, 0, 0]
115
+ back: [0, 3.14159, 0] # 180度旋转
116
+ left: [0, -1.5708, 0] # -90度旋转
117
+ right: [0, 1.5708, 0] # 90度旋转
118
+ top: [-1.5708, 0, 0] # -90度俯视
119
+ bottom: [1.5708, 0, 0] # 90度仰视
120
+
121
+ # 生成配置
122
+ generation:
123
+ mode: "image" # "image" 或 "video"
124
+ time:
125
+ start: 0.0
126
+ end: 12.0
127
+ fps: 30 # 外部观察频率
128
+ timestep: 0.0 # 用于单帧图像生成
129
+ compute_wsf: false # 是否默认计算完整WSF场
130
+ output_dir: "core_space" # 默认输出目录
131
+
132
+ # 输出格式配置
133
+ output:
134
+ format: "triple_channel" # 可选: "complex", "dual_channel", "triple_channel"
135
+ third_channel: "amplitude" # 如果format为"triple_channel",第三通道的内容: "amplitude", "phase", "none"
136
+
137
+ # Wave2Pixel相关的模型组件
138
+ model:
139
+ wave_encoder:
140
+ hidden_dim: 256
141
+ dropout: 0.1
142
+
143
+ feature_extractor:
144
+ input_dim: 4 # 实部、虚部、振幅、相位
145
+ hidden_dim: 64
146
+ output_dim: 32
147
+ dropout: 0.1
148
+
149
+ # 重命名为pixel_net以匹配代码中的使用
150
+ pixel_net:
151
+ channels: [32, 64, 128, 64, 4] # 最后4通道: RGB + 概率
152
+ kernel_size: 3
153
+ padding: 1
154
+ dropout: 0.1
nano_WaveGen/core_space/20251205_184253_step5_text2wave/batch_statistics.json ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "batch_size": 5,
3
+ "num_frames": 24,
4
+ "timestamp": "2025-12-05T18:42:58.362258",
5
+ "objects_per_frame": [
6
+ {
7
+ "frame": 0,
8
+ "mean_objects": 3.4,
9
+ "max_objects": 5,
10
+ "min_objects": 2
11
+ },
12
+ {
13
+ "frame": 1,
14
+ "mean_objects": 3.6,
15
+ "max_objects": 6,
16
+ "min_objects": 2
17
+ },
18
+ {
19
+ "frame": 2,
20
+ "mean_objects": 3.8,
21
+ "max_objects": 6,
22
+ "min_objects": 3
23
+ },
24
+ {
25
+ "frame": 3,
26
+ "mean_objects": 3.6,
27
+ "max_objects": 6,
28
+ "min_objects": 2
29
+ },
30
+ {
31
+ "frame": 4,
32
+ "mean_objects": 4.0,
33
+ "max_objects": 7,
34
+ "min_objects": 3
35
+ },
36
+ {
37
+ "frame": 5,
38
+ "mean_objects": 3.8,
39
+ "max_objects": 7,
40
+ "min_objects": 2
41
+ },
42
+ {
43
+ "frame": 6,
44
+ "mean_objects": 4.0,
45
+ "max_objects": 7,
46
+ "min_objects": 3
47
+ },
48
+ {
49
+ "frame": 7,
50
+ "mean_objects": 3.6,
51
+ "max_objects": 6,
52
+ "min_objects": 2
53
+ },
54
+ {
55
+ "frame": 8,
56
+ "mean_objects": 4.2,
57
+ "max_objects": 8,
58
+ "min_objects": 3
59
+ },
60
+ {
61
+ "frame": 9,
62
+ "mean_objects": 3.6,
63
+ "max_objects": 7,
64
+ "min_objects": 2
65
+ },
66
+ {
67
+ "frame": 10,
68
+ "mean_objects": 3.4,
69
+ "max_objects": 5,
70
+ "min_objects": 2
71
+ },
72
+ {
73
+ "frame": 11,
74
+ "mean_objects": 3.6,
75
+ "max_objects": 7,
76
+ "min_objects": 2
77
+ },
78
+ {
79
+ "frame": 12,
80
+ "mean_objects": 3.4,
81
+ "max_objects": 6,
82
+ "min_objects": 2
83
+ },
84
+ {
85
+ "frame": 13,
86
+ "mean_objects": 3.8,
87
+ "max_objects": 7,
88
+ "min_objects": 2
89
+ },
90
+ {
91
+ "frame": 14,
92
+ "mean_objects": 2.6,
93
+ "max_objects": 6,
94
+ "min_objects": 0
95
+ },
96
+ {
97
+ "frame": 15,
98
+ "mean_objects": 3.2,
99
+ "max_objects": 6,
100
+ "min_objects": 1
101
+ },
102
+ {
103
+ "frame": 16,
104
+ "mean_objects": 2.8,
105
+ "max_objects": 7,
106
+ "min_objects": 1
107
+ },
108
+ {
109
+ "frame": 17,
110
+ "mean_objects": 2.8,
111
+ "max_objects": 5,
112
+ "min_objects": 1
113
+ },
114
+ {
115
+ "frame": 18,
116
+ "mean_objects": 3.0,
117
+ "max_objects": 6,
118
+ "min_objects": 1
119
+ },
120
+ {
121
+ "frame": 19,
122
+ "mean_objects": 2.8,
123
+ "max_objects": 5,
124
+ "min_objects": 1
125
+ },
126
+ {
127
+ "frame": 20,
128
+ "mean_objects": 2.4,
129
+ "max_objects": 4,
130
+ "min_objects": 0
131
+ },
132
+ {
133
+ "frame": 21,
134
+ "mean_objects": 3.0,
135
+ "max_objects": 5,
136
+ "min_objects": 1
137
+ },
138
+ {
139
+ "frame": 22,
140
+ "mean_objects": 2.8,
141
+ "max_objects": 5,
142
+ "min_objects": 1
143
+ },
144
+ {
145
+ "frame": 23,
146
+ "mean_objects": 3.0,
147
+ "max_objects": 5,
148
+ "min_objects": 1
149
+ }
150
+ ]
151
+ }
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/error_statistics.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "object_mae": 1.520950198173523,
3
+ "world_mae": 3.048270600940062
4
+ }
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/info.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Text: large yellow metal cylinder, small yellow metal cylinder, small gray metal sphere
2
+ Generated at step: 5
3
+ Number of frames: 24
4
+ Sequence: sample_00000
5
+
6
+ --- Model Output Summary ---
7
+ Max objects: 10
8
+ Object parameters: 15 (exists + shape[2] + scale[3] + translation[3] + rotation[3] + velocity[3])
9
+ World parameters: 8 (camera_pos[3] + camera_quat[4] + scene_scale[1])
10
+ Physics parameters: 3 (mass + friction + restitution)
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/Full_Sample_Data_for_Learning_Target.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a99d3bd56b39f896e3f1febab643edf5c312fbc73d82073da95288a8b61a0ee
3
+ size 30977
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/camera_trajectory.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4fd4cdb82402db1d9b33317c378ad09319a97c25c846d6d7435e4f7ff1d9786
3
+ size 450
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/depth/depth_merge.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f17e9fda8898016fa785a6d002e2fb865c4945f38a486be7520263cf4704e02f
3
+ size 2225336
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/file_manifest.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Original sequence: sample_00000
2
+ Data split: validation
3
+ Original path: ../data/movi_a_128x128/validation/sample_00000
4
+ Copied at: 2025-12-05T18:42:54.238918
5
+
6
+ Files included:
7
+ - Full_Sample_Data_for_Learning_Target.npz
8
+ - camera_trajectory.npz
9
+ - depth/depth_merge.npz
10
+ - metadata.json
11
+ - normal/normal_merge.npz
12
+ - object_coordinates/object_coordinates_merge.npz
13
+ - point_clouds/point_clouds_merge.npz
14
+ - rgb/frame_000.png
15
+ - rgb/frame_001.png
16
+ - rgb/frame_002.png
17
+ - rgb/frame_003.png
18
+ - rgb/frame_004.png
19
+ - rgb/frame_005.png
20
+ - rgb/frame_006.png
21
+ - rgb/frame_007.png
22
+ - rgb/frame_008.png
23
+ - rgb/frame_009.png
24
+ - rgb/frame_010.png
25
+ - rgb/frame_011.png
26
+ - rgb/frame_012.png
27
+ - rgb/frame_013.png
28
+ - rgb/frame_014.png
29
+ - rgb/frame_015.png
30
+ - rgb/frame_016.png
31
+ - rgb/frame_017.png
32
+ - rgb/frame_018.png
33
+ - rgb/frame_019.png
34
+ - rgb/frame_020.png
35
+ - rgb/frame_021.png
36
+ - rgb/frame_022.png
37
+ - rgb/frame_023.png
38
+ - segmentation/segmentation_merge.npz
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/metadata.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_frames": 24,
3
+ "num_instances": 3,
4
+ "resolution": 128,
5
+ "depth_range": [
6
+ 7.109129905700684,
7
+ 67.49893951416016
8
+ ],
9
+ "camera": {
10
+ "focal_length": 35.0,
11
+ "sensor_width": 32.0,
12
+ "field_of_view": 0.8575560450553894,
13
+ "K": [
14
+ [
15
+ 140.0,
16
+ 0.0,
17
+ 64.0
18
+ ],
19
+ [
20
+ 0.0,
21
+ 140.0,
22
+ 64.0
23
+ ],
24
+ [
25
+ 0.0,
26
+ 0.0,
27
+ 1.0
28
+ ]
29
+ ]
30
+ },
31
+ "instances": [
32
+ {
33
+ "id": 1,
34
+ "shape": "cylinder",
35
+ "size": "large",
36
+ "color": "yellow",
37
+ "color_rgb": [
38
+ 1.0,
39
+ 0.9333333373069763,
40
+ 0.019607843831181526
41
+ ],
42
+ "material": "metal",
43
+ "mass": 5.784790992736816,
44
+ "friction": 0.4000000059604645,
45
+ "restitution": 0.30000001192092896
46
+ },
47
+ {
48
+ "id": 2,
49
+ "shape": "cylinder",
50
+ "size": "small",
51
+ "color": "yellow",
52
+ "color_rgb": [
53
+ 1.0,
54
+ 0.9333333373069763,
55
+ 0.019607843831181526
56
+ ],
57
+ "material": "metal",
58
+ "mass": 0.723098874092102,
59
+ "friction": 0.4000000059604645,
60
+ "restitution": 0.30000001192092896
61
+ },
62
+ {
63
+ "id": 3,
64
+ "shape": "sphere",
65
+ "size": "small",
66
+ "color": "gray",
67
+ "color_rgb": [
68
+ 0.34117648005485535,
69
+ 0.34117648005485535,
70
+ 0.34117648005485535
71
+ ],
72
+ "material": "metal",
73
+ "mass": 0.483887255191803,
74
+ "friction": 0.4000000059604645,
75
+ "restitution": 0.30000001192092896
76
+ }
77
+ ]
78
+ }
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/normal/normal_merge.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01026452044e3bccdf88dae92dca52d818e95795b4e813b5ce567604de090a23
3
+ size 126632
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/object_coordinates/object_coordinates_merge.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e29f3ba063dceb54307959ad67c82ba1b0803b2901515a5a17cbddb407c2661
3
+ size 1620075
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/point_clouds/point_clouds_merge.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f3bc0a862953979939ec1e974e0f22cfcf78b09b384c20ba0a523f1b817fa86
3
+ size 8503662
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_000.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_001.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_002.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_003.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_004.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_005.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_006.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_007.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_008.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_009.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_010.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_011.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_012.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_013.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_014.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_015.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_016.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_017.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_018.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_019.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_020.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_021.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_022.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/rgb/frame_023.png ADDED
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/original_data/segmentation/segmentation_merge.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:147566467e6cf53cd5478853e69d816161f9dd3e5b3198021a40de9e4405272d
3
+ size 9184
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/predictions.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f36d0a5cf1afa6e06e29b9e6fe572848d6e0fbf9193e0c26a319bf6dc9d95653
3
+ size 124404
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_0/targets.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bee90f8ec231b1c46f276e2de5164df93d2d09f7750f2d80964dc9fc15361408
3
+ size 115668
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_1/error_statistics.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "object_mae": 1.5807031393051147,
3
+ "world_mae": 2.9718214426902705
4
+ }
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_1/info.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Text: large purple metal cylinder, large blue rubber cylinder, small green metal cube, small red rubber sphere, small gray rubber cylinder, small blue rubber cylinder, small yellow rubber cube, small purple rubber sphere, small red metal cylinder
2
+ Generated at step: 5
3
+ Number of frames: 24
4
+ Sequence: sample_00001
5
+
6
+ --- Model Output Summary ---
7
+ Max objects: 10
8
+ Object parameters: 15 (exists + shape[2] + scale[3] + translation[3] + rotation[3] + velocity[3])
9
+ World parameters: 8 (camera_pos[3] + camera_quat[4] + scene_scale[1])
10
+ Physics parameters: 3 (mass + friction + restitution)
nano_WaveGen/core_space/20251205_184253_step5_text2wave/sample_1/original_data/Full_Sample_Data_for_Learning_Target.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da40da407c8b7f95f661a141ed6b6c32a340af5ab4d291c91889b89b5631504e
3
+ size 42494