Spaces:
Running
Running
Upload ai/data_generation/consolidate_data.py with huggingface_hub
Browse files
ai/data_generation/consolidate_data.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def consolidate_data(files, output_file):
|
| 7 |
+
all_states = []
|
| 8 |
+
all_policies = []
|
| 9 |
+
all_winners = []
|
| 10 |
+
|
| 11 |
+
for f in files:
|
| 12 |
+
if not os.path.exists(f):
|
| 13 |
+
print(f"Skipping {f}, not found.")
|
| 14 |
+
continue
|
| 15 |
+
print(f"Loading {f}...")
|
| 16 |
+
data = np.load(f)
|
| 17 |
+
all_states.append(data["states"])
|
| 18 |
+
all_policies.append(data["policies"])
|
| 19 |
+
all_winners.append(data["winners"])
|
| 20 |
+
|
| 21 |
+
if not all_states:
|
| 22 |
+
print("No data to consolidate.")
|
| 23 |
+
return
|
| 24 |
+
|
| 25 |
+
np_states = np.concatenate(all_states, axis=0)
|
| 26 |
+
np_policies = np.concatenate(all_policies, axis=0)
|
| 27 |
+
np_winners = np.concatenate(all_winners, axis=0)
|
| 28 |
+
|
| 29 |
+
np.savez_compressed(output_file, states=np_states, policies=np_policies, winners=np_winners)
|
| 30 |
+
print(f"Consolidated {len(np_states)} samples to {output_file}")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
if __name__ == "__main__":
|
| 34 |
+
files = [
|
| 35 |
+
"ai/data/data_poc_800.npz",
|
| 36 |
+
"ai/data/data_batch_strat_1.npz",
|
| 37 |
+
"ai/data/data_batch_0.npz",
|
| 38 |
+
"ai/data/data_batch_strat_0.npz",
|
| 39 |
+
]
|
| 40 |
+
consolidate_data(files, "ai/data/data_consolidated.npz")
|