MasteredUltraInstinct commited on
Commit
bef49c6
·
verified ·
1 Parent(s): ff36eb5

Update download_data.py

Browse files
Files changed (1) hide show
  1. download_data.py +18 -0
download_data.py CHANGED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ import os
3
+
4
+ # Load from HF
5
+ dataset = load_dataset("Azu/Handwritten-Mathematical-Expression-Convert-LaTeX")
6
+
7
+ os.makedirs("dataset/train", exist_ok=True)
8
+ os.makedirs("dataset/val", exist_ok=True)
9
+
10
+ # Split: 90% train, 10% val
11
+ for i, row in enumerate(dataset["train"]):
12
+ subset = "val" if i % 10 == 0 else "train"
13
+ image_path = f"dataset/{subset}/{i:05d}.png"
14
+ latex_path = f"dataset/{subset}/{i:05d}.txt"
15
+
16
+ row["image"].save(image_path)
17
+ with open(latex_path, "w", encoding="utf-8") as f:
18
+ f.write(row["label"])