DarshanScripts commited on
Commit
188b16d
·
verified ·
1 Parent(s): ef90a8e

Upload stratego\datasets\uploader.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. stratego//datasets//uploader.py +96 -0
stratego//datasets//uploader.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # stratego/datasets/uploader.py
2
+ """
3
+ Upload Stratego datasets to Hugging Face Hub.
4
+ """
5
+
6
+ from __future__ import annotations
7
+ import os
8
+ from typing import Optional
9
+
10
+ try:
11
+ from datasets import Dataset
12
+ from huggingface_hub import HfApi
13
+ HF_AVAILABLE = True
14
+ except ImportError:
15
+ HF_AVAILABLE = False
16
+
17
+
18
+ # Default repository - change this to your repo
19
+ DEFAULT_REPO_ID = "STRATEGO-LLM-TRAINING/stratego"
20
+
21
+
22
+ def push_to_hub(
23
+ dataset: "Dataset",
24
+ repo_id: str = DEFAULT_REPO_ID,
25
+ private: bool = False,
26
+ token: Optional[str] = None,
27
+ ) -> str:
28
+ """
29
+ Push a dataset to Hugging Face Hub.
30
+
31
+ Args:
32
+ dataset: The dataset to upload
33
+ repo_id: Hub repo ID like "username/stratego-games"
34
+ private: Whether to make the dataset private
35
+ token: HF token (optional, uses cached login)
36
+
37
+ Returns:
38
+ URL to the dataset on the Hub
39
+ """
40
+ if not HF_AVAILABLE:
41
+ raise ImportError("Run: pip install datasets huggingface_hub")
42
+
43
+ print(f"Pushing dataset to: {repo_id}")
44
+
45
+ # Ensure repo exists
46
+ try:
47
+ api = HfApi()
48
+ api.create_repo(repo_id, repo_type="dataset", exist_ok=True, private=private)
49
+ except Exception as e:
50
+ print(f"Note: {e}")
51
+
52
+ dataset.push_to_hub(repo_id, private=private, token=token)
53
+
54
+ url = f"https://huggingface.co/datasets/{repo_id}"
55
+ print(f"Dataset uploaded: {url}")
56
+
57
+ return url
58
+
59
+
60
+ def auto_push_after_game(
61
+ logs_dir: str = "logs/games",
62
+ repo_id: str = DEFAULT_REPO_ID,
63
+ silent: bool = False,
64
+ ) -> bool:
65
+ """
66
+ Automatically push all game logs to Hugging Face Hub.
67
+ Called after each game ends.
68
+
69
+ Args:
70
+ logs_dir: Path to game logs directory
71
+ repo_id: Hugging Face repo ID
72
+ silent: If True, suppress print messages on errors
73
+
74
+ Returns:
75
+ True if successful, False otherwise
76
+ """
77
+ if not HF_AVAILABLE:
78
+ if not silent:
79
+ print("HuggingFace not installed. Skipping auto-push.")
80
+ return False
81
+
82
+ try:
83
+ from .builder import build_dataset_from_logs
84
+
85
+ # Build dataset from all logs
86
+ dataset = build_dataset_from_logs(logs_dir)
87
+
88
+ # Push to hub
89
+ push_to_hub(dataset, repo_id=repo_id)
90
+
91
+ return True
92
+
93
+ except Exception as e:
94
+ if not silent:
95
+ print(f"Auto-push failed: {e}")
96
+ return False