cheekeong2025 commited on
Commit
b8b6ab1
Β·
verified Β·
1 Parent(s): f677d23

Upload streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +43 -132
streamlit_app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import uuid
3
  import json
4
  from datetime import datetime, timezone
@@ -27,32 +28,11 @@ with st.expander("ℹ️ How it works", expanded=False):
27
  """
28
  )
29
 
30
- # ------------------------------
31
- # Hugging Face token + dataset
32
- # ------------------------------
33
-
34
- # Prefer Space secret HF_TOKEN if present
35
- default_token = os.getenv("HF_TOKEN", "").strip()
36
-
37
  st.subheader("πŸ”‘ Hugging Face Hub")
38
- hf_token = st.text_input(
39
- "HF write token (fine-grained or classic write)",
40
- type="password",
41
- value=default_token,
42
- help=(
43
- "On a private Space, set HF_TOKEN in Settings β†’ Repository secrets. "
44
- "This token must have read/write access to the dataset repo."
45
- ),
46
- )
47
- dataset_repo = st.text_input(
48
- "Dataset repo to store jobs (e.g. yourname/llm_classroom_jobs)",
49
- "",
50
- help="Use the full ID: username/repo_name. It can be public or private."
51
- )
52
 
53
- # ------------------------------
54
- # Training configuration
55
- # ------------------------------
56
  st.subheader("βš™οΈ Training configuration")
57
  colA, colB = st.columns(2)
58
  with colA:
@@ -73,59 +53,26 @@ with advanced:
73
  eval_samples = st.text_input("Eval subset size (blank = full)", "")
74
  random_seed = st.number_input("Random seed", 0, 10000, 42)
75
 
76
-
77
- # ------------------------------
78
- # Helper functions
79
- # ------------------------------
80
  def ensure_dataset_repo(api: HfApi, repo_id: str, token: str):
81
- # Create dataset repo if it doesn't exist
82
  create_repo(repo_id, repo_type="dataset", token=token, exist_ok=True)
83
 
84
-
85
  def upload_json(api: HfApi, repo_id: str, token: str, path_in_repo: str, obj: dict):
86
  tmp = "tmp_upload.json"
87
  with open(tmp, "w", encoding="utf-8") as f:
88
  json.dump(obj, f, indent=2)
89
- try:
90
- upload_file(
91
- path_or_fileobj=tmp,
92
- repo_id=repo_id,
93
- path_in_repo=path_in_repo,
94
- repo_type="dataset",
95
- token=token,
96
- )
97
- finally:
98
- try:
99
- os.remove(tmp)
100
- except FileNotFoundError:
101
- pass
102
-
103
-
104
- def explain_404(prefix: str = ""):
105
- st.error(
106
- (prefix + "\n\n" if prefix else "")
107
- + "Got a **404 from Hugging Face Hub**.\n\n"
108
- "This usually means **one of these**:\n"
109
- "- The dataset repo name is wrong (must be `username/repo_name`).\n"
110
- "- The repo is **private** and your token does **not** have access.\n"
111
- "- The file path (e.g. `jobs/<job_id>/status.json`) does not exist yet.\n\n"
112
- "Check:\n"
113
- "1. The exact spelling of `dataset_repo`.\n"
114
- "2. That your HF token has **read & write** permissions to that repo.\n"
115
- "3. That the worker has already pushed the job files (for status/metrics)."
116
  )
 
117
 
118
-
119
- def missing_creds():
120
- return not hf_token or not dataset_repo
121
-
122
-
123
- # ------------------------------
124
  # Create job
125
- # ------------------------------
126
  if st.button("πŸ“ Create Job"):
127
- if missing_creds():
128
- st.error("Please provide HF token and dataset repo before creating a job.")
129
  else:
130
  try:
131
  api = HfApi(token=hf_token)
@@ -153,82 +100,46 @@ if st.button("πŸ“ Create Job"):
153
  "outputs_path": f"jobs/{job_id}/outputs/",
154
  }
155
 
156
- status = {
157
- "state": "pending",
158
- "updated_at": now_iso,
159
- "message": "waiting for worker",
160
- }
161
-
162
  upload_json(api, dataset_repo, hf_token, f"jobs/{job_id}/job.json", cfg)
163
  upload_json(api, dataset_repo, hf_token, f"jobs/{job_id}/status.json", status)
164
 
165
  st.success(f"βœ… Job created! ID: {job_id}")
166
  st.code(json.dumps(cfg, indent=2))
167
- st.info(
168
- "Open the Colab/Kaggle worker, set the SAME dataset repo + token, and press Run (GPU)."
169
- )
170
  except Exception as e:
171
- msg = str(e)
172
- if "404" in msg:
173
- explain_404("Error while creating the job.")
174
- else:
175
- st.exception(e)
176
 
177
  st.divider()
178
-
179
- # ------------------------------
180
- # Monitor / refresh status
181
- # ------------------------------
182
  st.subheader("πŸ”Ž Monitor a Job")
183
  job_id_input = st.text_input("Enter a Job ID to check status/artifacts", "")
 
 
 
 
 
 
 
 
 
 
 
184
 
185
- if st.button("πŸ”„ Refresh status"):
186
- if missing_creds() or not job_id_input.strip():
187
- st.error("Please provide HF token, dataset repo, and a Job ID.")
188
- else:
189
  try:
190
- api = HfApi(token=hf_token)
191
-
192
- status_path = hf_hub_download(
193
- repo_id=dataset_repo,
194
- repo_type="dataset",
195
- filename=f"jobs/{job_id_input}/status.json",
196
- token=hf_token,
197
- )
198
- with open(status_path, "r", encoding="utf-8") as f:
199
- status = json.load(f)
200
- st.write("**Status:**", status)
201
-
202
- # metrics.json is optional
203
- try:
204
- metrics_path = hf_hub_download(
205
- repo_id=dataset_repo,
206
- repo_type="dataset",
207
- filename=f"jobs/{job_id_input}/outputs/metrics.json",
208
- token=hf_token,
209
- )
210
- with open(metrics_path, "r", encoding="utf-8") as f:
211
- metrics = json.load(f)
212
- st.write("**Metrics:**", metrics)
213
- except Exception as e_metrics:
214
- if "404" in str(e_metrics):
215
- st.info("No `metrics.json` yet β€” the worker might still be training.")
216
- else:
217
- st.warning(f"Could not load metrics: {e_metrics}")
218
-
219
- st.write("Artifacts folder (on HF):")
220
- st.write(
221
- f"https://huggingface.co/datasets/{dataset_repo}/tree/main/jobs/{job_id_input}/outputs"
222
  )
223
-
224
- except Exception as e:
225
- msg = str(e)
226
- if "404" in msg:
227
- explain_404("Error while reading job status.")
228
- else:
229
- st.exception(e)
230
-
231
- st.caption(
232
- "Tip: If your worker can't auto-restart/auto-pull, "
233
- "you can open the dataset repo on Hugging Face to confirm files are there."
234
- )
 
1
  import os
2
+ import time
3
  import uuid
4
  import json
5
  from datetime import datetime, timezone
 
28
  """
29
  )
30
 
 
 
 
 
 
 
 
31
  st.subheader("πŸ”‘ Hugging Face Hub")
32
+ hf_token = st.text_input("HF write token (fine-grained or classic write)", type="password")
33
+ dataset_repo = st.text_input("Dataset repo to store jobs (e.g. yourname/llm_classroom_jobs)", "")
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ # Defaults for classroom
 
 
36
  st.subheader("βš™οΈ Training configuration")
37
  colA, colB = st.columns(2)
38
  with colA:
 
53
  eval_samples = st.text_input("Eval subset size (blank = full)", "")
54
  random_seed = st.number_input("Random seed", 0, 10000, 42)
55
 
 
 
 
 
56
  def ensure_dataset_repo(api: HfApi, repo_id: str, token: str):
 
57
  create_repo(repo_id, repo_type="dataset", token=token, exist_ok=True)
58
 
 
59
  def upload_json(api: HfApi, repo_id: str, token: str, path_in_repo: str, obj: dict):
60
  tmp = "tmp_upload.json"
61
  with open(tmp, "w", encoding="utf-8") as f:
62
  json.dump(obj, f, indent=2)
63
+ upload_file(
64
+ path_or_fileobj=tmp,
65
+ repo_id=repo_id,
66
+ path_in_repo=path_in_repo,
67
+ repo_type="dataset",
68
+ token=token,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  )
70
+ os.remove(tmp)
71
 
 
 
 
 
 
 
72
  # Create job
 
73
  if st.button("πŸ“ Create Job"):
74
+ if not hf_token or not dataset_repo:
75
+ st.error("Please provide HF token and dataset repo.")
76
  else:
77
  try:
78
  api = HfApi(token=hf_token)
 
100
  "outputs_path": f"jobs/{job_id}/outputs/",
101
  }
102
 
103
+ # initial status
104
+ status = {"state": "pending", "updated_at": now_iso, "message": "waiting for worker"}
 
 
 
 
105
  upload_json(api, dataset_repo, hf_token, f"jobs/{job_id}/job.json", cfg)
106
  upload_json(api, dataset_repo, hf_token, f"jobs/{job_id}/status.json", status)
107
 
108
  st.success(f"βœ… Job created! ID: {job_id}")
109
  st.code(json.dumps(cfg, indent=2))
110
+ st.info("Open the Colab/Kaggle worker, set the SAME dataset repo + token, and press Run (GPU).")
 
 
111
  except Exception as e:
112
+ st.exception(e)
 
 
 
 
113
 
114
  st.divider()
 
 
 
 
115
  st.subheader("πŸ”Ž Monitor a Job")
116
  job_id_input = st.text_input("Enter a Job ID to check status/artifacts", "")
117
+ if st.button("πŸ”„ Refresh status") and job_id_input and hf_token and dataset_repo:
118
+ try:
119
+ api = HfApi(token=hf_token)
120
+ # download status.json and maybe metrics
121
+ status_path = hf_hub_download(
122
+ repo_id=dataset_repo, repo_type="dataset",
123
+ filename=f"jobs/{job_id_input}/status.json", token=hf_token
124
+ )
125
+ with open(status_path, "r", encoding="utf-8") as f:
126
+ status = json.load(f)
127
+ st.write("**Status:**", status)
128
 
 
 
 
 
129
  try:
130
+ metrics_path = hf_hub_download(
131
+ repo_id=dataset_repo, repo_type="dataset",
132
+ filename=f"jobs/{job_id_input}/outputs/metrics.json", token=hf_token
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  )
134
+ with open(metrics_path, "r", encoding="utf-8") as f:
135
+ metrics = json.load(f)
136
+ st.write("**Metrics:**", metrics)
137
+ except Exception:
138
+ st.info("No metrics.json yet. The worker might still be training.")
139
+
140
+ st.write("Artifacts folder (on HF):")
141
+ st.write(f"https://huggingface.co/datasets/{dataset_repo}/tree/main/jobs/{job_id_input}/outputs")
142
+ except Exception as e:
143
+ st.exception(e)
144
+
145
+ st.caption("Tip: If your worker can't auto-restart/auto-pull, you can open the dataset repo to confirm files are there.")