Spaces:

cheekeong2025
/

LoRA-Fine-Tuning-with-Colab

Paused

App Files Files Community

cheekeong2025 commited on Nov 18, 2025

Commit

b8b6ab1

verified ·

1 Parent(s): f677d23

Upload streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +43 -132

streamlit_app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import uuid
 import json
 from datetime import datetime, timezone
@@ -27,32 +28,11 @@ with st.expander("ℹ️ How it works", expanded=False):
         """
     )
-# ------------------------------
-# Hugging Face token + dataset
-# ------------------------------
-# Prefer Space secret HF_TOKEN if present
-default_token = os.getenv("HF_TOKEN", "").strip()
 st.subheader("🔑 Hugging Face Hub")
-hf_token = st.text_input(
-    "HF write token (fine-grained or classic write)",
-    type="password",
-    value=default_token,
-    help=(
-        "On a private Space, set HF_TOKEN in Settings → Repository secrets. "
-        "This token must have read/write access to the dataset repo."
-    ),
-)
-dataset_repo = st.text_input(
-    "Dataset repo to store jobs (e.g. yourname/llm_classroom_jobs)",
-    "",
-    help="Use the full ID: username/repo_name. It can be public or private."
-)
-# ------------------------------
-# Training configuration
-# ------------------------------
 st.subheader("⚙️ Training configuration")
 colA, colB = st.columns(2)
 with colA:
@@ -73,59 +53,26 @@ with advanced:
     eval_samples = st.text_input("Eval subset size (blank = full)", "")
     random_seed = st.number_input("Random seed", 0, 10000, 42)
-# ------------------------------
-# Helper functions
-# ------------------------------
 def ensure_dataset_repo(api: HfApi, repo_id: str, token: str):
-    # Create dataset repo if it doesn't exist
     create_repo(repo_id, repo_type="dataset", token=token, exist_ok=True)
 def upload_json(api: HfApi, repo_id: str, token: str, path_in_repo: str, obj: dict):
     tmp = "tmp_upload.json"
     with open(tmp, "w", encoding="utf-8") as f:
         json.dump(obj, f, indent=2)
-    try:
-        upload_file(
-            path_or_fileobj=tmp,
-            repo_id=repo_id,
-            path_in_repo=path_in_repo,
-            repo_type="dataset",
-            token=token,
-        )
-    finally:
-        try:
-            os.remove(tmp)
-        except FileNotFoundError:
-            pass
-def explain_404(prefix: str = ""):
-    st.error(
-        (prefix + "\n\n" if prefix else "")
-        + "Got a **404 from Hugging Face Hub**.\n\n"
-        "This usually means **one of these**:\n"
-        "- The dataset repo name is wrong (must be `username/repo_name`).\n"
-        "- The repo is **private** and your token does **not** have access.\n"
-        "- The file path (e.g. `jobs/<job_id>/status.json`) does not exist yet.\n\n"
-        "Check:\n"
-        "1. The exact spelling of `dataset_repo`.\n"
-        "2. That your HF token has **read & write** permissions to that repo.\n"
-        "3. That the worker has already pushed the job files (for status/metrics)."
     )
-def missing_creds():
-    return not hf_token or not dataset_repo
-# ------------------------------
 # Create job
-# ------------------------------
 if st.button("📝 Create Job"):
-    if missing_creds():
-        st.error("Please provide HF token and dataset repo before creating a job.")
     else:
         try:
             api = HfApi(token=hf_token)
@@ -153,82 +100,46 @@ if st.button("📝 Create Job"):
                 "outputs_path": f"jobs/{job_id}/outputs/",
             }
-            status = {
-                "state": "pending",
-                "updated_at": now_iso,
-                "message": "waiting for worker",
-            }
             upload_json(api, dataset_repo, hf_token, f"jobs/{job_id}/job.json", cfg)
             upload_json(api, dataset_repo, hf_token, f"jobs/{job_id}/status.json", status)
             st.success(f"✅ Job created! ID: {job_id}")
             st.code(json.dumps(cfg, indent=2))
-            st.info(
-                "Open the Colab/Kaggle worker, set the SAME dataset repo + token, and press Run (GPU)."
-            )
         except Exception as e:
-            msg = str(e)
-            if "404" in msg:
-                explain_404("Error while creating the job.")
-            else:
-                st.exception(e)
 st.divider()
-# ------------------------------
-# Monitor / refresh status
-# ------------------------------
 st.subheader("🔎 Monitor a Job")
 job_id_input = st.text_input("Enter a Job ID to check status/artifacts", "")
-if st.button("🔄 Refresh status"):
-    if missing_creds() or not job_id_input.strip():
-        st.error("Please provide HF token, dataset repo, and a Job ID.")
-    else:
         try:
-            api = HfApi(token=hf_token)
-            status_path = hf_hub_download(
-                repo_id=dataset_repo,
-                repo_type="dataset",
-                filename=f"jobs/{job_id_input}/status.json",
-                token=hf_token,
-            )
-            with open(status_path, "r", encoding="utf-8") as f:
-                status = json.load(f)
-            st.write("**Status:**", status)
-            # metrics.json is optional
-            try:
-                metrics_path = hf_hub_download(
-                    repo_id=dataset_repo,
-                    repo_type="dataset",
-                    filename=f"jobs/{job_id_input}/outputs/metrics.json",
-                    token=hf_token,
-                )
-                with open(metrics_path, "r", encoding="utf-8") as f:
-                    metrics = json.load(f)
-                st.write("**Metrics:**", metrics)
-            except Exception as e_metrics:
-                if "404" in str(e_metrics):
-                    st.info("No `metrics.json` yet — the worker might still be training.")
-                else:
-                    st.warning(f"Could not load metrics: {e_metrics}")
-            st.write("Artifacts folder (on HF):")
-            st.write(
-                f"https://huggingface.co/datasets/{dataset_repo}/tree/main/jobs/{job_id_input}/outputs"
             )
-        except Exception as e:
-            msg = str(e)
-            if "404" in msg:
-                explain_404("Error while reading job status.")
-            else:
-                st.exception(e)
-st.caption(
-    "Tip: If your worker can't auto-restart/auto-pull, "
-    "you can open the dataset repo on Hugging Face to confirm files are there."
-)

 import os
+import time
 import uuid
 import json
 from datetime import datetime, timezone
         """
     )
 st.subheader("🔑 Hugging Face Hub")
+hf_token = st.text_input("HF write token (fine-grained or classic write)", type="password")
+dataset_repo = st.text_input("Dataset repo to store jobs (e.g. yourname/llm_classroom_jobs)", "")
+# Defaults for classroom
 st.subheader("⚙️ Training configuration")
 colA, colB = st.columns(2)
 with colA:
     eval_samples = st.text_input("Eval subset size (blank = full)", "")
     random_seed = st.number_input("Random seed", 0, 10000, 42)
 def ensure_dataset_repo(api: HfApi, repo_id: str, token: str):
     create_repo(repo_id, repo_type="dataset", token=token, exist_ok=True)
 def upload_json(api: HfApi, repo_id: str, token: str, path_in_repo: str, obj: dict):
     tmp = "tmp_upload.json"
     with open(tmp, "w", encoding="utf-8") as f:
         json.dump(obj, f, indent=2)
+    upload_file(
+        path_or_fileobj=tmp,
+        repo_id=repo_id,
+        path_in_repo=path_in_repo,
+        repo_type="dataset",
+        token=token,
     )
+    os.remove(tmp)
 # Create job
 if st.button("📝 Create Job"):
+    if not hf_token or not dataset_repo:
+        st.error("Please provide HF token and dataset repo.")
     else:
         try:
             api = HfApi(token=hf_token)
                 "outputs_path": f"jobs/{job_id}/outputs/",
             }
+            # initial status
+            status = {"state": "pending", "updated_at": now_iso, "message": "waiting for worker"}
             upload_json(api, dataset_repo, hf_token, f"jobs/{job_id}/job.json", cfg)
             upload_json(api, dataset_repo, hf_token, f"jobs/{job_id}/status.json", status)
             st.success(f"✅ Job created! ID: {job_id}")
             st.code(json.dumps(cfg, indent=2))
+            st.info("Open the Colab/Kaggle worker, set the SAME dataset repo + token, and press Run (GPU).")
         except Exception as e:
+            st.exception(e)
 st.divider()
 st.subheader("🔎 Monitor a Job")
 job_id_input = st.text_input("Enter a Job ID to check status/artifacts", "")
+if st.button("🔄 Refresh status") and job_id_input and hf_token and dataset_repo:
+    try:
+        api = HfApi(token=hf_token)
+        # download status.json and maybe metrics
+        status_path = hf_hub_download(
+            repo_id=dataset_repo, repo_type="dataset",
+            filename=f"jobs/{job_id_input}/status.json", token=hf_token
+        )
+        with open(status_path, "r", encoding="utf-8") as f:
+            status = json.load(f)
+        st.write("**Status:**", status)
         try:
+            metrics_path = hf_hub_download(
+                repo_id=dataset_repo, repo_type="dataset",
+                filename=f"jobs/{job_id_input}/outputs/metrics.json", token=hf_token
             )
+            with open(metrics_path, "r", encoding="utf-8") as f:
+                metrics = json.load(f)
+            st.write("**Metrics:**", metrics)
+        except Exception:
+            st.info("No metrics.json yet. The worker might still be training.")
+        st.write("Artifacts folder (on HF):")
+        st.write(f"https://huggingface.co/datasets/{dataset_repo}/tree/main/jobs/{job_id_input}/outputs")
+    except Exception as e:
+        st.exception(e)
+st.caption("Tip: If your worker can't auto-restart/auto-pull, you can open the dataset repo to confirm files are there.")