Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -43,6 +43,20 @@ def tail_file(path: str, n=200):
|
|
| 43 |
return "".join(lines[-n:])
|
| 44 |
|
| 45 |
# ---------- RUN DIR HELPERS ----------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
def new_run_dir():
|
| 47 |
"""Return a unique run dir path WITHOUT creating it (so LeRobot can create it)."""
|
| 48 |
base = pathlib.Path(RUN_ROOT) / f"pusht_{int(time.time())}"
|
|
@@ -55,12 +69,43 @@ def new_run_dir():
|
|
| 55 |
return str(d)
|
| 56 |
|
| 57 |
def current_run_dir(user_override: str | None):
|
| 58 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
if user_override and user_override.strip():
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
if LAST_PTR.exists():
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
return ""
|
|
|
|
| 64 |
|
| 65 |
def has_checkpoint(run_dir: str):
|
| 66 |
"""We consider a checkpoint present once checkpoints/last/ exists (first save is at step 500)."""
|
|
@@ -105,12 +150,12 @@ def start_training(steps, batch_size, push_to_hub, repo_id):
|
|
| 105 |
def resume_training(extra_steps, push_to_hub, repo_id, run_dir_text):
|
| 106 |
run_dir = current_run_dir(run_dir_text)
|
| 107 |
if not run_dir:
|
| 108 |
-
return "No run found
|
| 109 |
log = train_log_path(run_dir)
|
| 110 |
|
| 111 |
if not has_checkpoint(run_dir):
|
| 112 |
-
return f"
|
| 113 |
-
|
| 114 |
push_flags = (f"--policy.push_to_hub=true --policy.repo_id='{repo_id.strip()}'"
|
| 115 |
if push_to_hub and repo_id.strip() else
|
| 116 |
"--policy.push_to_hub=false")
|
|
|
|
| 43 |
return "".join(lines[-n:])
|
| 44 |
|
| 45 |
# ---------- RUN DIR HELPERS ----------
|
| 46 |
+
def newest_run(prefer_checkpoint: bool = True) -> str:
|
| 47 |
+
"""Return the newest pusht_* folder. If prefer_checkpoint=True, pick the newest that has checkpoints/last/."""
|
| 48 |
+
root = pathlib.Path(RUN_ROOT)
|
| 49 |
+
if not root.exists():
|
| 50 |
+
return ""
|
| 51 |
+
runs = sorted(root.glob("pusht_*"), key=lambda r: r.stat().st_mtime, reverse=True)
|
| 52 |
+
if not runs:
|
| 53 |
+
return ""
|
| 54 |
+
if prefer_checkpoint:
|
| 55 |
+
for r in runs:
|
| 56 |
+
if has_checkpoint(str(r)):
|
| 57 |
+
return str(r)
|
| 58 |
+
return str(runs[0])
|
| 59 |
+
|
| 60 |
def new_run_dir():
|
| 61 |
"""Return a unique run dir path WITHOUT creating it (so LeRobot can create it)."""
|
| 62 |
base = pathlib.Path(RUN_ROOT) / f"pusht_{int(time.time())}"
|
|
|
|
| 69 |
return str(d)
|
| 70 |
|
| 71 |
def current_run_dir(user_override: str | None):
|
| 72 |
+
"""
|
| 73 |
+
Resolve which run to use:
|
| 74 |
+
- If user typed something, accept folder name or full path.
|
| 75 |
+
- Else try LAST pointer.
|
| 76 |
+
- Else pick newest run with a checkpoint.
|
| 77 |
+
- Else pick newest run (even without checkpoint).
|
| 78 |
+
- Else return "" (none).
|
| 79 |
+
"""
|
| 80 |
+
# A) explicit user input
|
| 81 |
if user_override and user_override.strip():
|
| 82 |
+
p = user_override.strip()
|
| 83 |
+
# allow just "pusht_123..." as well as absolute path
|
| 84 |
+
if not p.startswith("/"):
|
| 85 |
+
p = str(pathlib.Path(RUN_ROOT) / p)
|
| 86 |
+
return p
|
| 87 |
+
|
| 88 |
+
# B) LAST pointer if present and valid
|
| 89 |
if LAST_PTR.exists():
|
| 90 |
+
p = LAST_PTR.read_text().strip()
|
| 91 |
+
if p and os.path.isdir(p):
|
| 92 |
+
return p
|
| 93 |
+
|
| 94 |
+
# C) newest run WITH checkpoint
|
| 95 |
+
p = newest_run(prefer_checkpoint=True)
|
| 96 |
+
if p:
|
| 97 |
+
LAST_PTR.write_text(p)
|
| 98 |
+
return p
|
| 99 |
+
|
| 100 |
+
# D) newest run (no checkpoint yet)
|
| 101 |
+
p = newest_run(prefer_checkpoint=False)
|
| 102 |
+
if p:
|
| 103 |
+
LAST_PTR.write_text(p)
|
| 104 |
+
return p
|
| 105 |
+
|
| 106 |
+
# E) nothing found
|
| 107 |
return ""
|
| 108 |
+
|
| 109 |
|
| 110 |
def has_checkpoint(run_dir: str):
|
| 111 |
"""We consider a checkpoint present once checkpoints/last/ exists (first save is at step 500)."""
|
|
|
|
| 150 |
def resume_training(extra_steps, push_to_hub, repo_id, run_dir_text):
|
| 151 |
run_dir = current_run_dir(run_dir_text)
|
| 152 |
if not run_dir:
|
| 153 |
+
return "No run found on disk. Start a fresh training first (let it pass step 500 to create a checkpoint).", "", "(no log)"
|
| 154 |
log = train_log_path(run_dir)
|
| 155 |
|
| 156 |
if not has_checkpoint(run_dir):
|
| 157 |
+
return f"Selected run: {run_dir}\nNo checkpoint in {run_dir}/checkpoints/last/ yet — run at least 500 steps once.", run_dir, tail_file(log)
|
| 158 |
+
|
| 159 |
push_flags = (f"--policy.push_to_hub=true --policy.repo_id='{repo_id.strip()}'"
|
| 160 |
if push_to_hub and repo_id.strip() else
|
| 161 |
"--policy.push_to_hub=false")
|