zino36 commited on
Commit
6a460fa
·
verified ·
1 Parent(s): 8d037cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -6
app.py CHANGED
@@ -43,6 +43,20 @@ def tail_file(path: str, n=200):
43
  return "".join(lines[-n:])
44
 
45
  # ---------- RUN DIR HELPERS ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  def new_run_dir():
47
  """Return a unique run dir path WITHOUT creating it (so LeRobot can create it)."""
48
  base = pathlib.Path(RUN_ROOT) / f"pusht_{int(time.time())}"
@@ -55,12 +69,43 @@ def new_run_dir():
55
  return str(d)
56
 
57
  def current_run_dir(user_override: str | None):
58
- """Prefer user text if given, else use the LAST pointer if present."""
 
 
 
 
 
 
 
 
59
  if user_override and user_override.strip():
60
- return user_override.strip()
 
 
 
 
 
 
61
  if LAST_PTR.exists():
62
- return LAST_PTR.read_text().strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  return ""
 
64
 
65
  def has_checkpoint(run_dir: str):
66
  """We consider a checkpoint present once checkpoints/last/ exists (first save is at step 500)."""
@@ -105,12 +150,12 @@ def start_training(steps, batch_size, push_to_hub, repo_id):
105
  def resume_training(extra_steps, push_to_hub, repo_id, run_dir_text):
106
  run_dir = current_run_dir(run_dir_text)
107
  if not run_dir:
108
- return "No run found yet. Start a fresh training first.", "", "(no log)"
109
  log = train_log_path(run_dir)
110
 
111
  if not has_checkpoint(run_dir):
112
- return f"No checkpoint in {run_dir}/checkpoints/last/ yet — run at least 500 steps once.", run_dir, tail_file(log)
113
-
114
  push_flags = (f"--policy.push_to_hub=true --policy.repo_id='{repo_id.strip()}'"
115
  if push_to_hub and repo_id.strip() else
116
  "--policy.push_to_hub=false")
 
43
  return "".join(lines[-n:])
44
 
45
  # ---------- RUN DIR HELPERS ----------
46
+ def newest_run(prefer_checkpoint: bool = True) -> str:
47
+ """Return the newest pusht_* folder. If prefer_checkpoint=True, pick the newest that has checkpoints/last/."""
48
+ root = pathlib.Path(RUN_ROOT)
49
+ if not root.exists():
50
+ return ""
51
+ runs = sorted(root.glob("pusht_*"), key=lambda r: r.stat().st_mtime, reverse=True)
52
+ if not runs:
53
+ return ""
54
+ if prefer_checkpoint:
55
+ for r in runs:
56
+ if has_checkpoint(str(r)):
57
+ return str(r)
58
+ return str(runs[0])
59
+
60
  def new_run_dir():
61
  """Return a unique run dir path WITHOUT creating it (so LeRobot can create it)."""
62
  base = pathlib.Path(RUN_ROOT) / f"pusht_{int(time.time())}"
 
69
  return str(d)
70
 
71
  def current_run_dir(user_override: str | None):
72
+ """
73
+ Resolve which run to use:
74
+ - If user typed something, accept folder name or full path.
75
+ - Else try LAST pointer.
76
+ - Else pick newest run with a checkpoint.
77
+ - Else pick newest run (even without checkpoint).
78
+ - Else return "" (none).
79
+ """
80
+ # A) explicit user input
81
  if user_override and user_override.strip():
82
+ p = user_override.strip()
83
+ # allow just "pusht_123..." as well as absolute path
84
+ if not p.startswith("/"):
85
+ p = str(pathlib.Path(RUN_ROOT) / p)
86
+ return p
87
+
88
+ # B) LAST pointer if present and valid
89
  if LAST_PTR.exists():
90
+ p = LAST_PTR.read_text().strip()
91
+ if p and os.path.isdir(p):
92
+ return p
93
+
94
+ # C) newest run WITH checkpoint
95
+ p = newest_run(prefer_checkpoint=True)
96
+ if p:
97
+ LAST_PTR.write_text(p)
98
+ return p
99
+
100
+ # D) newest run (no checkpoint yet)
101
+ p = newest_run(prefer_checkpoint=False)
102
+ if p:
103
+ LAST_PTR.write_text(p)
104
+ return p
105
+
106
+ # E) nothing found
107
  return ""
108
+
109
 
110
  def has_checkpoint(run_dir: str):
111
  """We consider a checkpoint present once checkpoints/last/ exists (first save is at step 500)."""
 
150
  def resume_training(extra_steps, push_to_hub, repo_id, run_dir_text):
151
  run_dir = current_run_dir(run_dir_text)
152
  if not run_dir:
153
+ return "No run found on disk. Start a fresh training first (let it pass step 500 to create a checkpoint).", "", "(no log)"
154
  log = train_log_path(run_dir)
155
 
156
  if not has_checkpoint(run_dir):
157
+ return f"Selected run: {run_dir}\nNo checkpoint in {run_dir}/checkpoints/last/ yet — run at least 500 steps once.", run_dir, tail_file(log)
158
+
159
  push_flags = (f"--policy.push_to_hub=true --policy.repo_id='{repo_id.strip()}'"
160
  if push_to_hub and repo_id.strip() else
161
  "--policy.push_to_hub=false")