ehejin commited on
Commit
6ceb30f
Β·
1 Parent(s): 4c987b0

cloning error fix

Browse files
Files changed (1) hide show
  1. src/app.py +57 -91
src/app.py CHANGED
@@ -34,43 +34,51 @@ def _init_submodule() -> None:
34
  raise RuntimeError("GH_TOKEN secret is not set.")
35
 
36
  import shutil
 
37
 
38
- # Aggressively remove any partial/corrupt lsp directory
39
  if _LSP_PATH.exists():
40
  shutil.rmtree(str(_LSP_PATH), ignore_errors=True)
41
- # Also nuke any leftover .git/modules/lsp entry
42
  git_modules = _BASE / ".git" / "modules" / "lsp"
43
  if git_modules.exists():
44
  shutil.rmtree(str(git_modules), ignore_errors=True)
45
 
 
 
46
  clone_url = f"https://ehejin:{token}@github.com/batu-el/lsp.git"
 
47
 
48
- for attempt in range(1, 4):
49
- print(f"[SUBMODULE] clone attempt {attempt}/3 ...")
50
- # Remove any partial clone from previous attempt
51
- if _LSP_PATH.exists():
52
- shutil.rmtree(str(_LSP_PATH), ignore_errors=True)
53
- result = subprocess.run(
54
- [
55
- "git", "clone",
56
- "--branch", "0412_train",
57
- "--depth", "1",
58
- clone_url,
59
- str(_LSP_PATH),
60
- ],
61
- capture_output=True, text=True,
62
- )
63
  print(f"[SUBMODULE] returncode: {result.returncode}")
64
  if result.stderr:
65
- # Scrub token from log
66
  print(f"[SUBMODULE] stderr: {result.stderr.replace(token, '***')}")
67
- if result.returncode == 0 and (_LSP_PATH / "src" / "prompts").exists():
68
- print("[SUBMODULE] clone succeeded.")
 
 
 
 
 
69
  break
70
- print(f"[SUBMODULE] attempt {attempt} failed, retrying...")
 
 
71
  else:
72
  raise RuntimeError(
73
- f"Failed to clone lsp after 3 attempts. "
74
  f"Last stderr: {result.stderr.replace(token, '***')}"
75
  )
76
 
@@ -83,6 +91,17 @@ def _init_submodule() -> None:
83
 
84
  _init_submodule()
85
 
 
 
 
 
 
 
 
 
 
 
 
86
  # ---------------------------------------------------------------------------
87
  # 2. App imports (only after submodule is initialised)
88
  # ---------------------------------------------------------------------------
@@ -108,13 +127,9 @@ from src.ui.screens_preference import screen_pair_intro
108
  # 3. Admin dashboard β€” visit ?admin=1
109
  # ---------------------------------------------------------------------------
110
  def _screen_admin(cfg: dict) -> None:
111
- """
112
- Coverage dashboard β€” visit ?admin=1 to see this.
113
- Always scans the HF repo directly β€” ignores local completions cache
114
- so the count reflects real accepted submissions only.
115
- """
116
  from src.data import (
117
- _load_pool, _pool_path, _data_dir,
118
  _load_reservations, _expire_reservations,
119
  )
120
 
@@ -126,74 +141,29 @@ def _screen_admin(cfg: dict) -> None:
126
  )
127
 
128
  if st.button("πŸ”„ Refresh", type="primary"):
 
 
 
 
129
  st.rerun()
130
 
131
- hf_token = cfg.get("hf_token", "")
132
- output_repo = cfg.get("output_dataset_repo", "")
133
-
134
  for cat_cfg in cfg["categories"]:
135
- cat = cat_cfg["name"]
136
- pool = _load_pool(str(_pool_path(cat, cfg)))
137
  total = len(pool)
138
 
139
- # ── Scan HF directly (no cache) ──────────────────────────────────────
140
- hf_counts = {str(i): 0 for i in range(total)}
141
- n_json = 0
142
- if hf_token and output_repo:
143
- try:
144
- from huggingface_hub import HfApi
145
- api = HfApi(token=hf_token)
146
- files = list(api.list_repo_files(repo_id=output_repo, repo_type="dataset"))
147
- json_files = [f for f in files if f.startswith("json/") and f.endswith(".json")]
148
- n_json = len(json_files)
149
- # Build a pair_id β†’ pool_index lookup for fallback matching
150
- id_to_index = {}
151
- for i, p in enumerate(pool):
152
- pid = p.get("pair_id") or p.get("item_id", "")
153
- if pid:
154
- id_to_index[pid] = i
155
-
156
- for filepath in json_files:
157
- try:
158
- content = api.hf_hub_download(
159
- repo_id=output_repo,
160
- filename=filepath,
161
- repo_type="dataset",
162
- token=hf_token,
163
- )
164
- with open(content) as f:
165
- submission = json.load(f)
166
- for item in submission.get("items", []):
167
- if item.get("category") != cat:
168
- continue
169
- # Use _pool_index if present (new submissions),
170
- # fall back to pair_id/item_id matching (old submissions)
171
- idx = item.get("_pool_index")
172
- if idx is None:
173
- pid = item.get("pair_id") or item.get("item_id", "")
174
- idx = id_to_index.get(pid)
175
- if idx is not None:
176
- hf_counts[str(idx)] = hf_counts.get(str(idx), 0) + 1
177
- except Exception as e:
178
- st.warning(f"Could not parse {filepath}: {e}")
179
- except Exception as e:
180
- st.error(f"Could not scan HF repo: {e}")
181
-
182
- # ── Reservations (active in-progress users) ───────────────────────────
183
  reservations = _load_reservations(cfg)
184
  _expire_reservations(reservations)
 
 
185
  reserved_uncovered = sum(
186
- 1 for k, v in reservations.items()
187
- if hf_counts.get(k, 0) == 0
188
  )
189
-
190
- covered = sum(1 for v in hf_counts.values() if v >= 1)
191
- uncovered = total - covered
192
- truly_uncovered = uncovered - reserved_uncovered
193
 
194
  st.markdown(f"### {cat.capitalize()}")
195
- st.caption(f"{n_json} submission file(s) in HF repo")
196
-
197
  col1, col2, col3, col4 = st.columns(4)
198
  col1.metric("Total items", total)
199
  col2.metric("Covered βœ…", covered)
@@ -206,15 +176,11 @@ def _screen_admin(cfg: dict) -> None:
206
  if truly_uncovered == 0 and reserved_uncovered == 0:
207
  st.success(f"βœ… All {total} items covered!")
208
  elif truly_uncovered == 0:
209
- st.info(
210
- f"πŸ”„ {reserved_uncovered} item(s) in progress β€” "
211
- f"waiting for active participants to finish."
212
- )
213
  else:
214
  st.warning(
215
  f"⚠️ {truly_uncovered} item(s) still need a participant. "
216
- f"Send more Prolific slots or wait for in-progress "
217
- f"reservations to expire (up to 80 min)."
218
  )
219
 
220
  st.markdown("---")
 
34
  raise RuntimeError("GH_TOKEN secret is not set.")
35
 
36
  import shutil
37
+ import time as _time
38
 
39
+ # Clean any stale state
40
  if _LSP_PATH.exists():
41
  shutil.rmtree(str(_LSP_PATH), ignore_errors=True)
 
42
  git_modules = _BASE / ".git" / "modules" / "lsp"
43
  if git_modules.exists():
44
  shutil.rmtree(str(git_modules), ignore_errors=True)
45
 
46
+ # Clone to /tmp first (always writable, avoids HF Space fs quirks),
47
+ # then copy to /app/lsp. If shallow clone fails, fall back to full.
48
  clone_url = f"https://ehejin:{token}@github.com/batu-el/lsp.git"
49
+ tmp_lsp = Path("/tmp/lsp_clone")
50
 
51
+ for attempt in range(1, 6):
52
+ use_shallow = attempt <= 3
53
+ print(f"[SUBMODULE] clone attempt {attempt}/5 to /tmp "
54
+ f"({'shallow' if use_shallow else 'full'})...")
55
+
56
+ if tmp_lsp.exists():
57
+ shutil.rmtree(str(tmp_lsp), ignore_errors=True)
58
+
59
+ cmd = ["git", "clone", "--branch", "0412_train"]
60
+ if use_shallow:
61
+ cmd += ["--depth", "1"]
62
+ cmd += [clone_url, str(tmp_lsp)]
63
+
64
+ result = subprocess.run(cmd, capture_output=True, text=True)
 
65
  print(f"[SUBMODULE] returncode: {result.returncode}")
66
  if result.stderr:
 
67
  print(f"[SUBMODULE] stderr: {result.stderr.replace(token, '***')}")
68
+
69
+ if result.returncode == 0 and (tmp_lsp / "src" / "prompts").exists():
70
+ # Copy from /tmp to /app/lsp
71
+ print("[SUBMODULE] clone succeeded, copying to /app/lsp...")
72
+ shutil.copytree(str(tmp_lsp), str(_LSP_PATH))
73
+ shutil.rmtree(str(tmp_lsp), ignore_errors=True)
74
+ print("[SUBMODULE] ready.")
75
  break
76
+
77
+ print(f"[SUBMODULE] attempt {attempt} failed, waiting 3s...")
78
+ _time.sleep(3)
79
  else:
80
  raise RuntimeError(
81
+ f"Failed to clone lsp after 5 attempts. "
82
  f"Last stderr: {result.stderr.replace(token, '***')}"
83
  )
84
 
 
91
 
92
  _init_submodule()
93
 
94
+ # Wipe any stale local state on container startup.
95
+ # Completions stay durable in HF; we re-scan HF fresh after wipe.
96
+ _data_root = _BASE / "data"
97
+ for pattern in ("reservations.json", "local_completions_*.json", "completion_cache_*.json"):
98
+ for f in _data_root.glob(pattern):
99
+ try:
100
+ f.unlink()
101
+ print(f"[STARTUP] Wiped stale file: {f.name}")
102
+ except Exception as e:
103
+ print(f"[STARTUP] Could not wipe {f.name}: {e}")
104
+
105
  # ---------------------------------------------------------------------------
106
  # 2. App imports (only after submodule is initialised)
107
  # ---------------------------------------------------------------------------
 
127
  # 3. Admin dashboard β€” visit ?admin=1
128
  # ---------------------------------------------------------------------------
129
  def _screen_admin(cfg: dict) -> None:
130
+ """Coverage dashboard β€” visit ?admin=1 to see this."""
 
 
 
 
131
  from src.data import (
132
+ _get_accepted_counts, _load_pool, _pool_path,
133
  _load_reservations, _expire_reservations,
134
  )
135
 
 
141
  )
142
 
143
  if st.button("πŸ”„ Refresh", type="primary"):
144
+ # Invalidate HF completion caches so we re-scan
145
+ from src.data import _data_dir
146
+ for f in _data_dir(cfg).glob("completion_cache*"):
147
+ f.unlink()
148
  st.rerun()
149
 
 
 
 
150
  for cat_cfg in cfg["categories"]:
151
+ cat = cat_cfg["name"]
152
+ pool = _load_pool(str(_pool_path(cat, cfg)))
153
  total = len(pool)
154
 
155
+ counts = _get_accepted_counts(cat, cfg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  reservations = _load_reservations(cfg)
157
  _expire_reservations(reservations)
158
+
159
+ covered = sum(1 for v in counts.values() if v >= 1)
160
  reserved_uncovered = sum(
161
+ 1 for k in reservations
162
+ if counts.get(k, 0) == 0
163
  )
164
+ truly_uncovered = total - covered - reserved_uncovered
 
 
 
165
 
166
  st.markdown(f"### {cat.capitalize()}")
 
 
167
  col1, col2, col3, col4 = st.columns(4)
168
  col1.metric("Total items", total)
169
  col2.metric("Covered βœ…", covered)
 
176
  if truly_uncovered == 0 and reserved_uncovered == 0:
177
  st.success(f"βœ… All {total} items covered!")
178
  elif truly_uncovered == 0:
179
+ st.info(f"πŸ”„ {reserved_uncovered} item(s) in progress.")
 
 
 
180
  else:
181
  st.warning(
182
  f"⚠️ {truly_uncovered} item(s) still need a participant. "
183
+ f"Send more Prolific slots."
 
184
  )
185
 
186
  st.markdown("---")