cloning error fix
Browse files- src/app.py +57 -91
src/app.py
CHANGED
|
@@ -34,43 +34,51 @@ def _init_submodule() -> None:
|
|
| 34 |
raise RuntimeError("GH_TOKEN secret is not set.")
|
| 35 |
|
| 36 |
import shutil
|
|
|
|
| 37 |
|
| 38 |
-
#
|
| 39 |
if _LSP_PATH.exists():
|
| 40 |
shutil.rmtree(str(_LSP_PATH), ignore_errors=True)
|
| 41 |
-
# Also nuke any leftover .git/modules/lsp entry
|
| 42 |
git_modules = _BASE / ".git" / "modules" / "lsp"
|
| 43 |
if git_modules.exists():
|
| 44 |
shutil.rmtree(str(git_modules), ignore_errors=True)
|
| 45 |
|
|
|
|
|
|
|
| 46 |
clone_url = f"https://ehejin:{token}@github.com/batu-el/lsp.git"
|
|
|
|
| 47 |
|
| 48 |
-
for attempt in range(1,
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
)
|
| 63 |
print(f"[SUBMODULE] returncode: {result.returncode}")
|
| 64 |
if result.stderr:
|
| 65 |
-
# Scrub token from log
|
| 66 |
print(f"[SUBMODULE] stderr: {result.stderr.replace(token, '***')}")
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
break
|
| 70 |
-
|
|
|
|
|
|
|
| 71 |
else:
|
| 72 |
raise RuntimeError(
|
| 73 |
-
f"Failed to clone lsp after
|
| 74 |
f"Last stderr: {result.stderr.replace(token, '***')}"
|
| 75 |
)
|
| 76 |
|
|
@@ -83,6 +91,17 @@ def _init_submodule() -> None:
|
|
| 83 |
|
| 84 |
_init_submodule()
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
# ---------------------------------------------------------------------------
|
| 87 |
# 2. App imports (only after submodule is initialised)
|
| 88 |
# ---------------------------------------------------------------------------
|
|
@@ -108,13 +127,9 @@ from src.ui.screens_preference import screen_pair_intro
|
|
| 108 |
# 3. Admin dashboard β visit ?admin=1
|
| 109 |
# ---------------------------------------------------------------------------
|
| 110 |
def _screen_admin(cfg: dict) -> None:
|
| 111 |
-
"""
|
| 112 |
-
Coverage dashboard β visit ?admin=1 to see this.
|
| 113 |
-
Always scans the HF repo directly β ignores local completions cache
|
| 114 |
-
so the count reflects real accepted submissions only.
|
| 115 |
-
"""
|
| 116 |
from src.data import (
|
| 117 |
-
_load_pool, _pool_path,
|
| 118 |
_load_reservations, _expire_reservations,
|
| 119 |
)
|
| 120 |
|
|
@@ -126,74 +141,29 @@ def _screen_admin(cfg: dict) -> None:
|
|
| 126 |
)
|
| 127 |
|
| 128 |
if st.button("π Refresh", type="primary"):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
st.rerun()
|
| 130 |
|
| 131 |
-
hf_token = cfg.get("hf_token", "")
|
| 132 |
-
output_repo = cfg.get("output_dataset_repo", "")
|
| 133 |
-
|
| 134 |
for cat_cfg in cfg["categories"]:
|
| 135 |
-
cat
|
| 136 |
-
pool
|
| 137 |
total = len(pool)
|
| 138 |
|
| 139 |
-
|
| 140 |
-
hf_counts = {str(i): 0 for i in range(total)}
|
| 141 |
-
n_json = 0
|
| 142 |
-
if hf_token and output_repo:
|
| 143 |
-
try:
|
| 144 |
-
from huggingface_hub import HfApi
|
| 145 |
-
api = HfApi(token=hf_token)
|
| 146 |
-
files = list(api.list_repo_files(repo_id=output_repo, repo_type="dataset"))
|
| 147 |
-
json_files = [f for f in files if f.startswith("json/") and f.endswith(".json")]
|
| 148 |
-
n_json = len(json_files)
|
| 149 |
-
# Build a pair_id β pool_index lookup for fallback matching
|
| 150 |
-
id_to_index = {}
|
| 151 |
-
for i, p in enumerate(pool):
|
| 152 |
-
pid = p.get("pair_id") or p.get("item_id", "")
|
| 153 |
-
if pid:
|
| 154 |
-
id_to_index[pid] = i
|
| 155 |
-
|
| 156 |
-
for filepath in json_files:
|
| 157 |
-
try:
|
| 158 |
-
content = api.hf_hub_download(
|
| 159 |
-
repo_id=output_repo,
|
| 160 |
-
filename=filepath,
|
| 161 |
-
repo_type="dataset",
|
| 162 |
-
token=hf_token,
|
| 163 |
-
)
|
| 164 |
-
with open(content) as f:
|
| 165 |
-
submission = json.load(f)
|
| 166 |
-
for item in submission.get("items", []):
|
| 167 |
-
if item.get("category") != cat:
|
| 168 |
-
continue
|
| 169 |
-
# Use _pool_index if present (new submissions),
|
| 170 |
-
# fall back to pair_id/item_id matching (old submissions)
|
| 171 |
-
idx = item.get("_pool_index")
|
| 172 |
-
if idx is None:
|
| 173 |
-
pid = item.get("pair_id") or item.get("item_id", "")
|
| 174 |
-
idx = id_to_index.get(pid)
|
| 175 |
-
if idx is not None:
|
| 176 |
-
hf_counts[str(idx)] = hf_counts.get(str(idx), 0) + 1
|
| 177 |
-
except Exception as e:
|
| 178 |
-
st.warning(f"Could not parse {filepath}: {e}")
|
| 179 |
-
except Exception as e:
|
| 180 |
-
st.error(f"Could not scan HF repo: {e}")
|
| 181 |
-
|
| 182 |
-
# ββ Reservations (active in-progress users) βββββββββββββββββββββββββββ
|
| 183 |
reservations = _load_reservations(cfg)
|
| 184 |
_expire_reservations(reservations)
|
|
|
|
|
|
|
| 185 |
reserved_uncovered = sum(
|
| 186 |
-
1 for k
|
| 187 |
-
if
|
| 188 |
)
|
| 189 |
-
|
| 190 |
-
covered = sum(1 for v in hf_counts.values() if v >= 1)
|
| 191 |
-
uncovered = total - covered
|
| 192 |
-
truly_uncovered = uncovered - reserved_uncovered
|
| 193 |
|
| 194 |
st.markdown(f"### {cat.capitalize()}")
|
| 195 |
-
st.caption(f"{n_json} submission file(s) in HF repo")
|
| 196 |
-
|
| 197 |
col1, col2, col3, col4 = st.columns(4)
|
| 198 |
col1.metric("Total items", total)
|
| 199 |
col2.metric("Covered β
", covered)
|
|
@@ -206,15 +176,11 @@ def _screen_admin(cfg: dict) -> None:
|
|
| 206 |
if truly_uncovered == 0 and reserved_uncovered == 0:
|
| 207 |
st.success(f"β
All {total} items covered!")
|
| 208 |
elif truly_uncovered == 0:
|
| 209 |
-
st.info(
|
| 210 |
-
f"π {reserved_uncovered} item(s) in progress β "
|
| 211 |
-
f"waiting for active participants to finish."
|
| 212 |
-
)
|
| 213 |
else:
|
| 214 |
st.warning(
|
| 215 |
f"β οΈ {truly_uncovered} item(s) still need a participant. "
|
| 216 |
-
f"Send more Prolific slots
|
| 217 |
-
f"reservations to expire (up to 80 min)."
|
| 218 |
)
|
| 219 |
|
| 220 |
st.markdown("---")
|
|
|
|
| 34 |
raise RuntimeError("GH_TOKEN secret is not set.")
|
| 35 |
|
| 36 |
import shutil
|
| 37 |
+
import time as _time
|
| 38 |
|
| 39 |
+
# Clean any stale state
|
| 40 |
if _LSP_PATH.exists():
|
| 41 |
shutil.rmtree(str(_LSP_PATH), ignore_errors=True)
|
|
|
|
| 42 |
git_modules = _BASE / ".git" / "modules" / "lsp"
|
| 43 |
if git_modules.exists():
|
| 44 |
shutil.rmtree(str(git_modules), ignore_errors=True)
|
| 45 |
|
| 46 |
+
# Clone to /tmp first (always writable, avoids HF Space fs quirks),
|
| 47 |
+
# then copy to /app/lsp. If shallow clone fails, fall back to full.
|
| 48 |
clone_url = f"https://ehejin:{token}@github.com/batu-el/lsp.git"
|
| 49 |
+
tmp_lsp = Path("/tmp/lsp_clone")
|
| 50 |
|
| 51 |
+
for attempt in range(1, 6):
|
| 52 |
+
use_shallow = attempt <= 3
|
| 53 |
+
print(f"[SUBMODULE] clone attempt {attempt}/5 to /tmp "
|
| 54 |
+
f"({'shallow' if use_shallow else 'full'})...")
|
| 55 |
+
|
| 56 |
+
if tmp_lsp.exists():
|
| 57 |
+
shutil.rmtree(str(tmp_lsp), ignore_errors=True)
|
| 58 |
+
|
| 59 |
+
cmd = ["git", "clone", "--branch", "0412_train"]
|
| 60 |
+
if use_shallow:
|
| 61 |
+
cmd += ["--depth", "1"]
|
| 62 |
+
cmd += [clone_url, str(tmp_lsp)]
|
| 63 |
+
|
| 64 |
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
|
|
| 65 |
print(f"[SUBMODULE] returncode: {result.returncode}")
|
| 66 |
if result.stderr:
|
|
|
|
| 67 |
print(f"[SUBMODULE] stderr: {result.stderr.replace(token, '***')}")
|
| 68 |
+
|
| 69 |
+
if result.returncode == 0 and (tmp_lsp / "src" / "prompts").exists():
|
| 70 |
+
# Copy from /tmp to /app/lsp
|
| 71 |
+
print("[SUBMODULE] clone succeeded, copying to /app/lsp...")
|
| 72 |
+
shutil.copytree(str(tmp_lsp), str(_LSP_PATH))
|
| 73 |
+
shutil.rmtree(str(tmp_lsp), ignore_errors=True)
|
| 74 |
+
print("[SUBMODULE] ready.")
|
| 75 |
break
|
| 76 |
+
|
| 77 |
+
print(f"[SUBMODULE] attempt {attempt} failed, waiting 3s...")
|
| 78 |
+
_time.sleep(3)
|
| 79 |
else:
|
| 80 |
raise RuntimeError(
|
| 81 |
+
f"Failed to clone lsp after 5 attempts. "
|
| 82 |
f"Last stderr: {result.stderr.replace(token, '***')}"
|
| 83 |
)
|
| 84 |
|
|
|
|
| 91 |
|
| 92 |
_init_submodule()
|
| 93 |
|
| 94 |
+
# Wipe any stale local state on container startup.
|
| 95 |
+
# Completions stay durable in HF; we re-scan HF fresh after wipe.
|
| 96 |
+
_data_root = _BASE / "data"
|
| 97 |
+
for pattern in ("reservations.json", "local_completions_*.json", "completion_cache_*.json"):
|
| 98 |
+
for f in _data_root.glob(pattern):
|
| 99 |
+
try:
|
| 100 |
+
f.unlink()
|
| 101 |
+
print(f"[STARTUP] Wiped stale file: {f.name}")
|
| 102 |
+
except Exception as e:
|
| 103 |
+
print(f"[STARTUP] Could not wipe {f.name}: {e}")
|
| 104 |
+
|
| 105 |
# ---------------------------------------------------------------------------
|
| 106 |
# 2. App imports (only after submodule is initialised)
|
| 107 |
# ---------------------------------------------------------------------------
|
|
|
|
| 127 |
# 3. Admin dashboard β visit ?admin=1
|
| 128 |
# ---------------------------------------------------------------------------
|
| 129 |
def _screen_admin(cfg: dict) -> None:
|
| 130 |
+
"""Coverage dashboard β visit ?admin=1 to see this."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
from src.data import (
|
| 132 |
+
_get_accepted_counts, _load_pool, _pool_path,
|
| 133 |
_load_reservations, _expire_reservations,
|
| 134 |
)
|
| 135 |
|
|
|
|
| 141 |
)
|
| 142 |
|
| 143 |
if st.button("π Refresh", type="primary"):
|
| 144 |
+
# Invalidate HF completion caches so we re-scan
|
| 145 |
+
from src.data import _data_dir
|
| 146 |
+
for f in _data_dir(cfg).glob("completion_cache*"):
|
| 147 |
+
f.unlink()
|
| 148 |
st.rerun()
|
| 149 |
|
|
|
|
|
|
|
|
|
|
| 150 |
for cat_cfg in cfg["categories"]:
|
| 151 |
+
cat = cat_cfg["name"]
|
| 152 |
+
pool = _load_pool(str(_pool_path(cat, cfg)))
|
| 153 |
total = len(pool)
|
| 154 |
|
| 155 |
+
counts = _get_accepted_counts(cat, cfg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
reservations = _load_reservations(cfg)
|
| 157 |
_expire_reservations(reservations)
|
| 158 |
+
|
| 159 |
+
covered = sum(1 for v in counts.values() if v >= 1)
|
| 160 |
reserved_uncovered = sum(
|
| 161 |
+
1 for k in reservations
|
| 162 |
+
if counts.get(k, 0) == 0
|
| 163 |
)
|
| 164 |
+
truly_uncovered = total - covered - reserved_uncovered
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
st.markdown(f"### {cat.capitalize()}")
|
|
|
|
|
|
|
| 167 |
col1, col2, col3, col4 = st.columns(4)
|
| 168 |
col1.metric("Total items", total)
|
| 169 |
col2.metric("Covered β
", covered)
|
|
|
|
| 176 |
if truly_uncovered == 0 and reserved_uncovered == 0:
|
| 177 |
st.success(f"β
All {total} items covered!")
|
| 178 |
elif truly_uncovered == 0:
|
| 179 |
+
st.info(f"π {reserved_uncovered} item(s) in progress.")
|
|
|
|
|
|
|
|
|
|
| 180 |
else:
|
| 181 |
st.warning(
|
| 182 |
f"β οΈ {truly_uncovered} item(s) still need a participant. "
|
| 183 |
+
f"Send more Prolific slots."
|
|
|
|
| 184 |
)
|
| 185 |
|
| 186 |
st.markdown("---")
|