Spaces:
Sleeping
Sleeping
add manual recovery from failed CR
Browse files- app.py +84 -0
- scripts/orchestrate_cr.py +62 -5
app.py
CHANGED
|
@@ -851,6 +851,90 @@ elif status in ("done", "error"):
|
|
| 851 |
else:
|
| 852 |
st.warning("No TSs available yet β retry download or upload DOCX files above.")
|
| 853 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 854 |
# Navigation
|
| 855 |
st.divider()
|
| 856 |
col_restart, col_new = st.columns(2)
|
|
|
|
| 851 |
else:
|
| 852 |
st.warning("No TSs available yet β retry download or upload DOCX files above.")
|
| 853 |
|
| 854 |
+
# ββ CR Recovery βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 855 |
+
failed_cr_path = output_dir / "failed_cr.json"
|
| 856 |
+
if failed_cr_path.exists():
|
| 857 |
+
failed_cr_entries = json.loads(failed_cr_path.read_text())
|
| 858 |
+
if failed_cr_entries:
|
| 859 |
+
st.divider()
|
| 860 |
+
st.subheader("β οΈ Recover failed CR downloads")
|
| 861 |
+
st.info(
|
| 862 |
+
f"{len(failed_cr_entries)} CR(s) could not be downloaded. "
|
| 863 |
+
"Retry or upload each one manually, then apply."
|
| 864 |
+
)
|
| 865 |
+
|
| 866 |
+
for entry in failed_cr_entries:
|
| 867 |
+
uid = entry["uid"]
|
| 868 |
+
cr_dir_path = Path(entry["cr_dir"])
|
| 869 |
+
expected = cr_dir_path / entry["expected_filename"]
|
| 870 |
+
ready = expected.exists() or (cr_dir_path / f"{uid}_extracted.docx").exists()
|
| 871 |
+
ts_label = (
|
| 872 |
+
f"TS {entry['ts_spec_number']} v{entry['ts_version']}"
|
| 873 |
+
if entry.get("ts_spec_number") else "TS unknown"
|
| 874 |
+
)
|
| 875 |
+
label = f"{'β
' if ready else 'β'} CR {uid} β {ts_label}"
|
| 876 |
+
with st.expander(label, expanded=not ready):
|
| 877 |
+
col1, col2 = st.columns(2)
|
| 878 |
+
|
| 879 |
+
with col1:
|
| 880 |
+
if st.button("π Retry download", key=f"retry_cr_{uid}"):
|
| 881 |
+
from fetch_crs import download_cr as _dl_cr
|
| 882 |
+
with st.spinner(f"Downloading CR {uid}β¦"):
|
| 883 |
+
fn, note = _dl_cr(
|
| 884 |
+
uid, cr_dir_path,
|
| 885 |
+
st.session_state.eol_user,
|
| 886 |
+
st.session_state.eol_password,
|
| 887 |
+
)
|
| 888 |
+
if fn:
|
| 889 |
+
st.success(f"Downloaded: {fn.name}")
|
| 890 |
+
st.rerun()
|
| 891 |
+
else:
|
| 892 |
+
st.error(f"Failed: {note}")
|
| 893 |
+
|
| 894 |
+
with col2:
|
| 895 |
+
uploaded_cr = st.file_uploader(
|
| 896 |
+
f"Or upload `{entry['expected_filename']}`",
|
| 897 |
+
type=["docx"],
|
| 898 |
+
key=f"upload_cr_{uid}",
|
| 899 |
+
)
|
| 900 |
+
if uploaded_cr is not None:
|
| 901 |
+
cr_dir_path.mkdir(parents=True, exist_ok=True)
|
| 902 |
+
expected.write_bytes(uploaded_cr.read())
|
| 903 |
+
st.success("Saved β")
|
| 904 |
+
st.rerun()
|
| 905 |
+
|
| 906 |
+
ready_cr_entries = [
|
| 907 |
+
e for e in failed_cr_entries
|
| 908 |
+
if (Path(e["cr_dir"]) / e["expected_filename"]).exists()
|
| 909 |
+
or (Path(e["cr_dir"]) / f"{e['uid']}_extracted.docx").exists()
|
| 910 |
+
]
|
| 911 |
+
remaining_cr = len(failed_cr_entries) - len(ready_cr_entries)
|
| 912 |
+
|
| 913 |
+
if ready_cr_entries:
|
| 914 |
+
if remaining_cr:
|
| 915 |
+
st.warning(f"{len(ready_cr_entries)} ready, {remaining_cr} still missing.")
|
| 916 |
+
else:
|
| 917 |
+
st.success(f"All {len(ready_cr_entries)} CR(s) ready.")
|
| 918 |
+
|
| 919 |
+
if st.button("βΆ Apply recovered CRs", type="primary", key="apply_recovered_crs"):
|
| 920 |
+
retry_log = str(session_dir(sid) / f"pipeline_{int(time.time())}_retry.log")
|
| 921 |
+
cmd = [
|
| 922 |
+
sys.executable,
|
| 923 |
+
str(SCRIPTS_DIR / "orchestrate_cr.py"),
|
| 924 |
+
"--output-dir", state["output_dir"],
|
| 925 |
+
"--retry-mode",
|
| 926 |
+
]
|
| 927 |
+
env = os.environ.copy()
|
| 928 |
+
env["EOL_USER"] = st.session_state.eol_user
|
| 929 |
+
env["EOL_PASSWORD"] = st.session_state.eol_password
|
| 930 |
+
_launch_proc(cmd, env, retry_log, sid, state, {
|
| 931 |
+
"status": "running",
|
| 932 |
+
"log_path": retry_log,
|
| 933 |
+
"run_log_paths": state.get("run_log_paths", []) + [retry_log],
|
| 934 |
+
})
|
| 935 |
+
else:
|
| 936 |
+
st.warning("No CRs recovered yet β retry download or upload DOCX files above.")
|
| 937 |
+
|
| 938 |
# Navigation
|
| 939 |
st.divider()
|
| 940 |
col_restart, col_new = st.columns(2)
|
scripts/orchestrate_cr.py
CHANGED
|
@@ -345,6 +345,29 @@ def _run_steps_2_to_6(cr_list, ts_groups, output_dir, cr_dir, ts_dir,
|
|
| 345 |
json.dumps(failed_ts_entries, indent=2)
|
| 346 |
)
|
| 347 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
# ββ Steps 5 & 6: Apply CRs + Finalise each TS ββββββββββββββββββββββββββββ
|
| 349 |
_section('Steps 5 & 6 β Applying CRs and Finalising Metadata')
|
| 350 |
report = [] # (ts_key, n_ok, n_skip, n_crs, out_path, log_path, errors)
|
|
@@ -445,11 +468,11 @@ def main():
|
|
| 445 |
# ββ Retry mode β skip steps 1-4, reconstruct state from failed_ts.json βββ
|
| 446 |
if args.retry_mode:
|
| 447 |
failed_ts_path = output_dir / 'failed_ts.json'
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
if not failed_ts_entries:
|
| 452 |
-
print('No failed TSs
|
| 453 |
return
|
| 454 |
|
| 455 |
_section('Retry mode β Steps 5 & 6 only')
|
|
@@ -482,6 +505,37 @@ def main():
|
|
| 482 |
elif plain.exists():
|
| 483 |
cr_paths[uid] = plain
|
| 484 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
# ββ Steps 5 & 6 (retry mode) βββββββββββββββββββββββββββββββββββββββββ
|
| 486 |
report = []
|
| 487 |
|
|
@@ -505,7 +559,10 @@ def main():
|
|
| 505 |
n_success = sum(1 for r in report if r[4] is not None and not r[6])
|
| 506 |
n_partial = sum(1 for r in report if r[4] is not None and r[6])
|
| 507 |
n_failed = sum(1 for r in report if r[4] is None)
|
|
|
|
| 508 |
print(f'TSs processed: {n_success} fully OK, {n_partial} with warnings, {n_failed} skipped/failed')
|
|
|
|
|
|
|
| 509 |
_print_report(report, detailed=False)
|
| 510 |
return
|
| 511 |
|
|
|
|
| 345 |
json.dumps(failed_ts_entries, indent=2)
|
| 346 |
)
|
| 347 |
|
| 348 |
+
# Write failed_cr.json
|
| 349 |
+
failed_cr_entries = []
|
| 350 |
+
for uid, _ in cr_list:
|
| 351 |
+
if uid not in cr_paths:
|
| 352 |
+
sn, v = next(
|
| 353 |
+
((sn, v) for (sn, v), uids in ts_groups.items() if uid in uids),
|
| 354 |
+
(None, None),
|
| 355 |
+
)
|
| 356 |
+
sc = sn.replace(' ', '') if sn else None
|
| 357 |
+
sd = spec_dirs.get((sn, v)) if sn else None
|
| 358 |
+
failed_cr_entries.append({
|
| 359 |
+
"uid": uid,
|
| 360 |
+
"cr_dir": str(cr_dir),
|
| 361 |
+
"expected_filename": f"{uid}.docx",
|
| 362 |
+
"ts_spec_number": sn,
|
| 363 |
+
"ts_version": v,
|
| 364 |
+
"ts_spec_dir": str(sd) if sd else None,
|
| 365 |
+
"ts_expected_filename": f"ts_{sc}_v{v}.docx" if sc and v else None,
|
| 366 |
+
})
|
| 367 |
+
(output_dir / "failed_cr.json").write_text(json.dumps(failed_cr_entries, indent=2))
|
| 368 |
+
if failed_cr_entries:
|
| 369 |
+
print(f' {len(failed_cr_entries)} failed CR download(s) written to failed_cr.json')
|
| 370 |
+
|
| 371 |
# ββ Steps 5 & 6: Apply CRs + Finalise each TS ββββββββββββββββββββββββββββ
|
| 372 |
_section('Steps 5 & 6 β Applying CRs and Finalising Metadata')
|
| 373 |
report = [] # (ts_key, n_ok, n_skip, n_crs, out_path, log_path, errors)
|
|
|
|
| 468 |
# ββ Retry mode β skip steps 1-4, reconstruct state from failed_ts.json βββ
|
| 469 |
if args.retry_mode:
|
| 470 |
failed_ts_path = output_dir / 'failed_ts.json'
|
| 471 |
+
failed_cr_path = output_dir / 'failed_cr.json'
|
| 472 |
+
failed_ts_entries = json.loads(failed_ts_path.read_text()) if failed_ts_path.exists() else []
|
| 473 |
+
failed_cr_entries = json.loads(failed_cr_path.read_text()) if failed_cr_path.exists() else []
|
| 474 |
+
if not failed_ts_entries and not failed_cr_entries:
|
| 475 |
+
print('No failed TSs or CRs β nothing to retry.')
|
| 476 |
return
|
| 477 |
|
| 478 |
_section('Retry mode β Steps 5 & 6 only')
|
|
|
|
| 505 |
elif plain.exists():
|
| 506 |
cr_paths[uid] = plain
|
| 507 |
|
| 508 |
+
# ββ Recover CRs from failed_cr.json ββββββββββββββββββββββββββββββββββ
|
| 509 |
+
still_failed_cr = []
|
| 510 |
+
for entry in failed_cr_entries:
|
| 511 |
+
uid = entry['uid']
|
| 512 |
+
cr_entry_dir = Path(entry['cr_dir'])
|
| 513 |
+
extracted = cr_entry_dir / f'{uid}_extracted.docx'
|
| 514 |
+
plain = cr_entry_dir / entry['expected_filename']
|
| 515 |
+
if extracted.exists():
|
| 516 |
+
cr_paths[uid] = extracted
|
| 517 |
+
elif plain.exists():
|
| 518 |
+
cr_paths[uid] = plain
|
| 519 |
+
else:
|
| 520 |
+
still_failed_cr.append(entry)
|
| 521 |
+
continue
|
| 522 |
+
sn, v = entry['ts_spec_number'], entry['ts_version']
|
| 523 |
+
if sn and v:
|
| 524 |
+
key = (sn, v)
|
| 525 |
+
ts_groups.setdefault(key, [])
|
| 526 |
+
if uid not in ts_groups[key]:
|
| 527 |
+
ts_groups[key].append(uid)
|
| 528 |
+
if key not in spec_dirs and entry['ts_spec_dir']:
|
| 529 |
+
spec_dirs[key] = Path(entry['ts_spec_dir'])
|
| 530 |
+
if key not in ts_paths and entry['ts_spec_dir'] and entry['ts_expected_filename']:
|
| 531 |
+
ts_file = Path(entry['ts_spec_dir']) / entry['ts_expected_filename']
|
| 532 |
+
if ts_file.exists():
|
| 533 |
+
ts_paths[key] = ts_file
|
| 534 |
+
print(f' [{uid}] CR recovered β will apply to TS {sn} v{v}')
|
| 535 |
+
else:
|
| 536 |
+
print(f' [{uid}] CR recovered but TS group unknown β skipping apply')
|
| 537 |
+
failed_cr_path.write_text(json.dumps(still_failed_cr, indent=2))
|
| 538 |
+
|
| 539 |
# ββ Steps 5 & 6 (retry mode) βββββββββββββββββββββββββββββββββββββββββ
|
| 540 |
report = []
|
| 541 |
|
|
|
|
| 559 |
n_success = sum(1 for r in report if r[4] is not None and not r[6])
|
| 560 |
n_partial = sum(1 for r in report if r[4] is not None and r[6])
|
| 561 |
n_failed = sum(1 for r in report if r[4] is None)
|
| 562 |
+
n_cr_still = len(still_failed_cr)
|
| 563 |
print(f'TSs processed: {n_success} fully OK, {n_partial} with warnings, {n_failed} skipped/failed')
|
| 564 |
+
if n_cr_still:
|
| 565 |
+
print(f'CRs still missing: {n_cr_still} (see failed_cr.json)')
|
| 566 |
_print_report(report, detailed=False)
|
| 567 |
return
|
| 568 |
|