File size: 28,522 Bytes
7eedaf8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a610111
 
 
 
 
7eedaf8
 
 
 
 
 
f8638ca
 
7eedaf8
a610111
7eedaf8
f8638ca
7eedaf8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8638ca
7eedaf8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5fc740
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b96123
b5fc740
 
 
 
 
 
 
 
 
 
 
 
 
2b96123
 
b5fc740
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a610111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b35d531
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a610111
 
 
 
 
 
 
 
b5fc740
 
 
a610111
 
 
 
7eedaf8
 
 
 
 
 
 
f8638ca
 
 
 
a610111
f8638ca
7eedaf8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8638ca
 
 
 
 
a610111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7eedaf8
 
a610111
 
 
 
f8638ca
 
 
 
 
 
7eedaf8
a610111
 
 
7eedaf8
 
 
 
 
 
f8638ca
 
 
b35d531
 
 
 
 
f8638ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b35d531
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a610111
f8638ca
 
 
 
 
 
b5fc740
 
 
 
f8638ca
 
 
 
 
 
 
 
 
 
 
 
b35d531
f8638ca
b35d531
 
b5fc740
f8638ca
 
a610111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5fc740
a610111
 
 
 
 
 
 
 
 
 
 
b5fc740
a610111
 
 
 
 
 
f8638ca
 
7eedaf8
 
 
 
 
 
 
 
 
 
f8638ca
7eedaf8
 
 
 
 
a610111
 
 
 
f8638ca
 
a610111
 
 
b5fc740
7eedaf8
 
 
 
 
 
 
 
 
 
 
 
 
b5fc740
7eedaf8
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
#!/usr/bin/env python3
"""
orchestrate_cr.py β€” Fully automated CR application pipeline.

Reads an Excel contribution list, downloads all Accepted CRs and their target
TSs, parses tracked changes from each CR, applies them to the TS, and
finalises the document metadata β€” all without any per-CR manual scripting.

Usage:
    python3 orchestrate_cr.py <excel_path> [person_name] [--output-dir DIR] [--author NAME]

Arguments:
    excel_path   Path to .xls or .xlsx contribution list (Windows paths OK)
    person_name  Name to match in SubmittedBy column (default: "Ly Thanh PHAN")

Options:
    --output-dir  Base output folder (default: ~/CR_Processing)
    --author      Tracked-change author name (default: "CR Application")
    --retry-mode  Skip steps 1-4; apply CRs listed in failed_ts.json
    --ts-mode     Apply all CRs for a given spec number across all versions
    --ts-id       Spec number to process in ts-mode (e.g. "102 267")
    --excel-hash  Excel hash used to filter the HF index in ts-mode
    --hf-repo     HuggingFace dataset repo containing the CR index
"""

import argparse
import contextlib
import datetime
import io
import json
import os
import re
import shutil
import sys
import time
from pathlib import Path

import docx as docx_lib

# ── sys.path setup ────────────────────────────────────────────────────────────
SCRIPT_DIR = Path(__file__).parent
FETCH_SCRIPTS = SCRIPT_DIR.parent.parent / 'fetch-crs' / 'scripts'
sys.path.insert(0, str(SCRIPT_DIR))
sys.path.insert(0, str(FETCH_SCRIPTS))

from fetch_crs import parse_excel, download_cr, parse_cr_cover, download_ts, wsl_path
from cr_parser import parse_cr
from ts_applicator import apply_manifest
from finalize_ts import (
    extract_cr_metadata,
    compute_pub_date,
    derive_new_version,
    update_change_history_table,
    update_history_table,
    update_title_para,
    NoChangeHistoryTable,
)
from docx_helpers import RevCounter, AUTHOR as DEFAULT_AUTHOR, DATE as DEFAULT_DATE


# ── Display / logging helpers ─────────────────────────────────────────────────

def _section(title):
    bar = '=' * 60
    print(f'\n{bar}')
    print(f'  {title}')
    print(bar)


class _TeeWriter:
    """Writes to both real stdout and a StringIO buffer simultaneously."""
    def __init__(self, real, buf):
        self._real = real
        self._buf = buf

    def write(self, s):
        self._real.write(s)
        self._buf.write(s)

    def flush(self):
        self._real.flush()


# ── Small report / cache helpers ─────────────────────────────────────────────

def _print_report(report, *, detailed=True):
    """Print per-TS result lines from a report list."""
    for ts_key, n_ok, n_skip, n_crs, out_path, log_path, errors in report:
        status = 'OK' if out_path and not errors else ('WARN' if out_path else 'FAIL')
        print(f'  [{status}] {ts_key}')
        if detailed:
            print(f'         CRs: {n_crs}  |  Body changes applied: {n_ok}  |  Skipped: {n_skip}')
            if out_path:
                print(f'         Output: {out_path.parent.name}/{out_path.name}')
            if log_path and log_path.exists():
                print(f'         Log:    {log_path.parent.name}/{log_path.name}')
        for err in errors:
            print(f'         ! {err}')


def _copy_cr_cache_if_needed(cr_paths, cr_dir, output_dir):
    """Copy downloaded CRs into output_dir/CRs when a shared cache is used."""
    run_cr_dir = output_dir / 'CRs'
    if cr_dir.resolve() != run_cr_dir.resolve():
        run_cr_dir.mkdir(parents=True, exist_ok=True)
        for p in cr_paths.values():
            if p.exists():
                shutil.copy2(p, run_cr_dir / p.name)


# ── Per-TS-group apply helper ─────────────────────────────────────────────────

def _apply_ts_group(spec_number, version, uids, ts_paths, cr_paths, spec_dir,
                    author, tc_date, log_label='Pipeline Log'):
    """Parse, apply, and finalise one TS group. Returns one report tuple."""
    ts_key = f'TS {spec_number} v{version}'
    spec_compact = spec_number.replace(' ', '')
    new_v = derive_new_version(version)
    stem = f'ts_{spec_compact}_v{new_v}_was_v{version}'
    ts_applied = spec_dir / f'ts_{spec_compact}_v{version}_applied.docx'
    ts_final   = spec_dir / f'{stem}.docx'
    log_path   = spec_dir / f'{stem}.log'
    errors = []

    print(f'\n-- {ts_key} ({len(uids)} CR(s): {", ".join(uids)}) --')

    if (spec_number, version) not in ts_paths:
        msg = 'TS download failed β€” skipping'
        print(f'  SKIP: {msg}')
        return (ts_key, 0, 0, len(uids), None, log_path, [msg])

    ts_in = ts_paths[(spec_number, version)]

    log_buf = io.StringIO()
    tee = _TeeWriter(sys.stdout, log_buf)

    with contextlib.redirect_stdout(tee):
        log_header = (
            f'{log_label}\n'
            f'TS:   {spec_number}  v{version} -> v{new_v}\n'
            f'CRs:  {", ".join(uids)}\n'
            f'Date: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n'
            f'{"=" * 60}\n'
        )
        print(log_header, end='')

        combined_manifest = []
        participating_uids = []

        for uid in uids:
            if uid not in cr_paths:
                errors.append(f'[{uid}] CR download had failed β€” skipped')
                continue
            print(f'  Parsing {uid}... ', end='', flush=True)
            try:
                changes = parse_cr(cr_paths[uid])
                combined_manifest.extend(changes)
                participating_uids.append(uid)
                print(f'{len(changes)} change(s)')
            except Exception as e:
                errors.append(f'[{uid}] parse ERROR: {e}')
                print(f'ERROR: {e}')

        if not combined_manifest:
            print('  No changes parsed β€” skipping apply step.')
            log_path.write_text(log_buf.getvalue(), encoding='utf-8')
            return (ts_key, 0, 0, len(uids), None, log_path,
                    errors + ['No changes parsed'])

        print(f'  Applying {len(combined_manifest)} change(s) to {ts_in.name}...')
        try:
            n_ok, n_skip, log_lines, n_parsed, n_merged = apply_manifest(
                ts_in, combined_manifest, ts_applied, author=author, date=tc_date
            )
        except Exception as e:
            errors.append(f'apply_manifest ERROR: {e}')
            print(f'  ERROR: {e}')
            log_path.write_text(log_buf.getvalue(), encoding='utf-8')
            return (ts_key, 0, 0, len(uids), None, log_path, errors)

        for line in log_lines:
            print(f'  {line}')
        for line in log_lines:
            if line.strip().startswith('ERROR'):
                errors.append(line.strip())
        print(f'  Parsed: {n_parsed} body changes (merged to {n_merged} groups)'
              f' β†’ Applied: {n_ok}  Skipped: {n_skip}')

        print('  Finalising metadata...')
        ts_final_or_applied = ts_applied  # fallback if finalise raises
        try:
            ts_doc = docx_lib.Document(str(ts_applied))
            rev = RevCounter(ts_doc)

            pub_ym, pub_month_year = compute_pub_date()
            old_v = version

            title_text = ts_doc.paragraphs[0].text
            date_match = re.search(r'\((\d{4}-\d{2})\)', title_text)
            old_date_str = date_match.group(1) if date_match else ''

            print(f'    Version:     {old_v} -> {new_v}')
            print(f'    Publication: {pub_month_year} ({pub_ym})')

            for uid in participating_uids:
                try:
                    meta = extract_cr_metadata(str(cr_paths[uid]))
                    ch_cells = update_change_history_table(
                        ts_doc, meta, pub_ym, old_v, new_v, rev, author, tc_date
                    )
                    print(f'    [Change History] {uid}: {ch_cells}')
                except NoChangeHistoryTable:
                    print(f'    [Change History] {uid}: NOT PRESENT β€” this document has no Change History table (History table only)')
                except Exception as e:
                    errors.append(f'[{uid}] Change History ERROR: {e}')
                    print(f'    [Change History] {uid}: ERROR β€” {e}')

            try:
                h_cells = update_history_table(
                    ts_doc, new_v, pub_month_year, rev, author, tc_date
                )
                print(f'    [History] {h_cells}')
            except Exception as e:
                errors.append(f'History table ERROR: {e}')
                print(f'    [History] ERROR β€” {e}')

            if old_date_str:
                try:
                    update_title_para(
                        ts_doc, old_v, new_v, old_date_str, pub_ym, rev, author, tc_date
                    )
                    print(f'    [Title] V{old_v} -> V{new_v}, ({old_date_str}) -> ({pub_ym})')
                except Exception as e:
                    errors.append(f'Title update ERROR: {e}')
                    print(f'    [Title] ERROR β€” {e}')
            else:
                print(f'    [Title] SKIP β€” no (YYYY-MM) pattern in: {title_text!r}')

            ts_doc.save(str(ts_final))
            print(f'  Saved: {spec_compact}/{ts_final.name}')
            print(f'  Log:   {spec_compact}/{log_path.name}')
            ts_final_or_applied = ts_final

        except Exception as e:
            errors.append(f'Finalisation ERROR: {e}')
            print(f'  Finalisation ERROR: {e}')

    log_path.write_text(log_buf.getvalue(), encoding='utf-8')
    return (ts_key, n_ok, n_skip, len(uids), ts_final_or_applied, log_path, errors)


# ── Shared Steps 2, 4, 5, 6 ──────────────────────────────────────────────────

def _run_steps_2_to_6(cr_list, ts_groups, output_dir, cr_dir, ts_dir,
                       eol_user, eol_password, author, tc_date):
    """
    Execute steps 2 (download CRs), 4 (download TSs), 5 & 6 (apply + finalise).

    cr_list    : list of (uid, title)
    ts_groups  : dict {(spec_number, version): [uid, ...]} β€” may be pre-built
                 (ts-mode) or None to trigger Step 3 (cover page parse).
    """
    # ── Step 2: Download CR DOCXs ─────────────────────────────────────────────
    _section('Step 2 β€” Downloading CR DOCXs')
    cr_paths = {}  # uid -> Path

    for uid, _ in cr_list:
        docx_path, note = download_cr(uid, cr_dir, eol_user, eol_password)
        if docx_path:
            cr_paths[uid] = docx_path
            print(f'  [{uid}] OK ({note}) β€” {docx_path.name}')
        else:
            print(f'  [{uid}] FAILED β€” {note}')

    n_cr_failed = len(cr_list) - len(cr_paths)
    if n_cr_failed:
        print(f'  {len(cr_paths)}/{len(cr_list)} downloaded ({n_cr_failed} failed)')
    else:
        print(f'  All {len(cr_list)} CR(s) downloaded successfully')

    # ── Step 3: Parse cover pages (only when ts_groups not pre-built) ─────────
    if ts_groups is None:
        _section('Step 3 β€” Parsing CR cover pages')
        ts_groups = {}
        uid_cover_failed = []

        for uid in cr_paths:
            spec_number, version = parse_cr_cover(cr_paths[uid])
            if spec_number and version:
                key = (spec_number, version)
                ts_groups.setdefault(key, []).append(uid)
                print(f'  [{uid}] -> TS {spec_number}  v{version}')
            else:
                uid_cover_failed.append(uid)
                print(f'  [{uid}] WARNING: could not parse cover page β€” skipping')

        if not ts_groups:
            print('\nNo TSs identified. Nothing to apply.')
            return [], {}, {}, {}

    # ── Step 4: Download TSs ──────────────────────────────────────────────────
    _section('Step 4 β€” Downloading TSs')
    ts_paths = {}   # (spec_number, version) -> Path
    spec_dirs = {}  # (spec_number, version) -> Path (per-spec subfolder)

    for (spec_number, version) in ts_groups:
        spec_compact = spec_number.replace(' ', '')
        spec_dir = ts_dir / spec_compact
        spec_dir.mkdir(parents=True, exist_ok=True)
        spec_dirs[(spec_number, version)] = spec_dir

        print(f'  [TS {spec_number} v{version}] ', end='', flush=True)
        filename, note = None, "not attempted"
        for attempt in range(1, 4):
            filename, note = download_ts(spec_number, version, spec_dir, eol_user, eol_password)
            if filename:
                break
            if attempt < 3:
                print(f'\n    [attempt {attempt}/3 failed β€” retrying in 5s: {note}]', flush=True)
                print(f'  [TS {spec_number} v{version}] ', end='', flush=True)
                time.sleep(5)
            else:
                print(f'\n    [all 3 attempts failed]', flush=True)
        if filename:
            ts_paths[(spec_number, version)] = spec_dir / filename
            print(f'OK ({note}) β€” {spec_compact}/{filename}')
        else:
            print(f'FAILED β€” {note}')

    # Write failed_ts.json
    failed_ts_entries = [
        {
            "spec_number":       spec_number,
            "version":           version,
            "spec_compact":      spec_number.replace(' ', ''),
            "spec_dir":          str(spec_dirs[(spec_number, version)]),
            "expected_filename": f"ts_{spec_number.replace(' ', '')}_v{version}.docx",
            "cr_uids":           ts_groups[(spec_number, version)],
            "cr_dir":            str(cr_dir),
        }
        for (spec_number, version) in ts_groups
        if (spec_number, version) not in ts_paths
    ]
    (output_dir / "failed_ts.json").write_text(
        json.dumps(failed_ts_entries, indent=2)
    )

    # Write failed_cr.json
    failed_cr_entries = []
    for uid, _ in cr_list:
        if uid not in cr_paths:
            sn, v = next(
                ((sn, v) for (sn, v), uids in ts_groups.items() if uid in uids),
                (None, None),
            )
            sc = sn.replace(' ', '') if sn else None
            sd = spec_dirs.get((sn, v)) if sn else None
            failed_cr_entries.append({
                "uid":                  uid,
                "cr_dir":               str(cr_dir),
                "expected_filename":    f"{uid}.docx",
                "ts_spec_number":       sn,
                "ts_version":           v,
                "ts_spec_dir":          str(sd) if sd else None,
                "ts_expected_filename": f"ts_{sc}_v{v}.docx" if sc and v else None,
            })
    (output_dir / "failed_cr.json").write_text(json.dumps(failed_cr_entries, indent=2))
    if failed_cr_entries:
        print(f'  {len(failed_cr_entries)} failed CR download(s) written to failed_cr.json')

    # ── Steps 5 & 6: Apply CRs + Finalise each TS ────────────────────────────
    _section('Steps 5 & 6 β€” Applying CRs and Finalising Metadata')
    report = []  # (ts_key, n_ok, n_skip, n_crs, out_path, log_path, errors)

    for (spec_number, version), uids in ts_groups.items():
        spec_compact = spec_number.replace(' ', '')
        spec_dir = spec_dirs.get((spec_number, version), ts_dir / spec_compact)
        spec_dir.mkdir(parents=True, exist_ok=True)
        report.append(_apply_ts_group(
            spec_number, version, uids, ts_paths, cr_paths, spec_dir, author, tc_date
        ))

    return report, cr_paths, ts_paths, spec_dirs


# ── Main ──────────────────────────────────────────────────────────────────────

def main():
    ap = argparse.ArgumentParser(
        description='Fully automated CR application pipeline.',
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    ap.add_argument(
        'excel_path',
        nargs='?',
        default=None,
        help='Path to .xls or .xlsx contribution list (not required in --retry-mode or --ts-mode)',
    )
    ap.add_argument(
        'person_name',
        nargs='?',
        default='Ly Thanh PHAN',
        help='Name to match in SubmittedBy column (default: "Ly Thanh PHAN")',
    )
    ap.add_argument(
        '--output-dir',
        default=str(Path.home() / 'CR_Processing'),
        help='Base output directory (default: ~/CR_Processing)',
    )
    ap.add_argument(
        '--author',
        default=DEFAULT_AUTHOR,
        help=f'Tracked change author name (default: "{DEFAULT_AUTHOR}")',
    )
    ap.add_argument(
        '--retry-mode',
        action='store_true',
        help='Skip steps 1-4; apply CRs to TSs listed in failed_ts.json that now have their DOCX on disk',
    )
    ap.add_argument(
        '--ts-mode',
        action='store_true',
        help='Apply all CRs for a given spec number across all versions (uses HF index)',
    )
    ap.add_argument(
        '--ts-id',
        default='',
        help='Spec number to process in ts-mode, e.g. "102 267"',
    )
    ap.add_argument(
        '--excel-hash',
        default='',
        help='Excel hash used to filter the HF index in ts-mode',
    )
    ap.add_argument(
        '--hf-repo',
        default='OrganizedProgrammers/CR_Index',
        help='HuggingFace dataset repo containing the CR index',
    )
    ap.add_argument(
        '--cr-cache-dir',
        default='',
        help='Shared directory for caching downloaded CR DOCXs across runs '
             '(default: <output-dir>/CRs)',
    )
    args = ap.parse_args()

    if args.ts_mode and not args.ts_id:
        ap.error('--ts-id is required when using --ts-mode')
    if not args.ts_mode and not args.retry_mode and not args.excel_path:
        ap.error('excel_path is required when not in --retry-mode or --ts-mode')

    eol_user = os.environ.get("EOL_USER", "")
    eol_password = os.environ.get("EOL_PASSWORD", "")
    if not eol_user or not eol_password:
        sys.exit("ERROR: EOL_USER and EOL_PASSWORD must be set")

    output_dir = Path(wsl_path(args.output_dir)).expanduser()
    cr_cache = args.cr_cache_dir.strip()
    cr_dir = Path(wsl_path(cr_cache)).expanduser() if cr_cache else output_dir / 'CRs'
    ts_dir = output_dir / 'TS'
    cr_dir.mkdir(parents=True, exist_ok=True)
    ts_dir.mkdir(parents=True, exist_ok=True)

    author = args.author
    tc_date = DEFAULT_DATE

    # ── Retry mode β€” skip steps 1-4, reconstruct state from failed_ts.json ───
    if args.retry_mode:
        failed_ts_path = output_dir / 'failed_ts.json'
        failed_cr_path = output_dir / 'failed_cr.json'
        failed_ts_entries = json.loads(failed_ts_path.read_text()) if failed_ts_path.exists() else []
        failed_cr_entries = json.loads(failed_cr_path.read_text()) if failed_cr_path.exists() else []
        if not failed_ts_entries and not failed_cr_entries:
            print('No failed TSs or CRs β€” nothing to retry.')
            return

        _section('Retry mode β€” Steps 5 & 6 only')
        print(f'Retrying {len(failed_ts_entries)} TS(s) from failed_ts.json')

        ts_groups = {}
        spec_dirs = {}
        ts_paths = {}
        cr_paths = {}

        for entry in failed_ts_entries:
            spec_number = entry['spec_number']
            version = entry['version']
            key = (spec_number, version)
            ts_groups[key] = entry['cr_uids']
            spec_dir = Path(entry['spec_dir'])
            spec_dirs[key] = spec_dir
            expected = spec_dir / entry['expected_filename']
            if expected.exists():
                ts_paths[key] = expected
                print(f'  [TS {spec_number} v{version}] DOCX found β€” will apply')
            else:
                print(f'  [TS {spec_number} v{version}] DOCX missing β€” skipping')
            cr_entry_dir = Path(entry['cr_dir'])
            for uid in entry['cr_uids']:
                extracted = cr_entry_dir / f'{uid}_extracted.docx'
                plain = cr_entry_dir / f'{uid}.docx'
                if extracted.exists():
                    cr_paths[uid] = extracted
                elif plain.exists():
                    cr_paths[uid] = plain

        # ── Recover CRs from failed_cr.json ──────────────────────────────────
        still_failed_cr = []
        for entry in failed_cr_entries:
            uid = entry['uid']
            cr_entry_dir = Path(entry['cr_dir'])
            extracted = cr_entry_dir / f'{uid}_extracted.docx'
            plain = cr_entry_dir / entry['expected_filename']
            if extracted.exists():
                cr_paths[uid] = extracted
            elif plain.exists():
                cr_paths[uid] = plain
            else:
                still_failed_cr.append(entry)
                continue
            sn, v = entry['ts_spec_number'], entry['ts_version']
            if sn and v:
                key = (sn, v)
                ts_groups.setdefault(key, [])
                if uid not in ts_groups[key]:
                    ts_groups[key].append(uid)
                if key not in spec_dirs and entry['ts_spec_dir']:
                    spec_dirs[key] = Path(entry['ts_spec_dir'])
                if key not in ts_paths and entry['ts_spec_dir'] and entry['ts_expected_filename']:
                    ts_file = Path(entry['ts_spec_dir']) / entry['ts_expected_filename']
                    if ts_file.exists():
                        ts_paths[key] = ts_file
                print(f'  [{uid}] CR recovered β€” will apply to TS {sn} v{v}')
            else:
                print(f'  [{uid}] CR recovered but TS group unknown β€” skipping apply')
        failed_cr_path.write_text(json.dumps(still_failed_cr, indent=2))

        # ── Steps 5 & 6 (retry mode) ─────────────────────────────────────────
        report = []

        for (spec_number, version), uids in ts_groups.items():
            spec_compact = spec_number.replace(' ', '')
            spec_dir = spec_dirs.get((spec_number, version), ts_dir / spec_compact)
            spec_dir.mkdir(parents=True, exist_ok=True)
            report.append(_apply_ts_group(
                spec_number, version, uids, ts_paths, cr_paths, spec_dir, author, tc_date,
                log_label='Pipeline Log (retry)'
            ))

        # Update failed_ts.json β€” remove entries that are now resolved
        still_failed = [
            e for e in failed_ts_entries
            if not (Path(e['spec_dir']) / e['expected_filename']).exists()
        ]
        failed_ts_path.write_text(json.dumps(still_failed, indent=2))

        _section('Retry Summary')
        n_success = sum(1 for r in report if r[4] is not None and not r[6])
        n_partial  = sum(1 for r in report if r[4] is not None and r[6])
        n_failed   = sum(1 for r in report if r[4] is None)
        n_cr_still = len(still_failed_cr)
        print(f'TSs processed:  {n_success} fully OK, {n_partial} with warnings, {n_failed} skipped/failed')
        if n_cr_still:
            print(f'CRs still missing: {n_cr_still} (see failed_cr.json)')
        _print_report(report, detailed=False)
        return

    # ── TS mode β€” load HF index, skip Steps 1 & 3 ────────────────────────────
    if args.ts_mode:
        hf_token = os.environ.get("HF_TOKEN", "")
        if not hf_token:
            sys.exit("ERROR: HF_TOKEN must be set in ts-mode")

        from hf_cr_index import load_hf_index

        _section(f'TS mode β€” spec {args.ts_id!r}')
        print(f'Loading HF index from {args.hf_repo}...')
        try:
            all_records = load_hf_index(hf_token, args.hf_repo)
        except Exception as e:
            sys.exit(f'ERROR loading HF index: {e}')

        records = [
            r for r in all_records
            if r.get("excel_hash") == args.excel_hash
            and r.get("spec_number") == args.ts_id
        ]

        if not records:
            sys.exit(
                f'ERROR: no indexed CRs found for spec {args.ts_id!r} '
                f'with excel_hash={args.excel_hash!r}'
            )

        # Build ts_groups from index (bypasses Step 3)
        ts_groups = {}
        for r in records:
            key = (r["spec_number"], r["version"])
            ts_groups.setdefault(key, []).append(r["uid"])

        # Build cr_list for Step 2 download
        cr_list = [(r["uid"], r["title"]) for r in records]

        print(f'Found {len(records)} CR(s) across {len(ts_groups)} version(s):')
        for (spec, ver), uids in ts_groups.items():
            print(f'  TS {spec} v{ver}: {", ".join(uids)}')

        report, cr_paths, ts_paths, spec_dirs = _run_steps_2_to_6(
            cr_list, ts_groups, output_dir, cr_dir, ts_dir,
            eol_user, eol_password, author, tc_date,
        )

        # Copy the CRs actually applied into the run output dir so the ZIP
        # contains exactly the CRs used for this TS (only needed when using
        # a shared CR cache that lives outside output_dir).
        _copy_cr_cache_if_needed(cr_paths, cr_dir, output_dir)

        _section('Final Report (TS mode)')
        n_success = sum(1 for r in report if r[4] is not None and not r[6])
        n_partial  = sum(1 for r in report if r[4] is not None and r[6])
        n_failed   = sum(1 for r in report if r[4] is None)

        print(f'Spec:         {args.ts_id}')
        print(f'CRs found:    {len(cr_list)}')
        print(f'TSs updated:  {n_success} fully OK, {n_partial} with warnings, {n_failed} failed')
        print()

        _print_report(report)

        print()
        print(f'Output directory: {output_dir}/')
        return

    # ── Normal mode ───────────────────────────────────────────────────────────
    excel_path = wsl_path(args.excel_path)

    # ── Step 1: Parse Excel ───────────────────────────────────────────────────
    _section('Step 1 β€” Parsing Excel')
    print(f'Excel:   {excel_path}')
    print(f'Person:  {args.person_name!r}')

    try:
        cr_list = parse_excel(excel_path, args.person_name)
    except Exception as e:
        sys.exit(f'ERROR parsing Excel: {e}')

    print(f'Found {len(cr_list)} Accepted CR(s)')

    if not cr_list:
        print('Nothing to process.')
        return

    # Steps 2, 3 (cover page parse), 4, 5, 6
    report, cr_paths, ts_paths, spec_dirs = _run_steps_2_to_6(
        cr_list, None, output_dir, cr_dir, ts_dir,
        eol_user, eol_password, author, tc_date,
    )

    # Copy the CRs actually applied into the run output dir so the ZIP
    # contains exactly the CRs used for this run (only needed when using
    # a shared CR cache that lives outside output_dir).
    _copy_cr_cache_if_needed(cr_paths, cr_dir, output_dir)

    # ── Final Report ──────────────────────────────────────────────────────────
    _section('Final Report')
    n_success = sum(1 for r in report if r[4] is not None and not r[6])
    n_partial  = sum(1 for r in report if r[4] is not None and r[6])
    n_failed   = sum(1 for r in report if r[4] is None)

    print(f'Person:       {args.person_name}')
    print(f'Excel:        {excel_path}')
    print(f'CRs found:    {len(cr_list)}')
    print(f'TSs updated:  {n_success} fully OK, {n_partial} with warnings, {n_failed} failed')
    print()

    _print_report(report)

    print()
    print(f'Output directory: {output_dir}/')


if __name__ == '__main__':
    main()