Spaces:
Running
Running
| """Command-line interface.""" | |
| from __future__ import annotations | |
| import os | |
| import sys | |
| from pathlib import Path | |
| # Force transformers to be torch-only. Some Windows environments ship a | |
| # TensorFlow build pinned to NumPy 1.x; importing it under NumPy 2.x crashes | |
| # the whole process. We never use TF or Flax, so disable them up front. | |
| os.environ.setdefault("TRANSFORMERS_NO_ADVISORY_WARNINGS", "1") | |
| os.environ.setdefault("USE_TF", "0") | |
| os.environ.setdefault("USE_FLAX", "0") | |
| os.environ.setdefault("TRANSFORMERS_NO_TF", "1") | |
| import click | |
| from .config import load_config | |
| from .data_loaders.custom_jsonl import load_jsonl | |
| from .pipeline import CiteGuardPipeline | |
| def main() -> None: | |
| """CiteGuard-RAG command-line entry point.""" | |
| def evaluate(input_path: str, config_path: str | None, output_dir: str, no_progress: bool) -> None: | |
| """Run the full pipeline on a JSONL of RAG examples.""" | |
| cfg = load_config(config_path) | |
| # Re-root output dirs under the requested --output if provided. | |
| out_root = Path(output_dir) | |
| cfg.output.tables_dir = str(out_root / "tables") | |
| cfg.output.figures_dir = str(out_root / "figures") | |
| cfg.output.cards_dir = str(out_root / "evidence_cards") | |
| cfg.output.reports_dir = str(out_root / "reports") | |
| examples = load_jsonl(input_path) | |
| if not examples: | |
| click.echo(f"No examples loaded from {input_path}.", err=True) | |
| sys.exit(1) | |
| pipeline = CiteGuardPipeline(cfg) | |
| results = pipeline.run(examples, show_progress=not no_progress) | |
| paths = pipeline.write_outputs(results) | |
| click.echo(f"Wrote {len(results['evaluations'])} claim evaluations.") | |
| for k, v in paths.items(): | |
| click.echo(f" {k}: {v}") | |
| agg = results["aggregate"] | |
| click.echo( | |
| f"macro-F1={agg['macro_f1']:.4f} weighted-F1={agg['weighted_f1']:.4f} " | |
| f"(n_with_gold={agg['n_with_gold_label']}/{agg['n_claims']})" | |
| ) | |
| def split(input_path: str) -> None: | |
| """Print claim splits for each example. Useful for debugging the splitter.""" | |
| from .preprocess.claim_splitter import ClaimSplitter | |
| splitter = ClaimSplitter() | |
| for ex in load_jsonl(input_path): | |
| claims = splitter.split(ex.example_id, ex.answer) | |
| click.echo(f"=== {ex.example_id} ({len(claims)} claims) ===") | |
| for c in claims: | |
| click.echo(f" [{c.claim_id}] cited={c.cited_doc_ids} :: {c.text}") | |
| if __name__ == "__main__": # pragma: no cover | |
| main() | |