Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| """Entry point for the Text2SPARQL repair pipeline. | |
| Handles CLI argument parsing, loading inputs, running the pipeline, | |
| and saving JSON traces. No business logic here. | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import logging | |
| import sys | |
| from pathlib import Path | |
| from .config import load_config | |
| from .models import QueryRequest, RunTrace | |
| from .pipeline import Text2SPARQLPipeline | |
| from .utils import load_json, make_run_dir, save_json | |
| logger = logging.getLogger("text2sparql_repair") | |
| def _setup_logging(verbose: bool = False) -> None: | |
| """Configure logging for the pipeline.""" | |
| level = logging.DEBUG if verbose else logging.INFO | |
| logging.basicConfig( | |
| level=level, | |
| format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", | |
| datefmt="%Y-%m-%d %H:%M:%S", | |
| ) | |
| def _save_trace(trace: RunTrace, run_dir: str) -> None: | |
| """Save all trace artifacts as individual JSON files. | |
| Files saved: | |
| - request.json | |
| - context.json | |
| - initial_candidates.json | |
| - validations.json | |
| - committee.json | |
| - decisions.json | |
| - repairs.json | |
| - final_trace.json | |
| """ | |
| save_json(trace.request.model_dump(), f"{run_dir}/request.json") | |
| save_json(trace.context.model_dump(), f"{run_dir}/context.json") | |
| save_json( | |
| [c.model_dump() for c in trace.initial_candidates], | |
| f"{run_dir}/initial_candidates.json", | |
| ) | |
| save_json( | |
| [v.model_dump() for v in trace.validation_history], | |
| f"{run_dir}/validations.json", | |
| ) | |
| save_json( | |
| [f.model_dump() for f in trace.committee_history], | |
| f"{run_dir}/committee.json", | |
| ) | |
| save_json( | |
| [d.model_dump() for d in trace.decision_history], | |
| f"{run_dir}/decisions.json", | |
| ) | |
| save_json( | |
| [r.model_dump() for r in trace.repair_history], | |
| f"{run_dir}/repairs.json", | |
| ) | |
| save_json(trace.model_dump(), f"{run_dir}/final_trace.json") | |
| def run_single(request_path: str, config_path: str) -> None: | |
| """Run the pipeline on a single request. | |
| Args: | |
| request_path: Path to a JSON file containing a QueryRequest. | |
| config_path: Path to the YAML config file. | |
| """ | |
| config = load_config(config_path) | |
| pipeline = Text2SPARQLPipeline(config) | |
| request_data = load_json(request_path) | |
| request = QueryRequest(**request_data) | |
| logger.info("Processing single request: %s", request.request_id) | |
| trace = pipeline.run(request) | |
| run_dir = make_run_dir("runs", request.request_id) | |
| _save_trace(trace, run_dir) | |
| logger.info( | |
| "Done. Status: %s | Final query: %s", | |
| trace.final_status, trace.final_query[:120] if trace.final_query else "(empty)", | |
| ) | |
| logger.info("Trace saved to: %s", run_dir) | |
| def run_batch(input_path: str, config_path: str) -> None: | |
| """Run the pipeline on a batch of requests. | |
| Input file can be: | |
| - A JSON array of QueryRequest objects | |
| - A JSONL file (one QueryRequest per line) | |
| Args: | |
| input_path: Path to the input file. | |
| config_path: Path to the YAML config file. | |
| """ | |
| config = load_config(config_path) | |
| pipeline = Text2SPARQLPipeline(config) | |
| # Detect format | |
| with open(input_path, "r") as f: | |
| first_char = f.read(1) | |
| f.seek(0) | |
| if first_char == "[": | |
| # JSON array | |
| requests_data = json.load(f) | |
| else: | |
| # JSONL | |
| requests_data = [ | |
| json.loads(line) for line in f if line.strip() | |
| ] | |
| logger.info("Processing batch of %d requests", len(requests_data)) | |
| for i, req_data in enumerate(requests_data): | |
| request = QueryRequest(**req_data) | |
| logger.info( | |
| "[%d/%d] Processing: %s", | |
| i + 1, len(requests_data), request.request_id, | |
| ) | |
| try: | |
| trace = pipeline.run(request) | |
| run_dir = make_run_dir("runs", request.request_id) | |
| _save_trace(trace, run_dir) | |
| logger.info( | |
| "[%d/%d] Done. Status: %s", | |
| i + 1, len(requests_data), trace.final_status, | |
| ) | |
| except Exception as exc: | |
| logger.error( | |
| "[%d/%d] Failed: %s", i + 1, len(requests_data), exc, | |
| exc_info=True, | |
| ) | |
| logger.info("Batch processing complete.") | |
| def main() -> None: | |
| """CLI entry point.""" | |
| parser = argparse.ArgumentParser( | |
| description="Text2SPARQL Post-Generation Repair Pipeline", | |
| prog="text2sparql_repair", | |
| ) | |
| parser.add_argument( | |
| "--config", "-c", | |
| default="configs/default.yaml", | |
| help="Path to YAML config file (default: configs/default.yaml)", | |
| ) | |
| parser.add_argument( | |
| "--request", "-r", | |
| help="Path to a single request JSON file", | |
| ) | |
| parser.add_argument( | |
| "--batch", "-b", | |
| help="Path to a batch input file (JSON array or JSONL)", | |
| ) | |
| parser.add_argument( | |
| "--verbose", "-v", | |
| action="store_true", | |
| help="Enable verbose (DEBUG) logging", | |
| ) | |
| args = parser.parse_args() | |
| _setup_logging(verbose=args.verbose) | |
| if args.request: | |
| run_single(args.request, args.config) | |
| elif args.batch: | |
| run_batch(args.batch, args.config) | |
| else: | |
| parser.print_help() | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() | |