Spaces:

InsanAlex
/

iris-at-text2sparql

Running on CPU Upgrade

Alex Latipov

Harden frozen eval prompts and judge JSON handling

d745844 7 days ago

5.36 kB

	"""Entry point for the Text2SPARQL repair pipeline.

	Handles CLI argument parsing, loading inputs, running the pipeline,
	and saving JSON traces. No business logic here.
	"""

	from __future__ import annotations

	import argparse
	import json
	import logging
	import sys
	from pathlib import Path

	from .config import load_config
	from .models import QueryRequest, RunTrace
	from .pipeline import Text2SPARQLPipeline
	from .utils import load_json, make_run_dir, save_json

	logger = logging.getLogger("text2sparql_repair")


	def _setup_logging(verbose: bool = False) -> None:
	"""Configure logging for the pipeline."""
	level = logging.DEBUG if verbose else logging.INFO
	logging.basicConfig(
	level=level,
	format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
	datefmt="%Y-%m-%d %H:%M:%S",
	)


	def _save_trace(trace: RunTrace, run_dir: str) -> None:
	"""Save all trace artifacts as individual JSON files.

	Files saved:
	- request.json
	- context.json
	- initial_candidates.json
	- validations.json
	- committee.json
	- decisions.json
	- repairs.json
	- final_trace.json
	"""
	save_json(trace.request.model_dump(), f"{run_dir}/request.json")
	save_json(trace.context.model_dump(), f"{run_dir}/context.json")
	save_json(
	[c.model_dump() for c in trace.initial_candidates],
	f"{run_dir}/initial_candidates.json",
	)
	save_json(
	[v.model_dump() for v in trace.validation_history],
	f"{run_dir}/validations.json",
	)
	save_json(
	[f.model_dump() for f in trace.committee_history],
	f"{run_dir}/committee.json",
	)
	save_json(
	[d.model_dump() for d in trace.decision_history],
	f"{run_dir}/decisions.json",
	)
	save_json(
	[r.model_dump() for r in trace.repair_history],
	f"{run_dir}/repairs.json",
	)
	save_json(trace.model_dump(), f"{run_dir}/final_trace.json")


	def run_single(request_path: str, config_path: str) -> None:
	"""Run the pipeline on a single request.

	Args:
	request_path: Path to a JSON file containing a QueryRequest.
	config_path: Path to the YAML config file.
	"""
	config = load_config(config_path)
	pipeline = Text2SPARQLPipeline(config)

	request_data = load_json(request_path)
	request = QueryRequest(**request_data)

	logger.info("Processing single request: %s", request.request_id)

	trace = pipeline.run(request)

	run_dir = make_run_dir("runs", request.request_id)
	_save_trace(trace, run_dir)

	logger.info(
	"Done. Status: %s \| Final query: %s",
	trace.final_status, trace.final_query[:120] if trace.final_query else "(empty)",
	)
	logger.info("Trace saved to: %s", run_dir)


	def run_batch(input_path: str, config_path: str) -> None:
	"""Run the pipeline on a batch of requests.

	Input file can be:
	- A JSON array of QueryRequest objects
	- A JSONL file (one QueryRequest per line)

	Args:
	input_path: Path to the input file.
	config_path: Path to the YAML config file.
	"""
	config = load_config(config_path)
	pipeline = Text2SPARQLPipeline(config)

	# Detect format
	with open(input_path, "r") as f:
	first_char = f.read(1)
	f.seek(0)

	if first_char == "[":
	# JSON array
	requests_data = json.load(f)
	else:
	# JSONL
	requests_data = [
	json.loads(line) for line in f if line.strip()
	]

	logger.info("Processing batch of %d requests", len(requests_data))

	for i, req_data in enumerate(requests_data):
	request = QueryRequest(**req_data)
	logger.info(
	"[%d/%d] Processing: %s",
	i + 1, len(requests_data), request.request_id,
	)

	try:
	trace = pipeline.run(request)
	run_dir = make_run_dir("runs", request.request_id)
	_save_trace(trace, run_dir)
	logger.info(
	"[%d/%d] Done. Status: %s",
	i + 1, len(requests_data), trace.final_status,
	)
	except Exception as exc:
	logger.error(
	"[%d/%d] Failed: %s", i + 1, len(requests_data), exc,
	exc_info=True,
	)

	logger.info("Batch processing complete.")


	def main() -> None:
	"""CLI entry point."""
	parser = argparse.ArgumentParser(
	description="Text2SPARQL Post-Generation Repair Pipeline",
	prog="text2sparql_repair",
	)
	parser.add_argument(
	"--config", "-c",
	default="configs/default.yaml",
	help="Path to YAML config file (default: configs/default.yaml)",
	)
	parser.add_argument(
	"--request", "-r",
	help="Path to a single request JSON file",
	)
	parser.add_argument(
	"--batch", "-b",
	help="Path to a batch input file (JSON array or JSONL)",
	)
	parser.add_argument(
	"--verbose", "-v",
	action="store_true",
	help="Enable verbose (DEBUG) logging",
	)

	args = parser.parse_args()
	_setup_logging(verbose=args.verbose)

	if args.request:
	run_single(args.request, args.config)
	elif args.batch:
	run_batch(args.batch, args.config)
	else:
	parser.print_help()
	sys.exit(1)


	if __name__ == "__main__":
	main()