from __future__ import annotations

import argparse
import json
import os
import subprocess
import sys
from pathlib import Path

os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("MKL_NUM_THREADS", "1")

PROJECT_ROOT = Path(__file__).resolve().parents[1]


def run_python_script(script_name: str, *args: str) -> int:
    cmd = [sys.executable, str(PROJECT_ROOT / "scripts" / script_name), *args]
    completed = subprocess.run(cmd, cwd=PROJECT_ROOT)
    return int(completed.returncode)


def print_status() -> None:
    reports_dir = PROJECT_ROOT / "artifacts" / "real_qa" / "reports"
    manifest_path = reports_dir / "manifest.json"
    eval_path = reports_dir / "evaluation_report.json"
    payload = {
        "project_root": str(PROJECT_ROOT),
        "manifest_exists": manifest_path.exists(),
        "evaluation_exists": eval_path.exists(),
        "artifacts_dir": str(PROJECT_ROOT / "artifacts" / "real_qa"),
    }
    if manifest_path.exists():
        payload["manifest_path"] = str(manifest_path)
    if eval_path.exists():
        payload["evaluation_path"] = str(eval_path)
    print(json.dumps(payload, indent=2, ensure_ascii=False))


def main() -> None:
    parser = argparse.ArgumentParser(description="Unified CLI for the real QA project.")
    subparsers = parser.add_subparsers(dest="command", required=True)

    subparsers.add_parser("build", help="Build the real QA artifacts and local model snapshots.")
    subparsers.add_parser("generate-train", help="Generate domain-specific training pairs for retriever tuning.")
    subparsers.add_parser("train", help="Fine-tune the dense retriever on project-specific training data.")
    subparsers.add_parser("serve", help="Run the FastAPI server and browser interface.")

    ask_parser = subparsers.add_parser("ask", help="Ask a question to the real QA system.")
    ask_parser.add_argument("question", help="Question to ask.")
    ask_parser.add_argument("--threshold", type=float, default=0.01, help="Answer confidence threshold.")
    ask_parser.add_argument(
        "--style",
        choices=["auto", "extractive", "explanatory"],
        default="auto",
        help="Answering style.",
    )

    eval_parser = subparsers.add_parser("eval", help="Run evaluation on the gold benchmark set.")
    eval_parser.add_argument("--threshold", type=float, default=0.01, help="Answer confidence threshold.")
    eval_parser.add_argument("--sweep", action="store_true", help="Run a threshold sweep.")

    subparsers.add_parser("status", help="Print build/evaluation status.")
    subparsers.add_parser("analyze", help="Generate an error analysis report from the latest evaluation.")

    args = parser.parse_args()

    if args.command == "build":
        raise SystemExit(run_python_script("build_real_qa_artifacts.py"))
    if args.command == "generate-train":
        raise SystemExit(run_python_script("generate_domain_training_data.py"))
    if args.command == "train":
        raise SystemExit(run_python_script("fine_tune_dense_retriever.py"))
    if args.command == "serve":
        raise SystemExit(run_python_script("run_real_qa_api.py"))
    if args.command == "ask":
        raise SystemExit(
            run_python_script(
                "run_real_qa.py",
                "--threshold",
                str(args.threshold),
                "--style",
                str(args.style),
                args.question,
            )
        )
    if args.command == "eval":
        cmd_args = ["--threshold", str(args.threshold)]
        if args.sweep:
            cmd_args.append("--sweep")
        raise SystemExit(run_python_script("evaluate_real_qa.py", *cmd_args))
    if args.command == "status":
        print_status()
        return
    if args.command == "analyze":
        raise SystemExit(run_python_script("analyze_real_qa_errors.py"))


if __name__ == "__main__":
    main()