Spaces:

JaydeepR
/

TenderIQ

Sleeping

File size: 6,148 Bytes

661eb14
76e0cee

"""Step 13 — programmatic end-to-end check; exits 0 on success."""

import sys
from pathlib import Path

BASE_DIR = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(BASE_DIR))


def check(condition: bool, msg: str) -> None:
    if not condition:
        print(f"FAIL: {msg}")
        sys.exit(1)
    print(f"  OK: {msg}")


def main() -> None:
    print("TenderIQ Smoke Test")
    print("=" * 50)

    # 1. Core imports
    print("\n1. Core module imports")
    from core import config, schemas, prompts
    from core.llm_client import LLM, LLMUnavailable
    from core.pdf_utils import extract_pages, is_text_pdf
    from core.ocr_pipeline import extract_document, ExtractedPage
    from core.chunker import chunk_tender, chunk_bidder
    from core.schemas import Criterion, Verdict, Evidence
    from core import audit
    from core.fallback import load_criteria, load_evaluation
    check(True, "All core modules import without error")

    # 2. Config
    print("\n2. Config")
    check(config.MODEL_VERSION.startswith("deepseek-chat"), "MODEL_VERSION set")
    check(config.CONFIDENCE_HIGH == 0.80, "CONFIDENCE_HIGH = 0.80")
    check(config.CONFIDENCE_REVIEW == 0.55, "CONFIDENCE_REVIEW = 0.55")

    # 3. Schemas
    print("\n3. Schemas")
    c = Criterion(**{
        "id": "C1", "title": "Turnover", "category": "financial",
        "mandatory": True, "description": "test",
        "rule": {"type": "numeric_threshold", "field": "t", "operator": ">=",
                 "value": 50000000, "unit": "INR"},
        "query_hints": ["turnover"], "source_page": 3, "source_clause": "3.2(a)",
    })
    check(c.mandatory is True, "Criterion schema validates")

    v = Verdict(bidder_id="b", criterion_id="C1", verdict="eligible")
    check(v.verdict_id.startswith("V-"), "Verdict auto-generates verdict_id")
    check(v.review_status == "pending", "Verdict defaults to pending")

    # 4. Mock data files
    print("\n4. Mock data files")
    from core.config import DATA_DIR
    tender_pdf = DATA_DIR / "tender" / "crpf_construction_tender.pdf"
    check(tender_pdf.exists(), "Tender PDF exists")
    for bidder in ["bidder_a", "bidder_b", "bidder_c"]:
        bidder_dir = DATA_DIR / "bidders" / bidder
        files = list(bidder_dir.glob("*"))
        files = [f for f in files if not f.name.endswith(".gitkeep")]
        check(len(files) >= 4, f"{bidder} has at least 4 documents")
    scan = DATA_DIR / "bidders" / "bidder_c" / "turnover_certificate_scan.png"
    check(scan.exists(), "Bidder C noisy scan exists")

    # 5. PDF utils
    print("\n5. PDF utils")
    pages = extract_pages(tender_pdf)
    check(len(pages) >= 3, f"Tender PDF has {len(pages)} pages")
    check(is_text_pdf(tender_pdf), "Tender PDF detected as text_pdf")
    img = __import__("core.pdf_utils", fromlist=["render_page_to_image"]).render_page_to_image(tender_pdf, 1)
    check(img.size[0] > 0, f"Page render returns {img.size} image")

    # 6. Chunker
    print("\n6. Chunker")
    chunks = chunk_tender(pages, "tender_001")
    check(len(chunks) > 0, f"chunk_tender returns {len(chunks)} chunks")
    check("text" in chunks[0] and "chunk_id" in chunks[0], "Chunk has text and chunk_id")

    # 7. OCR pipeline
    print("\n7. OCR pipeline")
    fin_pdf = DATA_DIR / "bidders" / "bidder_a" / "audited_financials.pdf"
    ep = extract_document(fin_pdf)
    check(len(ep) > 0, f"extract_document returns {len(ep)} pages")
    check(ep[0].source_type == "text_pdf", "Typed PDF uses Tier 1")
    check(ep[0].confidence == 1.0, "Typed PDF confidence = 1.0")

    ep_scan = extract_document(scan)
    check(len(ep_scan) == 1, "Noisy scan returns 1 page")
    check(ep_scan[0].source_type in ("text_pdf", "tesseract", "vision_llm"),
          f"Scan source_type = {ep_scan[0].source_type}")

    # 8. Fallback
    print("\n8. Fallback")
    criteria = load_criteria()
    check(len(criteria) == 5, f"load_criteria returns {len(criteria)} criteria")
    check(criteria[0].id == "C1", "First criterion is C1")
    mandatory_count = sum(1 for c in criteria if c.mandatory)
    check(mandatory_count == 4, f"{mandatory_count} mandatory criteria")
    optional_count = sum(1 for c in criteria if not c.mandatory)
    check(optional_count == 1, f"{optional_count} optional criterion (C5)")

    va = load_evaluation("bidder_a", "C1")
    check(va.verdict == "eligible", f"Bidder A C1 = {va.verdict}")
    vb = load_evaluation("bidder_b", "C1")
    check(vb.verdict == "not_eligible", f"Bidder B C1 = {vb.verdict}")
    vc = load_evaluation("bidder_c", "C1")
    check(vc.verdict == "needs_review", f"Bidder C C1 = {vc.verdict}")

    # 9. Audit
    print("\n9. Audit")
    rid = audit.log("smoke_test", actor="smoke_test")
    check(isinstance(rid, int) and rid > 0, f"audit.log returns row id {rid}")
    rows = audit.query({"action": "smoke_test"})
    check(len(rows) >= 1, "audit.query filters by action")

    # 10. Evaluator threshold logic
    print("\n10. Evaluator threshold logic")
    from core.evaluator import _apply_thresholds, _combined_confidence
    check(_apply_thresholds("eligible", 0.9) == "eligible", "eligible@0.9 stays eligible")
    check(_apply_thresholds("not_eligible", 0.9) == "not_eligible", "not_eligible@0.9 stays")
    check(_apply_thresholds("not_eligible", 0.6) == "needs_review", "not_eligible@0.6 -> needs_review")
    check(_apply_thresholds("eligible", 0.4) == "needs_review", "eligible@0.4 -> needs_review")
    check(_combined_confidence(0.9, "text_pdf", None) == 0.9, "text_pdf combined = llm_conf")
    c_vis = _combined_confidence(0.9, "vision_llm", None)
    check(0.8 < c_vis < 0.96, f"vision_llm combined = {c_vis:.3f}")

    # 11. Precomputed files
    print("\n11. Precomputed JSON files")
    from core.config import PRECOMPUTED_DIR
    check((PRECOMPUTED_DIR / "criteria.json").exists(), "criteria.json exists")
    for bidder in ["bidder_a", "bidder_b", "bidder_c"]:
        check((PRECOMPUTED_DIR / f"eval_{bidder}.json").exists(), f"eval_{bidder}.json exists")

    print("\n" + "=" * 50)
    print("All checks passed. Smoke test: SUCCESS")
    print("=" * 50)


if __name__ == "__main__":
    main()