File size: 6,148 Bytes
661eb14
76e0cee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""Step 13 — programmatic end-to-end check; exits 0 on success."""

import sys
from pathlib import Path

BASE_DIR = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(BASE_DIR))


def check(condition: bool, msg: str) -> None:
    if not condition:
        print(f"FAIL: {msg}")
        sys.exit(1)
    print(f"  OK: {msg}")


def main() -> None:
    print("TenderIQ Smoke Test")
    print("=" * 50)

    # 1. Core imports
    print("\n1. Core module imports")
    from core import config, schemas, prompts
    from core.llm_client import LLM, LLMUnavailable
    from core.pdf_utils import extract_pages, is_text_pdf
    from core.ocr_pipeline import extract_document, ExtractedPage
    from core.chunker import chunk_tender, chunk_bidder
    from core.schemas import Criterion, Verdict, Evidence
    from core import audit
    from core.fallback import load_criteria, load_evaluation
    check(True, "All core modules import without error")

    # 2. Config
    print("\n2. Config")
    check(config.MODEL_VERSION.startswith("deepseek-chat"), "MODEL_VERSION set")
    check(config.CONFIDENCE_HIGH == 0.80, "CONFIDENCE_HIGH = 0.80")
    check(config.CONFIDENCE_REVIEW == 0.55, "CONFIDENCE_REVIEW = 0.55")

    # 3. Schemas
    print("\n3. Schemas")
    c = Criterion(**{
        "id": "C1", "title": "Turnover", "category": "financial",
        "mandatory": True, "description": "test",
        "rule": {"type": "numeric_threshold", "field": "t", "operator": ">=",
                 "value": 50000000, "unit": "INR"},
        "query_hints": ["turnover"], "source_page": 3, "source_clause": "3.2(a)",
    })
    check(c.mandatory is True, "Criterion schema validates")

    v = Verdict(bidder_id="b", criterion_id="C1", verdict="eligible")
    check(v.verdict_id.startswith("V-"), "Verdict auto-generates verdict_id")
    check(v.review_status == "pending", "Verdict defaults to pending")

    # 4. Mock data files
    print("\n4. Mock data files")
    from core.config import DATA_DIR
    tender_pdf = DATA_DIR / "tender" / "crpf_construction_tender.pdf"
    check(tender_pdf.exists(), "Tender PDF exists")
    for bidder in ["bidder_a", "bidder_b", "bidder_c"]:
        bidder_dir = DATA_DIR / "bidders" / bidder
        files = list(bidder_dir.glob("*"))
        files = [f for f in files if not f.name.endswith(".gitkeep")]
        check(len(files) >= 4, f"{bidder} has at least 4 documents")
    scan = DATA_DIR / "bidders" / "bidder_c" / "turnover_certificate_scan.png"
    check(scan.exists(), "Bidder C noisy scan exists")

    # 5. PDF utils
    print("\n5. PDF utils")
    pages = extract_pages(tender_pdf)
    check(len(pages) >= 3, f"Tender PDF has {len(pages)} pages")
    check(is_text_pdf(tender_pdf), "Tender PDF detected as text_pdf")
    img = __import__("core.pdf_utils", fromlist=["render_page_to_image"]).render_page_to_image(tender_pdf, 1)
    check(img.size[0] > 0, f"Page render returns {img.size} image")

    # 6. Chunker
    print("\n6. Chunker")
    chunks = chunk_tender(pages, "tender_001")
    check(len(chunks) > 0, f"chunk_tender returns {len(chunks)} chunks")
    check("text" in chunks[0] and "chunk_id" in chunks[0], "Chunk has text and chunk_id")

    # 7. OCR pipeline
    print("\n7. OCR pipeline")
    fin_pdf = DATA_DIR / "bidders" / "bidder_a" / "audited_financials.pdf"
    ep = extract_document(fin_pdf)
    check(len(ep) > 0, f"extract_document returns {len(ep)} pages")
    check(ep[0].source_type == "text_pdf", "Typed PDF uses Tier 1")
    check(ep[0].confidence == 1.0, "Typed PDF confidence = 1.0")

    ep_scan = extract_document(scan)
    check(len(ep_scan) == 1, "Noisy scan returns 1 page")
    check(ep_scan[0].source_type in ("text_pdf", "tesseract", "vision_llm"),
          f"Scan source_type = {ep_scan[0].source_type}")

    # 8. Fallback
    print("\n8. Fallback")
    criteria = load_criteria()
    check(len(criteria) == 5, f"load_criteria returns {len(criteria)} criteria")
    check(criteria[0].id == "C1", "First criterion is C1")
    mandatory_count = sum(1 for c in criteria if c.mandatory)
    check(mandatory_count == 4, f"{mandatory_count} mandatory criteria")
    optional_count = sum(1 for c in criteria if not c.mandatory)
    check(optional_count == 1, f"{optional_count} optional criterion (C5)")

    va = load_evaluation("bidder_a", "C1")
    check(va.verdict == "eligible", f"Bidder A C1 = {va.verdict}")
    vb = load_evaluation("bidder_b", "C1")
    check(vb.verdict == "not_eligible", f"Bidder B C1 = {vb.verdict}")
    vc = load_evaluation("bidder_c", "C1")
    check(vc.verdict == "needs_review", f"Bidder C C1 = {vc.verdict}")

    # 9. Audit
    print("\n9. Audit")
    rid = audit.log("smoke_test", actor="smoke_test")
    check(isinstance(rid, int) and rid > 0, f"audit.log returns row id {rid}")
    rows = audit.query({"action": "smoke_test"})
    check(len(rows) >= 1, "audit.query filters by action")

    # 10. Evaluator threshold logic
    print("\n10. Evaluator threshold logic")
    from core.evaluator import _apply_thresholds, _combined_confidence
    check(_apply_thresholds("eligible", 0.9) == "eligible", "eligible@0.9 stays eligible")
    check(_apply_thresholds("not_eligible", 0.9) == "not_eligible", "not_eligible@0.9 stays")
    check(_apply_thresholds("not_eligible", 0.6) == "needs_review", "not_eligible@0.6 -> needs_review")
    check(_apply_thresholds("eligible", 0.4) == "needs_review", "eligible@0.4 -> needs_review")
    check(_combined_confidence(0.9, "text_pdf", None) == 0.9, "text_pdf combined = llm_conf")
    c_vis = _combined_confidence(0.9, "vision_llm", None)
    check(0.8 < c_vis < 0.96, f"vision_llm combined = {c_vis:.3f}")

    # 11. Precomputed files
    print("\n11. Precomputed JSON files")
    from core.config import PRECOMPUTED_DIR
    check((PRECOMPUTED_DIR / "criteria.json").exists(), "criteria.json exists")
    for bidder in ["bidder_a", "bidder_b", "bidder_c"]:
        check((PRECOMPUTED_DIR / f"eval_{bidder}.json").exists(), f"eval_{bidder}.json exists")

    print("\n" + "=" * 50)
    print("All checks passed. Smoke test: SUCCESS")
    print("=" * 50)


if __name__ == "__main__":
    main()