|
|
import asyncio
|
|
|
import sys
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
sys.path.append(str(Path(__file__).parent))
|
|
|
|
|
|
from test_accuracy import AccuracyTester
|
|
|
|
|
|
|
|
|
async def run_quick_test():
|
|
|
"""run quick accuracy test"""
|
|
|
|
|
|
print("\n" + "="*80)
|
|
|
print("QUICK ACCURACY TEST")
|
|
|
print("="*80 + "\n")
|
|
|
|
|
|
|
|
|
bureau_pdf = "test_data/JEET_ARORA_PARK251217CR671901414.pdf"
|
|
|
gst_pdf = "test_data/GSTR3B_06AAICK4577H1Z8_012025.pdf"
|
|
|
num_runs = 2
|
|
|
|
|
|
print(f"Configuration:")
|
|
|
print(f" Bureau PDF: {bureau_pdf}")
|
|
|
print(f" GST PDF: {gst_pdf}")
|
|
|
print(f" Number of runs: {num_runs}")
|
|
|
print(f" API URL: http://localhost:8000\n")
|
|
|
|
|
|
|
|
|
if not Path(bureau_pdf).exists():
|
|
|
print(f"β ERROR: Bureau PDF not found: {bureau_pdf}")
|
|
|
print(f" Please update the path or create test_data/ folder\n")
|
|
|
return
|
|
|
|
|
|
if not Path(gst_pdf).exists():
|
|
|
print(f"β ERROR: GST PDF not found: {gst_pdf}")
|
|
|
print(f" Please update the path or create test_data/ folder\n")
|
|
|
return
|
|
|
|
|
|
|
|
|
print("Creating tester...\n")
|
|
|
tester = AccuracyTester(api_url="http://localhost:8000")
|
|
|
|
|
|
|
|
|
print(f"Running {num_runs} extractions...")
|
|
|
print("(This may take a few minutes)\n")
|
|
|
|
|
|
try:
|
|
|
await tester.run_multiple_extractions(
|
|
|
bureau_path=bureau_pdf,
|
|
|
gst_path=gst_pdf,
|
|
|
num_runs=num_runs
|
|
|
)
|
|
|
|
|
|
|
|
|
output_file = "quick_test_report.json"
|
|
|
tester.generate_report(
|
|
|
output_path=output_file,
|
|
|
bureau_filename=Path(bureau_pdf).name,
|
|
|
gst_filename=Path(gst_pdf).name
|
|
|
)
|
|
|
|
|
|
print("\n" + "="*80)
|
|
|
print("β
TESTING COMPLETE!")
|
|
|
print("="*80 + "\n")
|
|
|
|
|
|
print(f"π Report saved: {output_file}")
|
|
|
print(f"\nNext steps:")
|
|
|
print(f" 1. Review the report JSON file")
|
|
|
print(f" 2. Check consistency and accuracy metrics")
|
|
|
print(f" 3. Run full test with --runs 100 for production\n")
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"\nβ ERROR: {str(e)}\n")
|
|
|
print("Troubleshooting:")
|
|
|
print(" 1. Is API running? Check: http://localhost:8000/docs")
|
|
|
print(" 2. Are PDF paths correct?")
|
|
|
print(" 3. Is ground_truth.py configured?\n")
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
print("\n" + "="*80)
|
|
|
print("QUICK START: Accuracy Testing")
|
|
|
print("="*80)
|
|
|
print("\nThis will:")
|
|
|
print(" 1. Run 10 quick extractions")
|
|
|
print(" 2. Measure consistency")
|
|
|
print(" 3. Compare against ground truth")
|
|
|
print(" 4. Generate test report\n")
|
|
|
|
|
|
print("Prerequisites:")
|
|
|
print(" β API must be running (python main.py)")
|
|
|
print(" β Test PDFs must exist")
|
|
|
print(" β ground_truth.py configured\n")
|
|
|
|
|
|
input("Press Enter to start...")
|
|
|
|
|
|
asyncio.run(run_quick_test())
|
|
|
|