Spaces:
Sleeping
Sleeping
File size: 2,668 Bytes
64da886 c62a9fc 64da886 c62a9fc 64da886 c62a9fc 64da886 c62a9fc 777e3bb 64da886 777e3bb 64da886 777e3bb 64da886 777e3bb 64da886 777e3bb 64da886 777e3bb 64da886 777e3bb 64da886 777e3bb 64da886 777e3bb 64da886 777e3bb 64da886 dc56b59 64da886 777e3bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
# run_all_samples.py
import json
import os
from pathlib import Path
from fastapi.testclient import TestClient
from app import app
# -----------------------------
# HuggingFace dataset URLs
# -----------------------------
SAMPLE_URLS = [
"https://huggingface.co/datasets/Sathvik-kota/samples/resolve/main/train_sample_1.pdf",
"https://huggingface.co/datasets/Sathvik-kota/samples/resolve/main/train_sample_2.pdf",
"https://huggingface.co/datasets/Sathvik-kota/samples/resolve/main/train_sample_3.pdf",
"https://huggingface.co/datasets/Sathvik-kota/samples/resolve/main/train_sample_4.pdf",
"https://huggingface.co/datasets/Sathvik-kota/samples/resolve/main/train_sample_5.pdf",
"https://huggingface.co/datasets/Sathvik-kota/samples/resolve/main/train_sample_6.pdf",
"https://huggingface.co/datasets/Sathvik-kota/samples/resolve/main/train_sample_7.pdf",
"https://huggingface.co/datasets/Sathvik-kota/samples/resolve/main/train_sample_8.pdf",
"https://huggingface.co/datasets/Sathvik-kota/samples/resolve/main/train_sample_9.pdf",
"https://huggingface.co/datasets/Sathvik-kota/samples/resolve/main/train_sample_10.pdf",
"https://huggingface.co/datasets/Sathvik-kota/samples/resolve/main/train_sample_11.pdf",
"https://huggingface.co/datasets/Sathvik-kota/samples/resolve/main/train_sample_12.pdf",
"https://huggingface.co/datasets/Sathvik-kota/samples/resolve/main/train_sample_13.pdf",
"https://huggingface.co/datasets/Sathvik-kota/samples/resolve/main/train_sample_14.pdf",
"https://huggingface.co/datasets/Sathvik-kota/samples/resolve/main/train_sample_15.pdf",
]
BASE_DIR = Path(__file__).parent
RESULTS_DIR = BASE_DIR / "results"
RESULTS_DIR.mkdir(exist_ok=True)
client = TestClient(app)
def main():
results = {}
print("\n🚀 Running extractor on HuggingFace dataset...\n")
for url in SAMPLE_URLS:
fname = url.split("/")[-1]
print(f"🔍 Processing: {fname}")
resp = client.post("/extract-bill-data", json={"document": url})
if resp.status_code != 200:
print(f"❌ Error: {resp.status_code}")
results[fname] = "API_ERROR"
continue
data = resp.json()
out_file = RESULTS_DIR / f"{fname}.json"
with open(out_file, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
item_count = data["data"]["total_item_count"]
results[fname] = item_count
print(f" → Saved: {out_file}")
print(f" → Items Extracted: {item_count}")
print("\n📊 FINAL SUMMARY:")
print(json.dumps(results, indent=2))
if __name__ == "__main__":
main()
|