Sathvik-kota commited on
Commit
777e3bb
Β·
verified Β·
1 Parent(s): 9ef45f2

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. run_all_samples.py +66 -62
run_all_samples.py CHANGED
@@ -6,82 +6,86 @@ from pathlib import Path
6
  from fastapi.testclient import TestClient
7
  from app import app # import your FastAPI app
8
 
 
9
  # -----------------------------
10
  # 1. Paths
11
  # -----------------------------
12
  BASE_DIR = Path(__file__).parent
13
- SAMPLES_DIR = BASE_DIR / "samples" # <-- UPDATED
14
  RESULTS_DIR = BASE_DIR / "results"
15
 
16
  RESULTS_DIR.mkdir(exist_ok=True)
17
 
18
- # -----------------------------
19
- # 2. Init FastAPI client
20
- # -----------------------------
21
- client = TestClient(app)
22
 
23
  # -----------------------------
24
- # 3. File filter
25
  # -----------------------------
26
  def is_bill_file(path: Path):
27
  return path.suffix.lower() in [".pdf", ".png", ".jpg", ".jpeg"]
28
 
29
- print("\nπŸš€ Running extractor on local sample files...\n")
30
- print(f"πŸ“ Using samples from: {SAMPLES_DIR}\n")
31
-
32
- if not SAMPLES_DIR.exists():
33
- raise RuntimeError("❌ ERROR: 'samples/' folder does not exist in your Space.")
34
-
35
- summary = {}
36
 
37
  # -----------------------------
38
- # 4. Process all files
39
  # -----------------------------
40
- for fp in sorted(SAMPLES_DIR.rglob("*")):
41
- if not fp.is_file() or not is_bill_file(fp):
42
- continue
43
-
44
- print(f"πŸ” Processing: {fp.name}")
45
-
46
- # Build a usable "file://" URL for extractor
47
- file_url = f"file://{fp.resolve()}"
48
-
49
- # Call extractor API
50
- try:
51
- res = client.post(
52
- "/extract-bill-data",
53
- json={"document": file_url},
54
- timeout=120
55
- )
56
- except Exception as e:
57
- print(f"❌ Error calling extractor: {e}")
58
- summary[fp.name] = "EXTRACTOR_ERROR"
59
- continue
60
-
61
- if res.status_code != 200:
62
- print(f"❌ API error {res.status_code}")
63
- summary[fp.name] = "API_ERROR"
64
- continue
65
-
66
- data = res.json()
67
-
68
- # Save JSON
69
- out_path = RESULTS_DIR / (fp.stem + ".json")
70
- with open(out_path, "w", encoding="utf-8") as f:
71
- json.dump(data, f, indent=2, ensure_ascii=False)
72
-
73
- # Count items
74
- try:
75
- total_items = data["data"]["total_item_count"]
76
- except:
77
- total_items = "N/A"
78
-
79
- summary[fp.name] = total_items
80
- print(f" β†’ Saved: {out_path}")
81
- print(f" β†’ Items Extracted: {total_items}\n")
82
-
83
- print("\nπŸŽ‰ DONE! All sample bills processed.\n")
84
-
85
- print("πŸ“Š Summary:")
86
- for fn, items in summary.items():
87
- print(f" {fn}: {items}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from fastapi.testclient import TestClient
7
  from app import app # import your FastAPI app
8
 
9
+
10
  # -----------------------------
11
  # 1. Paths
12
  # -----------------------------
13
  BASE_DIR = Path(__file__).parent
14
+ SAMPLES_DIR = BASE_DIR / "samples" # <-- Your uploaded folder
15
  RESULTS_DIR = BASE_DIR / "results"
16
 
17
  RESULTS_DIR.mkdir(exist_ok=True)
18
 
 
 
 
 
19
 
20
  # -----------------------------
21
+ # 2. File filter
22
  # -----------------------------
23
  def is_bill_file(path: Path):
24
  return path.suffix.lower() in [".pdf", ".png", ".jpg", ".jpeg"]
25
 
 
 
 
 
 
 
 
26
 
27
  # -----------------------------
28
+ # 3. MAIN EXTRACTION FUNCTION
29
  # -----------------------------
30
+ def main():
31
+ print("\nπŸš€ Running extractor on local sample files...\n")
32
+ print(f"πŸ“ Using samples from: {SAMPLES_DIR}\n")
33
+
34
+ if not SAMPLES_DIR.exists():
35
+ raise RuntimeError("❌ ERROR: 'samples/' folder does not exist in your Space.")
36
+
37
+ client = TestClient(app)
38
+
39
+ summary = {}
40
+
41
+ # -----------------------------
42
+ # 4. Process all sample files
43
+ # -----------------------------
44
+ for fp in sorted(SAMPLES_DIR.rglob("*")):
45
+ if not fp.is_file() or not is_bill_file(fp):
46
+ continue
47
+
48
+ print(f"πŸ” Processing: {fp.name}")
49
+
50
+ file_url = f"file://{fp.resolve()}"
51
+
52
+ try:
53
+ res = client.post(
54
+ "/extract-bill-data",
55
+ json={"document": file_url},
56
+ timeout=120
57
+ )
58
+ except Exception as e:
59
+ print(f"❌ Error calling extractor: {e}")
60
+ summary[fp.name] = "EXTRACTOR_ERROR"
61
+ continue
62
+
63
+ if res.status_code != 200:
64
+ print(f"❌ API error {res.status_code}")
65
+ summary[fp.name] = "API_ERROR"
66
+ continue
67
+
68
+ data = res.json()
69
+
70
+ out_path = RESULTS_DIR / (fp.stem + ".json")
71
+ with open(out_path, "w", encoding="utf-8") as f:
72
+ json.dump(data, f, indent=2, ensure_ascii=False)
73
+
74
+ try:
75
+ total_items = data["data"]["total_item_count"]
76
+ except:
77
+ total_items = "N/A"
78
+
79
+ summary[fp.name] = total_items
80
+ print(f" β†’ Saved: {out_path}")
81
+ print(f" β†’ Items Extracted: {total_items}\n")
82
+
83
+ print("\nπŸŽ‰ DONE! All sample bills processed.\n")
84
+ print("πŸ“Š Summary:")
85
+ for fn, items in summary.items():
86
+ print(f" {fn}: {items}")
87
+
88
+
89
+ # Required so FastAPI background task can call main()
90
+ if __name__ == "__main__":
91
+ main()