Sathvik-kota commited on
Commit
dc56b59
Β·
verified Β·
1 Parent(s): 4497c00

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. run_all_samples.py +44 -26
run_all_samples.py CHANGED
@@ -1,40 +1,51 @@
1
- # run_all_samples.py
2
  import os
3
  import json
4
  from pathlib import Path
 
5
 
6
  from fastapi.testclient import TestClient
7
- from app import app as fastapi_app # FIXED IMPORT
8
 
9
 
10
- # -----------------------------
11
- # 1. Paths
12
- # -----------------------------
13
  BASE_DIR = Path(__file__).parent
14
  SAMPLES_DIR = BASE_DIR / "samples"
15
  RESULTS_DIR = BASE_DIR / "results"
 
16
 
17
  RESULTS_DIR.mkdir(exist_ok=True)
18
 
 
 
 
 
 
 
 
19
 
20
- # -----------------------------
21
- # 2. File filter
22
- # -----------------------------
 
23
  def is_bill_file(path: Path):
24
  return path.suffix.lower() in [".pdf", ".png", ".jpg", ".jpeg"]
25
 
26
 
27
- # -----------------------------
28
- # 3. MAIN EXTRACTION FUNCTION
29
- # -----------------------------
30
  def main():
31
- print("\nπŸš€ Running extractor on local sample files...\n")
32
- print(f"πŸ“ Using samples from: {SAMPLES_DIR}\n")
33
 
34
  if not SAMPLES_DIR.exists():
35
- raise RuntimeError("❌ ERROR: 'samples/' folder does not exist in your Space.")
 
36
 
37
- client = TestClient(fastapi_app) # FIXED
38
 
39
  summary = {}
40
 
@@ -42,7 +53,7 @@ def main():
42
  if not fp.is_file() or not is_bill_file(fp):
43
  continue
44
 
45
- print(f"πŸ” Processing: {fp.name}")
46
 
47
  file_url = f"file://{fp.resolve()}"
48
 
@@ -50,23 +61,28 @@ def main():
50
  res = client.post(
51
  "/extract-bill-data",
52
  json={"document": file_url},
53
- timeout=120
54
  )
55
  except Exception as e:
56
- print(f"❌ Error calling extractor: {e}")
57
  summary[fp.name] = "EXTRACTOR_ERROR"
58
  continue
59
 
60
  if res.status_code != 200:
61
- print(f"❌ API error {res.status_code}")
62
  summary[fp.name] = "API_ERROR"
63
  continue
64
 
65
  data = res.json()
66
 
67
  out_path = RESULTS_DIR / (fp.stem + ".json")
68
- with open(out_path, "w", encoding="utf-8") as f:
69
- json.dump(data, f, indent=2, ensure_ascii=False)
 
 
 
 
 
70
 
71
  try:
72
  total_items = data["data"]["total_item_count"]
@@ -74,14 +90,16 @@ def main():
74
  total_items = "N/A"
75
 
76
  summary[fp.name] = total_items
77
- print(f" β†’ Saved: {out_path}")
78
- print(f" β†’ Items Extracted: {total_items}\n")
79
 
80
- print("\nπŸŽ‰ DONE! All sample bills processed.\n")
81
- print("πŸ“Š Summary:")
82
  for fn, items in summary.items():
83
- print(f" {fn}: {items}")
 
 
84
 
85
 
 
86
  if __name__ == "__main__":
87
  main()
 
1
+ # run_all_samples.py (with logging)
2
  import os
3
  import json
4
  from pathlib import Path
5
+ from datetime import datetime
6
 
7
  from fastapi.testclient import TestClient
8
+ from app import app as fastapi_app # FIXED IMPORT
9
 
10
 
11
+ # ----------------------------------------------------
12
+ # Paths
13
+ # ----------------------------------------------------
14
  BASE_DIR = Path(__file__).parent
15
  SAMPLES_DIR = BASE_DIR / "samples"
16
  RESULTS_DIR = BASE_DIR / "results"
17
+ LOG_FILE = BASE_DIR / "run_all_samples.log"
18
 
19
  RESULTS_DIR.mkdir(exist_ok=True)
20
 
21
+ # ----------------------------------------------------
22
+ # Logging helper
23
+ # ----------------------------------------------------
24
+ def log(msg: str):
25
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
26
+ with open(LOG_FILE, "a", encoding="utf-8") as f:
27
+ f.write(f"[{timestamp}] {msg}\n")
28
 
29
+
30
+ # ----------------------------------------------------
31
+ # File filter
32
+ # ----------------------------------------------------
33
  def is_bill_file(path: Path):
34
  return path.suffix.lower() in [".pdf", ".png", ".jpg", ".jpeg"]
35
 
36
 
37
+ # ----------------------------------------------------
38
+ # Main extraction function
39
+ # ----------------------------------------------------
40
  def main():
41
+ log("========== RUN STARTED ==========")
42
+ log(f"Samples directory: {SAMPLES_DIR}")
43
 
44
  if not SAMPLES_DIR.exists():
45
+ log("ERROR: samples/ folder does not exist!")
46
+ return
47
 
48
+ client = TestClient(fastapi_app)
49
 
50
  summary = {}
51
 
 
53
  if not fp.is_file() or not is_bill_file(fp):
54
  continue
55
 
56
+ log(f"Processing file: {fp.name}")
57
 
58
  file_url = f"file://{fp.resolve()}"
59
 
 
61
  res = client.post(
62
  "/extract-bill-data",
63
  json={"document": file_url},
64
+ timeout=180
65
  )
66
  except Exception as e:
67
+ log(f"ERROR calling extractor: {e}")
68
  summary[fp.name] = "EXTRACTOR_ERROR"
69
  continue
70
 
71
  if res.status_code != 200:
72
+ log(f"ERROR API returned status {res.status_code}")
73
  summary[fp.name] = "API_ERROR"
74
  continue
75
 
76
  data = res.json()
77
 
78
  out_path = RESULTS_DIR / (fp.stem + ".json")
79
+ try:
80
+ with open(out_path, "w", encoding="utf-8") as f:
81
+ json.dump(data, f, indent=2, ensure_ascii=False)
82
+ log(f"Saved result JSON: {out_path}")
83
+ except Exception as e:
84
+ log(f"ERROR writing output JSON: {e}")
85
+ continue
86
 
87
  try:
88
  total_items = data["data"]["total_item_count"]
 
90
  total_items = "N/A"
91
 
92
  summary[fp.name] = total_items
93
+ log(f"Items Extracted: {total_items}")
 
94
 
95
+ # Summary
96
+ log("======== SUMMARY ========")
97
  for fn, items in summary.items():
98
+ log(f"{fn}: {items}")
99
+
100
+ log("========== RUN FINISHED ==========\n")
101
 
102
 
103
+ # Run manually if executed directly
104
  if __name__ == "__main__":
105
  main()