Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -205,28 +205,44 @@ def process_with_gemini(model, text):
|
|
| 205 |
return resp.text
|
| 206 |
raise
|
| 207 |
|
| 208 |
-
def process_pdf_pages(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
reader, total_pages = read_pdf_pages(pdf_file)
|
| 210 |
all_txns = []
|
|
|
|
| 211 |
for pg in range(total_pages):
|
| 212 |
txt = extract_page_text(reader, pg).strip()
|
| 213 |
if not txt:
|
| 214 |
continue
|
|
|
|
|
|
|
| 215 |
try:
|
| 216 |
-
raw = process_with_gemini(
|
| 217 |
except Exception:
|
|
|
|
| 218 |
continue
|
| 219 |
-
|
|
|
|
| 220 |
start = raw.find("{")
|
| 221 |
end = raw.rfind("}") + 1
|
| 222 |
if start < 0 or end <= 0:
|
| 223 |
continue
|
|
|
|
|
|
|
| 224 |
js = raw[start:end].replace("```json", "").replace("```", "")
|
| 225 |
try:
|
| 226 |
data = json.loads(js)
|
| 227 |
-
all_txns.extend(data.get("transactions", []))
|
| 228 |
except json.JSONDecodeError:
|
| 229 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
return all_txns
|
| 231 |
|
| 232 |
# --------- Chat Endpoint ---------
|
|
@@ -275,7 +291,7 @@ def upload_statements():
|
|
| 275 |
f.seek(0)
|
| 276 |
|
| 277 |
# extract + store transactions
|
| 278 |
-
txns = process_pdf_pages(
|
| 279 |
for txn in txns:
|
| 280 |
try:
|
| 281 |
dt = datetime.strptime(txn["Date"], "%d/%m/%Y")
|
|
|
|
| 205 |
return resp.text
|
| 206 |
raise
|
| 207 |
|
| 208 |
+
def process_pdf_pages(pdf_file):
|
| 209 |
+
"""
|
| 210 |
+
Reads each page of the given PDF file, sends it through Gemini,
|
| 211 |
+
extracts the JSON “transactions” array, and returns the full list.
|
| 212 |
+
"""
|
| 213 |
reader, total_pages = read_pdf_pages(pdf_file)
|
| 214 |
all_txns = []
|
| 215 |
+
|
| 216 |
for pg in range(total_pages):
|
| 217 |
txt = extract_page_text(reader, pg).strip()
|
| 218 |
if not txt:
|
| 219 |
continue
|
| 220 |
+
|
| 221 |
+
# 1) Call Gemini
|
| 222 |
try:
|
| 223 |
+
raw = process_with_gemini(txt)
|
| 224 |
except Exception:
|
| 225 |
+
# Skip this page on any error (including retries inside process_with_gemini)
|
| 226 |
continue
|
| 227 |
+
|
| 228 |
+
# 2) Locate the JSON payload
|
| 229 |
start = raw.find("{")
|
| 230 |
end = raw.rfind("}") + 1
|
| 231 |
if start < 0 or end <= 0:
|
| 232 |
continue
|
| 233 |
+
|
| 234 |
+
# 3) Clean up any markdown fences and parse
|
| 235 |
js = raw[start:end].replace("```json", "").replace("```", "")
|
| 236 |
try:
|
| 237 |
data = json.loads(js)
|
|
|
|
| 238 |
except json.JSONDecodeError:
|
| 239 |
continue
|
| 240 |
+
|
| 241 |
+
# 4) Append all found transactions
|
| 242 |
+
txns = data.get("transactions", [])
|
| 243 |
+
if isinstance(txns, list):
|
| 244 |
+
all_txns.extend(txns)
|
| 245 |
+
|
| 246 |
return all_txns
|
| 247 |
|
| 248 |
# --------- Chat Endpoint ---------
|
|
|
|
| 291 |
f.seek(0)
|
| 292 |
|
| 293 |
# extract + store transactions
|
| 294 |
+
txns = process_pdf_pages(f)
|
| 295 |
for txn in txns:
|
| 296 |
try:
|
| 297 |
dt = datetime.strptime(txn["Date"], "%d/%m/%Y")
|