Update main.py
Browse files
main.py
CHANGED
|
@@ -32,14 +32,12 @@ CORS(app)
|
|
| 32 |
# Get API key securely
|
| 33 |
api_key = os.getenv('Gemini')
|
| 34 |
if not api_key:
|
| 35 |
-
# Fallback for local testing if env var not set, though env var is preferred
|
| 36 |
logging.warning("Gemini API key not found in environment variables.")
|
| 37 |
|
| 38 |
def configure_gemini(api_key):
|
| 39 |
"""Configure Gemini AI model."""
|
| 40 |
try:
|
| 41 |
genai.configure(api_key=api_key)
|
| 42 |
-
# Using 2.0 Flash as it has superior vision and long-context capabilities
|
| 43 |
return genai.GenerativeModel('gemini-2.0-flash')
|
| 44 |
except Exception as e:
|
| 45 |
logging.error(f"Error configuring Gemini: {str(e)}")
|
|
@@ -49,8 +47,6 @@ def configure_gemini(api_key):
|
|
| 49 |
# PROMPTS
|
| 50 |
# -------------------------------------------------------------------------
|
| 51 |
|
| 52 |
-
# Enhanced Prompt for General Financial Documents (Statements, Invoices, Receipts)
|
| 53 |
-
# Addresses Point 1 (Rounding/Dates) & Point 3 (Document Types)
|
| 54 |
FINANCIAL_DOC_PROMPT = """Analyze this financial document (which could be a Bank Statement, Invoice, Receipt, or Transaction List).
|
| 55 |
Extract all relevant transactions/items in JSON format.
|
| 56 |
|
|
@@ -90,6 +86,49 @@ RETURN STRUCTURE:
|
|
| 90 |
Return ONLY raw JSON. No markdown formatting.
|
| 91 |
"""
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
def get_text_prompt_with_fallback_date():
|
| 94 |
"""
|
| 95 |
Generate prompt for raw text snippets where context might be missing.
|
|
@@ -103,46 +142,30 @@ If the text below does not specify a year or date, reasonable assume {current_da
|
|
| 103 |
"""
|
| 104 |
|
| 105 |
# -------------------------------------------------------------------------
|
| 106 |
-
# CATEGORIZATION LOGIC - TYPE-BASED
|
| 107 |
# -------------------------------------------------------------------------
|
| 108 |
|
| 109 |
def categorize_transaction(transaction):
|
| 110 |
"""
|
| 111 |
Categorizes a transaction based strictly on its Type field.
|
| 112 |
-
This prevents keyword-based misclassification.
|
| 113 |
-
|
| 114 |
-
Args:
|
| 115 |
-
transaction: dict with keys including 'Type', 'Description', 'Destination_of_funds'
|
| 116 |
-
|
| 117 |
-
Returns:
|
| 118 |
-
dict with added 'Account_Category' field
|
| 119 |
"""
|
| 120 |
tx_type = transaction.get('Type', '').lower()
|
| 121 |
description = transaction.get('Description', '').lower()
|
| 122 |
destination = transaction.get('Destination_of_funds', '').lower()
|
| 123 |
-
|
| 124 |
-
# Add the categorized account field
|
| 125 |
account_category = "Uncategorized"
|
| 126 |
-
|
| 127 |
-
# ========== INCOME TYPE ==========
|
| 128 |
if tx_type == 'income':
|
| 129 |
-
# All income should map to revenue accounts, NOT expenses
|
| 130 |
if any(keyword in description for keyword in ['sales', 'service', 'revenue', 'invoice']):
|
| 131 |
account_category = "Sales Revenue"
|
| 132 |
elif any(keyword in description for keyword in ['interest', 'dividend']):
|
| 133 |
account_category = "Interest Income"
|
| 134 |
elif any(keyword in description for keyword in ['transfer', 'deposit', 'payment']):
|
| 135 |
-
# This fixes the "Income Trap" - transfers FROM others are income
|
| 136 |
account_category = "Other Income"
|
| 137 |
else:
|
| 138 |
account_category = "Other Income"
|
| 139 |
-
|
| 140 |
-
# ========== EXPENSE TYPE ==========
|
| 141 |
elif tx_type == 'expense':
|
| 142 |
-
# Map based on Destination_of_funds or description keywords
|
| 143 |
-
# This is TYPE-FIRST, so "cash" in description won't make it an asset
|
| 144 |
-
|
| 145 |
-
# Specific expense categories based on your system
|
| 146 |
if 'salaries' in destination or 'wages' in destination or 'salary' in description:
|
| 147 |
account_category = "Salaries and Wages"
|
| 148 |
elif 'water' in destination or 'electricity' in destination:
|
|
@@ -177,21 +200,16 @@ def categorize_transaction(transaction):
|
|
| 177 |
account_category = "Travel and Accommodation"
|
| 178 |
elif 'depreciation' in destination:
|
| 179 |
account_category = "Depreciation"
|
| 180 |
-
|
| 181 |
-
# Special cases based on description (but still respecting expense type)
|
| 182 |
elif 'atm' in description and 'cash' in description:
|
| 183 |
-
# This fixes the "Cash Trap" - ATM withdrawals are drawings, not assets
|
| 184 |
account_category = "Owner's Drawings"
|
| 185 |
elif 'payment to' in description:
|
| 186 |
-
# Payment to suppliers/vendors
|
| 187 |
if any(word in description for word in ['fabric', 'printing', 'material']):
|
| 188 |
account_category = "Cost of Sales"
|
| 189 |
else:
|
| 190 |
account_category = "Miscellaneous Expense"
|
| 191 |
else:
|
| 192 |
account_category = "Miscellaneous Expense"
|
| 193 |
-
|
| 194 |
-
# ========== ASSET TYPE ==========
|
| 195 |
elif tx_type == 'asset':
|
| 196 |
if 'equipment' in destination or 'equipment' in description:
|
| 197 |
account_category = "Equipment"
|
|
@@ -205,8 +223,7 @@ def categorize_transaction(transaction):
|
|
| 205 |
account_category = "Furniture"
|
| 206 |
else:
|
| 207 |
account_category = "Other Assets"
|
| 208 |
-
|
| 209 |
-
# ========== LIABILITY TYPE ==========
|
| 210 |
elif tx_type == 'liability':
|
| 211 |
if 'bank loan' in destination or 'loan' in description:
|
| 212 |
account_category = "Bank Loan"
|
|
@@ -214,8 +231,7 @@ def categorize_transaction(transaction):
|
|
| 214 |
account_category = "Credit Facility"
|
| 215 |
else:
|
| 216 |
account_category = "Other Liabilities"
|
| 217 |
-
|
| 218 |
-
# ========== EQUITY TYPE ==========
|
| 219 |
elif tx_type == 'equity':
|
| 220 |
if 'owner' in destination or 'capital' in description:
|
| 221 |
account_category = "Owner Investment"
|
|
@@ -223,12 +239,10 @@ def categorize_transaction(transaction):
|
|
| 223 |
account_category = "Retained Earnings"
|
| 224 |
else:
|
| 225 |
account_category = "Other Equity"
|
| 226 |
-
|
| 227 |
-
# ========== TRANSFER TYPE ==========
|
| 228 |
elif tx_type == 'transfer':
|
| 229 |
account_category = "Internal Transfer"
|
| 230 |
-
|
| 231 |
-
# ========== INVESTMENT TYPE ==========
|
| 232 |
elif tx_type == 'investment':
|
| 233 |
if 'securities' in destination or 'stock' in description:
|
| 234 |
account_category = "Securities"
|
|
@@ -236,16 +250,13 @@ def categorize_transaction(transaction):
|
|
| 236 |
account_category = "Mutual Funds"
|
| 237 |
else:
|
| 238 |
account_category = "Other Investments"
|
| 239 |
-
|
| 240 |
-
# ========== LOAN REPAYMENT TYPE ==========
|
| 241 |
elif tx_type == 'loan_repayment':
|
| 242 |
account_category = "Loan Repayment"
|
| 243 |
-
|
| 244 |
-
# ========== CAPITAL INJECTION TYPE ==========
|
| 245 |
elif tx_type == 'capital_injection':
|
| 246 |
account_category = "Capital Injection"
|
| 247 |
-
|
| 248 |
-
# Add the category to the transaction
|
| 249 |
transaction['Account_Category'] = account_category
|
| 250 |
return transaction
|
| 251 |
|
|
@@ -255,69 +266,60 @@ def categorize_transaction(transaction):
|
|
| 255 |
|
| 256 |
def extract_json_from_response(response_text):
|
| 257 |
"""Extract valid JSON from Gemini's response, handling Markdown fences."""
|
| 258 |
-
# Remove markdown code blocks
|
| 259 |
cleaned_text = re.sub(r'```json\s*', '', response_text)
|
| 260 |
cleaned_text = re.sub(r'```\s*', '', cleaned_text)
|
| 261 |
-
|
| 262 |
-
# Find JSON object
|
| 263 |
match = re.search(r'(\{.*\})', cleaned_text, re.DOTALL)
|
| 264 |
if match:
|
| 265 |
json_string = match.group(1)
|
| 266 |
else:
|
| 267 |
-
# Fallback: assume the whole text is JSON
|
| 268 |
json_string = cleaned_text
|
| 269 |
|
| 270 |
try:
|
| 271 |
return json.loads(json_string)
|
| 272 |
except json.JSONDecodeError:
|
| 273 |
logging.warning("JSON parsing failed, attempting repair.")
|
| 274 |
-
raise ValueError(json_string)
|
| 275 |
|
| 276 |
-
def repair_json_with_gemini(model, broken_json_string):
|
| 277 |
"""Uses Gemini to fix broken JSON syntax."""
|
| 278 |
repair_prompt = f"""Fix this broken JSON string. Return ONLY valid JSON.
|
| 279 |
-
|
| 280 |
try:
|
| 281 |
resp = model.generate_content(repair_prompt)
|
| 282 |
return extract_json_from_response(resp.text)
|
| 283 |
except Exception as e:
|
| 284 |
logging.error(f"JSON repair failed: {e}")
|
| 285 |
-
return {
|
| 286 |
|
| 287 |
def call_gemini_with_retry(model, content, prompt, retries=2):
|
| 288 |
"""
|
| 289 |
-
Generic runner for Gemini.
|
| 290 |
-
Args:
|
| 291 |
-
content: Can be a String (text) or a PIL.Image object (vision).
|
| 292 |
"""
|
| 293 |
for attempt in range(retries + 1):
|
| 294 |
try:
|
| 295 |
-
# Gemini Python SDK handles [Prompt, Image] or [Prompt, Text] automatically
|
| 296 |
response = model.generate_content([prompt, content])
|
| 297 |
-
|
| 298 |
try:
|
| 299 |
result = extract_json_from_response(response.text)
|
| 300 |
-
|
| 301 |
-
# POST-PROCESSING: Categorize each transaction based on Type
|
| 302 |
if 'transactions' in result:
|
| 303 |
result['transactions'] = [
|
| 304 |
categorize_transaction(tx) for tx in result['transactions']
|
| 305 |
]
|
| 306 |
-
|
| 307 |
return result
|
| 308 |
except ValueError as ve:
|
| 309 |
-
# Value error here contains the broken JSON string
|
| 310 |
broken_json = str(ve)
|
| 311 |
-
repaired = repair_json_with_gemini(model, broken_json)
|
| 312 |
-
|
| 313 |
-
# Categorize repaired transactions too
|
| 314 |
if 'transactions' in repaired:
|
| 315 |
repaired['transactions'] = [
|
| 316 |
categorize_transaction(tx) for tx in repaired['transactions']
|
| 317 |
]
|
| 318 |
-
|
| 319 |
return repaired
|
| 320 |
-
|
| 321 |
except Exception as e:
|
| 322 |
if "429" in str(e) or "ResourceExhausted" in str(e):
|
| 323 |
time.sleep(2 * (attempt + 1))
|
|
@@ -328,26 +330,391 @@ def call_gemini_with_retry(model, content, prompt, retries=2):
|
|
| 328 |
|
| 329 |
return {"transactions": []}
|
| 330 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
def is_file_empty(file_path):
|
| 332 |
"""Check if file is empty."""
|
| 333 |
return os.path.getsize(file_path) == 0
|
| 334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
# -------------------------------------------------------------------------
|
| 336 |
# CORE LOGIC: PDF PROCESSING (HYBRID TEXT + VISION)
|
| 337 |
# -------------------------------------------------------------------------
|
| 338 |
|
| 339 |
def process_pdf_page_as_image(model, pdf_path, page_num):
|
| 340 |
-
"""
|
| 341 |
if not PDF_IMAGE_SUPPORT:
|
| 342 |
raise ImportError("pdf2image/poppler not installed")
|
| 343 |
|
| 344 |
-
# Convert specific page to image
|
| 345 |
-
# first_page=page_num, last_page=page_num ensures we only convert 1 page at a time to save RAM
|
| 346 |
images = convert_from_path(pdf_path, first_page=page_num, last_page=page_num)
|
| 347 |
if not images:
|
| 348 |
return []
|
| 349 |
-
|
| 350 |
-
# Process the image
|
| 351 |
result = call_gemini_with_retry(model, images[0], FINANCIAL_DOC_PROMPT)
|
| 352 |
return result.get('transactions', [])
|
| 353 |
|
|
@@ -356,58 +723,48 @@ def process_pdf():
|
|
| 356 |
"""
|
| 357 |
Smart PDF Processor:
|
| 358 |
1. Checks if empty.
|
| 359 |
-
2. Tries standard Text extraction
|
| 360 |
-
3. If Text fails
|
| 361 |
"""
|
| 362 |
temp_path = None
|
| 363 |
try:
|
| 364 |
-
# 1. Validation
|
| 365 |
if 'file' not in request.files:
|
| 366 |
return jsonify({'error': 'No file uploaded'}), 400
|
| 367 |
file = request.files['file']
|
| 368 |
if file.filename == '':
|
| 369 |
return jsonify({'error': 'No file selected'}), 400
|
| 370 |
|
| 371 |
-
# Save Temp
|
| 372 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
|
| 373 |
file.save(tmp.name)
|
| 374 |
temp_path = tmp.name
|
| 375 |
|
| 376 |
-
# Point 2: Empty File Check
|
| 377 |
if is_file_empty(temp_path):
|
| 378 |
-
|
| 379 |
|
| 380 |
model = configure_gemini(api_key)
|
| 381 |
all_transactions = []
|
| 382 |
-
|
| 383 |
-
# Determine strategy: Try reading PDF structure first
|
| 384 |
try:
|
| 385 |
reader = pypdf.PdfReader(temp_path)
|
| 386 |
num_pages = len(reader.pages)
|
| 387 |
-
|
| 388 |
for i in range(num_pages):
|
| 389 |
logging.info(f"Processing page {i+1}/{num_pages}")
|
| 390 |
-
|
| 391 |
-
# Attempt Text Extraction
|
| 392 |
try:
|
| 393 |
text_content = reader.pages[i].extract_text()
|
| 394 |
except Exception:
|
| 395 |
-
text_content = ""
|
| 396 |
|
| 397 |
-
# LOGIC: Check if text is sufficient. If < 50 chars, it's likely a scan or image-heavy.
|
| 398 |
if text_content and len(text_content.strip()) > 50:
|
| 399 |
-
# Strategy A: Text Mode
|
| 400 |
logging.info("Text detected. Using Text Strategy.")
|
| 401 |
result = call_gemini_with_retry(model, text_content, FINANCIAL_DOC_PROMPT)
|
| 402 |
else:
|
| 403 |
-
# Strategy B: Vision Fallback (Point 4)
|
| 404 |
logging.info("Low text/Encryption detected. Switching to Vision Strategy.")
|
| 405 |
if PDF_IMAGE_SUPPORT:
|
| 406 |
-
|
| 407 |
-
# but convert_from_path handles slicing via first_page/last_page (1-based)
|
| 408 |
-
txs = process_pdf_page_as_image(model, temp_path, i+1)
|
| 409 |
all_transactions.extend(txs)
|
| 410 |
-
continue
|
| 411 |
else:
|
| 412 |
logging.warning("Cannot process scanned PDF - pdf2image missing.")
|
| 413 |
result = {"transactions": []}
|
|
@@ -416,10 +773,8 @@ def process_pdf():
|
|
| 416 |
all_transactions.extend(txs)
|
| 417 |
|
| 418 |
except pypdf.errors.PdfReadError:
|
| 419 |
-
# If pypdf fails completely (e.g., highly corrupted or weird encryption), try Vision on whole file
|
| 420 |
logging.warning("pypdf failed to read file. Attempting full Vision fallback.")
|
| 421 |
if PDF_IMAGE_SUPPORT:
|
| 422 |
-
# Warning: Processing all pages as images might be slow
|
| 423 |
images = convert_from_path(temp_path)
|
| 424 |
for img in images:
|
| 425 |
result = call_gemini_with_retry(model, img, FINANCIAL_DOC_PROMPT)
|
|
@@ -437,7 +792,7 @@ def process_pdf():
|
|
| 437 |
os.remove(temp_path)
|
| 438 |
|
| 439 |
# -------------------------------------------------------------------------
|
| 440 |
-
# TEXT & IMAGE ENDPOINTS
|
| 441 |
# -------------------------------------------------------------------------
|
| 442 |
|
| 443 |
@app.route('/process-text', methods=['POST'])
|
|
@@ -447,18 +802,17 @@ def process_text():
|
|
| 447 |
data = request.get_json()
|
| 448 |
if not data or 'text' not in data:
|
| 449 |
return jsonify({'error': 'No text provided'}), 400
|
| 450 |
-
|
| 451 |
text_input = data['text']
|
| 452 |
if not text_input.strip():
|
| 453 |
-
return jsonify({'error': 'Text input cannot be empty'}), 400
|
| 454 |
-
|
| 455 |
model = configure_gemini(api_key)
|
| 456 |
-
# Use specific prompt with date fallback for raw text
|
| 457 |
prompt = get_text_prompt_with_fallback_date()
|
| 458 |
-
|
| 459 |
result = call_gemini_with_retry(model, text_input, prompt)
|
| 460 |
return jsonify({'transactions': result.get('transactions', [])})
|
| 461 |
-
|
| 462 |
except Exception as e:
|
| 463 |
logging.error(f"Error: {e}")
|
| 464 |
return jsonify({'error': str(e)}), 500
|
|
@@ -471,8 +825,7 @@ def process_image():
|
|
| 471 |
if 'file' not in request.files:
|
| 472 |
return jsonify({'error': 'No file uploaded'}), 400
|
| 473 |
file = request.files['file']
|
| 474 |
-
|
| 475 |
-
# Point 2: Empty check
|
| 476 |
file.seek(0, os.SEEK_END)
|
| 477 |
size = file.tell()
|
| 478 |
file.seek(0)
|
|
@@ -484,15 +837,11 @@ def process_image():
|
|
| 484 |
temp_path = tmp.name
|
| 485 |
|
| 486 |
model = configure_gemini(api_key)
|
| 487 |
-
|
| 488 |
-
# Load image with PIL
|
| 489 |
img = Image.open(temp_path)
|
| 490 |
-
|
| 491 |
-
# Use the General Financial Prompt
|
| 492 |
result = call_gemini_with_retry(model, img, FINANCIAL_DOC_PROMPT)
|
| 493 |
-
|
| 494 |
return jsonify({'transactions': result.get('transactions', [])})
|
| 495 |
-
|
| 496 |
except Exception as e:
|
| 497 |
logging.error(f"Error: {e}")
|
| 498 |
return jsonify({'error': str(e)}), 500
|
|
@@ -500,10 +849,189 @@ def process_image():
|
|
| 500 |
if temp_path and os.path.exists(temp_path):
|
| 501 |
os.remove(temp_path)
|
| 502 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 503 |
@app.route('/transaction-types', methods=['GET'])
|
| 504 |
def get_transaction_types():
|
| 505 |
"""Return available transaction types and their categories."""
|
| 506 |
-
# Kept identical for backwards compatibility
|
| 507 |
transaction_types = {
|
| 508 |
"types": [
|
| 509 |
{
|
|
@@ -577,10 +1105,9 @@ def health_check():
|
|
| 577 |
return jsonify({
|
| 578 |
'status': 'healthy',
|
| 579 |
'timestamp': datetime.now().isoformat(),
|
| 580 |
-
'version': '2.
|
| 581 |
'vision_support': PDF_IMAGE_SUPPORT
|
| 582 |
})
|
| 583 |
|
| 584 |
if __name__ == '__main__':
|
| 585 |
-
# Ensure this port matches your server configuration
|
| 586 |
app.run(debug=True, host="0.0.0.0", port=7860)
|
|
|
|
| 32 |
# Get API key securely
|
| 33 |
api_key = os.getenv('Gemini')
|
| 34 |
if not api_key:
|
|
|
|
| 35 |
logging.warning("Gemini API key not found in environment variables.")
|
| 36 |
|
| 37 |
def configure_gemini(api_key):
|
| 38 |
"""Configure Gemini AI model."""
|
| 39 |
try:
|
| 40 |
genai.configure(api_key=api_key)
|
|
|
|
| 41 |
return genai.GenerativeModel('gemini-2.0-flash')
|
| 42 |
except Exception as e:
|
| 43 |
logging.error(f"Error configuring Gemini: {str(e)}")
|
|
|
|
| 47 |
# PROMPTS
|
| 48 |
# -------------------------------------------------------------------------
|
| 49 |
|
|
|
|
|
|
|
| 50 |
FINANCIAL_DOC_PROMPT = """Analyze this financial document (which could be a Bank Statement, Invoice, Receipt, or Transaction List).
|
| 51 |
Extract all relevant transactions/items in JSON format.
|
| 52 |
|
|
|
|
| 86 |
Return ONLY raw JSON. No markdown formatting.
|
| 87 |
"""
|
| 88 |
|
| 89 |
+
STUDENT_IMPORT_PROMPT = """Analyze this student document and extract student records into JSON.
|
| 90 |
+
|
| 91 |
+
The document may be:
|
| 92 |
+
- a class list
|
| 93 |
+
- an admission register
|
| 94 |
+
- a handwritten register
|
| 95 |
+
- a scanned student form
|
| 96 |
+
- a camera-captured document
|
| 97 |
+
- a PDF page
|
| 98 |
+
- an uploaded image
|
| 99 |
+
|
| 100 |
+
RULES:
|
| 101 |
+
1. Return ONLY raw JSON. No markdown.
|
| 102 |
+
2. Extract as many student rows as possible.
|
| 103 |
+
3. Support both printed and handwritten text.
|
| 104 |
+
4. If a field is missing, return an empty string.
|
| 105 |
+
5. Do not invent students.
|
| 106 |
+
6. Ignore page numbers, signatures, totals, decorations, and repeated headers.
|
| 107 |
+
7. Normalize similar fields as follows:
|
| 108 |
+
- class / stream / class_name -> class_name
|
| 109 |
+
- grade / form / level -> grade
|
| 110 |
+
- admission number / admission no / reg no / student no -> admission_number
|
| 111 |
+
- phone / mobile / contact -> phone_number
|
| 112 |
+
|
| 113 |
+
RETURN STRUCTURE:
|
| 114 |
+
{
|
| 115 |
+
"students": [
|
| 116 |
+
{
|
| 117 |
+
"name": "Student Name",
|
| 118 |
+
"admission_number": "ADM-001",
|
| 119 |
+
"class_name": "A",
|
| 120 |
+
"grade": "Grade 7",
|
| 121 |
+
"gender": "Female",
|
| 122 |
+
"email": "student@example.com",
|
| 123 |
+
"phone_number": "+2637...",
|
| 124 |
+
"extra_fields": [
|
| 125 |
+
{ "name": "guardian_name", "value": "John Doe" }
|
| 126 |
+
]
|
| 127 |
+
}
|
| 128 |
+
]
|
| 129 |
+
}
|
| 130 |
+
"""
|
| 131 |
+
|
| 132 |
def get_text_prompt_with_fallback_date():
|
| 133 |
"""
|
| 134 |
Generate prompt for raw text snippets where context might be missing.
|
|
|
|
| 142 |
"""
|
| 143 |
|
| 144 |
# -------------------------------------------------------------------------
|
| 145 |
+
# CATEGORIZATION LOGIC - TYPE-BASED
|
| 146 |
# -------------------------------------------------------------------------
|
| 147 |
|
| 148 |
def categorize_transaction(transaction):
|
| 149 |
"""
|
| 150 |
Categorizes a transaction based strictly on its Type field.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
"""
|
| 152 |
tx_type = transaction.get('Type', '').lower()
|
| 153 |
description = transaction.get('Description', '').lower()
|
| 154 |
destination = transaction.get('Destination_of_funds', '').lower()
|
| 155 |
+
|
|
|
|
| 156 |
account_category = "Uncategorized"
|
| 157 |
+
|
|
|
|
| 158 |
if tx_type == 'income':
|
|
|
|
| 159 |
if any(keyword in description for keyword in ['sales', 'service', 'revenue', 'invoice']):
|
| 160 |
account_category = "Sales Revenue"
|
| 161 |
elif any(keyword in description for keyword in ['interest', 'dividend']):
|
| 162 |
account_category = "Interest Income"
|
| 163 |
elif any(keyword in description for keyword in ['transfer', 'deposit', 'payment']):
|
|
|
|
| 164 |
account_category = "Other Income"
|
| 165 |
else:
|
| 166 |
account_category = "Other Income"
|
| 167 |
+
|
|
|
|
| 168 |
elif tx_type == 'expense':
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
if 'salaries' in destination or 'wages' in destination or 'salary' in description:
|
| 170 |
account_category = "Salaries and Wages"
|
| 171 |
elif 'water' in destination or 'electricity' in destination:
|
|
|
|
| 200 |
account_category = "Travel and Accommodation"
|
| 201 |
elif 'depreciation' in destination:
|
| 202 |
account_category = "Depreciation"
|
|
|
|
|
|
|
| 203 |
elif 'atm' in description and 'cash' in description:
|
|
|
|
| 204 |
account_category = "Owner's Drawings"
|
| 205 |
elif 'payment to' in description:
|
|
|
|
| 206 |
if any(word in description for word in ['fabric', 'printing', 'material']):
|
| 207 |
account_category = "Cost of Sales"
|
| 208 |
else:
|
| 209 |
account_category = "Miscellaneous Expense"
|
| 210 |
else:
|
| 211 |
account_category = "Miscellaneous Expense"
|
| 212 |
+
|
|
|
|
| 213 |
elif tx_type == 'asset':
|
| 214 |
if 'equipment' in destination or 'equipment' in description:
|
| 215 |
account_category = "Equipment"
|
|
|
|
| 223 |
account_category = "Furniture"
|
| 224 |
else:
|
| 225 |
account_category = "Other Assets"
|
| 226 |
+
|
|
|
|
| 227 |
elif tx_type == 'liability':
|
| 228 |
if 'bank loan' in destination or 'loan' in description:
|
| 229 |
account_category = "Bank Loan"
|
|
|
|
| 231 |
account_category = "Credit Facility"
|
| 232 |
else:
|
| 233 |
account_category = "Other Liabilities"
|
| 234 |
+
|
|
|
|
| 235 |
elif tx_type == 'equity':
|
| 236 |
if 'owner' in destination or 'capital' in description:
|
| 237 |
account_category = "Owner Investment"
|
|
|
|
| 239 |
account_category = "Retained Earnings"
|
| 240 |
else:
|
| 241 |
account_category = "Other Equity"
|
| 242 |
+
|
|
|
|
| 243 |
elif tx_type == 'transfer':
|
| 244 |
account_category = "Internal Transfer"
|
| 245 |
+
|
|
|
|
| 246 |
elif tx_type == 'investment':
|
| 247 |
if 'securities' in destination or 'stock' in description:
|
| 248 |
account_category = "Securities"
|
|
|
|
| 250 |
account_category = "Mutual Funds"
|
| 251 |
else:
|
| 252 |
account_category = "Other Investments"
|
| 253 |
+
|
|
|
|
| 254 |
elif tx_type == 'loan_repayment':
|
| 255 |
account_category = "Loan Repayment"
|
| 256 |
+
|
|
|
|
| 257 |
elif tx_type == 'capital_injection':
|
| 258 |
account_category = "Capital Injection"
|
| 259 |
+
|
|
|
|
| 260 |
transaction['Account_Category'] = account_category
|
| 261 |
return transaction
|
| 262 |
|
|
|
|
| 266 |
|
| 267 |
def extract_json_from_response(response_text):
|
| 268 |
"""Extract valid JSON from Gemini's response, handling Markdown fences."""
|
|
|
|
| 269 |
cleaned_text = re.sub(r'```json\s*', '', response_text)
|
| 270 |
cleaned_text = re.sub(r'```\s*', '', cleaned_text)
|
| 271 |
+
|
|
|
|
| 272 |
match = re.search(r'(\{.*\})', cleaned_text, re.DOTALL)
|
| 273 |
if match:
|
| 274 |
json_string = match.group(1)
|
| 275 |
else:
|
|
|
|
| 276 |
json_string = cleaned_text
|
| 277 |
|
| 278 |
try:
|
| 279 |
return json.loads(json_string)
|
| 280 |
except json.JSONDecodeError:
|
| 281 |
logging.warning("JSON parsing failed, attempting repair.")
|
| 282 |
+
raise ValueError(json_string)
|
| 283 |
|
| 284 |
+
def repair_json_with_gemini(model, broken_json_string, fallback_key="transactions"):
|
| 285 |
"""Uses Gemini to fix broken JSON syntax."""
|
| 286 |
repair_prompt = f"""Fix this broken JSON string. Return ONLY valid JSON.
|
| 287 |
+
Broken JSON: {broken_json_string}"""
|
| 288 |
try:
|
| 289 |
resp = model.generate_content(repair_prompt)
|
| 290 |
return extract_json_from_response(resp.text)
|
| 291 |
except Exception as e:
|
| 292 |
logging.error(f"JSON repair failed: {e}")
|
| 293 |
+
return {fallback_key: []}
|
| 294 |
|
| 295 |
def call_gemini_with_retry(model, content, prompt, retries=2):
|
| 296 |
"""
|
| 297 |
+
Generic runner for financial Gemini extraction.
|
|
|
|
|
|
|
| 298 |
"""
|
| 299 |
for attempt in range(retries + 1):
|
| 300 |
try:
|
|
|
|
| 301 |
response = model.generate_content([prompt, content])
|
| 302 |
+
|
| 303 |
try:
|
| 304 |
result = extract_json_from_response(response.text)
|
| 305 |
+
|
|
|
|
| 306 |
if 'transactions' in result:
|
| 307 |
result['transactions'] = [
|
| 308 |
categorize_transaction(tx) for tx in result['transactions']
|
| 309 |
]
|
| 310 |
+
|
| 311 |
return result
|
| 312 |
except ValueError as ve:
|
|
|
|
| 313 |
broken_json = str(ve)
|
| 314 |
+
repaired = repair_json_with_gemini(model, broken_json, fallback_key="transactions")
|
| 315 |
+
|
|
|
|
| 316 |
if 'transactions' in repaired:
|
| 317 |
repaired['transactions'] = [
|
| 318 |
categorize_transaction(tx) for tx in repaired['transactions']
|
| 319 |
]
|
| 320 |
+
|
| 321 |
return repaired
|
| 322 |
+
|
| 323 |
except Exception as e:
|
| 324 |
if "429" in str(e) or "ResourceExhausted" in str(e):
|
| 325 |
time.sleep(2 * (attempt + 1))
|
|
|
|
| 330 |
|
| 331 |
return {"transactions": []}
|
| 332 |
|
| 333 |
+
def call_gemini_students_with_retry(model, content, prompt, retries=2):
|
| 334 |
+
"""
|
| 335 |
+
Generic runner for student Gemini extraction.
|
| 336 |
+
"""
|
| 337 |
+
for attempt in range(retries + 1):
|
| 338 |
+
try:
|
| 339 |
+
response = model.generate_content([prompt, content])
|
| 340 |
+
|
| 341 |
+
try:
|
| 342 |
+
result = extract_json_from_response(response.text)
|
| 343 |
+
if 'students' not in result or not isinstance(result.get('students'), list):
|
| 344 |
+
return {"students": []}
|
| 345 |
+
return result
|
| 346 |
+
except ValueError as ve:
|
| 347 |
+
broken_json = str(ve)
|
| 348 |
+
repaired = repair_json_with_gemini(model, broken_json, fallback_key="students")
|
| 349 |
+
if 'students' not in repaired or not isinstance(repaired.get('students'), list):
|
| 350 |
+
return {"students": []}
|
| 351 |
+
return repaired
|
| 352 |
+
|
| 353 |
+
except Exception as e:
|
| 354 |
+
if "429" in str(e) or "ResourceExhausted" in str(e):
|
| 355 |
+
time.sleep(2 * (attempt + 1))
|
| 356 |
+
continue
|
| 357 |
+
logging.error(f"Gemini Student Import Error: {e}")
|
| 358 |
+
if attempt == retries:
|
| 359 |
+
raise
|
| 360 |
+
|
| 361 |
+
return {"students": []}
|
| 362 |
+
|
| 363 |
def is_file_empty(file_path):
|
| 364 |
"""Check if file is empty."""
|
| 365 |
return os.path.getsize(file_path) == 0
|
| 366 |
|
| 367 |
+
def parse_json_safely(value, default=None):
|
| 368 |
+
if default is None:
|
| 369 |
+
default = {}
|
| 370 |
+
if value is None:
|
| 371 |
+
return default
|
| 372 |
+
if isinstance(value, (dict, list)):
|
| 373 |
+
return value
|
| 374 |
+
if not isinstance(value, str):
|
| 375 |
+
return default
|
| 376 |
+
value = value.strip()
|
| 377 |
+
if not value:
|
| 378 |
+
return default
|
| 379 |
+
try:
|
| 380 |
+
return json.loads(value)
|
| 381 |
+
except Exception:
|
| 382 |
+
return default
|
| 383 |
+
|
| 384 |
+
def normalize_key(key: str) -> str:
|
| 385 |
+
return re.sub(r"[^a-z0-9]+", "_", str(key).strip().lower()).strip("_")
|
| 386 |
+
|
| 387 |
+
def title_case_name(value: str) -> str:
|
| 388 |
+
value = re.sub(r"\s+", " ", str(value or "").strip())
|
| 389 |
+
return value.title() if value else ""
|
| 390 |
+
|
| 391 |
+
def normalize_gender(value: str) -> str:
|
| 392 |
+
raw = str(value or "").strip().lower()
|
| 393 |
+
if raw in {"m", "male", "boy"}:
|
| 394 |
+
return "Male"
|
| 395 |
+
if raw in {"f", "female", "girl"}:
|
| 396 |
+
return "Female"
|
| 397 |
+
if not raw:
|
| 398 |
+
return ""
|
| 399 |
+
return str(value).strip().title()
|
| 400 |
+
|
| 401 |
+
def ensure_extra_fields_list(value):
|
| 402 |
+
if isinstance(value, list):
|
| 403 |
+
cleaned = []
|
| 404 |
+
for item in value:
|
| 405 |
+
if isinstance(item, dict):
|
| 406 |
+
name = str(item.get("name", "")).strip()
|
| 407 |
+
val = str(item.get("value", "")).strip()
|
| 408 |
+
if name:
|
| 409 |
+
cleaned.append({"name": name, "value": val})
|
| 410 |
+
return cleaned
|
| 411 |
+
return []
|
| 412 |
+
|
| 413 |
+
def build_student_prompt(template_fields=None):
|
| 414 |
+
template_fields = template_fields or {}
|
| 415 |
+
|
| 416 |
+
extra_context = {
|
| 417 |
+
"global_defaults": {
|
| 418 |
+
"grade": template_fields.get("grade"),
|
| 419 |
+
"class_name": template_fields.get("class_name") or template_fields.get("class"),
|
| 420 |
+
"gender": template_fields.get("gender"),
|
| 421 |
+
},
|
| 422 |
+
"generate_admission_numbers": bool(template_fields.get("generate_admission_numbers")),
|
| 423 |
+
"admission_prefix": template_fields.get("admission_prefix", "ADM"),
|
| 424 |
+
"ai_instructions": template_fields.get("ai_instructions", ""),
|
| 425 |
+
"expected_fields": template_fields.get("expected_fields", []),
|
| 426 |
+
"custom_fields": template_fields.get("custom_fields", []),
|
| 427 |
+
}
|
| 428 |
+
|
| 429 |
+
return f"""{STUDENT_IMPORT_PROMPT}
|
| 430 |
+
|
| 431 |
+
PRE-IMPORT CONFIGURATION:
|
| 432 |
+
{json.dumps(extra_context, ensure_ascii=False)}
|
| 433 |
+
"""
|
| 434 |
+
|
| 435 |
+
def normalize_student_record(student, template_fields=None, sequence=None):
|
| 436 |
+
"""
|
| 437 |
+
Normalizes one parsed student record into the required shape.
|
| 438 |
+
"""
|
| 439 |
+
template_fields = template_fields or {}
|
| 440 |
+
raw = student or {}
|
| 441 |
+
|
| 442 |
+
mapped = {}
|
| 443 |
+
extra_fields = []
|
| 444 |
+
|
| 445 |
+
alias_map = {
|
| 446 |
+
"name": ["name", "student_name", "full_name", "learner_name", "pupil_name"],
|
| 447 |
+
"admission_number": [
|
| 448 |
+
"admission_number", "admission_no", "admission_no_", "student_no",
|
| 449 |
+
"student_number", "reg_no", "registration_number"
|
| 450 |
+
],
|
| 451 |
+
"class_name": ["class_name", "class", "stream", "classroom"],
|
| 452 |
+
"grade": ["grade", "form", "level"],
|
| 453 |
+
"gender": ["gender", "sex"],
|
| 454 |
+
"email": ["email", "email_address"],
|
| 455 |
+
"phone_number": ["phone_number", "phone", "mobile", "contact", "contact_number"],
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
normalized_raw = {normalize_key(k): v for k, v in raw.items()}
|
| 459 |
+
|
| 460 |
+
for canonical, aliases in alias_map.items():
|
| 461 |
+
for alias in aliases:
|
| 462 |
+
if alias in normalized_raw and str(normalized_raw[alias]).strip():
|
| 463 |
+
mapped[canonical] = str(normalized_raw[alias]).strip()
|
| 464 |
+
break
|
| 465 |
+
|
| 466 |
+
for key, value in normalized_raw.items():
|
| 467 |
+
if key in {a for aliases in alias_map.values() for a in aliases}:
|
| 468 |
+
continue
|
| 469 |
+
if key == "extra_fields":
|
| 470 |
+
continue
|
| 471 |
+
if value is None or str(value).strip() == "":
|
| 472 |
+
continue
|
| 473 |
+
extra_fields.append({
|
| 474 |
+
"name": key,
|
| 475 |
+
"value": str(value).strip()
|
| 476 |
+
})
|
| 477 |
+
|
| 478 |
+
extra_fields.extend(ensure_extra_fields_list(raw.get("extra_fields")))
|
| 479 |
+
|
| 480 |
+
mapped["name"] = title_case_name(mapped.get("name", ""))
|
| 481 |
+
mapped["class_name"] = mapped.get("class_name") or str(
|
| 482 |
+
template_fields.get("class_name") or template_fields.get("class") or ""
|
| 483 |
+
).strip()
|
| 484 |
+
mapped["grade"] = mapped.get("grade") or str(template_fields.get("grade") or "").strip()
|
| 485 |
+
mapped["gender"] = normalize_gender(mapped.get("gender") or template_fields.get("gender") or "")
|
| 486 |
+
mapped["email"] = str(mapped.get("email", "")).strip()
|
| 487 |
+
mapped["phone_number"] = str(mapped.get("phone_number", "")).strip()
|
| 488 |
+
|
| 489 |
+
if not mapped.get("admission_number") and template_fields.get("generate_admission_numbers"):
|
| 490 |
+
prefix = str(template_fields.get("admission_prefix") or "ADM").strip() or "ADM"
|
| 491 |
+
start = int(template_fields.get("admission_start", 1) or 1)
|
| 492 |
+
width = int(template_fields.get("admission_width", 3) or 3)
|
| 493 |
+
serial = start + ((sequence or 1) - 1)
|
| 494 |
+
mapped["admission_number"] = f"{prefix}-{str(serial).zfill(width)}"
|
| 495 |
+
else:
|
| 496 |
+
mapped["admission_number"] = str(mapped.get("admission_number", "")).strip()
|
| 497 |
+
|
| 498 |
+
existing_extra = {normalize_key(x["name"]): True for x in extra_fields if x.get("name")}
|
| 499 |
+
for item in template_fields.get("custom_fields", []) or []:
|
| 500 |
+
if not isinstance(item, dict):
|
| 501 |
+
continue
|
| 502 |
+
name = str(item.get("name", "")).strip()
|
| 503 |
+
value = str(item.get("value", "")).strip()
|
| 504 |
+
if name and normalize_key(name) not in existing_extra and value:
|
| 505 |
+
extra_fields.append({"name": name, "value": value})
|
| 506 |
+
|
| 507 |
+
cleaned = {
|
| 508 |
+
"name": mapped.get("name", ""),
|
| 509 |
+
"admission_number": mapped.get("admission_number", ""),
|
| 510 |
+
"class_name": mapped.get("class_name", ""),
|
| 511 |
+
"grade": mapped.get("grade", ""),
|
| 512 |
+
"gender": mapped.get("gender", ""),
|
| 513 |
+
"email": mapped.get("email", ""),
|
| 514 |
+
"phone_number": mapped.get("phone_number", ""),
|
| 515 |
+
"extra_fields": extra_fields
|
| 516 |
+
}
|
| 517 |
+
|
| 518 |
+
return cleaned
|
| 519 |
+
|
| 520 |
+
def validate_student_records(students):
|
| 521 |
+
"""
|
| 522 |
+
Business rules:
|
| 523 |
+
- name is required
|
| 524 |
+
- admission_number must be unique if present
|
| 525 |
+
"""
|
| 526 |
+
validated = []
|
| 527 |
+
errors = []
|
| 528 |
+
seen_admission_numbers = set()
|
| 529 |
+
|
| 530 |
+
for index, student in enumerate(students):
|
| 531 |
+
row_errors = []
|
| 532 |
+
|
| 533 |
+
name = str(student.get("name", "")).strip()
|
| 534 |
+
admission_number = str(student.get("admission_number", "")).strip()
|
| 535 |
+
|
| 536 |
+
if not name:
|
| 537 |
+
row_errors.append("name is required")
|
| 538 |
+
|
| 539 |
+
if admission_number:
|
| 540 |
+
key = admission_number.lower()
|
| 541 |
+
if key in seen_admission_numbers:
|
| 542 |
+
row_errors.append("admission_number must be unique")
|
| 543 |
+
else:
|
| 544 |
+
seen_admission_numbers.add(key)
|
| 545 |
+
|
| 546 |
+
item = dict(student)
|
| 547 |
+
item["_row"] = index + 1
|
| 548 |
+
item["_valid"] = len(row_errors) == 0
|
| 549 |
+
item["_errors"] = row_errors
|
| 550 |
+
|
| 551 |
+
if row_errors:
|
| 552 |
+
errors.append({
|
| 553 |
+
"row": index + 1,
|
| 554 |
+
"student": item,
|
| 555 |
+
"errors": row_errors
|
| 556 |
+
})
|
| 557 |
+
|
| 558 |
+
validated.append(item)
|
| 559 |
+
|
| 560 |
+
return validated, errors
|
| 561 |
+
|
| 562 |
+
def dedupe_students(students):
|
| 563 |
+
"""
|
| 564 |
+
Basic dedupe within current import.
|
| 565 |
+
Prefers admission_number when available, otherwise name+class+grade.
|
| 566 |
+
"""
|
| 567 |
+
unique = []
|
| 568 |
+
seen = set()
|
| 569 |
+
|
| 570 |
+
for student in students:
|
| 571 |
+
admission_number = str(student.get("admission_number", "")).strip().lower()
|
| 572 |
+
name = str(student.get("name", "")).strip().lower()
|
| 573 |
+
class_name = str(student.get("class_name", "")).strip().lower()
|
| 574 |
+
grade = str(student.get("grade", "")).strip().lower()
|
| 575 |
+
|
| 576 |
+
key = (
|
| 577 |
+
f"adm:{admission_number}"
|
| 578 |
+
if admission_number
|
| 579 |
+
else f"name:{name}|class:{class_name}|grade:{grade}"
|
| 580 |
+
)
|
| 581 |
+
|
| 582 |
+
if key in seen:
|
| 583 |
+
continue
|
| 584 |
+
seen.add(key)
|
| 585 |
+
unique.append(student)
|
| 586 |
+
|
| 587 |
+
return unique
|
| 588 |
+
|
| 589 |
+
def allowed_student_import_file(filename):
|
| 590 |
+
ext = os.path.splitext(filename.lower())[1]
|
| 591 |
+
return ext in {".jpg", ".jpeg", ".png", ".webp", ".pdf", ".xlsx", ".xls", ".csv"}
|
| 592 |
+
|
| 593 |
+
def parse_students_from_dataframe(df, template_fields=None):
|
| 594 |
+
template_fields = template_fields or {}
|
| 595 |
+
df = df.copy()
|
| 596 |
+
|
| 597 |
+
df = df.dropna(how="all")
|
| 598 |
+
df = df.dropna(axis=1, how="all")
|
| 599 |
+
|
| 600 |
+
raw_students = []
|
| 601 |
+
for _, row in df.iterrows():
|
| 602 |
+
raw = {}
|
| 603 |
+
for col in df.columns:
|
| 604 |
+
value = row[col]
|
| 605 |
+
if pd.isna(value):
|
| 606 |
+
continue
|
| 607 |
+
raw[str(col)] = str(value).strip()
|
| 608 |
+
if raw:
|
| 609 |
+
raw_students.append(raw)
|
| 610 |
+
|
| 611 |
+
normalized = [
|
| 612 |
+
normalize_student_record(student, template_fields=template_fields, sequence=i + 1)
|
| 613 |
+
for i, student in enumerate(raw_students)
|
| 614 |
+
]
|
| 615 |
+
return normalized
|
| 616 |
+
|
| 617 |
+
def process_student_pdf_page_as_image(model, pdf_path, page_num, template_fields=None):
|
| 618 |
+
if not PDF_IMAGE_SUPPORT:
|
| 619 |
+
raise ImportError("pdf2image/poppler not installed")
|
| 620 |
+
|
| 621 |
+
images = convert_from_path(pdf_path, first_page=page_num, last_page=page_num)
|
| 622 |
+
if not images:
|
| 623 |
+
return []
|
| 624 |
+
|
| 625 |
+
prompt = build_student_prompt(template_fields)
|
| 626 |
+
result = call_gemini_students_with_retry(model, images[0], prompt)
|
| 627 |
+
students = result.get('students', []) or []
|
| 628 |
+
|
| 629 |
+
return [
|
| 630 |
+
normalize_student_record(student, template_fields=template_fields, sequence=i + 1)
|
| 631 |
+
for i, student in enumerate(students)
|
| 632 |
+
]
|
| 633 |
+
|
| 634 |
+
def parse_students_from_pdf(model, pdf_path, template_fields=None):
|
| 635 |
+
template_fields = template_fields or {}
|
| 636 |
+
all_students = []
|
| 637 |
+
prompt = build_student_prompt(template_fields)
|
| 638 |
+
|
| 639 |
+
try:
|
| 640 |
+
reader = pypdf.PdfReader(pdf_path)
|
| 641 |
+
num_pages = len(reader.pages)
|
| 642 |
+
|
| 643 |
+
for i in range(num_pages):
|
| 644 |
+
logging.info(f"Processing student PDF page {i+1}/{num_pages}")
|
| 645 |
+
|
| 646 |
+
try:
|
| 647 |
+
text_content = reader.pages[i].extract_text() or ""
|
| 648 |
+
except Exception:
|
| 649 |
+
text_content = ""
|
| 650 |
+
|
| 651 |
+
if text_content and len(text_content.strip()) > 50:
|
| 652 |
+
result = call_gemini_students_with_retry(model, text_content, prompt)
|
| 653 |
+
page_students = result.get('students', []) or []
|
| 654 |
+
all_students.extend(page_students)
|
| 655 |
+
else:
|
| 656 |
+
if PDF_IMAGE_SUPPORT:
|
| 657 |
+
page_students = process_student_pdf_page_as_image(
|
| 658 |
+
model, pdf_path, i + 1, template_fields=template_fields
|
| 659 |
+
)
|
| 660 |
+
all_students.extend(page_students)
|
| 661 |
+
else:
|
| 662 |
+
logging.warning("Skipped scanned PDF page because pdf2image is unavailable.")
|
| 663 |
+
|
| 664 |
+
except pypdf.errors.PdfReadError:
|
| 665 |
+
logging.warning("pypdf failed to read student PDF. Attempting full Vision fallback.")
|
| 666 |
+
if not PDF_IMAGE_SUPPORT:
|
| 667 |
+
raise ValueError("Unreadable PDF and pdf2image fallback unavailable.")
|
| 668 |
+
|
| 669 |
+
images = convert_from_path(pdf_path)
|
| 670 |
+
for img in images:
|
| 671 |
+
result = call_gemini_students_with_retry(model, img, prompt)
|
| 672 |
+
all_students.extend(result.get('students', []) or [])
|
| 673 |
+
|
| 674 |
+
normalized = [
|
| 675 |
+
normalize_student_record(student, template_fields=template_fields, sequence=i + 1)
|
| 676 |
+
for i, student in enumerate(all_students)
|
| 677 |
+
]
|
| 678 |
+
return normalized
|
| 679 |
+
|
| 680 |
+
def parse_students_from_image_file(model, file_path, template_fields=None):
|
| 681 |
+
template_fields = template_fields or {}
|
| 682 |
+
prompt = build_student_prompt(template_fields)
|
| 683 |
+
|
| 684 |
+
img = Image.open(file_path)
|
| 685 |
+
result = call_gemini_students_with_retry(model, img, prompt)
|
| 686 |
+
students = result.get('students', []) or []
|
| 687 |
+
|
| 688 |
+
return [
|
| 689 |
+
normalize_student_record(student, template_fields=template_fields, sequence=i + 1)
|
| 690 |
+
for i, student in enumerate(students)
|
| 691 |
+
]
|
| 692 |
+
|
| 693 |
+
def read_spreadsheet_students(file_path, filename, template_fields=None):
|
| 694 |
+
ext = os.path.splitext(filename.lower())[1]
|
| 695 |
+
|
| 696 |
+
if ext == ".csv":
|
| 697 |
+
df = pd.read_csv(file_path)
|
| 698 |
+
elif ext in {".xlsx", ".xls"}:
|
| 699 |
+
df = pd.read_excel(file_path)
|
| 700 |
+
else:
|
| 701 |
+
raise ValueError("Unsupported spreadsheet format")
|
| 702 |
+
|
| 703 |
+
return parse_students_from_dataframe(df, template_fields=template_fields)
|
| 704 |
+
|
| 705 |
# -------------------------------------------------------------------------
|
| 706 |
# CORE LOGIC: PDF PROCESSING (HYBRID TEXT + VISION)
|
| 707 |
# -------------------------------------------------------------------------
|
| 708 |
|
| 709 |
def process_pdf_page_as_image(model, pdf_path, page_num):
|
| 710 |
+
"""Convert specific PDF page to image and process with Vision."""
|
| 711 |
if not PDF_IMAGE_SUPPORT:
|
| 712 |
raise ImportError("pdf2image/poppler not installed")
|
| 713 |
|
|
|
|
|
|
|
| 714 |
images = convert_from_path(pdf_path, first_page=page_num, last_page=page_num)
|
| 715 |
if not images:
|
| 716 |
return []
|
| 717 |
+
|
|
|
|
| 718 |
result = call_gemini_with_retry(model, images[0], FINANCIAL_DOC_PROMPT)
|
| 719 |
return result.get('transactions', [])
|
| 720 |
|
|
|
|
| 723 |
"""
|
| 724 |
Smart PDF Processor:
|
| 725 |
1. Checks if empty.
|
| 726 |
+
2. Tries standard Text extraction.
|
| 727 |
+
3. If Text fails or is empty, falls back to Vision.
|
| 728 |
"""
|
| 729 |
temp_path = None
|
| 730 |
try:
|
|
|
|
| 731 |
if 'file' not in request.files:
|
| 732 |
return jsonify({'error': 'No file uploaded'}), 400
|
| 733 |
file = request.files['file']
|
| 734 |
if file.filename == '':
|
| 735 |
return jsonify({'error': 'No file selected'}), 400
|
| 736 |
|
|
|
|
| 737 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
|
| 738 |
file.save(tmp.name)
|
| 739 |
temp_path = tmp.name
|
| 740 |
|
|
|
|
| 741 |
if is_file_empty(temp_path):
|
| 742 |
+
return jsonify({'error': 'Uploaded file is empty'}), 400
|
| 743 |
|
| 744 |
model = configure_gemini(api_key)
|
| 745 |
all_transactions = []
|
| 746 |
+
|
|
|
|
| 747 |
try:
|
| 748 |
reader = pypdf.PdfReader(temp_path)
|
| 749 |
num_pages = len(reader.pages)
|
| 750 |
+
|
| 751 |
for i in range(num_pages):
|
| 752 |
logging.info(f"Processing page {i+1}/{num_pages}")
|
| 753 |
+
|
|
|
|
| 754 |
try:
|
| 755 |
text_content = reader.pages[i].extract_text()
|
| 756 |
except Exception:
|
| 757 |
+
text_content = ""
|
| 758 |
|
|
|
|
| 759 |
if text_content and len(text_content.strip()) > 50:
|
|
|
|
| 760 |
logging.info("Text detected. Using Text Strategy.")
|
| 761 |
result = call_gemini_with_retry(model, text_content, FINANCIAL_DOC_PROMPT)
|
| 762 |
else:
|
|
|
|
| 763 |
logging.info("Low text/Encryption detected. Switching to Vision Strategy.")
|
| 764 |
if PDF_IMAGE_SUPPORT:
|
| 765 |
+
txs = process_pdf_page_as_image(model, temp_path, i + 1)
|
|
|
|
|
|
|
| 766 |
all_transactions.extend(txs)
|
| 767 |
+
continue
|
| 768 |
else:
|
| 769 |
logging.warning("Cannot process scanned PDF - pdf2image missing.")
|
| 770 |
result = {"transactions": []}
|
|
|
|
| 773 |
all_transactions.extend(txs)
|
| 774 |
|
| 775 |
except pypdf.errors.PdfReadError:
|
|
|
|
| 776 |
logging.warning("pypdf failed to read file. Attempting full Vision fallback.")
|
| 777 |
if PDF_IMAGE_SUPPORT:
|
|
|
|
| 778 |
images = convert_from_path(temp_path)
|
| 779 |
for img in images:
|
| 780 |
result = call_gemini_with_retry(model, img, FINANCIAL_DOC_PROMPT)
|
|
|
|
| 792 |
os.remove(temp_path)
|
| 793 |
|
| 794 |
# -------------------------------------------------------------------------
|
| 795 |
+
# TEXT & IMAGE ENDPOINTS
|
| 796 |
# -------------------------------------------------------------------------
|
| 797 |
|
| 798 |
@app.route('/process-text', methods=['POST'])
|
|
|
|
| 802 |
data = request.get_json()
|
| 803 |
if not data or 'text' not in data:
|
| 804 |
return jsonify({'error': 'No text provided'}), 400
|
| 805 |
+
|
| 806 |
text_input = data['text']
|
| 807 |
if not text_input.strip():
|
| 808 |
+
return jsonify({'error': 'Text input cannot be empty'}), 400
|
| 809 |
+
|
| 810 |
model = configure_gemini(api_key)
|
|
|
|
| 811 |
prompt = get_text_prompt_with_fallback_date()
|
| 812 |
+
|
| 813 |
result = call_gemini_with_retry(model, text_input, prompt)
|
| 814 |
return jsonify({'transactions': result.get('transactions', [])})
|
| 815 |
+
|
| 816 |
except Exception as e:
|
| 817 |
logging.error(f"Error: {e}")
|
| 818 |
return jsonify({'error': str(e)}), 500
|
|
|
|
| 825 |
if 'file' not in request.files:
|
| 826 |
return jsonify({'error': 'No file uploaded'}), 400
|
| 827 |
file = request.files['file']
|
| 828 |
+
|
|
|
|
| 829 |
file.seek(0, os.SEEK_END)
|
| 830 |
size = file.tell()
|
| 831 |
file.seek(0)
|
|
|
|
| 837 |
temp_path = tmp.name
|
| 838 |
|
| 839 |
model = configure_gemini(api_key)
|
|
|
|
|
|
|
| 840 |
img = Image.open(temp_path)
|
|
|
|
|
|
|
| 841 |
result = call_gemini_with_retry(model, img, FINANCIAL_DOC_PROMPT)
|
| 842 |
+
|
| 843 |
return jsonify({'transactions': result.get('transactions', [])})
|
| 844 |
+
|
| 845 |
except Exception as e:
|
| 846 |
logging.error(f"Error: {e}")
|
| 847 |
return jsonify({'error': str(e)}), 500
|
|
|
|
| 849 |
if temp_path and os.path.exists(temp_path):
|
| 850 |
os.remove(temp_path)
|
| 851 |
|
| 852 |
+
# -------------------------------------------------------------------------
|
| 853 |
+
# STUDENT IMPORT ENDPOINTS
|
| 854 |
+
# -------------------------------------------------------------------------
|
| 855 |
+
|
| 856 |
+
@app.route('/api/customers/parse-students-images', methods=['POST'])
|
| 857 |
+
def parse_students_images():
|
| 858 |
+
"""
|
| 859 |
+
Supports:
|
| 860 |
+
- images
|
| 861 |
+
- PDFs
|
| 862 |
+
- CSV
|
| 863 |
+
- XLSX
|
| 864 |
+
- camera-captured images
|
| 865 |
+
|
| 866 |
+
multipart/form-data:
|
| 867 |
+
- files
|
| 868 |
+
- template_fields (JSON string)
|
| 869 |
+
"""
|
| 870 |
+
temp_paths = []
|
| 871 |
+
|
| 872 |
+
try:
|
| 873 |
+
uploaded_files = request.files.getlist("files")
|
| 874 |
+
if not uploaded_files:
|
| 875 |
+
return jsonify({"error": "No files uploaded"}), 400
|
| 876 |
+
|
| 877 |
+
template_fields = parse_json_safely(request.form.get("template_fields"), default={})
|
| 878 |
+
model = configure_gemini(api_key)
|
| 879 |
+
|
| 880 |
+
all_students = []
|
| 881 |
+
file_summaries = []
|
| 882 |
+
|
| 883 |
+
for uploaded_file in uploaded_files:
|
| 884 |
+
if not uploaded_file or uploaded_file.filename == "":
|
| 885 |
+
continue
|
| 886 |
+
|
| 887 |
+
if not allowed_student_import_file(uploaded_file.filename):
|
| 888 |
+
file_summaries.append({
|
| 889 |
+
"file": uploaded_file.filename,
|
| 890 |
+
"students_extracted": 0,
|
| 891 |
+
"status": "skipped",
|
| 892 |
+
"reason": "unsupported file type"
|
| 893 |
+
})
|
| 894 |
+
continue
|
| 895 |
+
|
| 896 |
+
with tempfile.NamedTemporaryFile(
|
| 897 |
+
delete=False,
|
| 898 |
+
suffix=os.path.splitext(uploaded_file.filename)[1]
|
| 899 |
+
) as tmp:
|
| 900 |
+
uploaded_file.save(tmp.name)
|
| 901 |
+
temp_paths.append(tmp.name)
|
| 902 |
+
temp_path = tmp.name
|
| 903 |
+
|
| 904 |
+
if os.path.getsize(temp_path) == 0:
|
| 905 |
+
file_summaries.append({
|
| 906 |
+
"file": uploaded_file.filename,
|
| 907 |
+
"students_extracted": 0,
|
| 908 |
+
"status": "skipped",
|
| 909 |
+
"reason": "empty file"
|
| 910 |
+
})
|
| 911 |
+
continue
|
| 912 |
+
|
| 913 |
+
ext = os.path.splitext(uploaded_file.filename.lower())[1]
|
| 914 |
+
parsed_students = []
|
| 915 |
+
|
| 916 |
+
if ext in {".jpg", ".jpeg", ".png", ".webp"}:
|
| 917 |
+
parsed_students = parse_students_from_image_file(
|
| 918 |
+
model, temp_path, template_fields=template_fields
|
| 919 |
+
)
|
| 920 |
+
elif ext == ".pdf":
|
| 921 |
+
parsed_students = parse_students_from_pdf(
|
| 922 |
+
model, temp_path, template_fields=template_fields
|
| 923 |
+
)
|
| 924 |
+
elif ext in {".csv", ".xlsx", ".xls"}:
|
| 925 |
+
parsed_students = read_spreadsheet_students(
|
| 926 |
+
temp_path, uploaded_file.filename, template_fields=template_fields
|
| 927 |
+
)
|
| 928 |
+
|
| 929 |
+
file_summaries.append({
|
| 930 |
+
"file": uploaded_file.filename,
|
| 931 |
+
"students_extracted": len(parsed_students),
|
| 932 |
+
"status": "processed"
|
| 933 |
+
})
|
| 934 |
+
|
| 935 |
+
all_students.extend(parsed_students)
|
| 936 |
+
|
| 937 |
+
all_students = dedupe_students(all_students)
|
| 938 |
+
validated_students, validation_errors = validate_student_records(all_students)
|
| 939 |
+
|
| 940 |
+
valid_students = [s for s in validated_students if s["_valid"]]
|
| 941 |
+
invalid_students = [s for s in validated_students if not s["_valid"]]
|
| 942 |
+
|
| 943 |
+
return jsonify({
|
| 944 |
+
"students": validated_students,
|
| 945 |
+
"summary": {
|
| 946 |
+
"files_received": len(uploaded_files),
|
| 947 |
+
"files_processed": len([x for x in file_summaries if x["status"] == "processed"]),
|
| 948 |
+
"total_students_extracted": len(all_students),
|
| 949 |
+
"valid_students": len(valid_students),
|
| 950 |
+
"invalid_students": len(invalid_students)
|
| 951 |
+
},
|
| 952 |
+
"file_summaries": file_summaries,
|
| 953 |
+
"validation_errors": validation_errors
|
| 954 |
+
})
|
| 955 |
+
|
| 956 |
+
except Exception as e:
|
| 957 |
+
logging.error(f"Student import server error: {e}")
|
| 958 |
+
return jsonify({"error": str(e)}), 500
|
| 959 |
+
|
| 960 |
+
finally:
|
| 961 |
+
for path in temp_paths:
|
| 962 |
+
try:
|
| 963 |
+
if path and os.path.exists(path):
|
| 964 |
+
os.remove(path)
|
| 965 |
+
except Exception:
|
| 966 |
+
pass
|
| 967 |
+
|
| 968 |
+
@app.route('/api/customers/validate-students-import', methods=['POST'])
|
| 969 |
+
def validate_students_import():
|
| 970 |
+
"""
|
| 971 |
+
Accepts already-parsed student rows from the preview table.
|
| 972 |
+
Useful before save.
|
| 973 |
+
"""
|
| 974 |
+
try:
|
| 975 |
+
data = request.get_json(silent=True) or {}
|
| 976 |
+
students = data.get("students", [])
|
| 977 |
+
|
| 978 |
+
if not isinstance(students, list):
|
| 979 |
+
return jsonify({"error": "students must be an array"}), 400
|
| 980 |
+
|
| 981 |
+
normalized = [
|
| 982 |
+
normalize_student_record(student, template_fields={}, sequence=i + 1)
|
| 983 |
+
for i, student in enumerate(students)
|
| 984 |
+
]
|
| 985 |
+
normalized = dedupe_students(normalized)
|
| 986 |
+
validated_students, validation_errors = validate_student_records(normalized)
|
| 987 |
+
|
| 988 |
+
return jsonify({
|
| 989 |
+
"students": validated_students,
|
| 990 |
+
"valid": len(validation_errors) == 0,
|
| 991 |
+
"validation_errors": validation_errors
|
| 992 |
+
})
|
| 993 |
+
|
| 994 |
+
except Exception as e:
|
| 995 |
+
logging.error(f"Student validation error: {e}")
|
| 996 |
+
return jsonify({"error": str(e)}), 500
|
| 997 |
+
|
| 998 |
+
@app.route('/api/customers/parse-students-manual', methods=['POST'])
|
| 999 |
+
def parse_students_manual():
|
| 1000 |
+
"""
|
| 1001 |
+
For manual entry from UI.
|
| 1002 |
+
Sends rows through the same normalization + validation pipeline.
|
| 1003 |
+
"""
|
| 1004 |
+
try:
|
| 1005 |
+
data = request.get_json(silent=True) or {}
|
| 1006 |
+
students = data.get("students", [])
|
| 1007 |
+
template_fields = data.get("template_fields", {}) or {}
|
| 1008 |
+
|
| 1009 |
+
if not isinstance(students, list):
|
| 1010 |
+
return jsonify({"error": "students must be an array"}), 400
|
| 1011 |
+
|
| 1012 |
+
normalized = [
|
| 1013 |
+
normalize_student_record(student, template_fields=template_fields, sequence=i + 1)
|
| 1014 |
+
for i, student in enumerate(students)
|
| 1015 |
+
]
|
| 1016 |
+
normalized = dedupe_students(normalized)
|
| 1017 |
+
validated_students, validation_errors = validate_student_records(normalized)
|
| 1018 |
+
|
| 1019 |
+
return jsonify({
|
| 1020 |
+
"students": validated_students,
|
| 1021 |
+
"validation_errors": validation_errors
|
| 1022 |
+
})
|
| 1023 |
+
|
| 1024 |
+
except Exception as e:
|
| 1025 |
+
logging.error(f"Manual student parse error: {e}")
|
| 1026 |
+
return jsonify({"error": str(e)}), 500
|
| 1027 |
+
|
| 1028 |
+
# -------------------------------------------------------------------------
|
| 1029 |
+
# OTHER ENDPOINTS
|
| 1030 |
+
# -------------------------------------------------------------------------
|
| 1031 |
+
|
| 1032 |
@app.route('/transaction-types', methods=['GET'])
|
| 1033 |
def get_transaction_types():
|
| 1034 |
"""Return available transaction types and their categories."""
|
|
|
|
| 1035 |
transaction_types = {
|
| 1036 |
"types": [
|
| 1037 |
{
|
|
|
|
| 1105 |
return jsonify({
|
| 1106 |
'status': 'healthy',
|
| 1107 |
'timestamp': datetime.now().isoformat(),
|
| 1108 |
+
'version': '2.3.0',
|
| 1109 |
'vision_support': PDF_IMAGE_SUPPORT
|
| 1110 |
})
|
| 1111 |
|
| 1112 |
if __name__ == '__main__':
|
|
|
|
| 1113 |
app.run(debug=True, host="0.0.0.0", port=7860)
|