rairo commited on
Commit
c997e02
·
verified ·
1 Parent(s): a14a444

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +258 -254
main.py CHANGED
@@ -5,286 +5,342 @@ import re
5
  import tempfile
6
  import time
7
  from datetime import datetime
 
 
 
8
  from flask import Flask, request, jsonify
9
  from flask_cors import CORS
10
  import pandas as pd
11
  import pypdf
12
  import google.generativeai as genai
 
 
 
 
 
 
 
 
 
13
 
14
  # Setup logging
15
- logging.basicConfig(level=logging.INFO)
16
 
17
  app = Flask(__name__)
18
- CORS(app) # Enable CORS for all routes
19
 
20
  # Get API key securely
21
  api_key = os.getenv('Gemini')
22
  if not api_key:
23
- raise ValueError("Gemini API key not found in environment variables")
 
24
 
25
  def configure_gemini(api_key):
26
  """Configure Gemini AI model."""
27
  try:
28
  genai.configure(api_key=api_key)
 
29
  return genai.GenerativeModel('gemini-2.0-flash')
30
  except Exception as e:
31
  logging.error(f"Error configuring Gemini: {str(e)}")
32
  raise
33
 
34
- def read_pdf_pages(file_path):
35
- """Extract text from each page of a PDF file as list of strings."""
36
- try:
37
- pages = []
38
- with open(file_path, 'rb') as f:
39
- reader = pypdf.PdfReader(f)
40
- for page in reader.pages:
41
- text = page.extract_text() or ""
42
- pages.append(text)
43
- return pages
44
- except Exception as e:
45
- logging.error(f"Error reading PDF: {str(e)}")
46
- raise
47
 
48
- PROMPT = """Analyze this bank statement and extract transactions in JSON format with these fields:
49
- - Date (format DD/MM/YYYY)
50
- - Description
51
- - Amount (just the integer value)
52
- - Type (categorize into one of the following based on the transaction nature):
53
- * 'income' - money received from customers, sales, services rendered
54
- * 'expense' - operational costs, purchases, payments made
55
- * 'asset' - purchase of equipment, property, vehicles, or other assets
56
- * 'liability' - taking on debt, loans received, credit facilities
57
- * 'equity' - owner investments, capital contributions, retained earnings transfers
58
- * 'transfer' - money moved between own accounts, internal transfers
59
- * 'investment' - securities purchases, investment account funding, portfolio additions
60
- * 'loan_repayment' - paying back borrowed money, loan principal payments
61
- * 'capital_injection' - owner or investor adding money to the business
62
- - Customer Name (Only if Type is 'income' and if no name is extracted write 'general income'. For all other types, extract relevant party name or write 'N/A')
63
- - City (In address of bank statement)
64
- - Destination_of_funds (categorize based on Type and description):
65
- * If 'expense': Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses
66
- * If 'income': 'income'
67
- * If 'asset': Equipment, Property, Vehicles, Technology, Furniture, Other assets
68
- * If 'liability': Bank loan, Credit facility, Supplier credit, Other liabilities
69
- * If 'equity': Owner investment, Retained earnings, Share capital, Other equity
70
- * If 'transfer': Internal transfer
71
- * If 'investment': Securities, Mutual funds, Fixed deposits, Other investments
72
- * If 'loan_repayment': Loan repayment
73
- * If 'capital_injection': Capital injection
74
- - ignore opening or closing balances.
75
- - extract the amount in full including decimals.
76
- Return ONLY the raw JSON object, without any surrounding text, explanations, or markdown fences like ```json.
77
-
78
- Return ONLY valid JSON with this structure:
79
- {
80
- "transactions": [
81
  {
82
- "Date": "string",
83
- "Description": "string",
84
- "Customer_name": "string",
85
- "City": "string",
86
- "Amount": number,
87
- "Type": "string",
88
- "Destination_of_funds": "string"
 
 
 
 
 
89
  }
90
- ]
91
- }"""
92
 
93
- def get_text_prompt_with_date():
94
- """Generate TEXT_PROMPT with current date context."""
 
 
 
 
 
 
95
  current_date = datetime.now().strftime("%d/%m/%Y")
96
- return f"""IMPORTANT: Today's date is {current_date}. If the user does not specify a date for a transaction, use {current_date} as the default date.
97
-
98
- Analyze the following natural language text and extract transactions in JSON format with these fields:
99
- - Date (format DD/MM/YYYY) - USE {current_date} IF NO DATE IS SPECIFIED
100
- - Description
101
- - Amount (just the integer value)
102
- - Type (categorize into one of the following based on the transaction nature):
103
- * 'income' - money received from customers, sales, services rendered
104
- * 'expense' - operational costs, purchases, payments made
105
- * 'asset' - purchase of equipment, property, vehicles, or other assets
106
- * 'liability' - taking on debt, loans received, credit facilities
107
- * 'equity' - owner investments, capital contributions, retained earnings transfers
108
- * 'transfer' - money moved between own accounts, internal transfers
109
- * 'investment' - securities purchases, investment account funding, portfolio additions
110
- * 'loan_repayment' - paying back borrowed money, loan principal payments
111
- * 'capital_injection' - owner or investor adding money to the business
112
- - Customer Name (Only if Type is 'income' and if no name is extracted write 'general income'. For all other types, extract relevant party name or write 'N/A')
113
- - City (extract from any address information provided or write 'N/A' if not available)
114
- - Destination_of_funds (categorize based on Type and description):
115
- * If 'expense': Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses
116
- * If 'income': 'income'
117
- * If 'asset': Equipment, Property, Vehicles, Technology, Furniture, Other assets
118
- * If 'liability': Bank loan, Credit facility, Supplier credit, Other liabilities
119
- * If 'equity': Owner investment, Retained earnings, Share capital, Other equity
120
- * If 'transfer': Internal transfer
121
- * If 'investment': Securities, Mutual funds, Fixed deposits, Other investments
122
- * If 'loan_repayment': Loan repayment
123
- * If 'capital_injection': Capital injection
124
- - ignore opening or closing balances.
125
-
126
- Return ONLY valid JSON with this structure:
127
- {{
128
- "transactions": [
129
- {{
130
- "Date": "string",
131
- "Description": "string",
132
- "Customer_name": "string",
133
- "City": "string",
134
- "Amount": number,
135
- "Type": "string",
136
- "Destination_of_funds": "string"
137
- }}
138
- ]
139
- }}
140
- important: Return an empty array if no transactions are in the text. Do not make up false data.
141
  """
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  def repair_json_with_gemini(model, broken_json_string):
144
- """Makes a second API call to Gemini to fix a broken JSON string."""
145
- logging.info("Attempting to repair broken JSON with another Gemini call...")
146
- repair_prompt = f"""The following text is a JSON object that is syntactically incorrect.
147
- It might have missing commas, brackets, or other errors.
148
- Please fix the syntax to make it a valid JSON object.
149
- Return ONLY the corrected, raw JSON object and nothing else.
150
-
151
- Broken JSON:
152
- {broken_json_string}
153
- """
154
  try:
155
  resp = model.generate_content(repair_prompt)
156
- return resp.text
157
  except Exception as e:
158
- logging.error(f"Error during JSON repair call: {e}")
159
- raise ValueError("Failed to repair the JSON string.")
160
 
161
- def call_gemini_with_retry_custom(model, text, prompt, retries=3, backoff_factor=2):
162
- """Call Gemini with retries, now with JSON repair logic."""
163
- for attempt in range(1, retries + 1):
 
 
 
 
164
  try:
165
- resp = model.generate_content([prompt, text])
166
- response_text = resp.text
 
167
  try:
168
- # First attempt to parse the original response
169
- return extract_json_from_response(response_text)
170
- except Exception:
171
- # If parsing fails, trigger the repair process
172
- logging.warning("Initial JSON parsing failed. Attempting repair.")
173
- repaired_text = repair_json_with_gemini(model, response_text)
174
- return extract_json_from_response(repaired_text) # Parse the repaired text
175
  except Exception as e:
176
- msg = str(e)
177
- if '429' in msg or 'RateLimit' in msg and attempt < retries:
178
- wait = backoff_factor ** attempt
179
- logging.warning(f"Rate limit hit, retrying in {wait}s (attempt {attempt}/{retries})")
180
- time.sleep(wait)
181
- else:
182
- logging.error(f"Error processing with Gemini after retries: {msg}")
183
  raise
184
 
185
- def call_gemini_with_retry(model, text, retries=3, backoff_factor=2):
186
- """Call Gemini with retries, now with JSON repair logic."""
187
- # This function now simply calls the custom one with the default PROMPT
188
- return call_gemini_with_retry_custom(model, text, PROMPT, retries, backoff_factor)
189
 
190
- def extract_json_from_response(response_text):
191
- """Extract valid JSON from Gemini's response, even if it's embedded."""
192
- # First, try to find a JSON block enclosed in markdown fences
193
- match = re.search(r'```json\s*(\{.*?\})\s*```', response_text, re.DOTALL)
194
- if match:
195
- json_string = match.group(1)
196
- else:
197
- # If no markdown, find the first '{' and the last '}'
198
- match = re.search(r'{.*}', response_text, re.DOTALL)
199
- if not match:
200
- raise ValueError("No valid JSON object found in the LLM response")
201
- json_string = match.group(0)
202
 
203
- try:
204
- # Clean and load the extracted JSON string
205
- return json.loads(json_string)
206
- except json.JSONDecodeError as e:
207
- logging.error(f"Failed to parse extracted JSON. Error: {e}")
208
- logging.error(f"Problematic JSON string was: {json_string}")
209
- raise ValueError(f"Could not parse JSON from LLM response: {e}")
 
 
 
 
 
 
 
 
 
 
 
210
 
211
  @app.route('/process-pdf', methods=['POST'])
212
  def process_pdf():
213
- """Handle PDF upload, process it in page-chunks with Gemini, and aggregate results."""
 
 
 
 
 
 
214
  try:
 
215
  if 'file' not in request.files:
216
  return jsonify({'error': 'No file uploaded'}), 400
217
  file = request.files['file']
218
- if file.filename == '' or not file.filename.lower().endswith('.pdf'):
219
- return jsonify({'error': 'A valid PDF file must be uploaded'}), 400
220
 
221
- # Save to temp file
222
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
223
  file.save(tmp.name)
224
- file_path = tmp.name
 
 
 
 
225
 
 
 
 
 
226
  try:
227
- model = configure_gemini(api_key)
228
- pages = read_pdf_pages(file_path)
229
- all_transactions = []
230
-
231
- for idx, page_text in enumerate(pages, start=1):
232
- if not page_text.strip():
233
- continue
234
- logging.info(f"Processing page {idx}/{len(pages)}")
235
- result = call_gemini_with_retry(model, page_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  txs = result.get('transactions', [])
237
  all_transactions.extend(txs)
238
 
239
- return jsonify({'transactions': all_transactions})
240
- finally:
241
- os.remove(file_path)
 
 
 
 
 
 
 
 
 
 
242
 
243
- except ValueError as ve:
244
- logging.warning(f"Client error: {ve}")
245
- return jsonify({'error': str(ve)}), 400
246
  except Exception as e:
247
- logging.error(f"Internal server error: {e}")
248
- return jsonify({'error': 'Internal server error'}), 500
 
 
 
 
 
 
 
249
 
250
  @app.route('/process-text', methods=['POST'])
251
  def process_text():
252
- """Handle text input and extract transactions using Gemini."""
253
  try:
254
- # Get JSON data from request
255
  data = request.get_json()
256
  if not data or 'text' not in data:
257
  return jsonify({'error': 'No text provided'}), 400
258
 
259
  text_input = data['text']
260
-
261
  if not text_input.strip():
262
- return jsonify({'error': 'Text input cannot be empty'}), 400
263
 
264
- # Configure Gemini model
265
  model = configure_gemini(api_key)
 
 
266
 
267
- # Generate prompt with current date
268
- text_prompt = get_text_prompt_with_date()
269
 
270
- # Process the text with Gemini
271
- logging.info("Processing text input for transaction extraction")
272
- result = call_gemini_with_retry_custom(model, text_input, text_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
- transactions = result.get('transactions', [])
 
275
 
276
- return jsonify({'transactions': transactions})
 
277
 
278
- except ValueError as ve:
279
- logging.warning(f"Client error: {ve}")
280
- return jsonify({'error': str(ve)}), 400
281
  except Exception as e:
282
- logging.error(f"Internal server error: {e}")
283
- return jsonify({'error': 'Internal server error'}), 500
 
 
 
284
 
285
  @app.route('/transaction-types', methods=['GET'])
286
  def get_transaction_types():
287
  """Return available transaction types and their categories."""
 
288
  transaction_types = {
289
  "types": [
290
  {
@@ -353,67 +409,15 @@ def get_transaction_types():
353
  }
354
  return jsonify(transaction_types)
355
 
356
- @app.route('/process-image', methods=['POST'])
357
- def process_image():
358
- """Handle image upload, process it with Gemini Vision, and extract transactions."""
359
- try:
360
- if 'file' not in request.files:
361
- return jsonify({'error': 'No file uploaded'}), 400
362
-
363
- file = request.files['file']
364
- if file.filename == '':
365
- return jsonify({'error': 'No file selected'}), 400
366
-
367
- # Check if file is an image
368
- allowed_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp'}
369
- file_ext = os.path.splitext(file.filename)[1].lower()
370
- if file_ext not in allowed_extensions:
371
- return jsonify({'error': 'Invalid file type. Supported formats: JPG, JPEG, PNG, GIF, BMP, WEBP'}), 400
372
-
373
- # Save to temp file
374
- with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
375
- file.save(tmp.name)
376
- file_path = tmp.name
377
-
378
- try:
379
- model = configure_gemini(api_key)
380
-
381
- logging.info(f"Processing image file: {file.filename}")
382
-
383
- # Read image file as bytes
384
- import PIL.Image
385
- img = PIL.Image.open(file_path)
386
-
387
- # Generate content with the image and prompt
388
- response = model.generate_content([PROMPT, img])
389
-
390
- # Parse the response
391
- result = extract_json_from_response(response.text)
392
- transactions = result.get('transactions', [])
393
-
394
- return jsonify({'transactions': transactions})
395
-
396
- finally:
397
- # Clean up temp file
398
- if os.path.exists(file_path):
399
- os.remove(file_path)
400
-
401
- except ValueError as ve:
402
- logging.warning(f"Client error: {ve}")
403
- return jsonify({'error': str(ve)}), 400
404
- except Exception as e:
405
- logging.error(f"Internal server error: {e}")
406
- return jsonify({'error': 'Internal server error'}), 500
407
-
408
-
409
  @app.route('/health', methods=['GET'])
410
  def health_check():
411
- """Health check endpoint."""
412
  return jsonify({
413
  'status': 'healthy',
414
  'timestamp': datetime.now().isoformat(),
415
- 'version': '2.0.0'
 
416
  })
417
 
418
  if __name__ == '__main__':
 
419
  app.run(debug=True, host="0.0.0.0", port=7860)
 
5
  import tempfile
6
  import time
7
  from datetime import datetime
8
+ from io import BytesIO
9
+
10
+ # Third-party imports
11
  from flask import Flask, request, jsonify
12
  from flask_cors import CORS
13
  import pandas as pd
14
  import pypdf
15
  import google.generativeai as genai
16
+ from PIL import Image
17
+
18
+ # specific import for image fallback
19
+ try:
20
+ from pdf2image import convert_from_path
21
+ PDF_IMAGE_SUPPORT = True
22
+ except ImportError:
23
+ PDF_IMAGE_SUPPORT = False
24
+ logging.warning("pdf2image not installed. Scanned/Encrypted PDF fallback will not work.")
25
 
26
  # Setup logging
27
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
28
 
29
  app = Flask(__name__)
30
+ CORS(app)
31
 
32
  # Get API key securely
33
  api_key = os.getenv('Gemini')
34
  if not api_key:
35
+ # Fallback for local testing if env var not set, though env var is preferred
36
+ logging.warning("Gemini API key not found in environment variables.")
37
 
38
  def configure_gemini(api_key):
39
  """Configure Gemini AI model."""
40
  try:
41
  genai.configure(api_key=api_key)
42
+ # Using 2.0 Flash as it has superior vision and long-context capabilities
43
  return genai.GenerativeModel('gemini-2.0-flash')
44
  except Exception as e:
45
  logging.error(f"Error configuring Gemini: {str(e)}")
46
  raise
47
 
48
+ # -------------------------------------------------------------------------
49
+ # PROMPTS
50
+ # -------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
51
 
52
+ # Enhanced Prompt for General Financial Documents (Statements, Invoices, Receipts)
53
+ # Addresses Point 1 (Rounding/Dates) & Point 3 (Document Types)
54
+ FINANCIAL_DOC_PROMPT = """Analyze this financial document (which could be a Bank Statement, Invoice, Receipt, or Transaction List).
55
+ Extract all relevant transactions/items in JSON format.
56
+
57
+ RULES:
58
+ 1. **Dates**: Extract the date printed on the document. Format as DD/MM/YYYY.
59
+ - If the year is missing in the row, use the document's context (e.g., header date).
60
+ - Do NOT use the current date (today) unless the document explicitly says "Today".
61
+ 2. **Amounts**: Extract the EXACT amount including decimals. DO NOT ROUND.
62
+ 3. **Ignore**: Opening/Closing balances, page numbers, or cumulative running totals.
63
+
64
+ FIELDS TO EXTRACT:
65
+ - Date: string (DD/MM/YYYY)
66
+ - Description: string (Full description of item/transaction)
67
+ - Amount: number (Float, exact value)
68
+ - Type: string (Categorize exactly as one of: 'income', 'expense', 'asset', 'liability', 'equity', 'transfer', 'investment', 'loan_repayment', 'capital_injection')
69
+ - Customer_name: string (If 'income', name of payer. If 'expense', name of payee/vendor. Else 'N/A')
70
+ - City: string (Extract from address if present, else 'N/A')
71
+ - Document_Type: string (Infer: 'statement', 'invoice', 'receipt', 'transaction_list')
72
+ - Destination_of_funds: string (Categorize based on description. e.g., 'Salaries', 'Fuel', 'Rentals', 'Equipment', etc.)
73
+
74
+ RETURN STRUCTURE:
 
 
 
 
 
 
 
 
 
 
75
  {
76
+ "transactions": [
77
+ {
78
+ "Date": "DD/MM/YYYY",
79
+ "Description": "Item Description",
80
+ "Customer_name": "Vendor or Payer",
81
+ "City": "City Name",
82
+ "Amount": 123.45,
83
+ "Type": "expense",
84
+ "Destination_of_funds": "Category",
85
+ "Document_Type": "invoice"
86
+ }
87
+ ]
88
  }
 
 
89
 
90
+ Return ONLY raw JSON. No markdown formatting.
91
+ """
92
+
93
+ def get_text_prompt_with_fallback_date():
94
+ """
95
+ Generate prompt for raw text snippets where context might be missing.
96
+ Only allows current date fallback for raw text, not PDFs.
97
+ """
98
  current_date = datetime.now().strftime("%d/%m/%Y")
99
+ return f"""IMPORTANT: Today's date is {current_date}.
100
+ If the text below does not specify a year or date, reasonable assume {current_date} context, but prefer explicit dates in text.
101
+
102
+ {FINANCIAL_DOC_PROMPT}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  """
104
 
105
+ # -------------------------------------------------------------------------
106
+ # HELPER FUNCTIONS
107
+ # -------------------------------------------------------------------------
108
+
109
+ def extract_json_from_response(response_text):
110
+ """Extract valid JSON from Gemini's response, handling Markdown fences."""
111
+ # Remove markdown code blocks
112
+ cleaned_text = re.sub(r'```json\s*', '', response_text)
113
+ cleaned_text = re.sub(r'```\s*', '', cleaned_text)
114
+
115
+ # Find JSON object
116
+ match = re.search(r'(\{.*\})', cleaned_text, re.DOTALL)
117
+ if match:
118
+ json_string = match.group(1)
119
+ else:
120
+ # Fallback: assume the whole text is JSON
121
+ json_string = cleaned_text
122
+
123
+ try:
124
+ return json.loads(json_string)
125
+ except json.JSONDecodeError:
126
+ logging.warning("JSON parsing failed, attempting repair.")
127
+ raise ValueError(json_string) # Pass invalid string to caller for repair
128
+
129
  def repair_json_with_gemini(model, broken_json_string):
130
+ """Uses Gemini to fix broken JSON syntax."""
131
+ repair_prompt = f"""Fix this broken JSON string. Return ONLY valid JSON.
132
+ Broken JSON: {broken_json_string}"""
 
 
 
 
 
 
 
133
  try:
134
  resp = model.generate_content(repair_prompt)
135
+ return extract_json_from_response(resp.text)
136
  except Exception as e:
137
+ logging.error(f"JSON repair failed: {e}")
138
+ return {"transactions": []} # Fail safe
139
 
140
+ def call_gemini_with_retry(model, content, prompt, retries=2):
141
+ """
142
+ Generic runner for Gemini.
143
+ Args:
144
+ content: Can be a String (text) or a PIL.Image object (vision).
145
+ """
146
+ for attempt in range(retries + 1):
147
  try:
148
+ # Gemini Python SDK handles [Prompt, Image] or [Prompt, Text] automatically
149
+ response = model.generate_content([prompt, content])
150
+
151
  try:
152
+ return extract_json_from_response(response.text)
153
+ except ValueError as ve:
154
+ # Value error here contains the broken JSON string
155
+ broken_json = str(ve)
156
+ return repair_json_with_gemini(model, broken_json)
157
+
 
158
  except Exception as e:
159
+ if "429" in str(e) or "ResourceExhausted" in str(e):
160
+ time.sleep(2 * (attempt + 1))
161
+ continue
162
+ logging.error(f"Gemini Error: {e}")
163
+ if attempt == retries:
 
 
164
  raise
165
 
166
+ return {"transactions": []}
 
 
 
167
 
168
+ def is_file_empty(file_path):
169
+ """Check if file is empty."""
170
+ return os.path.getsize(file_path) == 0
 
 
 
 
 
 
 
 
 
171
 
172
+ # -------------------------------------------------------------------------
173
+ # CORE LOGIC: PDF PROCESSING (HYBRID TEXT + VISION)
174
+ # -------------------------------------------------------------------------
175
+
176
+ def process_pdf_page_as_image(model, pdf_path, page_num):
177
+ """Point 4: Convert specific PDF page to image and process with Vision."""
178
+ if not PDF_IMAGE_SUPPORT:
179
+ raise ImportError("pdf2image/poppler not installed")
180
+
181
+ # Convert specific page to image
182
+ # first_page=page_num, last_page=page_num ensures we only convert 1 page at a time to save RAM
183
+ images = convert_from_path(pdf_path, first_page=page_num, last_page=page_num)
184
+ if not images:
185
+ return []
186
+
187
+ # Process the image
188
+ result = call_gemini_with_retry(model, images[0], FINANCIAL_DOC_PROMPT)
189
+ return result.get('transactions', [])
190
 
191
  @app.route('/process-pdf', methods=['POST'])
192
  def process_pdf():
193
+ """
194
+ Smart PDF Processor:
195
+ 1. Checks if empty.
196
+ 2. Tries standard Text extraction (Fast/Cheap).
197
+ 3. If Text fails (Encryption) or is empty (Scanned), falls back to Vision (Slow/Powerful).
198
+ """
199
+ temp_path = None
200
  try:
201
+ # 1. Validation
202
  if 'file' not in request.files:
203
  return jsonify({'error': 'No file uploaded'}), 400
204
  file = request.files['file']
205
+ if file.filename == '':
206
+ return jsonify({'error': 'No file selected'}), 400
207
 
208
+ # Save Temp
209
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
210
  file.save(tmp.name)
211
+ temp_path = tmp.name
212
+
213
+ # Point 2: Empty File Check
214
+ if is_file_empty(temp_path):
215
+ return jsonify({'error': 'Uploaded file is empty'}), 400
216
 
217
+ model = configure_gemini(api_key)
218
+ all_transactions = []
219
+
220
+ # Determine strategy: Try reading PDF structure first
221
  try:
222
+ reader = pypdf.PdfReader(temp_path)
223
+ num_pages = len(reader.pages)
224
+
225
+ for i in range(num_pages):
226
+ logging.info(f"Processing page {i+1}/{num_pages}")
227
+
228
+ # Attempt Text Extraction
229
+ try:
230
+ text_content = reader.pages[i].extract_text()
231
+ except Exception:
232
+ text_content = "" # Force fallback if extraction fails
233
+
234
+ # LOGIC: Check if text is sufficient. If < 50 chars, it's likely a scan or image-heavy.
235
+ if text_content and len(text_content.strip()) > 50:
236
+ # Strategy A: Text Mode
237
+ logging.info("Text detected. Using Text Strategy.")
238
+ result = call_gemini_with_retry(model, text_content, FINANCIAL_DOC_PROMPT)
239
+ else:
240
+ # Strategy B: Vision Fallback (Point 4)
241
+ logging.info("Low text/Encryption detected. Switching to Vision Strategy.")
242
+ if PDF_IMAGE_SUPPORT:
243
+ # Page numbers in pypdf are 0-indexed, pdf2image uses 1-based indexing often,
244
+ # but convert_from_path handles slicing via first_page/last_page (1-based)
245
+ txs = process_pdf_page_as_image(model, temp_path, i+1)
246
+ all_transactions.extend(txs)
247
+ continue # Skip the rest of loop
248
+ else:
249
+ logging.warning("Cannot process scanned PDF - pdf2image missing.")
250
+ result = {"transactions": []}
251
+
252
  txs = result.get('transactions', [])
253
  all_transactions.extend(txs)
254
 
255
+ except pypdf.errors.PdfReadError:
256
+ # If pypdf fails completely (e.g., highly corrupted or weird encryption), try Vision on whole file
257
+ logging.warning("pypdf failed to read file. Attempting full Vision fallback.")
258
+ if PDF_IMAGE_SUPPORT:
259
+ # Warning: Processing all pages as images might be slow
260
+ images = convert_from_path(temp_path)
261
+ for img in images:
262
+ result = call_gemini_with_retry(model, img, FINANCIAL_DOC_PROMPT)
263
+ all_transactions.extend(result.get('transactions', []))
264
+ else:
265
+ raise ValueError("PDF is unreadable and Vision fallback is unavailable.")
266
+
267
+ return jsonify({'transactions': all_transactions})
268
 
 
 
 
269
  except Exception as e:
270
+ logging.error(f"Server Error: {e}")
271
+ return jsonify({'error': str(e)}), 500
272
+ finally:
273
+ if temp_path and os.path.exists(temp_path):
274
+ os.remove(temp_path)
275
+
276
+ # -------------------------------------------------------------------------
277
+ # TEXT & IMAGE ENDPOINTS (UPDATED)
278
+ # -------------------------------------------------------------------------
279
 
280
  @app.route('/process-text', methods=['POST'])
281
  def process_text():
282
+ """Handle raw text input."""
283
  try:
 
284
  data = request.get_json()
285
  if not data or 'text' not in data:
286
  return jsonify({'error': 'No text provided'}), 400
287
 
288
  text_input = data['text']
 
289
  if not text_input.strip():
290
+ return jsonify({'error': 'Text input cannot be empty'}), 400 # Point 2
291
 
 
292
  model = configure_gemini(api_key)
293
+ # Use specific prompt with date fallback for raw text
294
+ prompt = get_text_prompt_with_fallback_date()
295
 
296
+ result = call_gemini_with_retry(model, text_input, prompt)
297
+ return jsonify({'transactions': result.get('transactions', [])})
298
 
299
+ except Exception as e:
300
+ logging.error(f"Error: {e}")
301
+ return jsonify({'error': str(e)}), 500
302
+
303
+ @app.route('/process-image', methods=['POST'])
304
+ def process_image():
305
+ """Handle raw image upload (Receipts, Photos of invoices)."""
306
+ temp_path = None
307
+ try:
308
+ if 'file' not in request.files:
309
+ return jsonify({'error': 'No file uploaded'}), 400
310
+ file = request.files['file']
311
+
312
+ # Point 2: Empty check
313
+ file.seek(0, os.SEEK_END)
314
+ size = file.tell()
315
+ file.seek(0)
316
+ if size == 0:
317
+ return jsonify({'error': 'File is empty'}), 400
318
+
319
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as tmp:
320
+ file.save(tmp.name)
321
+ temp_path = tmp.name
322
+
323
+ model = configure_gemini(api_key)
324
 
325
+ # Load image with PIL
326
+ img = Image.open(temp_path)
327
 
328
+ # Use the General Financial Prompt
329
+ result = call_gemini_with_retry(model, img, FINANCIAL_DOC_PROMPT)
330
 
331
+ return jsonify({'transactions': result.get('transactions', [])})
332
+
 
333
  except Exception as e:
334
+ logging.error(f"Error: {e}")
335
+ return jsonify({'error': str(e)}), 500
336
+ finally:
337
+ if temp_path and os.path.exists(temp_path):
338
+ os.remove(temp_path)
339
 
340
  @app.route('/transaction-types', methods=['GET'])
341
  def get_transaction_types():
342
  """Return available transaction types and their categories."""
343
+ # Kept identical for backwards compatibility
344
  transaction_types = {
345
  "types": [
346
  {
 
409
  }
410
  return jsonify(transaction_types)
411
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  @app.route('/health', methods=['GET'])
413
  def health_check():
 
414
  return jsonify({
415
  'status': 'healthy',
416
  'timestamp': datetime.now().isoformat(),
417
+ 'version': '2.1.0',
418
+ 'vision_support': PDF_IMAGE_SUPPORT
419
  })
420
 
421
  if __name__ == '__main__':
422
+ # Ensure this port matches your server configuration
423
  app.run(debug=True, host="0.0.0.0", port=7860)