rairo commited on
Commit
0237009
·
verified ·
1 Parent(s): e184be2

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +254 -259
main.py CHANGED
@@ -5,342 +5,287 @@ import re
5
  import tempfile
6
  import time
7
  from datetime import datetime
8
- from io import BytesIO
9
-
10
- # Third-party imports
11
  from flask import Flask, request, jsonify
12
  from flask_cors import CORS
13
  import pandas as pd
14
  import pypdf
15
  import google.generativeai as genai
16
- from PIL import Image
17
-
18
- # specific import for image fallback
19
- try:
20
- from pdf2image import convert_from_path
21
- PDF_IMAGE_SUPPORT = True
22
- except ImportError:
23
- PDF_IMAGE_SUPPORT = False
24
- logging.warning("pdf2image not installed. Scanned/Encrypted PDF fallback will not work.")
25
 
26
  # Setup logging
27
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
28
 
29
  app = Flask(__name__)
30
- CORS(app)
31
 
32
  # Get API key securely
33
  api_key = os.getenv('Gemini')
34
  if not api_key:
35
- # Fallback for local testing if env var not set, though env var is preferred
36
- logging.warning("Gemini API key not found in environment variables.")
37
 
38
  def configure_gemini(api_key):
39
  """Configure Gemini AI model."""
40
  try:
41
  genai.configure(api_key=api_key)
42
- # Using 2.0 Flash as it has superior vision and long-context capabilities
43
  return genai.GenerativeModel('gemini-2.0-flash')
44
  except Exception as e:
45
  logging.error(f"Error configuring Gemini: {str(e)}")
46
  raise
47
 
48
- # -------------------------------------------------------------------------
49
- # PROMPTS
50
- # -------------------------------------------------------------------------
51
-
52
- # Enhanced Prompt for General Financial Documents (Statements, Invoices, Receipts)
53
- # Addresses Point 1 (Rounding/Dates) & Point 3 (Document Types)
54
- FINANCIAL_DOC_PROMPT = """Analyze this financial document (which could be a Bank Statement, Invoice, Receipt, or Transaction List).
55
- Extract all relevant transactions/items in JSON format.
56
-
57
- RULES:
58
- 1. **Dates**: Extract the date printed on the document. Format as DD/MM/YYYY.
59
- - If the year is missing in the row, use the document's context (e.g., header date).
60
- - Do NOT use the current date (today) unless the document explicitly says "Today".
61
- 2. **Amounts**: Extract the EXACT amount including decimals. DO NOT ROUND.
62
- 3. **Ignore**: Opening/Closing balances, page numbers, or cumulative running totals.
63
-
64
- FIELDS TO EXTRACT:
65
- - Date: string (DD/MM/YYYY)
66
- - Description: string (Full description of item/transaction)
67
- - Amount: number (Float, exact value)
68
- - Type: string (Categorize exactly as one of: 'income', 'expense', 'asset', 'liability', 'equity', 'transfer', 'investment', 'loan_repayment', 'capital_injection')
69
- - Customer_name: string (If 'income', name of payer. If 'expense', name of payee/vendor. Else 'N/A')
70
- - City: string (Extract from address if present, else 'N/A')
71
- - Document_Type: string (Infer: 'statement', 'invoice', 'receipt', 'transaction_list')
72
- - Destination_of_funds: string (Categorize based on description. e.g., 'Salaries', 'Fuel', 'Rentals', 'Equipment', etc.)
73
 
74
- RETURN STRUCTURE:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  {
76
- "transactions": [
77
- {
78
- "Date": "DD/MM/YYYY",
79
- "Description": "Item Description",
80
- "Customer_name": "Vendor or Payer",
81
- "City": "City Name",
82
- "Amount": 123.45,
83
- "Type": "expense",
84
- "Destination_of_funds": "Category",
85
- "Document_Type": "invoice"
86
- }
87
- ]
88
  }
 
 
89
 
90
- Return ONLY raw JSON. No markdown formatting.
91
- """
92
-
93
- def get_text_prompt_with_fallback_date():
94
- """
95
- Generate prompt for raw text snippets where context might be missing.
96
- Only allows current date fallback for raw text, not PDFs.
97
- """
98
  current_date = datetime.now().strftime("%d/%m/%Y")
99
- return f"""IMPORTANT: Today's date is {current_date}.
100
- If the text below does not specify a year or date, reasonable assume {current_date} context, but prefer explicit dates in text.
101
-
102
- {FINANCIAL_DOC_PROMPT}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  """
104
 
105
- # -------------------------------------------------------------------------
106
- # HELPER FUNCTIONS
107
- # -------------------------------------------------------------------------
108
-
109
- def extract_json_from_response(response_text):
110
- """Extract valid JSON from Gemini's response, handling Markdown fences."""
111
- # Remove markdown code blocks
112
- cleaned_text = re.sub(r'```json\s*', '', response_text)
113
- cleaned_text = re.sub(r'```\s*', '', cleaned_text)
114
-
115
- # Find JSON object
116
- match = re.search(r'(\{.*\})', cleaned_text, re.DOTALL)
117
- if match:
118
- json_string = match.group(1)
119
- else:
120
- # Fallback: assume the whole text is JSON
121
- json_string = cleaned_text
122
-
123
- try:
124
- return json.loads(json_string)
125
- except json.JSONDecodeError:
126
- logging.warning("JSON parsing failed, attempting repair.")
127
- raise ValueError(json_string) # Pass invalid string to caller for repair
128
-
129
  def repair_json_with_gemini(model, broken_json_string):
130
- """Uses Gemini to fix broken JSON syntax."""
131
- repair_prompt = f"""Fix this broken JSON string. Return ONLY valid JSON.
132
- Broken JSON: {broken_json_string}"""
 
 
 
 
 
 
 
133
  try:
134
  resp = model.generate_content(repair_prompt)
135
- return extract_json_from_response(resp.text)
136
  except Exception as e:
137
- logging.error(f"JSON repair failed: {e}")
138
- return {"transactions": []} # Fail safe
139
 
140
- def call_gemini_with_retry(model, content, prompt, retries=2):
141
- """
142
- Generic runner for Gemini.
143
- Args:
144
- content: Can be a String (text) or a PIL.Image object (vision).
145
- """
146
- for attempt in range(retries + 1):
147
  try:
148
- # Gemini Python SDK handles [Prompt, Image] or [Prompt, Text] automatically
149
- response = model.generate_content([prompt, content])
150
-
151
  try:
152
- return extract_json_from_response(response.text)
153
- except ValueError as ve:
154
- # Value error here contains the broken JSON string
155
- broken_json = str(ve)
156
- return repair_json_with_gemini(model, broken_json)
157
-
 
158
  except Exception as e:
159
- if "429" in str(e) or "ResourceExhausted" in str(e):
160
- time.sleep(2 * (attempt + 1))
161
- continue
162
- logging.error(f"Gemini Error: {e}")
163
- if attempt == retries:
 
 
164
  raise
165
 
166
- return {"transactions": []}
167
-
168
- def is_file_empty(file_path):
169
- """Check if file is empty."""
170
- return os.path.getsize(file_path) == 0
171
-
172
- # -------------------------------------------------------------------------
173
- # CORE LOGIC: PDF PROCESSING (HYBRID TEXT + VISION)
174
- # -------------------------------------------------------------------------
175
 
176
- def process_pdf_page_as_image(model, pdf_path, page_num):
177
- """Point 4: Convert specific PDF page to image and process with Vision."""
178
- if not PDF_IMAGE_SUPPORT:
179
- raise ImportError("pdf2image/poppler not installed")
 
 
 
 
 
 
 
 
180
 
181
- # Convert specific page to image
182
- # first_page=page_num, last_page=page_num ensures we only convert 1 page at a time to save RAM
183
- images = convert_from_path(pdf_path, first_page=page_num, last_page=page_num)
184
- if not images:
185
- return []
186
-
187
- # Process the image
188
- result = call_gemini_with_retry(model, images[0], FINANCIAL_DOC_PROMPT)
189
- return result.get('transactions', [])
190
 
191
  @app.route('/process-pdf', methods=['POST'])
192
  def process_pdf():
193
- """
194
- Smart PDF Processor:
195
- 1. Checks if empty.
196
- 2. Tries standard Text extraction (Fast/Cheap).
197
- 3. If Text fails (Encryption) or is empty (Scanned), falls back to Vision (Slow/Powerful).
198
- """
199
- temp_path = None
200
  try:
201
- # 1. Validation
202
  if 'file' not in request.files:
203
  return jsonify({'error': 'No file uploaded'}), 400
204
  file = request.files['file']
205
- if file.filename == '':
206
- return jsonify({'error': 'No file selected'}), 400
207
 
208
- # Save Temp
209
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
210
  file.save(tmp.name)
211
- temp_path = tmp.name
212
-
213
- # Point 2: Empty File Check
214
- if is_file_empty(temp_path):
215
- return jsonify({'error': 'Uploaded file is empty'}), 400
216
 
217
- model = configure_gemini(api_key)
218
- all_transactions = []
219
-
220
- # Determine strategy: Try reading PDF structure first
221
  try:
222
- reader = pypdf.PdfReader(temp_path)
223
- num_pages = len(reader.pages)
224
-
225
- for i in range(num_pages):
226
- logging.info(f"Processing page {i+1}/{num_pages}")
227
-
228
- # Attempt Text Extraction
229
- try:
230
- text_content = reader.pages[i].extract_text()
231
- except Exception:
232
- text_content = "" # Force fallback if extraction fails
233
-
234
- # LOGIC: Check if text is sufficient. If < 50 chars, it's likely a scan or image-heavy.
235
- if text_content and len(text_content.strip()) > 50:
236
- # Strategy A: Text Mode
237
- logging.info("Text detected. Using Text Strategy.")
238
- result = call_gemini_with_retry(model, text_content, FINANCIAL_DOC_PROMPT)
239
- else:
240
- # Strategy B: Vision Fallback (Point 4)
241
- logging.info("Low text/Encryption detected. Switching to Vision Strategy.")
242
- if PDF_IMAGE_SUPPORT:
243
- # Page numbers in pypdf are 0-indexed, pdf2image uses 1-based indexing often,
244
- # but convert_from_path handles slicing via first_page/last_page (1-based)
245
- txs = process_pdf_page_as_image(model, temp_path, i+1)
246
- all_transactions.extend(txs)
247
- continue # Skip the rest of loop
248
- else:
249
- logging.warning("Cannot process scanned PDF - pdf2image missing.")
250
- result = {"transactions": []}
251
-
252
  txs = result.get('transactions', [])
253
  all_transactions.extend(txs)
254
 
255
- except pypdf.errors.PdfReadError:
256
- # If pypdf fails completely (e.g., highly corrupted or weird encryption), try Vision on whole file
257
- logging.warning("pypdf failed to read file. Attempting full Vision fallback.")
258
- if PDF_IMAGE_SUPPORT:
259
- # Warning: Processing all pages as images might be slow
260
- images = convert_from_path(temp_path)
261
- for img in images:
262
- result = call_gemini_with_retry(model, img, FINANCIAL_DOC_PROMPT)
263
- all_transactions.extend(result.get('transactions', []))
264
- else:
265
- raise ValueError("PDF is unreadable and Vision fallback is unavailable.")
266
-
267
- return jsonify({'transactions': all_transactions})
268
 
 
 
 
269
  except Exception as e:
270
- logging.error(f"Server Error: {e}")
271
- return jsonify({'error': str(e)}), 500
272
- finally:
273
- if temp_path and os.path.exists(temp_path):
274
- os.remove(temp_path)
275
-
276
- # -------------------------------------------------------------------------
277
- # TEXT & IMAGE ENDPOINTS (UPDATED)
278
- # -------------------------------------------------------------------------
279
 
280
  @app.route('/process-text', methods=['POST'])
281
  def process_text():
282
- """Handle raw text input."""
283
  try:
 
284
  data = request.get_json()
285
  if not data or 'text' not in data:
286
  return jsonify({'error': 'No text provided'}), 400
287
-
288
  text_input = data['text']
 
289
  if not text_input.strip():
290
- return jsonify({'error': 'Text input cannot be empty'}), 400 # Point 2
291
 
 
292
  model = configure_gemini(api_key)
293
- # Use specific prompt with date fallback for raw text
294
- prompt = get_text_prompt_with_fallback_date()
295
 
296
- result = call_gemini_with_retry(model, text_input, prompt)
297
- return jsonify({'transactions': result.get('transactions', [])})
298
-
299
- except Exception as e:
300
- logging.error(f"Error: {e}")
301
- return jsonify({'error': str(e)}), 500
302
-
303
- @app.route('/process-image', methods=['POST'])
304
- def process_image():
305
- """Handle raw image upload (Receipts, Photos of invoices)."""
306
- temp_path = None
307
- try:
308
- if 'file' not in request.files:
309
- return jsonify({'error': 'No file uploaded'}), 400
310
- file = request.files['file']
311
 
312
- # Point 2: Empty check
313
- file.seek(0, os.SEEK_END)
314
- size = file.tell()
315
- file.seek(0)
316
- if size == 0:
317
- return jsonify({'error': 'File is empty'}), 400
318
-
319
- with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as tmp:
320
- file.save(tmp.name)
321
- temp_path = tmp.name
322
-
323
- model = configure_gemini(api_key)
324
 
325
- # Load image with PIL
326
- img = Image.open(temp_path)
327
 
328
- # Use the General Financial Prompt
329
- result = call_gemini_with_retry(model, img, FINANCIAL_DOC_PROMPT)
330
 
331
- return jsonify({'transactions': result.get('transactions', [])})
332
-
 
333
  except Exception as e:
334
- logging.error(f"Error: {e}")
335
- return jsonify({'error': str(e)}), 500
336
- finally:
337
- if temp_path and os.path.exists(temp_path):
338
- os.remove(temp_path)
339
 
340
  @app.route('/transaction-types', methods=['GET'])
341
  def get_transaction_types():
342
  """Return available transaction types and their categories."""
343
- # Kept identical for backwards compatibility
344
  transaction_types = {
345
  "types": [
346
  {
@@ -409,15 +354,65 @@ def get_transaction_types():
409
  }
410
  return jsonify(transaction_types)
411
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  @app.route('/health', methods=['GET'])
413
  def health_check():
 
414
  return jsonify({
415
  'status': 'healthy',
416
  'timestamp': datetime.now().isoformat(),
417
- 'version': '2.1.0',
418
- 'vision_support': PDF_IMAGE_SUPPORT
419
  })
420
 
421
  if __name__ == '__main__':
422
- # Ensure this port matches your server configuration
423
  app.run(debug=True, host="0.0.0.0", port=7860)
 
5
  import tempfile
6
  import time
7
  from datetime import datetime
 
 
 
8
  from flask import Flask, request, jsonify
9
  from flask_cors import CORS
10
  import pandas as pd
11
  import pypdf
12
  import google.generativeai as genai
13
+ import PIL.Image
 
 
 
 
 
 
 
 
14
 
15
  # Setup logging
16
+ logging.basicConfig(level=logging.INFO)
17
 
18
  app = Flask(__name__)
19
+ CORS(app) # Enable CORS for all routes
20
 
21
  # Get API key securely
22
  api_key = os.getenv('Gemini')
23
  if not api_key:
24
+ raise ValueError("Gemini API key not found in environment variables")
 
25
 
26
  def configure_gemini(api_key):
27
  """Configure Gemini AI model."""
28
  try:
29
  genai.configure(api_key=api_key)
 
30
  return genai.GenerativeModel('gemini-2.0-flash')
31
  except Exception as e:
32
  logging.error(f"Error configuring Gemini: {str(e)}")
33
  raise
34
 
35
+ def read_pdf_pages(file_path):
36
+ """Extract text from each page of a PDF file as list of strings."""
37
+ try:
38
+ pages = []
39
+ with open(file_path, 'rb') as f:
40
+ reader = pypdf.PdfReader(f)
41
+ for page in reader.pages:
42
+ text = page.extract_text() or ""
43
+ pages.append(text)
44
+ return pages
45
+ except Exception as e:
46
+ logging.error(f"Error reading PDF: {str(e)}")
47
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ PROMPT = """Analyze this bank statement and extract transactions in JSON format with these fields:
50
+
51
+ Date (format DD/MM/YYYY)
52
+ Description
53
+ Amount (just the integer value)
54
+ Type (categorize into one of the following based on the transaction nature):
55
+ 'income' - money received from customers, sales, services rendered
56
+ 'expense' - operational costs, purchases, payments made
57
+ 'asset' - purchase of equipment, property, vehicles, or other assets
58
+ 'liability' - taking on debt, loans received, credit facilities
59
+ 'equity' - owner investments, capital contributions, retained earnings transfers
60
+ 'transfer' - money moved between own accounts, internal transfers
61
+ 'investment' - securities purchases, investment account funding, portfolio additions
62
+ 'loan_repayment' - paying back borrowed money, loan principal payments
63
+ 'capital_injection' - owner or investor adding money to the business
64
+ Customer Name (Only if Type is 'income' and if no name is extracted write 'general income'. For all other types, extract relevant party name or write 'N/A')
65
+ City (In address of bank statement)
66
+ Destination_of_funds (categorize based on Type and description):
67
+ If 'expense': Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses
68
+ If 'income': 'income'
69
+ If 'asset': Equipment, Property, Vehicles, Technology, Furniture, Other assets
70
+ If 'liability': Bank loan, Credit facility, Supplier credit, Other liabilities
71
+ If 'equity': Owner investment, Retained earnings, Share capital, Other equity
72
+ If 'transfer': Internal transfer
73
+ If 'investment': Securities, Mutual funds, Fixed deposits, Other investments
74
+ If 'loan_repayment': Loan repayment
75
+ If 'capital_injection': Capital injection
76
+ ignore opening or closing balances.
77
+ extract the amount in full including decimals.
78
+ Return ONLY the raw JSON object, without any surrounding text, explanations, or markdown fences like ```json.
79
+ Return ONLY valid JSON with this structure:
80
  {
81
+ "transactions": [
82
+ {
83
+ "Date": "string",
84
+ "Description": "string",
85
+ "Customer_name": "string",
86
+ "City": "string",
87
+ "Amount": number,
88
+ "Type": "string",
89
+ "Destination_of_funds": "string"
 
 
 
90
  }
91
+ ]
92
+ }"""
93
 
94
+ def get_text_prompt_with_date():
95
+ """Generate TEXT_PROMPT with current date context."""
 
 
 
 
 
 
96
  current_date = datetime.now().strftime("%d/%m/%Y")
97
+ return f"""IMPORTANT: Today's date is {current_date}. If the user does not specify a date for a transaction, use {current_date} as the default date.
98
+
99
+ Analyze the following natural language text and extract transactions in JSON format with these fields:
100
+
101
+ Date (format DD/MM/YYYY) - USE {current_date} IF NO DATE IS SPECIFIED
102
+ Description
103
+ Amount (just the integer value)
104
+ Type (categorize into one of the following based on the transaction nature):
105
+ 'income' - money received from customers, sales, services rendered
106
+ 'expense' - operational costs, purchases, payments made
107
+ 'asset' - purchase of equipment, property, vehicles, or other assets
108
+ 'liability' - taking on debt, loans received, credit facilities
109
+ 'equity' - owner investments, capital contributions, retained earnings transfers
110
+ 'transfer' - money moved between own accounts, internal transfers
111
+ 'investment' - securities purchases, investment account funding, portfolio additions
112
+ 'loan_repayment' - paying back borrowed money, loan principal payments
113
+ 'capital_injection' - owner or investor adding money to the business
114
+ Customer Name (Only if Type is 'income' and if no name is extracted write 'general income'. For all other types, extract relevant party name or write 'N/A')
115
+ City (extract from any address information provided or write 'N/A' if not available)
116
+ Destination_of_funds (categorize based on Type and description):
117
+ If 'expense': Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses
118
+ If 'income': 'income'
119
+ If 'asset': Equipment, Property, Vehicles, Technology, Furniture, Other assets
120
+ If 'liability': Bank loan, Credit facility, Supplier credit, Other liabilities
121
+ If 'equity': Owner investment, Retained earnings, Share capital, Other equity
122
+ If 'transfer': Internal transfer
123
+ If 'investment': Securities, Mutual funds, Fixed deposits, Other investments
124
+ If 'loan_repayment': Loan repayment
125
+ If 'capital_injection': Capital injection
126
+ ignore opening or closing balances.
127
+ Return ONLY valid JSON with this structure:
128
+ {{
129
+ "transactions": [
130
+ {{
131
+ "Date": "string",
132
+ "Description": "string",
133
+ "Customer_name": "string",
134
+ "City": "string",
135
+ "Amount": number,
136
+ "Type": "string",
137
+ "Destination_of_funds": "string"
138
+ }}
139
+ ]
140
+ }}
141
+ important: Return an empty array if no transactions are in the text. Do not make up false data.
142
  """
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  def repair_json_with_gemini(model, broken_json_string):
145
+ """Makes a second API call to Gemini to fix a broken JSON string."""
146
+ logging.info("Attempting to repair broken JSON with another Gemini call...")
147
+ repair_prompt = f"""The following text is a JSON object that is syntactically incorrect.
148
+ It might have missing commas, brackets, or other errors.
149
+ Please fix the syntax to make it a valid JSON object.
150
+ Return ONLY the corrected, raw JSON object and nothing else.
151
+
152
+ Broken JSON:
153
+ {broken_json_string}
154
+ """
155
  try:
156
  resp = model.generate_content(repair_prompt)
157
+ return resp.text
158
  except Exception as e:
159
+ logging.error(f"Error during JSON repair call: {e}")
160
+ raise ValueError("Failed to repair the JSON string.")
161
 
162
+ def call_gemini_with_retry_custom(model, text, prompt, retries=3, backoff_factor=2):
163
+ """Call Gemini with retries, now with JSON repair logic."""
164
+ for attempt in range(1, retries + 1):
 
 
 
 
165
  try:
166
+ resp = model.generate_content([prompt, text])
167
+ response_text = resp.text
 
168
  try:
169
+ # First attempt to parse the original response
170
+ return extract_json_from_response(response_text)
171
+ except Exception:
172
+ # If parsing fails, trigger the repair process
173
+ logging.warning("Initial JSON parsing failed. Attempting repair.")
174
+ repaired_text = repair_json_with_gemini(model, response_text)
175
+ return extract_json_from_response(repaired_text) # Parse the repaired text
176
  except Exception as e:
177
+ msg = str(e)
178
+ if '429' in msg or 'RateLimit' in msg and attempt < retries:
179
+ wait = backoff_factor ** attempt
180
+ logging.warning(f"Rate limit hit, retrying in {wait}s (attempt {attempt}/{retries})")
181
+ time.sleep(wait)
182
+ else:
183
+ logging.error(f"Error processing with Gemini after retries: {msg}")
184
  raise
185
 
186
+ def call_gemini_with_retry(model, text, retries=3, backoff_factor=2):
187
+ """Call Gemini with retries, now with JSON repair logic."""
188
+ # This function now simply calls the custom one with the default PROMPT
189
+ return call_gemini_with_retry_custom(model, text, PROMPT, retries, backoff_factor)
 
 
 
 
 
190
 
191
+ def extract_json_from_response(response_text):
192
+ """Extract valid JSON from Gemini's response, even if it's embedded."""
193
+ # First, try to find a JSON block enclosed in markdown fences
194
+ match = re.search(r'json\s*(\{.*?\})\s*', response_text, re.DOTALL)
195
+ if match:
196
+ json_string = match.group(1)
197
+ else:
198
+ # If no markdown, find the first '{' and the last '}'
199
+ match = re.search(r'{.*}', response_text, re.DOTALL)
200
+ if not match:
201
+ raise ValueError("No valid JSON object found in the LLM response")
202
+ json_string = match.group(0)
203
 
204
+ try:
205
+ # Clean and load the extracted JSON string
206
+ return json.loads(json_string)
207
+ except json.JSONDecodeError as e:
208
+ logging.error(f"Failed to parse extracted JSON. Error: {e}")
209
+ logging.error(f"Problematic JSON string was: {json_string}")
210
+ raise ValueError(f"Could not parse JSON from LLM response: {e}")
 
 
211
 
212
  @app.route('/process-pdf', methods=['POST'])
213
  def process_pdf():
214
+ """Handle PDF upload, process it in page-chunks with Gemini, and aggregate results."""
 
 
 
 
 
 
215
  try:
 
216
  if 'file' not in request.files:
217
  return jsonify({'error': 'No file uploaded'}), 400
218
  file = request.files['file']
219
+ if file.filename == '' or not file.filename.lower().endswith('.pdf'):
220
+ return jsonify({'error': 'A valid PDF file must be uploaded'}), 400
221
 
222
+ # Save to temp file
223
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
224
  file.save(tmp.name)
225
+ file_path = tmp.name
 
 
 
 
226
 
 
 
 
 
227
  try:
228
+ model = configure_gemini(api_key)
229
+ pages = read_pdf_pages(file_path)
230
+ all_transactions = []
231
+
232
+ for idx, page_text in enumerate(pages, start=1):
233
+ if not page_text.strip():
234
+ continue
235
+ logging.info(f"Processing page {idx}/{len(pages)}")
236
+ result = call_gemini_with_retry(model, page_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  txs = result.get('transactions', [])
238
  all_transactions.extend(txs)
239
 
240
+ return jsonify({'transactions': all_transactions})
241
+ finally:
242
+ os.remove(file_path)
 
 
 
 
 
 
 
 
 
 
243
 
244
+ except ValueError as ve:
245
+ logging.warning(f"Client error: {ve}")
246
+ return jsonify({'error': str(ve)}), 400
247
  except Exception as e:
248
+ logging.error(f"Internal server error: {e}")
249
+ return jsonify({'error': 'Internal server error'}), 500
 
 
 
 
 
 
 
250
 
251
  @app.route('/process-text', methods=['POST'])
252
  def process_text():
253
+ """Handle text input and extract transactions using Gemini."""
254
  try:
255
+ # Get JSON data from request
256
  data = request.get_json()
257
  if not data or 'text' not in data:
258
  return jsonify({'error': 'No text provided'}), 400
259
+
260
  text_input = data['text']
261
+
262
  if not text_input.strip():
263
+ return jsonify({'error': 'Text input cannot be empty'}), 400
264
 
265
+ # Configure Gemini model
266
  model = configure_gemini(api_key)
 
 
267
 
268
+ # Generate prompt with current date
269
+ text_prompt = get_text_prompt_with_date()
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
+ # Process the text with Gemini
272
+ logging.info("Processing text input for transaction extraction")
273
+ result = call_gemini_with_retry_custom(model, text_input, text_prompt)
 
 
 
 
 
 
 
 
 
274
 
275
+ transactions = result.get('transactions', [])
 
276
 
277
+ return jsonify({'transactions': transactions})
 
278
 
279
+ except ValueError as ve:
280
+ logging.warning(f"Client error: {ve}")
281
+ return jsonify({'error': str(ve)}), 400
282
  except Exception as e:
283
+ logging.error(f"Internal server error: {e}")
284
+ return jsonify({'error': 'Internal server error'}), 500
 
 
 
285
 
286
  @app.route('/transaction-types', methods=['GET'])
287
  def get_transaction_types():
288
  """Return available transaction types and their categories."""
 
289
  transaction_types = {
290
  "types": [
291
  {
 
354
  }
355
  return jsonify(transaction_types)
356
 
357
+ @app.route('/process-image', methods=['POST'])
358
+ def process_image():
359
+ """Handle image upload, process it with Gemini Vision, and extract transactions."""
360
+ try:
361
+ if 'file' not in request.files:
362
+ return jsonify({'error': 'No file uploaded'}), 400
363
+
364
+ file = request.files['file']
365
+ if file.filename == '':
366
+ return jsonify({'error': 'No file selected'}), 400
367
+
368
+ # Check if file is an image
369
+ allowed_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp'}
370
+ file_ext = os.path.splitext(file.filename)[1].lower()
371
+ if file_ext not in allowed_extensions:
372
+ return jsonify({'error': 'Invalid file type. Supported formats: JPG, JPEG, PNG, GIF, BMP, WEBP'}), 400
373
+
374
+ # Save to temp file
375
+ with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
376
+ file.save(tmp.name)
377
+ file_path = tmp.name
378
+
379
+ try:
380
+ model = configure_gemini(api_key)
381
+
382
+ logging.info(f"Processing image file: {file.filename}")
383
+
384
+ # Read image file as bytes
385
+ img = PIL.Image.open(file_path)
386
+
387
+ # Generate content with the image and prompt
388
+ response = model.generate_content([PROMPT, img])
389
+
390
+ # Parse the response
391
+ result = extract_json_from_response(response.text)
392
+ transactions = result.get('transactions', [])
393
+
394
+ return jsonify({'transactions': transactions})
395
+
396
+ finally:
397
+ # Clean up temp file
398
+ if os.path.exists(file_path):
399
+ os.remove(file_path)
400
+
401
+ except ValueError as ve:
402
+ logging.warning(f"Client error: {ve}")
403
+ return jsonify({'error': str(ve)}), 400
404
+ except Exception as e:
405
+ logging.error(f"Internal server error: {e}")
406
+ return jsonify({'error': 'Internal server error'}), 500
407
+
408
  @app.route('/health', methods=['GET'])
409
  def health_check():
410
+ """Health check endpoint."""
411
  return jsonify({
412
  'status': 'healthy',
413
  'timestamp': datetime.now().isoformat(),
414
+ 'version': '2.0.0'
 
415
  })
416
 
417
  if __name__ == '__main__':
 
418
  app.run(debug=True, host="0.0.0.0", port=7860)