rairo commited on
Commit
cde4684
·
verified ·
1 Parent(s): d858c29

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +51 -68
main.py CHANGED
@@ -1,94 +1,77 @@
1
- from flask import Flask, request, jsonify
2
- import re
3
- import pandas as pd
4
- import google.generativeai as genai
5
- import pypdf
6
  import json
 
7
  from datetime import datetime
8
- import os
 
 
9
  from flask_cors import CORS
 
10
 
11
  app = Flask(__name__)
12
  CORS(app) # Enable CORS for all routes
13
 
14
- api_key = os.environ['Gemini']
 
15
 
16
  def configure_gemini(api_key):
17
  genai.configure(api_key=api_key)
18
- return genai.GenerativeModel('gemini-2.0-flash-exp')
19
-
20
- def read_pdf(file_path):
21
- text_content = []
22
- with open(file_path, 'rb') as file:
23
- pdf_reader = pypdf.PdfReader(file)
24
- for page in pdf_reader.pages:
25
- text = page.extract_text()
26
- if text:
27
- text_content.append(text)
28
- return "\n".join(text_content)
29
 
30
- def process_with_gemini(model, text):
31
- prompt = """Analyze this bank statement and extract transactions in JSON format with these fields:
32
- - Date (format DD/MM/YYYY)
33
- - Description
34
- - Amount (just the integer value)
35
- - Type (is 'income' if 'credit amount', else 'expense')
36
- - Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
37
- - City (In address of bank statement)
38
-
39
- Return ONLY valid JSON with this structure:
40
- {
41
- "transactions": [
42
- {
43
- "Date": "string",
44
- "Description": "string",
45
- "Customer_name": "string",
46
- "City": "string",
47
- "Amount": number,
48
- "Type": "string"
49
- }
50
- ]
51
- }"""
52
-
53
- response = model.generate_content([prompt, text])
54
  return response.text
55
 
56
- @app.route('/process-pdf', methods=['POST'])
57
- def process_pdf():
58
  try:
59
- if 'file' not in request.files:
60
  return jsonify({'error': 'No file uploaded'}), 400
61
-
62
- file = request.files['file']
63
  if file.filename == '':
64
  return jsonify({'error': 'No file selected'}), 400
65
-
66
- if not file.filename.endswith('.pdf'):
67
- return jsonify({'error': 'File must be a PDF'}), 400
68
 
69
- # Save uploaded file temporarily
70
- temp_path = "temp.pdf"
71
- file.save(temp_path)
72
-
73
- # Configure Gemini and process PDF
 
 
 
74
  model = configure_gemini(api_key)
75
- pdf_text = read_pdf(temp_path)
76
- json_response = process_with_gemini(model, pdf_text)
77
-
78
- # Clean JSON response
79
- json_str = json_response[json_response.find('{'):json_response.rfind('}')+1]
 
 
 
 
 
80
  json_str = json_str.replace('```json', '').replace('```', '')
81
-
82
- # Parse JSON and return
83
  data = json.loads(json_str)
84
-
85
- # Clean up temp file
86
- os.remove(temp_path)
87
-
 
88
  return jsonify(data)
89
-
90
  except Exception as e:
91
  return jsonify({'error': str(e)}), 500
92
 
93
  if __name__ == '__main__':
94
- app.run(debug=True,host="0.0.0.0", port=7860)
 
 
1
+ import os
2
+ import io
 
 
 
3
  import json
4
+ import hashlib
5
  from datetime import datetime
6
+ from PIL import Image
7
+
8
+ from flask import Flask, request, jsonify
9
  from flask_cors import CORS
10
+ import google.generativeai as genai
11
 
12
  app = Flask(__name__)
13
  CORS(app) # Enable CORS for all routes
14
 
15
+ # Use the Gemini API key from the environment, or set it here for testing.
16
+ api_key = os.environ.get('Gemini', 'YOUR_GEMINI_API_KEY')
17
 
18
  def configure_gemini(api_key):
19
  genai.configure(api_key=api_key)
20
+ return genai.GenerativeModel('gemini-2.0-flash-thinking-exp')
 
 
 
 
 
 
 
 
 
 
21
 
22
+ def process_receipt(model, image):
23
+ prompt = (
24
+ "Analyze this image and determine if it's a receipt. If it is a receipt, extract:\n"
25
+ " - Total amount (as float)\n"
26
+ " - List of items purchased (array of strings)\n"
27
+ " - Date of transaction (DD/MM/YYYY format)\n"
28
+ " - Receipt number (as string)\n"
29
+ "Return JSON format with keys: is_receipt (boolean), total, items, date, receipt_number.\n"
30
+ "If not a receipt, return {\"is_receipt\": false}"
31
+ )
32
+ response = model.generate_content([prompt, image])
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  return response.text
34
 
35
+ @app.route('/process-receipt', methods=['POST'])
36
+ def process_receipt_endpoint():
37
  try:
38
+ if 'receipt' not in request.files:
39
  return jsonify({'error': 'No file uploaded'}), 400
40
+
41
+ file = request.files['receipt']
42
  if file.filename == '':
43
  return jsonify({'error': 'No file selected'}), 400
 
 
 
44
 
45
+ # Read file bytes and compute a hash (for duplicate checking or logging)
46
+ image_bytes = file.read()
47
+ file_hash = hashlib.md5(image_bytes).hexdigest()
48
+
49
+ # Open the image using Pillow
50
+ image = Image.open(io.BytesIO(image_bytes))
51
+
52
+ # Configure Gemini and process the receipt image
53
  model = configure_gemini(api_key)
54
+ result_text = process_receipt(model, image)
55
+
56
+ # Attempt to extract JSON from the response text
57
+ json_start = result_text.find('{')
58
+ json_end = result_text.rfind('}')
59
+ if json_start == -1 or json_end == -1:
60
+ return jsonify({'error': 'Invalid response format', 'raw': result_text}), 500
61
+
62
+ json_str = result_text[json_start:json_end+1]
63
+ # Clean up any markdown formatting if necessary
64
  json_str = json_str.replace('```json', '').replace('```', '')
 
 
65
  data = json.loads(json_str)
66
+
67
+ # Optionally, add metadata to the response
68
+ data['file_hash'] = file_hash
69
+ data['timestamp'] = datetime.now().isoformat()
70
+
71
  return jsonify(data)
 
72
  except Exception as e:
73
  return jsonify({'error': str(e)}), 500
74
 
75
  if __name__ == '__main__':
76
+ # The server listens on all interfaces at port 7860.
77
+ app.run(debug=True, host="0.0.0.0", port=7860)