rairo commited on
Commit
bcfae1d
·
verified ·
1 Parent(s): b3c2aa8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -0
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ import re
3
+ import pandas as pd
4
+ import google.generativeai as genai
5
+ import pypdf
6
+ import json
7
+ from datetime import datetime
8
+ import os
9
+ from flask_cors import CORS
10
+
11
+ app = Flask(__name__)
12
+ CORS(app) # Enable CORS for all routes
13
+
14
+ api_key = os.environ['Gemini']
15
+
16
+ def configure_gemini(api_key):
17
+ genai.configure(api_key=api_key)
18
+ return genai.GenerativeModel('gemini-2.0-flash-exp')
19
+
20
+ def read_pdf(file_path):
21
+ text_content = []
22
+ with open(file_path, 'rb') as file:
23
+ pdf_reader = pypdf.PdfReader(file)
24
+ for page in pdf_reader.pages:
25
+ text = page.extract_text()
26
+ if text:
27
+ text_content.append(text)
28
+ return "\n".join(text_content)
29
+
30
+ def process_with_gemini(model, text):
31
+ prompt = """Analyze this bank statement and extract transactions in JSON format with these fields:
32
+ - Date (format DD/MM/YYYY)
33
+ - Description
34
+ - Amount (just the integer value)
35
+ - Type (is 'income' if 'credit amount', else 'expense')
36
+ - Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
37
+ - City (In address of bank statement)
38
+
39
+ Return ONLY valid JSON with this structure:
40
+ {
41
+ "transactions": [
42
+ {
43
+ "Date": "string",
44
+ "Description": "string",
45
+ "Customer_name": "string",
46
+ "City": "string",
47
+ "Amount": number,
48
+ "Type": "string"
49
+ }
50
+ ]
51
+ }"""
52
+
53
+ response = model.generate_content([prompt, text])
54
+ return response.text
55
+
56
+ @app.route('/process-pdf', methods=['POST'])
57
+ def process_pdf():
58
+ try:
59
+ if 'file' not in request.files:
60
+ return jsonify({'error': 'No file uploaded'}), 400
61
+
62
+ file = request.files['file']
63
+ if file.filename == '':
64
+ return jsonify({'error': 'No file selected'}), 400
65
+
66
+ if not file.filename.endswith('.pdf'):
67
+ return jsonify({'error': 'File must be a PDF'}), 400
68
+
69
+ # Save uploaded file temporarily
70
+ temp_path = "temp.pdf"
71
+ file.save(temp_path)
72
+
73
+ # Configure Gemini and process PDF
74
+ model = configure_gemini(api_key)
75
+ pdf_text = read_pdf(temp_path)
76
+ json_response = process_with_gemini(model, pdf_text)
77
+
78
+ # Clean JSON response
79
+ json_str = json_response[json_response.find('{'):json_response.rfind('}')+1]
80
+ json_str = json_str.replace('```json', '').replace('```', '')
81
+
82
+ # Parse JSON and return
83
+ data = json.loads(json_str)
84
+
85
+ # Clean up temp file
86
+ os.remove(temp_path)
87
+
88
+ return jsonify(data)
89
+
90
+ except Exception as e:
91
+ return jsonify({'error': str(e)}), 500
92
+
93
+ if __name__ == '__main__':
94
+ app.run(debug=True,host="0.0.0.0", port=7860)