blessedpug commited on
Commit
8e65d73
·
1 Parent(s): 1fac04f

added fraud check

Browse files
Files changed (4) hide show
  1. data.json +334 -0
  2. fraud.py +118 -0
  3. models.py +1 -0
  4. pipeline.py +18 -7
data.json ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "merchant": "TESCO SHREWSBURY CATTLE MARKET",
4
+ "date": "17/10/22",
5
+ "total_amount": 55.3,
6
+ "items": [
7
+ {
8
+ "description": "TAPE MEASURE",
9
+ "amount": 2.1
10
+ },
11
+ {
12
+ "description": "PROTEIN COOKIE",
13
+ "amount": 1.0
14
+ },
15
+ {
16
+ "description": "PROTEIN COOKIE",
17
+ "amount": 1.0
18
+ },
19
+ {
20
+ "description": "CONDOMS",
21
+ "amount": 7.0
22
+ },
23
+ {
24
+ "description": "PROCECCO",
25
+ "amount": 8.0
26
+ },
27
+ {
28
+ "description": "PROCECCO",
29
+ "amount": 8.0
30
+ },
31
+ {
32
+ "description": "SAUVIGNON BLNC",
33
+ "amount": 7.0
34
+ },
35
+ {
36
+ "description": "SAUVIGNON BLNC",
37
+ "amount": 7.0
38
+ },
39
+ {
40
+ "description": "TAPE MEASURE",
41
+ "amount": 2.1
42
+ },
43
+ {
44
+ "description": "SAUVIGNON BLNC",
45
+ "amount": 7.0
46
+ },
47
+ {
48
+ "description": "PROTEIN COOKIE",
49
+ "amount": 1.0
50
+ },
51
+ {
52
+ "description": "PROTEIN COOKIE",
53
+ "amount": 1.0
54
+ },
55
+ {
56
+ "description": "PROTEIN COOKIE",
57
+ "amount": 1.0
58
+ },
59
+ {
60
+ "description": "SAUVIGNON BLNC",
61
+ "amount": 7.0
62
+ },
63
+ {
64
+ "description": "KRONENBOURG",
65
+ "amount": 1.1
66
+ },
67
+ {
68
+ "description": "KRONENBOURG",
69
+ "amount": 1.0
70
+ }
71
+ ],
72
+ "fraud_check": false
73
+ },
74
+ {
75
+ "merchant": "TESCO",
76
+ "date": "17/10/22",
77
+ "total_amount": 55.3,
78
+ "items": [
79
+ {
80
+ "description": "TAPE MEASURE",
81
+ "amount": 2.1
82
+ },
83
+ {
84
+ "description": "PROTEIN COOKIE",
85
+ "amount": 1.8
86
+ },
87
+ {
88
+ "description": "PROTEIN COOKIE",
89
+ "amount": 1.8
90
+ },
91
+ {
92
+ "description": "CONDOMS",
93
+ "amount": 10.0
94
+ },
95
+ {
96
+ "description": "PROCECCO",
97
+ "amount": 7.0
98
+ },
99
+ {
100
+ "description": "PROCECCO",
101
+ "amount": 7.0
102
+ },
103
+ {
104
+ "description": "SAUVIGNON BLNC",
105
+ "amount": 7.0
106
+ },
107
+ {
108
+ "description": "SAUVIGNON BLNC",
109
+ "amount": 7.0
110
+ },
111
+ {
112
+ "description": "KRONENBOURG",
113
+ "amount": 4.6
114
+ },
115
+ {
116
+ "description": "TAPE MEASURE",
117
+ "amount": 2.1
118
+ },
119
+ {
120
+ "description": "PROTEIN COOKIE",
121
+ "amount": 1.8
122
+ },
123
+ {
124
+ "description": "PROTEIN COOKIE",
125
+ "amount": 1.8
126
+ },
127
+ {
128
+ "description": "CONDOMS",
129
+ "amount": 10.0
130
+ },
131
+ {
132
+ "description": "SAUVIGNON BLNC",
133
+ "amount": 7.0
134
+ },
135
+ {
136
+ "description": "SAUVIGNON BLNC",
137
+ "amount": 7.0
138
+ },
139
+ {
140
+ "description": "KRONENBOURG",
141
+ "amount": 4.6
142
+ }
143
+ ],
144
+ "fraud_check": false
145
+ },
146
+ {
147
+ "merchant": "TESCO SHREWSBURY CATTLE MARKET",
148
+ "date": "17/10/22",
149
+ "total_amount": 55.3,
150
+ "items": [
151
+ {
152
+ "description": "TAPE MEASURE",
153
+ "amount": 2.1
154
+ },
155
+ {
156
+ "description": "PROTEIN COOKIE",
157
+ "amount": 1.8
158
+ },
159
+ {
160
+ "description": "PROTEIN COOKIE",
161
+ "amount": 1.8
162
+ },
163
+ {
164
+ "description": "CONDOMS",
165
+ "amount": 10.0
166
+ },
167
+ {
168
+ "description": "PROCECCO",
169
+ "amount": 7.0
170
+ },
171
+ {
172
+ "description": "PROCECCO",
173
+ "amount": 7.0
174
+ },
175
+ {
176
+ "description": "SAUVIGNON BLNC",
177
+ "amount": 7.0
178
+ },
179
+ {
180
+ "description": "SAUVIGNON BLNC",
181
+ "amount": 7.0
182
+ },
183
+ {
184
+ "description": "KRONENBOURG",
185
+ "amount": 4.6
186
+ },
187
+ {
188
+ "description": "TAPE MEASURE",
189
+ "amount": 2.1
190
+ },
191
+ {
192
+ "description": "PROTEIN COOKIE",
193
+ "amount": 1.8
194
+ },
195
+ {
196
+ "description": "PROTEIN COOKIE",
197
+ "amount": 1.8
198
+ },
199
+ {
200
+ "description": "CONDOMS",
201
+ "amount": 10.0
202
+ },
203
+ {
204
+ "description": "SAUVIGNON BLNC",
205
+ "amount": 7.0
206
+ },
207
+ {
208
+ "description": "SAUVIGNON BLNC",
209
+ "amount": 7.0
210
+ },
211
+ {
212
+ "description": "KRONENBOURG",
213
+ "amount": 4.6
214
+ }
215
+ ],
216
+ "fraud_check": false
217
+ },
218
+ {
219
+ "fraud_check": false,
220
+ "merchant": "TESCO SHREWSBURY CATTLE MARKET",
221
+ "date": "17/10/22",
222
+ "total_amount": 55.3,
223
+ "items": [
224
+ {
225
+ "description": "TAPE MEASURE",
226
+ "amount": 2.1
227
+ },
228
+ {
229
+ "description": "PROTEIN COOKIE",
230
+ "amount": 1.8
231
+ },
232
+ {
233
+ "description": "PROTEIN COOKIE",
234
+ "amount": 1.8
235
+ },
236
+ {
237
+ "description": "CONDOMS",
238
+ "amount": 10.0
239
+ },
240
+ {
241
+ "description": "PROCECCO",
242
+ "amount": 7.0
243
+ },
244
+ {
245
+ "description": "PROCECCO",
246
+ "amount": 7.0
247
+ },
248
+ {
249
+ "description": "SAUVIGNON BLNC",
250
+ "amount": 7.0
251
+ },
252
+ {
253
+ "description": "SAUVIGNON BLNC",
254
+ "amount": 7.0
255
+ },
256
+ {
257
+ "description": "KRONENBOURG",
258
+ "amount": 4.6
259
+ }
260
+ ]
261
+ },
262
+ {
263
+ "fraud_check": false,
264
+ "merchant": "Tesco",
265
+ "date": "17/10/22",
266
+ "total_amount": 55.3,
267
+ "items": [
268
+ {
269
+ "description": "TAPE MEASURE",
270
+ "amount": 2.1
271
+ },
272
+ {
273
+ "description": "PROTEIN COOKIE",
274
+ "amount": 1.8
275
+ },
276
+ {
277
+ "description": "PROTEIN COOKIE",
278
+ "amount": 1.8
279
+ },
280
+ {
281
+ "description": "CONDOMS",
282
+ "amount": 10.0
283
+ },
284
+ {
285
+ "description": "PROCECCO",
286
+ "amount": 7.0
287
+ },
288
+ {
289
+ "description": "PROCECCO",
290
+ "amount": 7.0
291
+ },
292
+ {
293
+ "description": "SAUVIGNON BLNC",
294
+ "amount": 7.0
295
+ },
296
+ {
297
+ "description": "SAUVIGNON BLNC",
298
+ "amount": 7.0
299
+ },
300
+ {
301
+ "description": "KRONENBOURG",
302
+ "amount": 4.6
303
+ },
304
+ {
305
+ "description": "TAPE MEASURE",
306
+ "amount": 2.1
307
+ },
308
+ {
309
+ "description": "PROTEIN COOKIE",
310
+ "amount": 1.8
311
+ },
312
+ {
313
+ "description": "PROTEIN COOKIE",
314
+ "amount": 1.8
315
+ },
316
+ {
317
+ "description": "CONDOMS",
318
+ "amount": 10.0
319
+ },
320
+ {
321
+ "description": "SAUVIGNON BLNC",
322
+ "amount": 7.0
323
+ },
324
+ {
325
+ "description": "SAUVIGNON BLNC",
326
+ "amount": 7.0
327
+ },
328
+ {
329
+ "description": "KRONENBOURG",
330
+ "amount": 4.6
331
+ }
332
+ ]
333
+ }
334
+ ]
fraud.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ DATA_FILE = 'data.json'
5
+
6
+ def load_receipts():
7
+ """Load all stored receipts from data.json."""
8
+ if not os.path.exists(DATA_FILE):
9
+ print("Data file doesn't exist, returning empty list")
10
+ return []
11
+
12
+ try:
13
+ with open(DATA_FILE, 'r') as f:
14
+ data = json.load(f)
15
+ print(f"Loaded data type: {type(data)}")
16
+
17
+ # Handle case where data.json contains a single object instead of array
18
+ if isinstance(data, dict):
19
+ print("Data is a single dict, wrapping in array")
20
+ return [data]
21
+ elif isinstance(data, list):
22
+ print(f"Data is a list with {len(data)} items")
23
+ return data
24
+ else:
25
+ print("Data is neither dict nor list, returning empty list")
26
+ return []
27
+ except Exception as e:
28
+ print(f"Error loading receipts: {e}")
29
+ return []
30
+
31
+ def save_receipts(receipts):
32
+ """Save receipts back to data.json."""
33
+ with open(DATA_FILE, 'w') as f:
34
+ json.dump(receipts, f, indent=4)
35
+
36
+ def receipts_are_equal(receipt1, receipt2):
37
+ """Check if two receipts are the same, comparing only basic fields."""
38
+ print(f"\n=== COMPARING RECEIPTS ===")
39
+ print(f"Receipt1 merchant: {receipt1.get('merchant')}")
40
+ print(f"Receipt2 merchant: {receipt2.get('merchant')}")
41
+
42
+ # Check if both receipts exist
43
+ if not receipt1 or not receipt2:
44
+ print("One or both receipts are empty")
45
+ return False
46
+
47
+ # Check basic fields with special handling for merchant
48
+ merchant1 = receipt1.get('merchant', '').lower()
49
+ merchant2 = receipt2.get('merchant', '').lower()
50
+ date1 = receipt1.get('date')
51
+ date2 = receipt2.get('date')
52
+ amount1 = receipt1.get('total_amount')
53
+ amount2 = receipt2.get('total_amount')
54
+
55
+ print(f"Comparing merchant (lowercase): '{merchant1}' vs '{merchant2}'")
56
+ print(f"Comparing date: '{date1}' vs '{date2}'")
57
+ print(f"Comparing total_amount: '{amount1}' vs '{amount2}'")
58
+
59
+ if merchant1 != merchant2:
60
+ print("Merchant field doesn't match")
61
+ return False
62
+
63
+ if date1 != date2:
64
+ print("Date field doesn't match")
65
+ return False
66
+
67
+ if amount1 != amount2:
68
+ print("Total amount field doesn't match")
69
+ return False
70
+
71
+ print("All basic fields match - receipts are equal")
72
+ return True
73
+
74
+ def is_duplicate(new_receipt, receipts):
75
+ """Return True if new_receipt is already in receipts."""
76
+ print(f"\n=== CHECKING FOR DUPLICATES ===")
77
+ print(f"Checking new receipt against {len(receipts)} existing receipts")
78
+
79
+ for i, old_receipt in enumerate(receipts):
80
+ print(f"\nChecking against receipt {i}:")
81
+ print(f" Stored: {old_receipt.get('merchant')} - {old_receipt.get('date')} - {old_receipt.get('total_amount')}")
82
+ print(f" New: {new_receipt.get('merchant')} - {new_receipt.get('date')} - {new_receipt.get('total_amount')}")
83
+
84
+ if receipts_are_equal(old_receipt, new_receipt):
85
+ print(f"DUPLICATE FOUND at index {i}!")
86
+ return True
87
+
88
+ print("No duplicates found")
89
+ return False
90
+
91
+ def process_receipt(new_receipt):
92
+ print(f"\n=== PROCESSING RECEIPT ===")
93
+ # Ensure we're working with a dict, not a string
94
+ if isinstance(new_receipt, str):
95
+ try:
96
+ new_receipt = json.loads(new_receipt)
97
+ except:
98
+ return {"error": "Invalid receipt format"}
99
+
100
+ receipts = load_receipts()
101
+ print(f"Loaded {len(receipts)} existing receipts")
102
+
103
+ if is_duplicate(new_receipt, receipts):
104
+ print("SETTING FRAUD CHECK TO TRUE")
105
+ new_receipt['fraud_check'] = True
106
+ # Do not save, just return
107
+ else:
108
+ print("SETTING FRAUD CHECK TO FALSE - SAVING RECEIPT")
109
+ new_receipt['fraud_check'] = False
110
+ receipts.append(new_receipt)
111
+ save_receipts(receipts)
112
+
113
+ return new_receipt
114
+
115
+ # ---- Usage ----
116
+ # new_receipt = { ... } # Your receipt JSON here
117
+ # result = process_receipt(new_receipt)
118
+ # print(result)
models.py CHANGED
@@ -6,6 +6,7 @@ class ReceiptItem(BaseModel):
6
  amount: float
7
 
8
  class ReceiptData(BaseModel):
 
9
  merchant: str
10
  date: str
11
  total_amount: float
 
6
  amount: float
7
 
8
  class ReceiptData(BaseModel):
9
+ fraud_check: Optional[bool] = False # Optional field for fraud detection
10
  merchant: str
11
  date: str
12
  total_amount: float
pipeline.py CHANGED
@@ -7,6 +7,7 @@ import base64
7
  import json
8
  from models import ReceiptData, ChildFeeForm
9
  from form_fill import fill_child_fee_pdf
 
10
 
11
  load_dotenv()
12
  openai.api_key = os.getenv("OPENAI_API_KEY", "").strip()
@@ -20,9 +21,10 @@ reciept_system_prompt = (
20
  " description: str\n"
21
  " amount: float\n\n"
22
  "class ReceiptData(BaseModel):\n"
23
- " merchant: str\n"
 
24
  " date: str\n"
25
- " total_amount: float\n"
26
  " items: Optional[List[ReceiptItem]] = None\n"
27
  "- Extract only the above given information.\n"
28
  "- If a value is missing, set it to null, \"\", or an empty list as appropriate.\n"
@@ -54,14 +56,14 @@ fee_bill_system_prompt = (
54
 
55
 
56
 
57
- def pil_to_bytes(pil_img, quality=60):
58
  buf = BytesIO()
59
  pil_img.save(buf, format='JPEG', quality=quality)
60
  buf.seek(0)
61
  return buf
62
 
63
 
64
- def preprocess_image(pil_img, max_size=512):
65
  return pil_img.resize((max_size, max_size), Image.LANCZOS)
66
 
67
 
@@ -95,10 +97,19 @@ def extract_info(pil_img):
95
  if raw_output.startswith("json"):
96
  raw_output = raw_output[4:].strip()
97
  data = json.loads(raw_output)
98
- print(data)
99
  validated = ReceiptData(**data)
100
- json_block = json.dumps(validated.dict(), indent=2, ensure_ascii=False)
101
- return f"```json\n{json_block}\n```"
 
 
 
 
 
 
 
 
 
102
  except Exception as e:
103
  return f"```json\n{json.dumps({'error': str(e), 'raw_output': raw_output}, indent=2)}\n```"
104
 
 
7
  import json
8
  from models import ReceiptData, ChildFeeForm
9
  from form_fill import fill_child_fee_pdf
10
+ from fraud import process_receipt
11
 
12
  load_dotenv()
13
  openai.api_key = os.getenv("OPENAI_API_KEY", "").strip()
 
21
  " description: str\n"
22
  " amount: float\n\n"
23
  "class ReceiptData(BaseModel):\n"
24
+ " fraud_check: Optional[bool] = False # Optional field for fraud detection, always set to false"
25
+ " merchant: str #Only extract the brand name, not the branch name - Only the brand\n"
26
  " date: str\n"
27
+ " total_amount: float\n #Try your hardest to find the accurate total amount\n"
28
  " items: Optional[List[ReceiptItem]] = None\n"
29
  "- Extract only the above given information.\n"
30
  "- If a value is missing, set it to null, \"\", or an empty list as appropriate.\n"
 
56
 
57
 
58
 
59
+ def pil_to_bytes(pil_img, quality=70):
60
  buf = BytesIO()
61
  pil_img.save(buf, format='JPEG', quality=quality)
62
  buf.seek(0)
63
  return buf
64
 
65
 
66
+ def preprocess_image(pil_img, max_size=812):
67
  return pil_img.resize((max_size, max_size), Image.LANCZOS)
68
 
69
 
 
97
  if raw_output.startswith("json"):
98
  raw_output = raw_output[4:].strip()
99
  data = json.loads(raw_output)
100
+ # print(data)
101
  validated = ReceiptData(**data)
102
+ # json_block = json.dumps(validated.dict(), indent=2, ensure_ascii=False)
103
+
104
+ validated_dict = validated.dict() # This is a Python dict, perfect for fraud check
105
+ print(validated_dict)
106
+ result = process_receipt(validated_dict) # This expects a dict!
107
+
108
+
109
+ result_json = json.dumps(result, indent=2, ensure_ascii=True) # For display
110
+ print(result_json)
111
+ return f"```json\n{result_json}\n```"
112
+
113
  except Exception as e:
114
  return f"```json\n{json.dumps({'error': str(e), 'raw_output': raw_output}, indent=2)}\n```"
115