RajanMalaviya commited on
Commit
7334b9a
·
verified ·
1 Parent(s): da26974

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +273 -289
app.py CHANGED
@@ -1,305 +1,289 @@
1
- from fastapi import FastAPI, File, UploadFile, HTTPException
2
- import pytesseract
3
- import cv2
4
- import os
5
- from PIL import Image
6
- import json
7
- import unicodedata
8
- from pdf2image import convert_from_bytes
9
- from pypdf import PdfReader
10
- import numpy as np
11
- from typing import List
12
- import io
13
- import logging
14
- import time
15
- import asyncio
16
- import psutil
17
- import cachetools
18
- import hashlib
19
- from huggingface_hub import InferenceClient
20
 
21
- app = FastAPI()
 
 
 
 
22
 
23
- # Configure logging
24
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
25
- logger = logging.getLogger(__name__)
 
 
 
 
 
26
 
27
- # Set Tesseract path
28
- pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
 
29
 
30
- # Get Hugging Face token from environment variable
31
- hf_token = os.getenv("HF_TOKEN")
32
- if not hf_token:
33
- logger.error("HF_TOKEN environment variable not set")
34
- raise HTTPException(status_code=500, detail="HF_TOKEN environment variable not set")
35
 
36
- # Initialize Hugging Face Inference Client
37
- client = InferenceClient(token=hf_token)
38
- logger.info("Hugging Face Inference Client initialized")
 
 
 
 
39
 
40
- # In-memory caches (1-hour TTL)
41
- raw_text_cache = cachetools.TTLCache(maxsize=100, ttl=3600)
42
- structured_data_cache = cachetools.TTLCache(maxsize=100, ttl=3600)
 
43
 
44
- def log_memory_usage():
45
- """Log current memory usage."""
46
- process = psutil.Process()
47
- mem_info = process.memory_info()
48
- return f"Memory usage: {mem_info.rss / 1024 / 1024:.2f} MB"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- def get_file_hash(file_bytes):
51
- """Generate MD5 hash of file content."""
52
- return hashlib.md5(file_bytes).hexdigest()
53
 
54
- def get_text_hash(raw_text):
55
- """Generate MD5 hash of raw text."""
56
- return hashlib.md5(raw_text.encode('utf-8')).hexdigest()
 
 
 
 
 
57
 
58
- async def process_image(img_bytes, filename, idx):
59
- """Process a single image (JPG/JPEG/PNG) with OCR."""
60
- start_time = time.time()
61
- logger.info(f"Starting OCR for {filename} image {idx}, {log_memory_usage()}")
 
 
 
 
 
 
 
 
62
  try:
63
- img = Image.open(io.BytesIO(img_bytes)).resize((600, 400)) # Smaller for speed
64
- img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
65
- gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
66
- img_pil = Image.fromarray(cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB))
67
- custom_config = r'--oem 1 --psm 6 -l eng' # English only for speed
68
- page_text = pytesseract.image_to_string(img_pil, config=custom_config)
69
- logger.info(f"Completed OCR for {filename} image {idx}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
70
- return page_text + "\n"
71
  except Exception as e:
72
- logger.error(f"OCR failed for {filename} image {idx}: {str(e)}, {log_memory_usage()}")
73
- return ""
74
 
75
- async def process_pdf_page(img, page_idx):
76
- """Process a single PDF page with OCR."""
77
- start_time = time.time()
78
- logger.info(f"Starting OCR for PDF page {page_idx}, {log_memory_usage()}")
 
 
 
 
 
 
 
 
 
 
 
79
  try:
80
- img = img.resize((600, 400)) # Smaller for speed
81
- img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
82
- gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
83
- img_pil = Image.fromarray(cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB))
84
- custom_config = r'--oem 1 --psm 6 -l eng' # English only for speed
85
- page_text = pytesseract.image_to_string(img_pil, config=custom_config)
86
- logger.info(f"Completed OCR for PDF page {page_idx}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
87
- return page_text + "\n"
88
  except Exception as e:
89
- logger.error(f"OCR failed for PDF page {page_idx}: {str(e)}, {log_memory_usage()}")
90
- return ""
91
-
92
- async def process_with_llm(filename: str, raw_text: str):
93
- """Process raw text with LLM via Hugging Face Inference API."""
94
- start_time = time.time()
95
- logger.info(f"Starting LLM API processing for {filename}, {log_memory_usage()}")
96
-
97
- # Check structured data cache
98
- text_hash = get_text_hash(raw_text)
99
- if text_hash in structured_data_cache:
100
- logger.info(f"Structured data cache hit for {filename}, {log_memory_usage()}")
101
- return structured_data_cache[text_hash]
102
-
103
- # Truncate text for API
104
- if len(raw_text) > 2000:
105
- raw_text = raw_text[:2000]
106
- logger.info(f"Truncated raw text for {filename} to 2000 characters, {log_memory_usage()}")
107
-
108
- # Define models to try
109
- models = [
110
- {"model": "google/gemma-2-9b-it", "provider": "hyperbolic"},
111
- {"model": "mistral/Mixtral-8x22B-Instruct-v0.1", "provider": "auto"}
112
- ]
113
-
114
- for model_info in models:
115
- model = model_info["model"]
116
- provider = model_info["provider"]
117
- logger.info(f"Attempting LLM API call with model {model} and provider {provider}")
118
- for attempt in range(2): # Retry once
119
- try:
120
- prompt = f"""
121
- Extract key invoice fields as JSON from the raw text. Support English. Detect currency (e.g., USD, INR). Output only valid JSON, with no additional text, comments, or markdown.
122
- Raw text: {raw_text}
123
- Output JSON:
124
- {{
125
- "currency": "",
126
- "Name_Client": "",
127
- "Products": [],
128
- "Subtotal": "",
129
- "Tax": "",
130
- "total": "",
131
- "invoice date": "",
132
- "invoice number": ""
133
- }}
134
- """
135
- # Call Hugging Face Inference API
136
- response = await asyncio.to_thread(client.chat_completion,
137
- model=model,
138
- messages=[{"role": "user", "content": prompt}],
139
- max_tokens=256,
140
- temperature=0.7,
141
- provider=provider
142
- )
143
- llm_output = response.choices[0].message.content
144
-
145
- # Extract JSON from output
146
- llm_output = llm_output.strip()
147
- if not llm_output.startswith("{"):
148
- raise ValueError("API output is not valid JSON")
149
- json_start = llm_output.find("{")
150
- json_end = llm_output.rfind("}") + 1
151
- json_str = llm_output[json_start:json_end]
152
- try:
153
- structured_data = json.loads(json_str)
154
- except json.JSONDecodeError:
155
- logger.warning(f"JSON parsing failed for {filename}, attempting to fix")
156
- json_str = llm_output[llm_output.find("{"):llm_output.rfind("}")+1]
157
- structured_data = json.loads(json_str)
158
- structured_data_cache[text_hash] = structured_data
159
- logger.info(f"LLM API processing for {filename} with {model}, attempt {attempt+1}, took {time.time() - start_time:.2f} seconds, {log_memory_usage()}")
160
- return structured_data
161
- except Exception as e:
162
- if hasattr(e, 'response') and e.response.status_code == 429: # Rate limit
163
- logger.warning(f"Rate limit hit for {filename} with {model}, attempt {attempt+1}: {str(e)}, {log_memory_usage()}")
164
- if attempt == 1:
165
- break
166
- await asyncio.sleep(2 ** attempt) # Exponential backoff
167
- else:
168
- logger.warning(f"LLM API processing failed for {filename} with {model}, attempt {attempt+1}: {str(e)}, {log_memory_usage()}")
169
- break
170
-
171
- # If all models fail
172
- error_msg = "All LLM API models failed. Check model availability, authentication, or rate limits."
173
- logger.error(f"{error_msg} for {filename}, {log_memory_usage()}")
174
- return {"error": error_msg}
175
-
176
- @app.post("/ocr")
177
- async def extract_and_structure(files: List[UploadFile] = File(...)):
178
- output_json = {
179
- "success": True,
180
- "message": "",
181
- "data": []
182
- }
183
- success_count = 0
184
- fail_count = 0
185
-
186
- logger.info(f"Starting processing for {len(files)} files, {log_memory_usage()}")
187
-
188
- for file in files:
189
- total_start_time = time.time()
190
- logger.info(f"Processing file: {file.filename}, {log_memory_usage()}")
191
-
192
- # Validate file format
193
- valid_extensions = {'.pdf', '.jpg', '.jpeg', '.png'}
194
- file_ext = os.path.splitext(file.filename.lower())[1]
195
- if file_ext not in valid_extensions:
196
- fail_count += 1
197
- output_json["data"].append({
198
- "filename": file.filename,
199
- "structured_data": {"error": f"Unsupported file format: {file_ext}"},
200
- "error": f"Unsupported file format: {file_ext}"
201
- })
202
- logger.error(f"Unsupported file format for {file.filename}: {file_ext}")
203
- continue
204
-
205
- # Read file into memory
206
- try:
207
- file_start_time = time.time()
208
- file_bytes = await file.read()
209
- file_stream = io.BytesIO(file_bytes)
210
- file_hash = get_file_hash(file_bytes)
211
- logger.info(f"Read file {file.filename}, took {time.time() - file_start_time:.2f} seconds, size: {len(file_bytes)/1024:.2f} KB, {log_memory_usage()}")
212
- except Exception as e:
213
- fail_count += 1
214
- output_json["data"].append({
215
- "filename": file.filename,
216
- "structured_data": {"error": f"Failed to read file: {str(e)}"},
217
- "error": f"Failed to read file: {str(e)}"
218
- })
219
- logger.error(f"Failed to read file {file.filename}: {str(e)}, {log_memory_usage()}")
220
- continue
221
-
222
- # Check raw text cache
223
- raw_text = ""
224
- if file_hash in raw_text_cache:
225
- raw_text = raw_text_cache[file_hash]
226
- logger.info(f"Raw text cache hit for {file.filename}, {log_memory_usage()}")
227
- else:
228
- if file_ext == '.pdf':
229
- # Try extracting embedded text
230
- try:
231
- extract_start_time = time.time()
232
- reader = PdfReader(file_stream)
233
- for page in reader.pages:
234
- text = page.extract_text()
235
- if text:
236
- raw_text += text + "\n"
237
- logger.info(f"Embedded text extraction for {file.filename}, took {time.time() - extract_start_time:.2f} seconds, text length: {len(raw_text)}, {log_memory_usage()}")
238
- except Exception as e:
239
- logger.warning(f"Embedded text extraction failed for {file.filename}: {str(e)}, {log_memory_usage()}")
240
-
241
- # If no embedded text, perform OCR
242
- if not raw_text.strip():
243
- try:
244
- convert_start_time = time.time()
245
- images = convert_from_bytes(file_bytes, poppler_path="/usr/local/bin", dpi=100)
246
- logger.info(f"PDF to images conversion for {file.filename}, {len(images)} pages, took {time.time() - convert_start_time:.2f} seconds, {log_memory_usage()}")
247
-
248
- ocr_start_time = time.time()
249
- page_texts = []
250
- for i, img in enumerate(images):
251
- page_text = await process_pdf_page(img, i)
252
- page_texts.append(page_text)
253
- raw_text = "".join(page_texts)
254
- logger.info(f"Total OCR for {file.filename}, took {time.time() - ocr_start_time:.2f} seconds, text length: {len(raw_text)}, {log_memory_usage()}")
255
- except Exception as e:
256
- fail_count += 1
257
- output_json["data"].append({
258
- "filename": file.filename,
259
- "structured_data": {"error": f"OCR failed: {str(e)}"},
260
- "error": f"OCR failed: {str(e)}"
261
- })
262
- logger.error(f"OCR failed for {file.filename}: {str(e)}, {log_memory_usage()}")
263
- continue
264
- else: # JPG/JPEG/PNG
265
- try:
266
- ocr_start_time = time.time()
267
- raw_text = await process_image(file_bytes, file.filename, 0)
268
- logger.info(f"Image OCR for {file.filename}, took {time.time() - ocr_start_time:.2f} seconds, text length: {len(raw_text)}, {log_memory_usage()}")
269
- except Exception as e:
270
- fail_count += 1
271
- output_json["data"].append({
272
- "filename": file.filename,
273
- "structured_data": {"error": f"Image OCR failed: {str(e)}"},
274
- "error": f"Image OCR failed: {str(e)}"
275
- })
276
- logger.error(f"Image OCR failed for {file.filename}: {str(e)}, {log_memory_usage()}")
277
- continue
278
-
279
- # Normalize text
280
- try:
281
- normalize_start_time = time.time()
282
- raw_text = unicodedata.normalize('NFKC', raw_text)
283
- raw_text = raw_text.encode().decode('utf-8')
284
- raw_text_cache[file_hash] = raw_text
285
- logger.info(f"Text normalization for {file.filename}, took {time.time() - normalize_start_time:.2f} seconds, text length: {len(raw_text)}, {log_memory_usage()}")
286
- except Exception as e:
287
- logger.warning(f"Text normalization failed for {filename}: {str(e)}, {log_memory_usage()}")
288
-
289
- # Process with LLM API
290
- structured_data = await process_with_llm(file.filename, raw_text)
291
- success_count += 1
292
- output_json["data"].append({
293
- "filename": file.filename,
294
- "structured_data": structured_data,
295
- "error": ""
296
- })
297
-
298
- logger.info(f"Total processing for {file.filename}, took {time.time() - total_start_time:.2f} seconds, {log_memory_usage()}")
299
-
300
- output_json["message"] = f"Processed {len(files)} files. {success_count} succeeded, {fail_count} failed."
301
- if fail_count > 0 and success_count == 0:
302
- output_json["success"] = False
303
 
304
- logger.info(f"Completed processing for {len(files)} files, {success_count} succeeded, {fail_count} failed, {log_memory_usage()}")
305
- return output_json
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel, Field
3
+ from typing import List, Optional, Dict, Any
4
+ from datetime import datetime, date
5
+ import re
6
+ from difflib import SequenceMatcher
7
+ import uvicorn
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ app = FastAPI(
10
+ title="Transaction Reconciliation API",
11
+ description="Reconcile bank and credit card transactions using fuzzy matching",
12
+ version="1.0.0"
13
+ )
14
 
15
+ # Pydantic Models
16
+ class Transaction(BaseModel):
17
+ id: str
18
+ date: str
19
+ amount: float
20
+ description: str
21
+ type: str
22
+ reference_number: Optional[str] = None
23
 
24
+ class ReconciliationInput(BaseModel):
25
+ bank_transactions: List[Transaction]
26
+ credit_card_transactions: List[Transaction]
27
 
28
+ class MatchedTransaction(BaseModel):
29
+ bank_id: str
30
+ credit_card_id: str
31
+ match_score: float = Field(..., ge=0, le=1)
32
+ match_reason: str
33
 
34
+ class UnmatchedTransaction(BaseModel):
35
+ id: str
36
+ date: str
37
+ amount: float
38
+ description: str
39
+ type: str
40
+ reference_number: Optional[str] = None
41
 
42
+ class ReconciliationOutput(BaseModel):
43
+ matched_transactions: List[MatchedTransaction]
44
+ unmatched_bank_transactions: List[UnmatchedTransaction]
45
+ unmatched_credit_card_transactions: List[UnmatchedTransaction]
46
 
47
+ class ReconciliationService:
48
+ def __init__(self,
49
+ description_threshold: float = 0.7,
50
+ amount_tolerance: float = 0.01,
51
+ max_date_diff_days: int = 7):
52
+ self.description_threshold = description_threshold
53
+ self.amount_tolerance = amount_tolerance
54
+ self.max_date_diff_days = max_date_diff_days
55
+
56
+ def fuzzy_match_description(self, desc1: str, desc2: str) -> float:
57
+ """Calculate fuzzy match score between two descriptions"""
58
+ # Clean descriptions for better matching
59
+ clean_desc1 = self._clean_description(desc1.lower())
60
+ clean_desc2 = self._clean_description(desc2.lower())
61
+
62
+ # Use SequenceMatcher for fuzzy matching
63
+ similarity = SequenceMatcher(None, clean_desc1, clean_desc2).ratio()
64
+
65
+ # Additional check for common transaction patterns
66
+ if self._check_common_patterns(clean_desc1, clean_desc2):
67
+ similarity = max(similarity, 0.8)
68
+
69
+ return similarity
70
+
71
+ def _clean_description(self, description: str) -> str:
72
+ """Clean description for better matching"""
73
+ # Remove special characters and extra spaces
74
+ cleaned = re.sub(r'[^\w\s]', ' ', description)
75
+ cleaned = re.sub(r'\s+', ' ', cleaned).strip()
76
+ return cleaned
77
+
78
+ def _check_common_patterns(self, desc1: str, desc2: str) -> bool:
79
+ """Check for common transaction patterns"""
80
+ patterns = [
81
+ (r'uber', r'uber'),
82
+ (r'amazon|amzn', r'amazon|amzn'),
83
+ (r'invoice\s*#?\s*(\d+)', r'invoice\s*#?\s*(\d+)'),
84
+ (r'payment.*invoice', r'payment.*invoice'),
85
+ (r'trip\s*id\s*(\d+)', r'trip\s*id\s*(\d+)')
86
+ ]
87
+
88
+ for pattern1, pattern2 in patterns:
89
+ if re.search(pattern1, desc1) and re.search(pattern2, desc2):
90
+ return True
91
+ return False
92
+
93
+ def calculate_date_difference(self, date1: str, date2: str) -> int:
94
+ """Calculate difference in days between two dates"""
95
+ try:
96
+ d1 = datetime.strptime(date1, "%Y-%m-%d").date()
97
+ d2 = datetime.strptime(date2, "%Y-%m-%d").date()
98
+ return abs((d1 - d2).days)
99
+ except ValueError:
100
+ return float('inf')
101
+
102
+ def amounts_match(self, amount1: float, amount2: float) -> bool:
103
+ """Check if amounts are close enough to match"""
104
+ return abs(abs(amount1) - abs(amount2)) <= self.amount_tolerance
105
+
106
+ def types_match(self, bank_type: str, cc_type: str) -> bool:
107
+ """Check if transaction types match according to business logic"""
108
+ type_mappings = {
109
+ ('debit', 'payment'),
110
+ ('credit', 'receipt'),
111
+ ('withdrawal', 'payment'),
112
+ ('deposit', 'receipt')
113
+ }
114
+
115
+ return (bank_type.lower(), cc_type.lower()) in type_mappings or bank_type.lower() == cc_type.lower()
116
+
117
+ def calculate_match_score(self, bank_txn: Transaction, cc_txn: Transaction) -> tuple[float, str]:
118
+ """Calculate overall match score and reason"""
119
+ scores = []
120
+ reasons = []
121
+
122
+ # Amount matching (weight: 0.4)
123
+ if self.amounts_match(bank_txn.amount, cc_txn.amount):
124
+ scores.append(0.4)
125
+ reasons.append("amounts match")
126
+ else:
127
+ amount_diff = abs(abs(bank_txn.amount) - abs(cc_txn.amount))
128
+ amount_score = max(0, 0.4 * (1 - amount_diff / max(abs(bank_txn.amount), abs(cc_txn.amount))))
129
+ scores.append(amount_score)
130
+ if amount_score > 0.2:
131
+ reasons.append("amounts close")
132
+
133
+ # Reference number matching (weight: 0.3)
134
+ if (bank_txn.reference_number and cc_txn.reference_number and
135
+ bank_txn.reference_number == cc_txn.reference_number):
136
+ scores.append(0.3)
137
+ reasons.append("reference numbers match")
138
+ else:
139
+ scores.append(0)
140
+
141
+ # Description matching (weight: 0.2)
142
+ desc_score = self.fuzzy_match_description(bank_txn.description, cc_txn.description)
143
+ scores.append(0.2 * desc_score)
144
+ if desc_score >= self.description_threshold:
145
+ reasons.append("descriptions match")
146
+
147
+ # Date matching (weight: 0.1)
148
+ date_diff = self.calculate_date_difference(bank_txn.date, cc_txn.date)
149
+ if date_diff <= self.max_date_diff_days:
150
+ date_score = 0.1 * (1 - date_diff / self.max_date_diff_days)
151
+ scores.append(date_score)
152
+ if date_diff <= 1:
153
+ reasons.append("dates match")
154
+ else:
155
+ reasons.append("dates close")
156
+ else:
157
+ scores.append(0)
158
+
159
+ total_score = sum(scores)
160
+ reason = ", ".join(reasons) if reasons else "partial match"
161
+
162
+ return total_score, reason
163
+
164
+ def reconcile(self, input_data: ReconciliationInput) -> ReconciliationOutput:
165
+ """Main reconciliation logic"""
166
+ matched_transactions = []
167
+ unmatched_bank = list(input_data.bank_transactions)
168
+ unmatched_cc = list(input_data.credit_card_transactions)
169
+
170
+ # Find matches
171
+ for bank_txn in input_data.bank_transactions:
172
+ best_match = None
173
+ best_score = 0
174
+ best_reason = ""
175
+
176
+ for cc_txn in input_data.credit_card_transactions:
177
+ # Check if types match first
178
+ if not self.types_match(bank_txn.type, cc_txn.type):
179
+ continue
180
+
181
+ score, reason = self.calculate_match_score(bank_txn, cc_txn)
182
+
183
+ # Minimum threshold for considering a match
184
+ if score >= 0.6 and score > best_score:
185
+ best_match = cc_txn
186
+ best_score = score
187
+ best_reason = reason
188
+
189
+ if best_match:
190
+ matched_transactions.append(MatchedTransaction(
191
+ bank_id=bank_txn.id,
192
+ credit_card_id=best_match.id,
193
+ match_score=round(best_score, 2),
194
+ match_reason=best_reason
195
+ ))
196
+
197
+ # Remove matched transactions from unmatched lists
198
+ if bank_txn in unmatched_bank:
199
+ unmatched_bank.remove(bank_txn)
200
+ if best_match in unmatched_cc:
201
+ unmatched_cc.remove(best_match)
202
+
203
+ # Convert remaining unmatched transactions
204
+ unmatched_bank_list = [
205
+ UnmatchedTransaction(
206
+ id=txn.id,
207
+ date=txn.date,
208
+ amount=txn.amount,
209
+ description=txn.description,
210
+ type=txn.type,
211
+ reference_number=txn.reference_number
212
+ ) for txn in unmatched_bank
213
+ ]
214
+
215
+ unmatched_cc_list = [
216
+ UnmatchedTransaction(
217
+ id=txn.id,
218
+ date=txn.date,
219
+ amount=txn.amount,
220
+ description=txn.description,
221
+ type=txn.type,
222
+ reference_number=txn.reference_number
223
+ ) for txn in unmatched_cc
224
+ ]
225
+
226
+ return ReconciliationOutput(
227
+ matched_transactions=matched_transactions,
228
+ unmatched_bank_transactions=unmatched_bank_list,
229
+ unmatched_credit_card_transactions=unmatched_cc_list
230
+ )
231
 
232
+ # Initialize service
233
+ reconciliation_service = ReconciliationService()
 
234
 
235
+ @app.get("/")
236
+ async def root():
237
+ """Health check endpoint"""
238
+ return {
239
+ "message": "Transaction Reconciliation API is running",
240
+ "status": "healthy",
241
+ "version": "1.0.0"
242
+ }
243
 
244
+ @app.post("/reconcile", response_model=ReconciliationOutput)
245
+ async def reconcile_transactions(input_data: ReconciliationInput):
246
+ """
247
+ Reconcile bank and credit card transactions
248
+
249
+ This endpoint matches transactions based on:
250
+ - Amount similarity (within tolerance)
251
+ - Date proximity (within 7 days)
252
+ - Description fuzzy matching (70% threshold)
253
+ - Transaction type compatibility
254
+ - Reference number exact matching
255
+ """
256
  try:
257
+ result = reconciliation_service.reconcile(input_data)
258
+ return result
 
 
 
 
 
 
259
  except Exception as e:
260
+ raise HTTPException(status_code=500, detail=f"Reconciliation failed: {str(e)}")
 
261
 
262
+ @app.get("/health")
263
+ async def health_check():
264
+ """Health check for deployment"""
265
+ return {"status": "ok", "service": "Transaction Reconciliation API"}
266
+
267
+ @app.post("/reconcile/custom", response_model=ReconciliationOutput)
268
+ async def reconcile_with_custom_params(
269
+ input_data: ReconciliationInput,
270
+ description_threshold: float = Field(0.7, ge=0, le=1, description="Fuzzy match threshold for descriptions"),
271
+ amount_tolerance: float = Field(0.01, ge=0, description="Maximum allowed difference in amounts"),
272
+ max_date_diff_days: int = Field(7, ge=0, description="Maximum allowed date difference in days")
273
+ ):
274
+ """
275
+ Reconcile transactions with custom matching parameters
276
+ """
277
  try:
278
+ custom_service = ReconciliationService(
279
+ description_threshold=description_threshold,
280
+ amount_tolerance=amount_tolerance,
281
+ max_date_diff_days=max_date_diff_days
282
+ )
283
+ result = custom_service.reconcile(input_data)
284
+ return result
 
285
  except Exception as e:
286
+ raise HTTPException(status_code=500, detail=f"Reconciliation failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
+ if __name__ == "__main__":
289
+ uvicorn.run(app, host="0.0.0.0", port=8000)