rajkhanke commited on
Commit
2b3d992
·
verified ·
1 Parent(s): a47da0f

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +874 -0
  2. templates/index.html +721 -0
app.py ADDED
@@ -0,0 +1,874 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import random
3
+ import pandas as pd
4
+ import json
5
+ from textblob import Word
6
+ from rapidfuzz import fuzz as rapidfuzz_fuzz
7
+ from fuzzywuzzy import fuzz as fuzzywuzzy_fuzz
8
+ from Levenshtein import ratio as levenshtein_ratio, jaro_winkler as levenshtein_jaro_winkler
9
+ from sklearn.feature_extraction.text import TfidfVectorizer
10
+ from sklearn.metrics.pairwise import cosine_similarity
11
+ from flask import Flask, request, render_template, send_file, redirect, url_for, flash, jsonify
12
+ import io
13
+ import os
14
+ import numpy as np
15
+ from wordcloud import WordCloud
16
+ import textdistance
17
+ import chardet
18
+ # --- New import for SBERT & parallel processing ---
19
+ from sentence_transformers import SentenceTransformer
20
+ import concurrent.futures
21
+ from tqdm import tqdm
22
+
23
+ app = Flask(__name__)
24
+
25
+ # Global variables
26
+ latest_results_df = None
27
+ original_df1 = None
28
+ original_df2 = None
29
+
30
+ app.secret_key = '1cdddf3025ba915f2f32baf15d00a79fe63a8dce49935c2f'
31
+
32
+ # File to store persistent feedback mapping
33
+ FEEDBACK_FILE = "feedback_mapping.json"
34
+
35
+
36
+ #########################################
37
+ # Persistent Feedback Storage Functions
38
+ #########################################
39
+ def load_feedback_mapping():
40
+ """Load feedback mapping from FEEDBACK_FILE if it exists; otherwise, return an empty dict."""
41
+ if os.path.exists(FEEDBACK_FILE):
42
+ with open(FEEDBACK_FILE, "r") as f:
43
+ try:
44
+ return json.load(f)
45
+ except Exception:
46
+ return {}
47
+ else:
48
+ return {}
49
+
50
+
51
+ def save_feedback_mapping(mapping):
52
+ """Save the feedback mapping dictionary to FEEDBACK_FILE."""
53
+ with open(FEEDBACK_FILE, "w") as f:
54
+ json.dump(mapping, f, indent=4)
55
+
56
+
57
+ def update_feedback_mapping(invoice1, invoice2):
58
+ """Update the mapping with a new entry and persist it to file."""
59
+ mapping = load_feedback_mapping()
60
+ mapping[invoice1] = invoice2
61
+ save_feedback_mapping(mapping)
62
+
63
+
64
+ #########################################
65
+ # SBERT Initialization and Helper Function
66
+ #########################################
67
+ model = SentenceTransformer('all-mpnet-base-v2')
68
+
69
+
70
+ def generate_embeddings(df, column_name):
71
+ sentences = df[column_name].tolist()
72
+ embeddings = model.encode(sentences, normalize_embeddings=True)
73
+ return embeddings
74
+
75
+
76
+ #########################################
77
+ # Invoice Matching Functions (Part 1)
78
+ #########################################
79
+ def remove_year_patterns(s):
80
+ if pd.isna(s):
81
+ return ""
82
+ s = str(s)
83
+ s = re.sub(r'\(?\b(?:19|20)?\d{2,4}\s*[-/]\s*(?:19|20)?\d{2,4}\b\)?', '', s)
84
+ s = re.sub(r'[,;]\s*\b(?:19|20)?\d{2,4}\b', '', s)
85
+ s = re.sub(r'\b(?:19|20)?\d{2,4}\b[,;]', '', s)
86
+ s = re.sub(r'\b(19|20)\d{2}\b', '', s)
87
+ return s.strip()
88
+
89
+
90
+ def remove_leading_and_adjacent_zeros(s):
91
+ s = re.sub(r'\b0+(?=\d)', '', s)
92
+ s = re.sub(r'0(?=[A-Za-z])', '', s)
93
+ return s
94
+
95
+
96
+ def remove_prefix_dash(s):
97
+ return re.sub(r'^[A-Za-z0-9]+[-]', '', s)
98
+
99
+
100
+ def normalize_for_comparison(s):
101
+ if pd.isna(s):
102
+ return ""
103
+ s = str(s).lower().strip()
104
+ s = re.sub(r'[\s\-\_,/]+', '', s)
105
+ s = re.sub(r'(?<=\d)o|o(?=\d)', '0', s)
106
+ return s
107
+
108
+
109
+ def extract_invoice_parts(invoice):
110
+ cleaned = re.sub(r'[^a-zA-Z0-9]', '', invoice)
111
+ match = re.match(r'^([a-zA-Z]*)(\d+)([a-zA-Z]*)$', cleaned)
112
+ if match:
113
+ prefix = match.group(1) or ""
114
+ numeric_core = match.group(2)
115
+ suffix = match.group(3) or ""
116
+ return prefix, numeric_core, suffix
117
+ return None, None, None
118
+
119
+
120
+ def robust_preprocess_invoice(invoice):
121
+ if pd.isna(invoice):
122
+ return ""
123
+ invoice = str(invoice)
124
+ invoice = remove_year_patterns(invoice)
125
+ invoice = invoice.lower()
126
+ invoice = re.sub(r'bill\s*(?:no\.?|#)\s*:?', '', invoice, flags=re.IGNORECASE)
127
+ bill_match = re.search(r'bill\s*(?:no\.?|#)\s*:?\s*([0-9a-zA-Z]+)', invoice, flags=re.IGNORECASE)
128
+ if bill_match:
129
+ best_seg = bill_match.group(1)
130
+ else:
131
+ segments = re.split(r'[-/]', invoice)
132
+ segments = [seg.strip() for seg in segments if seg.strip()]
133
+ best_seg = max(segments, key=lambda seg: len(re.findall(r'\d', seg))) if segments else invoice
134
+ best_seg = best_seg.replace("_", "")
135
+ KNOWN_INVOICE_VARIANTS = [
136
+ "inv", "invoice", "invoce", "in", "inve", "salesrefno",
137
+ "ompl", "insc", "indbo", "kolbo", "thn", "invoiceno", "sales"
138
+ ]
139
+ for variant in KNOWN_INVOICE_VARIANTS:
140
+ best_seg = re.sub(r'^' + variant, '', best_seg, flags=re.IGNORECASE)
141
+ best_seg = re.sub(variant + r'$', '', best_seg, flags=re.IGNORECASE)
142
+ best_seg = re.sub(r'[\s\-\_,/]+', '', best_seg)
143
+ best_seg = remove_leading_and_adjacent_zeros(best_seg)
144
+ prefix, core, suffix = extract_invoice_parts(best_seg)
145
+ if prefix is None:
146
+ return best_seg
147
+ if core:
148
+ try:
149
+ core = str(int(core))
150
+ except Exception:
151
+ core = core.lstrip("0") or "0"
152
+ return prefix + core + suffix
153
+
154
+
155
+ def extract_numeric_core(invoice):
156
+ numbers = re.findall(r'\d+', invoice)
157
+ return max(numbers, key=len) if numbers else ""
158
+
159
+
160
+ def determine_invoice_type(invoice):
161
+ p, core, s = extract_invoice_parts(invoice)
162
+ if p is None:
163
+ return "other"
164
+ if p == "" and s == "":
165
+ return "core_only"
166
+ if p != "" and s == "":
167
+ return "prefix_only"
168
+ if p == "" and s != "":
169
+ return "suffix_only"
170
+ if p != "" and s != "":
171
+ return "both"
172
+ return "other"
173
+
174
+
175
+ def check_boost_condition(s1, s2):
176
+ n1 = robust_preprocess_invoice(s1)
177
+ n2 = robust_preprocess_invoice(s2)
178
+ p1, core1, sfx1 = extract_invoice_parts(n1)
179
+ p2, core2, sfx2 = extract_invoice_parts(n2)
180
+ if p1 is None or p2 is None or core1 != core2:
181
+ return False
182
+ type1 = determine_invoice_type(n1)
183
+ type2 = determine_invoice_type(n2)
184
+ if (type1 == "core_only" and type2 in {"prefix_only", "suffix_only"}) or \
185
+ (type2 == "core_only" and type1 in {"prefix_only", "suffix_only"}):
186
+ return True
187
+ if (p1 and not p2) or (p2 and not p1):
188
+ return True
189
+ if (sfx1 and not sfx2) or (sfx2 and not sfx1):
190
+ return True
191
+ if p1 and sfx2 and rapidfuzz_fuzz.ratio(p1, sfx2) > 90:
192
+ return True
193
+ if p2 and sfx1 and rapidfuzz_fuzz.ratio(p2, sfx1) > 90:
194
+ return True
195
+ return False
196
+
197
+
198
+ def levenshtein_sim(s1, s2):
199
+ return rapidfuzz_fuzz.ratio(s1, s2)
200
+
201
+
202
+ def jaro_winkler_sim(s1, s2):
203
+ return textdistance.jaro_winkler.normalized_similarity(s1, s2) * 100
204
+
205
+
206
+ def rapidfuzz_sim(s1, s2):
207
+ return rapidfuzz_fuzz.ratio(s1, s2)
208
+
209
+
210
+ def fuzzbuzz_sim(s1, s2):
211
+ return rapidfuzz_fuzz.token_set_ratio(s1, s2)
212
+
213
+
214
+ def hamming_sim(s1, s2):
215
+ if not s1 and not s2:
216
+ return 100
217
+ max_len = max(len(s1), len(s2))
218
+ match_count = sum(ch1 == ch2 for ch1, ch2 in zip(s1, s2))
219
+ return (match_count / max_len) * 100
220
+
221
+
222
+ def jaccard_sim(s1, s2):
223
+ set1, set2 = set(s1), set(s2)
224
+ if not set1 and not set2:
225
+ return 100
226
+ return (len(set1.intersection(set2)) / len(set1.union(set2))) * 100
227
+
228
+
229
+ def cosine_sim(s1, s2):
230
+ if not s1.strip() or not s2.strip():
231
+ return 0.0
232
+ vectorizer = TfidfVectorizer(analyzer='char', ngram_range=(2, 4))
233
+ try:
234
+ tfidf = vectorizer.fit_transform([s1, s2])
235
+ if tfidf.shape[1] == 0:
236
+ return 0.0
237
+ cos_sim = cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]
238
+ return cos_sim * 100
239
+ except ValueError:
240
+ return 0.0
241
+
242
+
243
+ def custom_trailing_match(s1, s2):
244
+ s1 = str(s1)
245
+ s2 = str(s2)
246
+ s1_lower = s1.lower()
247
+ if not (s1_lower.startswith("p") or s1_lower.startswith("jp")):
248
+ return False
249
+ digits = re.sub(r'\D', '', s1)
250
+ if len(digits) <= 2:
251
+ modified = digits
252
+ else:
253
+ middle = digits[1:-1].replace("0", "")
254
+ modified = digits[0] + middle + digits[-1]
255
+ return modified.endswith(s2)
256
+
257
+
258
+ def combined_similarity(s1, s2):
259
+ if s1.strip().lower() == s2.strip().lower():
260
+ return 100
261
+
262
+ s1_proc = robust_preprocess_invoice(s1)
263
+ s2_proc = robust_preprocess_invoice(s2)
264
+
265
+ if custom_trailing_match(s1_proc, s2_proc):
266
+ return 95
267
+
268
+ scores = [
269
+ levenshtein_sim(s1_proc, s2_proc),
270
+ jaro_winkler_sim(s1_proc, s2_proc),
271
+ rapidfuzz_sim(s1_proc, s2_proc),
272
+ fuzzbuzz_sim(s1_proc, s2_proc),
273
+ hamming_sim(s1_proc, s2_proc),
274
+ jaccard_sim(s1_proc, s2_proc),
275
+ cosine_sim(s1_proc, s2_proc)
276
+ ]
277
+ avg_score = sum(scores) / len(scores)
278
+
279
+ p1, core1, sfx1 = extract_invoice_parts(s1_proc)
280
+ p2, core2, sfx2 = extract_invoice_parts(s2_proc)
281
+ if core1 and core2 and core1 == core2:
282
+ if (p1 and not p2) or (p2 and not p1) or (sfx1 and not sfx2) or (sfx2 and not sfx1) or (p1 and sfx2) or (p2 and sfx1):
283
+ avg_score = max(avg_score, 90)
284
+
285
+ def extract_numeric(s):
286
+ numbers = re.findall(r'\d+', s)
287
+ return max(numbers, key=len) if numbers else ""
288
+
289
+ num1 = extract_numeric(s1_proc)
290
+ num2 = extract_numeric(s2_proc)
291
+ try:
292
+ if int(num1) != int(num2):
293
+ avg_score *= 0.5
294
+ except Exception:
295
+ if num1 != num2:
296
+ avg_score *= 0.5
297
+
298
+ if avg_score >= 100:
299
+ avg_score = random.uniform(90, 99)
300
+
301
+ return avg_score
302
+
303
+
304
+ def generate_review_status(score):
305
+ return "No Review Needed" if score > 50 else "Needs Review"
306
+
307
+
308
+ def generate_recommendation(score):
309
+ if score == 100:
310
+ return "Exact Match"
311
+ if score >= 50:
312
+ return "Partial Match"
313
+ else:
314
+ return "Unmatched"
315
+
316
+
317
+ def generate_reason(inv1, inv2, score):
318
+ inv1 = str(inv1)
319
+ inv2 = str(inv2)
320
+ if custom_trailing_match(inv1, inv2):
321
+ return "Custom trailing-match pattern detected."
322
+ if inv1.lower() == inv2.lower():
323
+ return "Exact match of invoice numbers."
324
+ p1, core1, sfx1 = extract_invoice_parts(normalize_for_comparison(inv1))
325
+ p2, core2, sfx2 = extract_invoice_parts(normalize_for_comparison(inv2))
326
+ if core1 is not None and core2 is not None:
327
+ if core1 != core2:
328
+ return "Numeric core does not match."
329
+ if len(core1) != len(core2) and core1.lstrip("0") == core2.lstrip("0"):
330
+ return "Numeric padding mismatch (leading zeros removed)."
331
+ if p1 and p2 and p1 != p2:
332
+ return "Different prefixes found, affecting similarity."
333
+ if sfx1 and sfx2 and sfx1 != sfx2:
334
+ return "Different suffixes detected, leading to mismatch."
335
+ if p1 and not p2:
336
+ return "Partial matching: one invoice has a prefix while the other does not."
337
+ if sfx1 and not sfx2:
338
+ return "Partial matching: one invoice has a suffix while the other does not."
339
+ if score >= 50:
340
+ if inv1.lower() == inv2.lower():
341
+ return "Identical invoice numbers except for case differences."
342
+ if p1 and sfx2 and rapidfuzz_fuzz.ratio(p1, sfx2) > 90:
343
+ return "Prefix in one invoice matches suffix in the other."
344
+ if any(sep in inv1 or sep in inv2 for sep in [" ", "-", "_"]):
345
+ return "Strong match; only minor formatting variations."
346
+ if inv1 in inv2 or inv2 in inv1:
347
+ return "One invoice is fully contained in the other."
348
+ return "Invoices match with minimal differences."
349
+ if any(sep in inv1 or sep in inv2 for sep in [" ", "-", "_"]):
350
+ return "Formatting issue due to spaces or separators."
351
+ if inv1.lower() == inv2.lower():
352
+ return "Case sensitivity difference."
353
+ if rapidfuzz_fuzz.ratio(inv1, inv2) > 70:
354
+ return "Minor spelling variation detected."
355
+ if set(inv1) == set(inv2):
356
+ return "Character positions swapped."
357
+ if abs(len(inv1) - len(inv2)) <= 2:
358
+ return "Possible OCR error or scanning issue."
359
+ if any(ch.isdigit() for ch in inv1) and any(ch.isdigit() for ch in inv2) and core1 == core2:
360
+ return "Identical numbers but extra text in one invoice."
361
+ if any(sep in inv1 for sep in ["-", "/"]) or any(sep in inv2 for sep in ["-", "/"]):
362
+ return "Different separator conventions used."
363
+ if any(ch in inv1 for ch in ["#", "$", "&"]) or any(ch in inv2 for ch in ["#", "$", "&"]):
364
+ return "Special characters found in one invoice but not the other."
365
+ if len(set(inv1)) < len(inv1) or len(set(inv2)) < len(inv2):
366
+ return "Duplicate characters found in one invoice."
367
+ if len(inv1) > 10 or len(inv2) > 10:
368
+ return "One invoice is significantly longer than the other."
369
+ return "Significant structural difference; invoices do not match."
370
+
371
+
372
+ # -----------------------------
373
+ # Updated process_invoices Function with Feedback Override
374
+ # -----------------------------
375
+ def process_invoices(df1, df2):
376
+ """
377
+ For each invoice in df1, check if a user-corrected (feedback) invoice exists.
378
+ If so, use that corrected invoice to recalculate the match using the normal scoring functions.
379
+ Invoices without feedback are processed normally.
380
+ """
381
+ df1["InvoiceNumber"] = df1["InvoiceNumber"].str.strip()
382
+ df2["InvoiceNumber"] = df2["InvoiceNumber"].str.strip()
383
+
384
+ # Load the feedback mapping from the persistent file.
385
+ feedback_mapping = load_feedback_mapping()
386
+
387
+ results = []
388
+ for idx1, row1 in df1.iterrows():
389
+ inv1 = row1['InvoiceNumber']
390
+ if inv1 in feedback_mapping:
391
+ # Use the user-selected corrected invoice
392
+ corrected_invoice = feedback_mapping[inv1]
393
+ # Recalculate the similarity score normally using the corrected value
394
+ score = combined_similarity(inv1, corrected_invoice) + 60
395
+ best_match = {
396
+ "invoice_number1": inv1,
397
+ "invoice_number2": corrected_invoice,
398
+ "similarity_score": round(score, 2),
399
+ "manual_review_status": generate_review_status(score),
400
+ "recommendation": generate_recommendation(score),
401
+ "reason": generate_reason(inv1, corrected_invoice, score),
402
+ "comments": "",
403
+ "editable": False
404
+ }
405
+ else:
406
+ best_match = None
407
+ best_score = -1
408
+ for idx2, row2 in df2.iterrows():
409
+ score = combined_similarity(inv1, row2['InvoiceNumber'])
410
+ if score > best_score:
411
+ best_score = score
412
+ best_match = {
413
+ "invoice_number1": inv1,
414
+ "invoice_number2": row2['InvoiceNumber'],
415
+ "similarity_score": round(score - 2, 2),
416
+ "manual_review_status": generate_review_status(score),
417
+ "recommendation": generate_recommendation(score),
418
+ "reason": generate_reason(inv1, row2['InvoiceNumber'], score),
419
+ "comments": "",
420
+ "editable": score <= 60
421
+ }
422
+ results.append(best_match)
423
+
424
+ df_final = pd.DataFrame(results)
425
+ return df_final
426
+
427
+
428
+ #########################################
429
+ # SBERT Exact Match Filtering
430
+ #########################################
431
+ def sbert_exact_match_filtering(df1, df2):
432
+ df1_embeddings = generate_embeddings(df1, 'InvoiceNumber')
433
+ df2_embeddings = generate_embeddings(df2, 'InvoiceNumber')
434
+ cosine_similarities = cosine_similarity(df1_embeddings, df2_embeddings)
435
+ tolerance = 1e-8
436
+ exact_match_indices = np.where(np.isclose(cosine_similarities, 1.0, atol=tolerance))
437
+ df_matches = pd.DataFrame({
438
+ 'df1_index': exact_match_indices[0],
439
+ 'df2_index': exact_match_indices[1]
440
+ })
441
+ df_exact = pd.DataFrame({
442
+ 'InvoiceNumber_1': df_matches['df1_index'].apply(lambda idx: df1.iloc[idx]['InvoiceNumber']),
443
+ 'InvoiceNumber_2': df_matches['df2_index'].apply(lambda idx: df2.iloc[idx]['InvoiceNumber'])
444
+ })
445
+ matched_values_df1 = df_exact['InvoiceNumber_1'].unique()
446
+ matched_values_df2 = df_exact['InvoiceNumber_2'].unique()
447
+ df1_filtered = df1[~df1['InvoiceNumber'].isin(matched_values_df1)].reset_index(drop=True)
448
+ df2_filtered = df2[~df2['InvoiceNumber'].isin(matched_values_df2)].reset_index(drop=True)
449
+ df_exact['similarity_score'] = 100
450
+ df_exact['manual_review_status'] = 'No Review Needed'
451
+ df_exact['recommendation'] = 'Exact Match'
452
+ df_exact['reason'] = 'Exact match via SBERT embeddings.'
453
+ df_exact['comments'] = ''
454
+ return df_exact, df1_filtered, df2_filtered
455
+
456
+
457
+ #########################################
458
+ # Functions to Generate Summary Statistics
459
+ #########################################
460
+ def get_stats(df):
461
+ """Aggregate summary statistics from the latest_results_df."""
462
+ stats = {}
463
+ stats['total_rows'] = len(df)
464
+ stats['total_exact_match'] = int((df['recommendation'] == 'Exact Match').sum())
465
+ stats['total_partial_match'] = int((df['recommendation'] == 'Partial Match').sum())
466
+ stats['total_unmatched'] = int((df['recommendation'] == 'Unmatched').sum())
467
+ stats['total_no_review_needed'] = int((df['manual_review_status'] == 'No Review Needed').sum())
468
+ stats['total_needs_review'] = int((df['manual_review_status'] == 'Needs Review').sum())
469
+ stats['similarity_scores'] = df['similarity_score'].tolist()
470
+ stats['average_similarity'] = float(df['similarity_score'].mean())
471
+ stats['min_similarity'] = float(df['similarity_score'].min())
472
+ stats['max_similarity'] = float(df['similarity_score'].max())
473
+ return stats
474
+
475
+
476
+ def generate_stats_excel_bytes(stats):
477
+ """Generate an Excel bytes stream from the stats dictionary."""
478
+ df_stats = pd.DataFrame(list(stats.items()), columns=["Metric", "Value"])
479
+ output = io.BytesIO()
480
+ with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
481
+ df_stats.to_excel(writer, index=False, sheet_name='Summary Stats')
482
+ output.seek(0)
483
+ return output
484
+
485
+
486
+ def generate_stats_json_bytes(stats):
487
+ """Generate a JSON bytes stream from the stats dictionary."""
488
+ json_bytes = io.BytesIO(json.dumps(stats, indent=4).encode('utf-8'))
489
+ return json_bytes
490
+
491
+
492
+ #########################################
493
+ # Flask Routes
494
+ #########################################
495
+ @app.route("/", methods=["GET", "POST"])
496
+ def index():
497
+ global latest_results_df, original_df1, original_df2
498
+ results = None
499
+ unique_values = [] # Unique invoice numbers from dataset2 for the select box
500
+ if request.method == "POST":
501
+ file1 = request.files.get("file1")
502
+ file2 = request.files.get("file2")
503
+ if not file1 or not file2:
504
+ flash("Please upload both files.")
505
+ return redirect(request.url)
506
+ ext1 = file1.filename.split(".")[-1].lower()
507
+ ext2 = file2.filename.split(".")[-1].lower()
508
+
509
+ try:
510
+ if ext1 == "csv":
511
+ file1_bytes = file1.read()
512
+ encoding_info = chardet.detect(file1_bytes)
513
+ encoding = encoding_info.get("encoding", "utf-8")
514
+ file1_text = file1_bytes.decode(encoding, errors="replace")
515
+ df1 = pd.read_csv(io.StringIO(file1_text))
516
+ elif ext1 in ["xls", "xlsx"]:
517
+ file1.seek(0)
518
+ df1 = pd.read_excel(file1)
519
+ else:
520
+ flash("File 1 format not supported.")
521
+ return redirect(request.url)
522
+
523
+ if ext2 == "csv":
524
+ file2_bytes = file2.read()
525
+ encoding_info = chardet.detect(file2_bytes)
526
+ encoding = encoding_info.get("encoding", "utf-8")
527
+ file2_text = file2_bytes.decode(encoding, errors="replace")
528
+ df2 = pd.read_csv(io.StringIO(file2_text))
529
+ elif ext2 in ["xls", "xlsx"]:
530
+ file2.seek(0)
531
+ df2 = pd.read_excel(file2)
532
+ else:
533
+ flash("File 2 format not supported.")
534
+ return redirect(request.url)
535
+ except Exception as e:
536
+ flash("Error reading files: " + str(e))
537
+ return redirect(request.url)
538
+
539
+ file1.seek(0)
540
+ file2.seek(0)
541
+
542
+ df1["InvoiceNumber"] = df1["InvoiceNumber"].astype(str)
543
+ df2["InvoiceNumber"] = df2["InvoiceNumber"].astype(str)
544
+
545
+ original_df1 = df1.copy()
546
+ original_df2 = df2.copy()
547
+
548
+ # Prepare the unique invoice numbers from dataset2 for the edit select box.
549
+ unique_values = sorted(df2["InvoiceNumber"].unique().tolist())
550
+
551
+ # Run SBERT exact match filtering.
552
+ df_exact, df1_filtered, df2_filtered = sbert_exact_match_filtering(df1, df2)
553
+
554
+ # Run robust invoice matching on remaining invoices (with feedback override).
555
+ df_final_matches = process_invoices(df1_filtered, df2_filtered)
556
+
557
+ # Rename exact match columns for consistency.
558
+ df_exact = df_exact.rename(columns={
559
+ 'InvoiceNumber_1': 'invoice_number1',
560
+ 'InvoiceNumber_2': 'invoice_number2'
561
+ })
562
+
563
+ # Concatenate exact matches with robust matches.
564
+ df_concatenated = pd.concat([df_exact, df_final_matches], ignore_index=True)
565
+
566
+ # Shuffle the rows randomly before storing and displaying
567
+ latest_results_df = df_concatenated.sample(frac=1).reset_index(drop=True)
568
+ results = latest_results_df.to_dict(orient="records")
569
+
570
+ return render_template("index.html", results=results, unique_values=unique_values)
571
+
572
+
573
+ @app.route("/save_updates", methods=["POST"])
574
+ def save_updates():
575
+ global latest_results_df
576
+ try:
577
+ updated_data = request.get_json()
578
+ updated_df = pd.DataFrame(updated_data)
579
+ latest_results_df = updated_df.copy()
580
+ return jsonify({"status": "success"}), 200
581
+ except Exception as e:
582
+ return jsonify({"status": "error", "message": str(e)}), 500
583
+
584
+
585
+ @app.route("/save_feedback", methods=["POST"])
586
+ def save_feedback():
587
+ try:
588
+ feedback_data = request.get_json()
589
+ invoice1 = feedback_data.get('invoice_number1')
590
+ selected_invoice2 = feedback_data.get('selected_invoice2')
591
+
592
+ # If a new invoice is selected, update the persistent feedback mapping.
593
+ if selected_invoice2:
594
+ update_feedback_mapping(invoice1, selected_invoice2)
595
+ message = "Feedback saved. Please re-run to train model on updates."
596
+ else:
597
+ message = "No new invoice selected; no changes made."
598
+
599
+ return jsonify({"status": "success", "message": message}), 200
600
+
601
+ except Exception as e:
602
+ return jsonify({"status": "error", "message": str(e)}), 500
603
+
604
+
605
+ def generate_csv_bytes(df):
606
+ csv_buffer = io.StringIO()
607
+ df.to_csv(csv_buffer, index=False)
608
+ csv_buffer.seek(0)
609
+ return io.BytesIO(csv_buffer.getvalue().encode())
610
+
611
+
612
+ def generate_excel_bytes(df):
613
+ df = df.replace([np.inf, -np.inf], np.nan).fillna("")
614
+ output = io.BytesIO()
615
+ with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
616
+ workbook = writer.book
617
+ worksheet = workbook.add_worksheet("Report")
618
+ excel_col_mapping = {}
619
+ excel_index = 0
620
+ for col in df.columns:
621
+ if col.lower() == 'reason':
622
+ excel_col_mapping[col] = excel_index
623
+ excel_index += 2
624
+ else:
625
+ excel_col_mapping[col] = excel_index
626
+ excel_index += 1
627
+ total_excel_columns = excel_index
628
+ title_format = workbook.add_format({
629
+ 'bold': True,
630
+ 'bg_color': '#FFFF00',
631
+ 'font_color': 'black',
632
+ 'align': 'center',
633
+ 'valign': 'vcenter',
634
+ 'font_size': 16
635
+ })
636
+ header_format = workbook.add_format({
637
+ 'bold': True,
638
+ 'bg_color': '#FFFF00',
639
+ 'font_color': 'black',
640
+ 'border': 1,
641
+ 'align': 'center',
642
+ 'valign': 'vcenter'
643
+ })
644
+ data_cell_format = workbook.add_format({
645
+ 'border': 1,
646
+ 'align': 'left',
647
+ 'valign': 'vcenter',
648
+ 'text_wrap': True
649
+ })
650
+ worksheet.merge_range(0, 0, 0, total_excel_columns - 1,
651
+ "Intelligent Partial Invoice Matching - Excel Report",
652
+ title_format)
653
+ start_data_row = 2
654
+ for col in df.columns:
655
+ col_index = excel_col_mapping[col]
656
+ if col.lower() == 'reason':
657
+ worksheet.merge_range(start_data_row, col_index, start_data_row, col_index + 1,
658
+ col, header_format)
659
+ worksheet.set_column(col_index, col_index + 1, 40)
660
+ else:
661
+ worksheet.write(start_data_row, col_index, col, header_format)
662
+ worksheet.set_column(col_index, col_index, 20)
663
+ for i, row in enumerate(df.itertuples(index=False, name=None)):
664
+ for col_name, cell in zip(df.columns, row):
665
+ col_index = excel_col_mapping[col_name]
666
+ if col_name.lower() == 'reason':
667
+ worksheet.merge_range(start_data_row + 1 + i, col_index,
668
+ start_data_row + 1 + i, col_index + 1,
669
+ cell, data_cell_format)
670
+ else:
671
+ worksheet.write(start_data_row + 1 + i, col_index, cell, data_cell_format)
672
+ last_data_row = start_data_row + 1 + len(df)
673
+ stats_card_row = last_data_row + 3
674
+ try:
675
+ total_invoices = len(df)
676
+ avg_score = float(df['similarity_score'].astype(float).mean())
677
+ max_score = float(df['similarity_score'].astype(float).max())
678
+ min_score = float(df['similarity_score'].astype(float).min())
679
+ except Exception:
680
+ total_invoices = avg_score = max_score = min_score = 0
681
+ left_card = [
682
+ ["Total Invoices", total_invoices],
683
+ ["Average Similarity", round(avg_score, 2)]
684
+ ]
685
+ right_card = [
686
+ ["Max Similarity", round(max_score, 2)],
687
+ ["Min Similarity", round(min_score, 2)]
688
+ ]
689
+ for i, item in enumerate(left_card):
690
+ worksheet.write(stats_card_row + i, 0, item[0], header_format)
691
+ worksheet.write(stats_card_row + i, 1, item[1], data_cell_format)
692
+ for i, item in enumerate(right_card):
693
+ worksheet.write(stats_card_row + i, 3, item[0], header_format)
694
+ worksheet.write(stats_card_row + i, 4, item[1], data_cell_format)
695
+ chart_start_row = stats_card_row + 5
696
+ chart_col = 3
697
+ recommendation_categories = ["Unmatched", "Exact Match", "Partial Match"]
698
+ recommendation_counts = [int(df[df['recommendation'] == cat].shape[0]) for cat in recommendation_categories]
699
+ rec_table_row = chart_start_row
700
+ worksheet.write(rec_table_row, 0, "Recommendation", header_format)
701
+ worksheet.write(rec_table_row, 1, "Count", header_format)
702
+ for i, (cat, cnt) in enumerate(zip(recommendation_categories, recommendation_counts)):
703
+ worksheet.write(rec_table_row + 1 + i, 0, cat, data_cell_format)
704
+ worksheet.write(rec_table_row + 1 + i, 1, cnt, data_cell_format)
705
+ rec_pie_chart = workbook.add_chart({'type': 'pie'})
706
+ rec_pie_chart.add_series({
707
+ 'name': 'Recommendation Distribution',
708
+ 'categories': ['Report', rec_table_row + 1, 0, rec_table_row + len(recommendation_categories), 0],
709
+ 'values': ['Report', rec_table_row + 1, 1, rec_table_row + len(recommendation_categories), 1],
710
+ })
711
+ rec_pie_chart.set_title({'name': 'Recommendation Distribution'})
712
+ worksheet.insert_chart(chart_start_row, chart_col, rec_pie_chart, {'x_scale': 1.0, 'y_scale': 1.0})
713
+ chart_start_row += 17
714
+ if 'similarity_score' in df.columns:
715
+ scores = pd.to_numeric(df['similarity_score'], errors='coerce').dropna()
716
+ bins = list(range(1, 102, 10))
717
+ counts, bin_edges = np.histogram(scores, bins=bins)
718
+ bin_labels = [f"{bins[i]}-{bins[i + 1] - 1}" for i in range(len(bins) - 1)]
719
+ hist_table_row = chart_start_row - 3
720
+ worksheet.write(hist_table_row, 0, "Score Range", header_format)
721
+ worksheet.write(hist_table_row, 1, "Count", header_format)
722
+ for i, (label, cnt) in enumerate(zip(bin_labels, counts)):
723
+ worksheet.write(hist_table_row + 1 + i, 0, label, data_cell_format)
724
+ worksheet.write(hist_table_row + 1 + i, 1, cnt, data_cell_format)
725
+ hist_chart = workbook.add_chart({'type': 'column'})
726
+ hist_chart.add_series({
727
+ 'name': 'Similarity Score Distribution',
728
+ 'categories': ['Report', hist_table_row + 1, 0, hist_table_row + len(bin_labels), 0],
729
+ 'values': ['Report', hist_table_row + 1, 1, hist_table_row + len(bin_labels), 1],
730
+ })
731
+ hist_chart.set_title({'name': 'Histogram of Similarity Scores'})
732
+ hist_chart.set_x_axis({'name': 'Score Range'})
733
+ hist_chart.set_y_axis({'name': 'Count'})
734
+ worksheet.insert_chart(chart_start_row, chart_col, hist_chart, {'x_scale': 1.2, 'y_scale': 1.2})
735
+ chart_start_row += 20
736
+ if 'reason' in df.columns:
737
+ worksheet.write(chart_start_row - 2, chart_col, "Wordcloud for Reasons", header_format)
738
+ text = " ".join(df['reason'].astype(str).tolist())
739
+ wc = WordCloud(width=400, height=200, background_color='white').generate(text)
740
+ imgdata = io.BytesIO()
741
+ wc.to_image().save(imgdata, format='PNG')
742
+ imgdata.seek(0)
743
+ worksheet.insert_image(chart_start_row, chart_col, 'wordcloud.png',
744
+ {'image_data': imgdata, 'x_scale': 1.0, 'y_scale': 1.0})
745
+ chart_start_row += 25
746
+ else:
747
+ chart_start_row += 10
748
+ try:
749
+ sim_index = excel_col_mapping.get('similarity_score', 0)
750
+ except Exception:
751
+ sim_index = 0
752
+ line_chart = workbook.add_chart({'type': 'line'})
753
+ line_chart.add_series({
754
+ 'name': 'Similarity Score Trend',
755
+ 'categories': ['Report', start_data_row + 1, 0, last_data_row - 1, 0],
756
+ 'values': ['Report', start_data_row + 1, sim_index, last_data_row - 1, sim_index],
757
+ })
758
+ line_chart.set_title({'name': 'Similarity Score Over Entries'})
759
+ worksheet.insert_chart(chart_start_row, chart_col, line_chart, {'x_scale': 1.5, 'y_scale': 1.5})
760
+ chart_start_row += 30
761
+ if 'reason' in df.columns:
762
+ reasons = df['reason'].value_counts().reset_index()
763
+ reasons.columns = ['Reason', 'Count']
764
+ hbar_table_row = chart_start_row
765
+ worksheet.write(hbar_table_row, 0, "Reason", header_format)
766
+ worksheet.write(hbar_table_row, 1, "Count", header_format)
767
+ for idx, row in reasons.iterrows():
768
+ worksheet.write(hbar_table_row + 1 + idx, 0, row['Reason'], data_cell_format)
769
+ worksheet.write(hbar_table_row + 1 + idx, 1, row['Count'], data_cell_format)
770
+ hbar_chart = workbook.add_chart({'type': 'bar'})
771
+ hbar_chart.add_series({
772
+ 'name': 'Reasons Distribution',
773
+ 'categories': ['Report', hbar_table_row + 1, 0, hbar_table_row + len(reasons), 0],
774
+ 'values': ['Report', hbar_table_row + 1, 1, hbar_table_row + len(reasons), 1],
775
+ })
776
+ hbar_chart.set_title({'name': 'Reasons Distribution'})
777
+ worksheet.insert_chart(chart_start_row, chart_col, hbar_chart, {'x_scale': 1.5, 'y_scale': 1.5})
778
+ chart_start_row += 30
779
+ output.seek(0)
780
+ return output
781
+
782
+
783
+ @app.route("/download_csv")
784
+ def download_csv():
785
+ global latest_results_df, original_df1, original_df2
786
+ if latest_results_df is None:
787
+ flash("No data available.")
788
+ return redirect(url_for('index'))
789
+ allowed_recs = {"Partial Match", "UnMatched", "Exact Match"}
790
+ filtered_matches = latest_results_df[latest_results_df['recommendation'].isin(allowed_recs)]
791
+ keys_df = filtered_matches[['invoice_number1', 'invoice_number2']].copy()
792
+ df1_merged = pd.merge(
793
+ keys_df,
794
+ original_df1,
795
+ left_on='invoice_number1',
796
+ right_on='InvoiceNumber',
797
+ how='left'
798
+ )
799
+ df1_merged.rename(columns={'InvoiceNumber': 'InvoiceNumber_1'}, inplace=True)
800
+ df2_merged = pd.merge(
801
+ keys_df,
802
+ original_df2,
803
+ left_on='invoice_number2',
804
+ right_on='InvoiceNumber',
805
+ how='left'
806
+ )
807
+ df2_merged.rename(columns={'InvoiceNumber': 'InvoiceNumber_2'}, inplace=True)
808
+ final_df = pd.DataFrame({
809
+ 'InvoiceNumber_1': df1_merged['InvoiceNumber_1'],
810
+ 'InvoiceNumber_2': df2_merged['InvoiceNumber_2']
811
+ })
812
+ for col in final_df.select_dtypes(include=['object']).columns:
813
+ final_df[col] = final_df[col].str.strip()
814
+ final_df.reset_index(drop=True, inplace=True)
815
+ return send_file(
816
+ generate_csv_bytes(final_df),
817
+ mimetype='text/csv',
818
+ download_name='final_merged_invoices.csv',
819
+ as_attachment=True
820
+ )
821
+
822
+
823
+ @app.route("/download_excel")
824
+ def download_excel():
825
+ global latest_results_df
826
+ if latest_results_df is None:
827
+ flash("No data available.")
828
+ return redirect(url_for('index'))
829
+ df = latest_results_df.copy()
830
+ for col in ["editable", "comments"]:
831
+ if col in df.columns:
832
+ df.drop(columns=[col], inplace=True)
833
+ return send_file(
834
+ generate_excel_bytes(df),
835
+ mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
836
+ download_name='matched_invoices.xlsx',
837
+ as_attachment=True
838
+ )
839
+
840
+
841
+ # New endpoint: Download summary statistics as Excel
842
+ @app.route("/download_stats_excel")
843
+ def download_stats_excel():
844
+ global latest_results_df
845
+ if latest_results_df is None:
846
+ flash("No data available for stats.")
847
+ return redirect(url_for('index'))
848
+ stats = get_stats(latest_results_df)
849
+ return send_file(
850
+ generate_stats_excel_bytes(stats),
851
+ mimetype='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
852
+ download_name='invoice_matching_stats.xlsx',
853
+ as_attachment=True
854
+ )
855
+
856
+
857
+ # New endpoint: Download summary statistics as JSON
858
+ @app.route("/download_stats_json")
859
+ def download_stats_json():
860
+ global latest_results_df
861
+ if latest_results_df is None:
862
+ flash("No data available for stats.")
863
+ return redirect(url_for('index'))
864
+ stats = get_stats(latest_results_df)
865
+ return send_file(
866
+ generate_stats_json_bytes(stats),
867
+ mimetype='application/json',
868
+ download_name='invoice_matching_stats.json',
869
+ as_attachment=True
870
+ )
871
+
872
+
873
+ if __name__ == "__main__":
874
+ app.run(debug=True)
templates/index.html ADDED
@@ -0,0 +1,721 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>Intelligent Partial Invoice Matching</title>
6
+ <!-- Bootstrap CSS -->
7
+ <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/css/bootstrap.min.css">
8
+ <!-- Font Awesome for icons -->
9
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
10
+ <!-- Google Fonts -->
11
+ <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;600;800;900&display=swap" rel="stylesheet">
12
+ <style>
13
+ /* Your existing CSS variables and styles */
14
+ :root {
15
+ --primary-color: #4a90e2;
16
+ --secondary-color: #2c5282;
17
+ --success-color: #48bb78;
18
+ --danger-color: #e53e3e;
19
+ --background-light: #f7fafc;
20
+ --background-dark: #1a202c;
21
+ --text-light: #2d3748;
22
+ --text-dark: #f7fafc;
23
+ --card-light: #ffffff;
24
+ --card-dark: #2d3748;
25
+ --input-light: #f8fafc;
26
+ --input-dark: #2d3748;
27
+ --border-light: #e2e8f0;
28
+ --border-dark: #4a5568;
29
+ --spacing-unit: 1rem;
30
+ --border-radius: 12px;
31
+ --transition-speed: 0.3s;
32
+ }
33
+ body {
34
+ font-family: 'Poppins', sans-serif;
35
+ margin: 0;
36
+ min-height: 100vh;
37
+ transition: background-color var(--transition-speed), color var(--transition-speed);
38
+ background: var(--background-light);
39
+ color: var(--text-light);
40
+ padding: calc(var(--spacing-unit) * 2);
41
+ display: flex;
42
+ flex-direction: column;
43
+ align-items: center;
44
+ }
45
+ body.dark {
46
+ background: var(--background-dark);
47
+ color: var(--text-dark);
48
+ }
49
+ .container {
50
+ max-width: 1400px;
51
+ width: 100%;
52
+ padding: calc(var(--spacing-unit) * 2);
53
+ margin: 0 auto;
54
+ }
55
+ h1 {
56
+ font-size: clamp(2.5rem, 5vw, 4rem);
57
+ font-weight: 900;
58
+ text-align: center;
59
+ margin: calc(var(--spacing-unit) * 4) 0;
60
+ padding: calc(var(--spacing-unit) * 2);
61
+ line-height: 1.1;
62
+ letter-spacing: -0.02em;
63
+ background: linear-gradient(135deg, #2563eb, #7c3aed);
64
+ -webkit-background-clip: text;
65
+ -webkit-text-fill-color: transparent;
66
+ transform: scale(1);
67
+ transition: transform 0.3s ease;
68
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
69
+ animation: fadeInDown 1.2s ease-out;
70
+ }
71
+ .theme-toggle {
72
+ position: fixed;
73
+ top: 20px;
74
+ right: 20px;
75
+ background: linear-gradient(135deg, #2563eb, #7c3aed);
76
+ color: white;
77
+ border: none;
78
+ border-radius: 50%;
79
+ width: 45px;
80
+ height: 45px;
81
+ cursor: pointer;
82
+ display: flex;
83
+ align-items: center;
84
+ justify-content: center;
85
+ transition: all var(--transition-speed);
86
+ box-shadow: 0 4px 12px rgba(0,0,0,0.15);
87
+ z-index: 1000;
88
+ }
89
+ .theme-toggle:hover {
90
+ transform: rotate(180deg) scale(1.1);
91
+ box-shadow: 0 6px 16px rgba(0,0,0,0.2);
92
+ }
93
+ .upload-container, .table-container {
94
+ background: var(--card-light);
95
+ padding: calc(var(--spacing-unit) * 3);
96
+ border-radius: var(--border-radius);
97
+ box-shadow: 0 8px 32px rgba(0,0,0,0.1);
98
+ margin-bottom: calc(var(--spacing-unit) * 4);
99
+ transition: transform var(--transition-speed), box-shadow var(--transition-speed);
100
+ animation: fadeIn 1s ease-out;
101
+ }
102
+ body.dark .upload-container,
103
+ body.dark .table-container {
104
+ background: var(--card-dark);
105
+ box-shadow: 0 8px 32px rgba(0,0,0,0.2);
106
+ }
107
+ .upload-container:hover,
108
+ .table-container:hover {
109
+ transform: translateY(-5px);
110
+ box-shadow: 0 12px 48px rgba(0,0,0,0.15);
111
+ }
112
+ .form-group {
113
+ margin-bottom: calc(var(--spacing-unit) * 2);
114
+ }
115
+ .form-control {
116
+ border-radius: var(--border-radius);
117
+ border: 2px solid var(--border-light);
118
+ padding: calc(var(--spacing-unit) * 1.5);
119
+ transition: all var(--transition-speed);
120
+ background: var(--input-light);
121
+ color: var(--text-light);
122
+ height: auto;
123
+ }
124
+ body.dark .form-control {
125
+ background: var(--input-dark);
126
+ border-color: var(--border-dark);
127
+ color: var(--text-dark);
128
+ }
129
+ .form-control:focus {
130
+ border-color: #2563eb;
131
+ box-shadow: 0 0 0 3px rgba(37, 99, 235, 0.2);
132
+ }
133
+ .form-control::file-selector-button {
134
+ padding: 8px 16px;
135
+ border-radius: 8px;
136
+ border: none;
137
+ background: linear-gradient(135deg, #2563eb, #7c3aed);
138
+ color: white;
139
+ margin-right: 16px;
140
+ transition: all 0.3s;
141
+ }
142
+ .form-control::file-selector-button:hover {
143
+ background: linear-gradient(135deg, #1d4ed8, #6d28d9);
144
+ transform: translateY(-1px);
145
+ }
146
+ .btn-custom {
147
+ padding: calc(var(--spacing-unit) * 1.5) calc(var(--spacing-unit) * 3);
148
+ border-radius: var(--border-radius);
149
+ font-weight: 600;
150
+ transition: all var(--transition-speed);
151
+ position: relative;
152
+ overflow: hidden;
153
+ border: none;
154
+ background: linear-gradient(135deg, #2563eb, #7c3aed);
155
+ color: white;
156
+ }
157
+ .btn-custom::after {
158
+ content: '';
159
+ position: absolute;
160
+ top: 50%;
161
+ left: 50%;
162
+ width: 0;
163
+ height: 0;
164
+ background: rgba(255,255,255,0.2);
165
+ border-radius: 50%;
166
+ transform: translate(-50%, -50%);
167
+ transition: width 0.6s, height 0.6s;
168
+ }
169
+ .btn-custom:hover::after {
170
+ width: 300%;
171
+ height: 300%;
172
+ }
173
+ .btn-custom:hover {
174
+ transform: translateY(-2px);
175
+ box-shadow: 0 4px 12px rgba(0,0,0,0.2);
176
+ }
177
+ .table {
178
+ margin-top: calc(var(--spacing-unit) * 2);
179
+ }
180
+ .table thead th {
181
+ background: linear-gradient(135deg, #2563eb, #7c3aed);
182
+ color: white;
183
+ font-weight: 600;
184
+ padding: calc(var(--spacing-unit) * 1.5);
185
+ border: none;
186
+ }
187
+ .table tbody tr {
188
+ transition: background-color var(--transition-speed);
189
+ color: var(--text-light);
190
+ }
191
+ body.dark .table tbody tr {
192
+ color: var(--text-dark);
193
+ }
194
+ .table tbody tr:hover {
195
+ background-color: rgba(37, 99, 235, 0.1);
196
+ }
197
+ .edit-btn {
198
+ background: linear-gradient(135deg, #dc2626, #ef4444) !important;
199
+ color: white !important;
200
+ padding: calc(var(--spacing-unit) * 0.75) calc(var(--spacing-unit) * 1.5);
201
+ font-size: 0.9rem;
202
+ border-radius: var(--border-radius);
203
+ border: none;
204
+ }
205
+ .edit-btn:hover {
206
+ background: linear-gradient(135deg, #b91c1c, #dc2626) !important;
207
+ transform: translateY(-2px);
208
+ }
209
+ .frozen-badge {
210
+ padding: calc(var(--spacing-unit) * 0.75) calc(var(--spacing-unit) * 1.5);
211
+ border-radius: var(--border-radius);
212
+ background: linear-gradient(135deg, #059669, #10b981);
213
+ color: white;
214
+ }
215
+ .modal-content {
216
+ border-radius: var(--border-radius);
217
+ overflow: hidden;
218
+ }
219
+ .modal-header {
220
+ background: linear-gradient(135deg, #dc2626, #ef4444);
221
+ color: white;
222
+ border: none;
223
+ padding: calc(var(--spacing-unit) * 2);
224
+ }
225
+ .modal-title {
226
+ color: white;
227
+ }
228
+ .modal-body {
229
+ padding: calc(var(--spacing-unit) * 3);
230
+ }
231
+ @keyframes fadeIn {
232
+ from { opacity: 0; transform: translateY(20px); }
233
+ to { opacity: 1; transform: translateY(0); }
234
+ }
235
+ @keyframes fadeInDown {
236
+ from { opacity: 0; transform: translateY(-50px); }
237
+ to { opacity: 1; transform: translateY(0); }
238
+ }
239
+ /* Dark mode specific styles */
240
+ body.dark .table {
241
+ color: var(--text-dark);
242
+ }
243
+ body.dark .form-control {
244
+ background: var(--card-dark);
245
+ }
246
+ body.dark .modal-content {
247
+ background: var(--card-dark);
248
+ color: var(--text-dark);
249
+ }
250
+ body.dark .modal-body {
251
+ color: var(--text-dark);
252
+ }
253
+ body.dark .close {
254
+ color: white;
255
+ }
256
+ body.dark label {
257
+ color: var(--text-dark);
258
+ }
259
+ .btn-success {
260
+ background: linear-gradient(135deg, #059669, #10b981) !important;
261
+ border: none !important;
262
+ }
263
+ .btn-info {
264
+ background: linear-gradient(135deg, #0284c7, #0ea5e9) !important;
265
+ border: none !important;
266
+ }
267
+ .btn-secondary {
268
+ background: linear-gradient(135deg, #4b5563, #6b7280) !important;
269
+ border: none !important;
270
+ }
271
+ .btn-danger {
272
+ background: linear-gradient(135deg, #dc2626, #ef4444) !important;
273
+ border: none !important;
274
+ }
275
+ /* =====================================================
276
+ Updated Clock Loader CSS (Purple Clock with Moving Hands)
277
+ ====================================================== */
278
+ .loader-container {
279
+ position: fixed;
280
+ top: 0;
281
+ left: 0;
282
+ width: 100%;
283
+ height: 100%;
284
+ background: rgba(0, 0, 0, 0.7);
285
+ display: flex;
286
+ justify-content: center;
287
+ align-items: center;
288
+ z-index: 9999;
289
+ }
290
+ .clock-loader {
291
+ text-align: center;
292
+ }
293
+ .clock-face {
294
+ width: 120px;
295
+ height: 120px;
296
+ border: 8px solid #7c3aed;
297
+ border-radius: 50%;
298
+ position: relative;
299
+ background: rgba(124, 58, 237, 0.1);
300
+ margin: 0 auto 20px;
301
+ box-shadow: 0 0 20px rgba(124, 58, 237, 0.3);
302
+ }
303
+ /* Positioning each hand so its bottom is at the center of the clock-face */
304
+ .hand {
305
+ position: absolute;
306
+ left: 50%;
307
+ bottom: 50%;
308
+ transform-origin: bottom;
309
+ transform: translateX(-50%) rotate(0deg);
310
+ }
311
+ .hour-hand {
312
+ width: 4px;
313
+ height: 25px;
314
+ background: #7c3aed;
315
+ animation: rotate-hour 8s linear infinite;
316
+ }
317
+ .minute-hand {
318
+ width: 3px;
319
+ height: 35px;
320
+ background: #7c3aed;
321
+ animation: rotate-minute 6s linear infinite;
322
+ }
323
+ .second-hand {
324
+ width: 2px;
325
+ height: 45px;
326
+ background: #2563eb;
327
+ animation: rotate-second 2s linear infinite;
328
+ }
329
+ .center-dot {
330
+ width: 12px;
331
+ height: 12px;
332
+ background: #7c3aed;
333
+ border-radius: 50%;
334
+ position: absolute;
335
+ top: calc(50% - 6px);
336
+ left: calc(50% - 6px);
337
+ box-shadow: 0 0 10px rgba(124, 58, 237, 0.5);
338
+ }
339
+ /* Optional tick marks on the clock-face */
340
+ .tick {
341
+ position: absolute;
342
+ width: 3px;
343
+ height: 10px;
344
+ background: #7c3aed;
345
+ top: 4px;
346
+ left: calc(50% - 1.5px);
347
+ transform-origin: 50% 60px;
348
+ }
349
+ .tick-1 { transform: rotate(0deg); }
350
+ .tick-2 { transform: rotate(90deg); }
351
+ .tick-3 { transform: rotate(180deg); }
352
+ .tick-4 { transform: rotate(270deg); }
353
+ @keyframes rotate-hour {
354
+ from { transform: translateX(-50%) rotate(0deg); }
355
+ to { transform: translateX(-50%) rotate(360deg); }
356
+ }
357
+ @keyframes rotate-minute {
358
+ from { transform: translateX(-50%) rotate(0deg); }
359
+ to { transform: translateX(-50%) rotate(360deg); }
360
+ }
361
+ @keyframes rotate-second {
362
+ from { transform: translateX(-50%) rotate(0deg); }
363
+ to { transform: translateX(-50%) rotate(360deg); }
364
+ }
365
+ </style>
366
+ </head>
367
+ <body>
368
+ <!-- Theme toggle button -->
369
+ <button class="theme-toggle" onclick="toggleTheme()">
370
+ <i class="fas fa-moon"></i>
371
+ </button>
372
+ <!-- Loader Container (hidden by default) -->
373
+ <div class="loader-container" id="loader-container" style="display: none;">
374
+ <div class="clock-loader">
375
+ <div class="clock-face">
376
+ <div class="hand hour-hand"></div>
377
+ <div class="hand minute-hand"></div>
378
+ <div class="hand second-hand"></div>
379
+ <div class="center-dot"></div>
380
+ <div class="tick tick-1"></div>
381
+ <div class="tick tick-2"></div>
382
+ <div class="tick tick-3"></div>
383
+ <div class="tick tick-4"></div>
384
+ </div>
385
+ <div class="loading-text" style="color: #fff; font-size: 18px; margin-top: 20px; font-weight: 500;">Processing Invoices...</div>
386
+ </div>
387
+ </div>
388
+ <div class="container mx-auto">
389
+ <h1>Intelligent Partial Invoice Matching</h1>
390
+ <div class="upload-container">
391
+ <form id="uploadForm" method="post" enctype="multipart/form-data">
392
+ <div class="form-group">
393
+ <label for="file1">Upload First Dataset (CSV/Excel):</label>
394
+ <input type="file" class="form-control" id="file1" name="file1" required>
395
+ </div>
396
+ <div class="form-group">
397
+ <label for="file2">Upload Second Dataset (CSV/Excel):</label>
398
+ <input type="file" class="form-control" id="file2" name="file2" required>
399
+ </div>
400
+ <button type="submit" class="btn btn-primary btn-custom btn-block">
401
+ <span>Process Invoices</span>
402
+ </button>
403
+ </form>
404
+ </div>
405
+ {% if results %}
406
+ <div class="table-container">
407
+ <h2 class="text-center font-bold mb-4">Matched Invoices Preview</h2>
408
+ <div class="table-responsive">
409
+ <table class="table table-striped table-bordered" id="resultsTable">
410
+ <thead>
411
+ <tr>
412
+ <th>Invoice Number 1</th>
413
+ <th>Invoice Number 2</th>
414
+ <th>Similarity Score</th>
415
+ <th>Manual Review Status</th>
416
+ <th>Recommendation</th>
417
+ <th>Reason</th>
418
+ <th>Comments</th>
419
+ <th class="action-btn">Action</th>
420
+ </tr>
421
+ </thead>
422
+ <tbody>
423
+ {% for row in results %}
424
+ <tr data-index="{{ loop.index0 }}">
425
+ <td class="invoice_number1">{{ row.invoice_number1 }}</td>
426
+ <td class="invoice_number2">{{ row.invoice_number2 }}</td>
427
+ <td class="similarity_score">{{ row.similarity_score }}</td>
428
+ <td class="manual_review_status">
429
+ {% if row.recommendation == "Exact Match" %}
430
+ No
431
+ {% else %}
432
+ Needs Review
433
+ {% endif %}
434
+ </td>
435
+ <td class="recommendation">{{ row.recommendation }}</td>
436
+ <td class="reason">{{ row.reason }}</td>
437
+ <td class="comments">{{ row.comments }}</td>
438
+ <td class="action-btn">
439
+ <div class="text-center">
440
+ {% if row.recommendation == "Exact Match" %}
441
+ <button class="btn btn-success btn-custom freeze-btn" disabled style="padding: 10px 20px;filter: brightness(1.3);">
442
+ <i class="fas fa-lock"></i> Freeze
443
+ </button>
444
+ {% else %}
445
+ <input type="checkbox" class="select-review-checkbox" data-index="{{ loop.index0 }}" style="width:30px; height:30px;">
446
+ {% endif %}
447
+ </div>
448
+ </td>
449
+ </tr>
450
+ {% endfor %}
451
+ </tbody>
452
+ </table>
453
+ </div>
454
+ </div>
455
+ <div class="row mb-4">
456
+ <div class="col-md-4 mb-3">
457
+ <a href="{{ url_for('download_csv') }}" class="btn btn-success btn-custom btn-block">
458
+ <i class="fas fa-file-csv"></i> Download CSV Report
459
+ </a>
460
+ </div>
461
+ <div class="col-md-4 mb-3">
462
+ <a href="{{ url_for('download_excel') }}" class="btn btn-info btn-custom btn-block">
463
+ <i class="fas fa-file-excel"></i> Download Excel Report
464
+ </a>
465
+ </div>
466
+ <div class="col-md-4 mb-3">
467
+ <!-- New Download Summary Stats Button -->
468
+ <button id="downloadStatsBtn" class="btn btn-secondary btn-custom btn-block">
469
+ <i class="fas fa-chart-bar"></i> Download Summary Stats
470
+ </button>
471
+ </div>
472
+ </div>
473
+ <div class="text-center mb-5">
474
+ <button id="saveUpdates" class="btn btn-primary btn-custom">
475
+ <i class="fas fa-save"></i> Save All Updates
476
+ </button>
477
+ </div>
478
+ {% endif %}
479
+ </div>
480
+ <!-- Edit Modal -->
481
+ <div class="modal fade" id="editModal" tabindex="-1" aria-labelledby="editModalLabel" aria-hidden="true">
482
+ <div class="modal-dialog modal-dialog-centered">
483
+ <div class="modal-content">
484
+ <div class="modal-header">
485
+ <h5 class="modal-title" id="editModalLabel">
486
+ <i class="fas fa-edit"></i> Edit Invoice Match
487
+ </h5>
488
+ <button type="button" class="close text-white" data-dismiss="modal" aria-label="Close">
489
+ <span aria-hidden="true">&times;</span>
490
+ </button>
491
+ </div>
492
+ <form id="editForm">
493
+ <div class="modal-body">
494
+ <!-- Non-editable fields -->
495
+ <div class="form-group">
496
+ <label>Invoice Number 1:</label>
497
+ <p id="modalInvoice1" class="font-weight-bold"></p>
498
+ </div>
499
+ <div class="form-group">
500
+ <label>Invoice Number 2 (Current):</label>
501
+ <p id="modalInvoice2" class="font-weight-bold"></p>
502
+ </div>
503
+ <div class="form-group">
504
+ <label>Similarity Score:</label>
505
+ <p id="modalScore" class="font-weight-bold"></p>
506
+ </div>
507
+ <!-- New select box for corrected invoice from dataset2 -->
508
+ <div class="form-group">
509
+ <label for="modalSelectInvoice2">Select Correct Invoice from Dataset 2:</label>
510
+ <select class="form-control" id="modalSelectInvoice2">
511
+ <option value="">-- Select --</option>
512
+ {% for inv in unique_values %}
513
+ <option value="{{ inv }}">{{ inv }}</option>
514
+ {% endfor %}
515
+ </select>
516
+ </div>
517
+ <!-- Editable fields -->
518
+ <div class="form-group">
519
+ <label for="modalReviewStatus">Manual Review Status:</label>
520
+ <select class="form-control" id="modalReviewStatus" required>
521
+ <option value="Needs Review">Needs Review</option>
522
+ <option value="No Review Needed">No Review Needed</option>
523
+ </select>
524
+ </div>
525
+ <div class="form-group">
526
+ <label for="modalRecommendation">Recommendation:</label>
527
+ <select class="form-control" id="modalRecommendation" required>
528
+ <option value="Unmatched">Unmatched</option>
529
+ <option value="Partial Match">Partial Match</option>
530
+ <option value="Exact Match">Exact Match</option>
531
+ </select>
532
+ </div>
533
+ <div class="form-group">
534
+ <label for="modalReason">Reason:</label>
535
+ <textarea class="form-control" id="modalReason" rows="3" required></textarea>
536
+ </div>
537
+ <div class="form-group">
538
+ <label for="modalComments">Comments:</label>
539
+ <textarea class="form-control" id="modalComments" rows="2"></textarea>
540
+ </div>
541
+ <!-- Hidden index -->
542
+ <input type="hidden" id="modalRowIndex">
543
+ </div>
544
+ <div class="modal-footer">
545
+ <button type="button" class="btn btn-secondary btn-custom" data-dismiss="modal">
546
+ <i class="fas fa-times"></i> Cancel
547
+ </button>
548
+ <button type="submit" class="btn btn-danger btn-custom">
549
+ <i class="fas fa-save"></i> Save Changes
550
+ </button>
551
+ </div>
552
+ </form>
553
+ </div>
554
+ </div>
555
+ </div>
556
+ <!-- jQuery, Popper.js, Bootstrap JS -->
557
+ <script src="https://code.jquery.com/jquery-3.5.1.min.js"></script>
558
+ <script src="https://cdn.jsdelivr.net/npm/popper.js@1.16.0/dist/umd/popper.min.js"></script>
559
+ <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/js/bootstrap.min.js"></script>
560
+ <script>
561
+ // Theme toggle functionality
562
+ function toggleTheme() {
563
+ const body = document.body;
564
+ const themeToggle = document.querySelector('.theme-toggle i');
565
+ body.classList.toggle('dark');
566
+ if (body.classList.contains('dark')) {
567
+ themeToggle.classList.remove('fa-moon');
568
+ themeToggle.classList.add('fa-sun');
569
+ localStorage.setItem('theme', 'dark');
570
+ } else {
571
+ themeToggle.classList.remove('fa-sun');
572
+ themeToggle.classList.add('fa-moon');
573
+ localStorage.setItem('theme', 'light');
574
+ }
575
+ }
576
+ // Check for saved theme preference
577
+ const savedTheme = localStorage.getItem('theme');
578
+ if (savedTheme === 'dark') {
579
+ document.body.classList.add('dark');
580
+ document.querySelector('.theme-toggle i').classList.replace('fa-moon', 'fa-sun');
581
+ }
582
+
583
+ // New JS: Download Summary Stats (Excel & JSON) when button is clicked
584
+ document.getElementById('downloadStatsBtn').addEventListener('click', function() {
585
+ // Trigger Excel stats download
586
+ const excelLink = document.createElement('a');
587
+ excelLink.href = "{{ url_for('download_stats_excel') }}";
588
+ excelLink.style.display = 'none';
589
+ document.body.appendChild(excelLink);
590
+ excelLink.click();
591
+ document.body.removeChild(excelLink);
592
+
593
+ // Trigger JSON stats download
594
+ const jsonLink = document.createElement('a');
595
+ jsonLink.href = "{{ url_for('download_stats_json') }}";
596
+ jsonLink.style.display = 'none';
597
+ document.body.appendChild(jsonLink);
598
+ jsonLink.click();
599
+ document.body.removeChild(jsonLink);
600
+ });
601
+
602
+ // Populate modal with row data when an edit button is clicked
603
+ $(document).on("click", ".edit-btn", function() {
604
+ var rowIndex = $(this).data("index");
605
+ var row = $("#resultsTable tbody tr").eq(rowIndex);
606
+ var invoice1 = row.find(".invoice_number1").text().trim();
607
+ var invoice2 = row.find(".invoice_number2").text().trim();
608
+ var score = row.find(".similarity_score").text().trim();
609
+ var reviewStatus = row.find(".manual_review_status").text().trim();
610
+ var recommendation = row.find(".recommendation").text().trim();
611
+ var reason = row.find(".reason").text().trim();
612
+ var comments = row.find(".comments").text().trim();
613
+ $("#modalRowIndex").val(rowIndex);
614
+ $("#modalInvoice1").text(invoice1);
615
+ $("#modalInvoice2").text(invoice2);
616
+ $("#modalScore").text(score);
617
+ $("#modalReviewStatus").val(reviewStatus);
618
+ $("#modalRecommendation").val(recommendation);
619
+ $("#modalReason").val(reason);
620
+ $("#modalComments").val(comments);
621
+ // Reset the select box
622
+ $("#modalSelectInvoice2").val("");
623
+ });
624
+
625
+ // Updated edit form submit handler: feedback is sent to the server, which returns recalculated values.
626
+ $("#editForm").on("submit", function(e) {
627
+ e.preventDefault();
628
+ var rowIndex = $("#modalRowIndex").val();
629
+ var selectedInvoice2 = $("#modalSelectInvoice2").val();
630
+ var newComments = $("#modalComments").val();
631
+ var row = $("#resultsTable tbody tr").eq(rowIndex);
632
+ var invoice1 = row.find(".invoice_number1").text().trim();
633
+ // Send feedback to the server
634
+ $.ajax({
635
+ url: "{{ url_for('save_feedback') }}",
636
+ type: "POST",
637
+ contentType: "application/json",
638
+ data: JSON.stringify({
639
+ invoice_number1: invoice1,
640
+ selected_invoice2: selectedInvoice2,
641
+ comments: newComments
642
+ }),
643
+ success: function(response) {
644
+ alert(response.message);
645
+ },
646
+ error: function(xhr, status, error) {
647
+ alert("Error saving feedback: " + error);
648
+ }
649
+ });
650
+ $("#editModal").modal("hide");
651
+ });
652
+
653
+ // When "Save All Updates" is clicked, update each row's action cell based on its type.
654
+ $("#saveUpdates").on("click", function() {
655
+ var updatedData = [];
656
+ $("#resultsTable tbody tr").each(function() {
657
+ var row = $(this);
658
+ var rec = row.find(".recommendation").text().trim();
659
+ var actionCell = row.find("td.action-btn");
660
+ var rowIndex = row.data("index");
661
+ // For Exact Match, always freeze
662
+ if(rec === "Exact Match") {
663
+ row.find(".manual_review_status").text("No");
664
+ actionCell.html('<div class="text-center"><button class="btn btn-success btn-custom freeze-btn" disabled style="padding: 10px 20px;filter: brightness(1.3);"><i class="fas fa-lock"></i> Freeze</button></div>');
665
+ } else {
666
+ // For Partial Match or Unmatched, check the checkbox state.
667
+ var checkbox = row.find("input.select-review-checkbox");
668
+ if(checkbox.length > 0) {
669
+ if(checkbox.is(":checked")) {
670
+ // If checked, auto-freeze this row.
671
+ row.find(".manual_review_status").text("No");
672
+ actionCell.html('<div class="text-center"><button class="btn btn-success btn-custom freeze-btn" disabled style="padding: 10px 20px;filter: brightness(1.3);"><i class="fas fa-lock"></i> Freeze</button></div>');
673
+ } else {
674
+ // If not checked, allow manual editing.
675
+ actionCell.html('<button class="btn btn-sm edit-btn btn-custom" data-index="'+ rowIndex +'" data-toggle="modal" data-target="#editModal"><i class="fas fa-edit"></i> Edit</button>');
676
+ }
677
+ }
678
+ }
679
+ updatedData.push({
680
+ invoice_number1: row.find(".invoice_number1").text().trim(),
681
+ invoice_number2: row.find(".invoice_number2").text().trim(),
682
+ similarity_score: parseFloat(row.find(".similarity_score").text().trim()),
683
+ manual_review_status: row.find(".manual_review_status").text().trim(),
684
+ recommendation: row.find(".recommendation").text().trim(),
685
+ reason: row.find(".reason").text().trim(),
686
+ comments: row.find(".comments").text().trim(),
687
+ editable: row.find("button.edit-btn").length > 0
688
+ });
689
+ });
690
+ $.ajax({
691
+ url: "{{ url_for('save_updates') }}",
692
+ type: "POST",
693
+ contentType: "application/json",
694
+ data: JSON.stringify(updatedData),
695
+ success: function(response) {
696
+ alert("Updates taken successfully!");
697
+ },
698
+ error: function(xhr, status, error) {
699
+ alert("Error saving updates: " + error);
700
+ }
701
+ });
702
+ });
703
+
704
+ // Hide loader when processing is complete (if the server sends processing_complete = true)
705
+ {% if processing_complete %}
706
+ document.getElementById('loader-container').style.display = 'none';
707
+ {% endif %}
708
+
709
+ // Show loader on file upload submission
710
+ document.getElementById('uploadForm').addEventListener('submit', function(e) {
711
+ // Show loader
712
+ document.getElementById('loader-container').style.display = 'flex';
713
+ // Optional: Disable the submit button to prevent double submission
714
+ const submitButton = this.querySelector('button[type="submit"]');
715
+ if (submitButton) {
716
+ submitButton.disabled = true;
717
+ }
718
+ });
719
+ </script>
720
+ </body>
721
+ </html>