ink85 commited on
Commit
a2be518
·
verified ·
1 Parent(s): f5780ec

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +15 -6
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
 
3
  # ✅ Always use /tmp for Hugging Face cache in Spaces
@@ -91,23 +92,31 @@ def extract_entities(text):
91
  entities = {}
92
 
93
  # PAN format: AAAAA9999A
94
- pan_match = re.search(r"\b[A-Z]{5}\d{4}[A-Z]\b", text, re.IGNORECASE)
 
95
 
96
  # Account Number
97
- acc_match = re.search(r"account\s*number\s*[:\-]?\s*([A-Za-z0-9]+)", text, re.IGNORECASE)
 
98
 
99
  # Penalty (accepts 'penalty' or 'penalties')
100
- penalty_match = re.search(r"\bpenalt(?:y|ies)\s*[:\-]?\s*([\d,]+)", text, re.IGNORECASE)
 
101
 
102
  # Deactivation keywords
103
  deactivate_match = re.search(r"\bdeactivat(?:e|ed|ion)\b", text, re.IGNORECASE)
104
 
105
  if pan_match:
106
- entities["PAN"] = pan_match.group(0).upper()
 
 
107
  if acc_match:
108
- entities["Account_Number"] = acc_match.group(1)
 
109
  if penalty_match:
110
- entities["Penalty"] = penalty_match.group(1).replace(",", "")
 
 
111
  if deactivate_match:
112
  entities["Deactivate"] = deactivate_match.group(0).lower()
113
 
 
1
+
2
  import os
3
 
4
  # ✅ Always use /tmp for Hugging Face cache in Spaces
 
92
  entities = {}
93
 
94
  # PAN format: AAAAA9999A
95
+ # pan_match = re.search(r"\b[A-Z]{5}\d{4}[A-Z]\b", text, re.IGNORECASE)
96
+ pan_match = re.search(r"PAN (\w{5}\d{4}\w)", text)
97
 
98
  # Account Number
99
+ # acc_match = re.search(r"account\s*number\s*[:\-]?\s*([A-Za-z0-9]+)", text, re.IGNORECASE)
100
+ acc_match = re.search(r"Account Number (\w+)", text, re.IGNORECASE)
101
 
102
  # Penalty (accepts 'penalty' or 'penalties')
103
+ # penalty_match = re.search(r"\bpenalt(?:y|ies)\s*[:\-]?\s*([\d,]+)", text, re.IGNORECASE)
104
+ penalty_match = re.search(r"INR ([\d,]+)", text, re.IGNORECASE)
105
 
106
  # Deactivation keywords
107
  deactivate_match = re.search(r"\bdeactivat(?:e|ed|ion)\b", text, re.IGNORECASE)
108
 
109
  if pan_match:
110
+ #entities["PAN"] = pan_match.group(0).upper()
111
+ entities["PAN"] = pan_match.group(1) if pan_match else None
112
+
113
  if acc_match:
114
+ entities["Account_Number"] = acc_match.group(1) if acc_match else None
115
+
116
  if penalty_match:
117
+ #entities["Penalty"] = penalty_match.group(1).replace(",", "")
118
+ entities["Penalty"] = penalty_match.group(1) if penalty_match else None
119
+
120
  if deactivate_match:
121
  entities["Deactivate"] = deactivate_match.group(0).lower()
122