Spaces:
Sleeping
Sleeping
Update core/extractor.py
Browse files- core/extractor.py +19 -0
core/extractor.py
CHANGED
|
@@ -147,6 +147,7 @@ class Extractor:
|
|
| 147 |
result['ID_number'] = _idnumber
|
| 148 |
result['Name'] = ''
|
| 149 |
result['Date_of_birth'] = ''
|
|
|
|
| 150 |
result['Gender'] = ''
|
| 151 |
result['Nationality'] = ''
|
| 152 |
result['Place_of_origin'] = ''
|
|
@@ -176,6 +177,24 @@ class Extractor:
|
|
| 176 |
result['Date_of_birth_box'] = DOB[1] if DOB else []
|
| 177 |
continue
|
| 178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
if re.search(r'sinh|birth|bith', s) and (not result['Date_of_birth']):
|
| 180 |
if re.search(regex_dob, s):
|
| 181 |
DOB = _results[i]
|
|
|
|
| 147 |
result['ID_number'] = _idnumber
|
| 148 |
result['Name'] = ''
|
| 149 |
result['Date_of_birth'] = ''
|
| 150 |
+
result['Date_of_issue'] = ''
|
| 151 |
result['Gender'] = ''
|
| 152 |
result['Nationality'] = ''
|
| 153 |
result['Place_of_origin'] = ''
|
|
|
|
| 177 |
result['Date_of_birth_box'] = DOB[1] if DOB else []
|
| 178 |
continue
|
| 179 |
|
| 180 |
+
if re.search(r'month', s) and (not result['Date_of_issue']):
|
| 181 |
+
if re.search(regex_dob, s):
|
| 182 |
+
DOI = _results[i]
|
| 183 |
+
|
| 184 |
+
elif re.search(regex_dob, _results[i - 1][0]):
|
| 185 |
+
DOI = _results[i - 1]
|
| 186 |
+
|
| 187 |
+
elif re.search(regex_dob, _results[i + 1][0]):
|
| 188 |
+
DOI = _results[i + 1]
|
| 189 |
+
|
| 190 |
+
else:
|
| 191 |
+
DOI = []
|
| 192 |
+
|
| 193 |
+
result['Date_of_issue'] = (re.split(r':|\s+', DOI[0]))[-1].strip() if DOI else ''
|
| 194 |
+
result['Date_of_issue_box'] = DOI[1] if DOI else []
|
| 195 |
+
|
| 196 |
+
continue
|
| 197 |
+
|
| 198 |
if re.search(r'sinh|birth|bith', s) and (not result['Date_of_birth']):
|
| 199 |
if re.search(regex_dob, s):
|
| 200 |
DOB = _results[i]
|