landify-cccd-ocr

Sleeping

App Files Files Community

anh-khoa-nguyen commited on Sep 14, 2025

Commit

e155984

1 Parent(s): 591424d

Initial commit of CCCD OCR API

Browse files

Files changed (15) hide show

.idea/.gitignore +3 -0
.idea/LandifyOCR.iml +10 -0
.idea/inspectionProfiles/Project_Default.xml +23 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +7 -0
.idea/modules.xml +8 -0
README.md +11 -7
app.py +142 -0
core/__init__.py +0 -0
core/__pycache__/__init__.cpython-38.pyc +0 -0
core/__pycache__/extractor.cpython-38.pyc +0 -0
core/extractor.py +330 -0
core/seq2seqocr.pth +3 -0
extracted_infomation.json +118 -0
requirements.txt +102 -0

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Default ignored files
+/shelf/
+/workspace.xml

.idea/LandifyOCR.iml ADDED Viewed

	@@ -0,0 +1,10 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.8 (LandifyOCR)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,23 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="10">
+            <item index="0" class="java.lang.String" itemvalue="psycopg2" />
+            <item index="1" class="java.lang.String" itemvalue="postgis" />
+            <item index="2" class="java.lang.String" itemvalue="mypy_extensions" />
+            <item index="3" class="java.lang.String" itemvalue="pathspec" />
+            <item index="4" class="java.lang.String" itemvalue="pyflakes" />
+            <item index="5" class="java.lang.String" itemvalue="mccabe" />
+            <item index="6" class="java.lang.String" itemvalue="black" />
+            <item index="7" class="java.lang.String" itemvalue="isort" />
+            <item index="8" class="java.lang.String" itemvalue="pycodestyle" />
+            <item index="9" class="java.lang.String" itemvalue="flake8" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,7 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.8 (LandifyOCR)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (LandifyOCR)" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/LandifyOCR.iml" filepath="$PROJECT_DIR$/.idea/LandifyOCR.iml" />
+    </modules>
+  </component>
+</project>

README.md CHANGED Viewed

@@ -1,11 +1,15 @@
 ---
-title: Landify Cccd Ocr
-emoji: 👁
-colorFrom: indigo
-colorTo: yellow
 sdk: docker
-pinned: false
-license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Vietnamese Citizen ID OCR
+emoji: 💳
+colorFrom: blue
+colorTo: green
 sdk: docker
+python_version: 3.8
+app_port: 7860
 ---
+# Vietnamese Citizen ID (CCCD) OCR API
+Đây là một API sử dụng FastAPI để trích xuất thông tin từ ảnh Căn cước công dân Việt Nam.
+Sử dụng endpoint `/docs` để xem tài liệu và thử nghiệm API.

app.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import base64
+import datetime
+import os
+import re
+import time
+import uuid
+import cv2
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from pydantic import BaseModel
+from typing import Optional
+# Import lớp Extractor từ thư mục core
+from core.extractor import Extractor
+# --- Khởi tạo ---
+# Khởi tạo ứng dụng FastAPI
+app = FastAPI(
+    title="CCCD Extraction API",
+    description="Một microservice để trích xuất thông tin từ Căn cước công dân Việt Nam.",
+    version="1.0.0"
+)
+# Đường dẫn để lưu trữ file upload
+UPLOAD_DIR = "uploads"
+os.makedirs(UPLOAD_DIR, exist_ok=True)
+# Khởi tạo một lần duy nhất đối tượng Extractor để tái sử dụng
+# Điều này giúp load model một lần và tăng tốc độ xử lý cho các request sau
+try:
+    idcard_extractor = Extractor()
+    print("CCCD Extractor loaded successfully.")
+except Exception as e:
+    print(f"Error loading CCCD Extractor: {e}")
+    idcard_extractor = None
+# --- Định nghĩa Model cho Request và Response ---
+# Model cho request nếu gửi ảnh dạng base64
+class ImageRequest(BaseModel):
+    image_base64: str
+# Model cho response trả về
+class ExtractionResponse(BaseModel):
+    ID_number: Optional[str] = None
+    Name: Optional[str] = None
+    Date_of_birth: Optional[str] = None
+    Gender: Optional[str] = None
+    Nationality: Optional[str] = None
+    Place_of_origin: Optional[str] = None
+    Place_of_residence: Optional[str] = None
+    elapsed: float
+# --- Xây dựng API Endpoint ---
+@app.get("/")
+def read_root():
+    return {"message": "Welcome to the CCCD Extraction API. Use the /extract/ endpoint to process an image."}
+@app.post("/extract/", response_model=ExtractionResponse, tags=["CCCD Extraction"])
+async def extract_id_card_info(file: UploadFile = File(...)):
+    """
+    Nhận một file ảnh CCCD, trích xuất thông tin và trả về.
+    """
+    if not idcard_extractor:
+        raise HTTPException(status_code=500, detail="OCR Extractor is not available.")
+    # --- 1. Lưu file ảnh được upload ---
+    # Tạo tên file ngẫu nhiên và an toàn để tránh trùng lặp
+    file_extension = os.path.splitext(file.filename)[1]
+    random_filename = f"{uuid.uuid4()}{file_extension}"
+    file_path = os.path.join(UPLOAD_DIR, random_filename)
+    try:
+        # Đọc nội dung file và lưu lại
+        with open(file_path, "wb") as buffer:
+            buffer.write(await file.read())
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Could not save uploaded file: {e}")
+    # --- 2. Xử lý ảnh và trích xuất thông tin (logic từ Django view) ---
+    start_time = time.time()
+    try:
+        frame = cv2.imread(file_path)
+        if frame is None:
+            raise HTTPException(status_code=400, detail="Invalid image file.")
+        # Bước 1: Dùng PaddleOCR để phát hiện các vùng văn bản
+        annotations = idcard_extractor.Detection(frame)
+        info = {}
+        # Tìm số CCCD trước tiên
+        for box in annotations:
+            text_detected = box[1][0]
+            if re.search(r'\d{9,12}', text_detected):
+                # Tách số ra khỏi chuỗi nhiễu
+                id_number = re.search(r'\d{9,12}', text_detected).group(0)
+                info['ID_number'] = id_number
+                info['ID_number_box'] = box[0]
+                break
+        if 'ID_number' not in info:
+            raise HTTPException(status_code=400, detail="Could not detect an ID number in the image.")
+        # Bước 2: Dùng VietOCR để nhận dạng các trường thông tin còn lại
+        extracted_result = []
+        for box in annotations:
+            # Bỏ qua vùng chứa số ID đã xử lý
+            if re.search(r'\d{9,12}', box[1][0]):
+                continue
+            top_left = (int(box[0][0][0]), int(box[0][0][1]))
+            top_right = (int(box[0][1][0]), int(box[0][1][1]))
+            bottom_right = (int(box[0][2][0]), int(box[0][2][1]))
+            bottom_left = (int(box[0][3][0]), int(box[0][3][1]))
+            # Warp và nhận dạng
+            result_text, _ = idcard_extractor.WarpAndRec(frame, top_left, top_right, bottom_right, bottom_left)
+            extracted_result.append((result_text, box[0]))  # Lưu cả text và bounding box
+        # Bước 3: Tổng hợp thông tin
+        final_info = idcard_extractor.GetInformationAndSave(extracted_result, info['ID_number'], info['ID_number_box'])
+        elapsed = time.time() - start_time
+        final_info["elapsed"] = round(elapsed, 2)
+        # Xóa file tạm sau khi xử lý xong
+        os.remove(file_path)
+        return final_info
+    except Exception as e:
+        # Nếu có lỗi, cũng xóa file tạm
+        if os.path.exists(file_path):
+            os.remove(file_path)
+        raise HTTPException(status_code=500, detail=f"An error occurred during processing: {str(e)}")

core/__init__.py ADDED Viewed

File without changes

core/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (129 Bytes). View file

core/__pycache__/extractor.cpython-38.pyc ADDED Viewed

Binary file (6.91 kB). View file

core/extractor.py ADDED Viewed

	@@ -0,0 +1,330 @@

+import os
+import re
+import json
+import cv2
+import time
+import threading
+import numpy as np
+import matplotlib.pyplot as plt
+from PIL import Image
+from paddleocr import PaddleOCR
+from vietocr.tool.predictor import Predictor
+from vietocr.tool.config import Cfg
+CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+ocr = None
+detector = None
+class Extractor:
+    def __init__(self):
+        self.config = Cfg.load_config_from_name('vgg_seq2seq')
+        self.config['weights'] = os.path.join(CURRENT_DIR, "seq2seqocr.pth")
+        self.config['cnn']['pretrained'] = False
+        self.config['device'] = 'cpu'
+        if (ocr == None):
+            self.ocr = PaddleOCR(lang='en')
+        else:
+            self.ocr = ocr
+        if (detector == None):
+            self.detector = Predictor(self.config)
+        else:
+            self.detector = detector
+        # result = {'ID_number':'',
+        #              'Name':'',
+        #              'Date_of_birth':'',
+        #              'Gender':'',
+        #              'Nationality':'',
+        #              'Place_of_origin':'',
+        #              'Place_of_residence':''}
+    ####################################################################################################
+    def Detection(self, frame):
+        annotations = self.ocr.ocr(frame, rec=True, cls=False)
+        return annotations[0]
+    ####################################################################################################
+    def WarpAndSave(self, frame, fileName, top_left, top_right, bottom_right, bottom_left):
+        w, h, cn = frame.shape
+        padding = 4.0
+        padding = int(padding * w / 640)
+        # All points are in format [cols, rows]
+        pt_A = top_left[0], top_left[1]
+        pt_B = bottom_left[0], bottom_left[1]
+        pt_C = bottom_right[0], bottom_right[1]
+        pt_D = top_right[0], top_right[1]
+        # Here, I have used L2 norm. You can use L1 also.
+        width_AD = np.sqrt(((pt_A[0] - pt_D[0]) ** 2) + ((pt_A[1] - pt_D[1]) ** 2))
+        width_BC = np.sqrt(((pt_B[0] - pt_C[0]) ** 2) + ((pt_B[1] - pt_C[1]) ** 2))
+        maxWidth = max(int(width_AD), int(width_BC))
+        height_AB = np.sqrt(((pt_A[0] - pt_B[0]) ** 2) + ((pt_A[1] - pt_B[1]) ** 2))
+        height_CD = np.sqrt(((pt_C[0] - pt_D[0]) ** 2) + ((pt_C[1] - pt_D[1]) ** 2))
+        maxHeight = max(int(height_AB), int(height_CD))
+        input_pts = np.float32([pt_A, pt_B, pt_C, pt_D])
+        output_pts = np.float32([[0, 0],
+                                 [0, maxHeight - 1],
+                                 [maxWidth - 1, maxHeight - 1],
+                                 [maxWidth - 1, 0]])
+        # Compute the perspective transform M
+        M = cv2.getPerspectiveTransform(input_pts, output_pts)
+        matWarped = cv2.warpPerspective(frame, M, (maxWidth, maxHeight), flags=cv2.INTER_LINEAR)
+        cv2.imwrite(fileName, matWarped)
+        return True
+    ####################################################################################################
+    def WarpAndRec(self, frame, top_left, top_right, bottom_right, bottom_left):
+        w, h, cn = frame.shape
+        padding = 4.0
+        padding = int(padding * w / 640)
+        box = []
+        # All points are in format [cols, rows]
+        pt_A = top_left[0] - padding, top_left[1] - padding
+        pt_B = bottom_left[0] - padding, bottom_left[1] + padding
+        pt_C = bottom_right[0] + padding, bottom_right[1] + padding
+        pt_D = top_right[0] + padding, top_right[1] - padding
+        # Here, I have used L2 norm. You can use L1 also.
+        width_AD = np.sqrt(((pt_A[0] - pt_D[0]) ** 2) + ((pt_A[1] - pt_D[1]) ** 2))
+        width_BC = np.sqrt(((pt_B[0] - pt_C[0]) ** 2) + ((pt_B[1] - pt_C[1]) ** 2))
+        maxWidth = max(int(width_AD), int(width_BC))
+        height_AB = np.sqrt(((pt_A[0] - pt_B[0]) ** 2) + ((pt_A[1] - pt_B[1]) ** 2))
+        height_CD = np.sqrt(((pt_C[0] - pt_D[0]) ** 2) + ((pt_C[1] - pt_D[1]) ** 2))
+        maxHeight = max(int(height_AB), int(height_CD))
+        input_pts = np.float32([pt_A, pt_B, pt_C, pt_D])
+        output_pts = np.float32([[0, 0],
+                                 [0, maxHeight - 1],
+                                 [maxWidth - 1, maxHeight - 1],
+                                 [maxWidth - 1, 0]])
+        # Compute the perspective transform M
+        M = cv2.getPerspectiveTransform(input_pts, output_pts)
+        matWarped = cv2.warpPerspective(frame, M, (maxWidth, maxHeight), flags=cv2.INTER_LINEAR)
+        # cv2.imwrite(fileName, matWarped)
+        s = self.detector.predict(Image.fromarray(matWarped))
+        box.append(pt_A)
+        box.append(pt_D)
+        box.append(pt_C)
+        box.append(pt_B)
+        return [s, box]
+    ####################################################################################################
+    def GetInformationAndSave(self, _results, _idnumber, _idnumberbox):
+        print("---------------------------------")
+        print(_results)
+        # string = '{"ID_number": "09219802508", "Name": "", "Date_of_birth": "", "Gender": "", "Nationality": "", "Place_of_origin": "", "Place_of_residence": "", "ID_number_box": [[208.0, 171.0], [495.0, 177.0], [495.0, 201.0], [208.0, 195.0]]}'
+        # result = json.loads(string)
+        result = {}
+        result['ID_number'] = _idnumber
+        result['Name'] = ''
+        result['Date_of_birth'] = ''
+        result['Gender'] = ''
+        result['Nationality'] = ''
+        result['Place_of_origin'] = ''
+        result['Place_of_residence'] = ''
+        result['ID_number_box'] = _idnumberbox
+        regex_dob = r'[0-9][0-9]/[0-9][0-9]'
+        regex_residence = r'[0-9][0-9]/[0-9][0-9]/|[0-9]{4,10}|Date|Demo|Dis|Dec|Dale|fer|ting|gical|ping|exp|ver|pate|cond|trị|đến|không|Không|Có|Pat|ter|ity'
+        for i, res in enumerate(_results):
+            s = res[0]
+            print(s)
+            if re.search(r'tên|name', s):
+                # result['ID_number']                   = result[i+1].split(':|;|,|\\.|\s+')[-1].strip()
+                # ID_number                             = result[i+1] if re.search(r'[0-9][0-9][0-9]',(re.split(r':|[.]|\s+',result[i+1][0]))[-1].strip()) else (result[i+2] if re.search(r'[0-9][0-9][0-9]',result[i+2][0]) else result[i+3])
+                # result['ID_number']                   = (re.split(r':|[.]|\s+',ID_number[0]))[-1].strip()
+                # result['ID_number_box']               = ID_number[1]
+                Name = _results[i + 1] if (not re.search(r'[0-9]', _results[i + 1][0])) else _results[i + 2]
+                result['Name'] = Name[0].title()
+                result['Name_box'] = Name[1] if Name[1] else []
+                if (result['Date_of_birth'] == ''):
+                    DOB = _results[i - 2] if re.search(regex_dob, _results[i - 2][0]) else []
+                    result['Date_of_birth'] = (re.split(r':|\s+', DOB[0]))[-1].strip() if DOB else ''
+                    result['Date_of_birth_box'] = DOB[1] if DOB else []
+                continue
+            if re.search(r'sinh|birth|bith', s) and (not result['Date_of_birth']):
+                if re.search(regex_dob, s):
+                    DOB = _results[i]
+                elif re.search(regex_dob, _results[i - 1][0]):
+                    DOB = _results[i - 1]
+                elif re.search(regex_dob, _results[i + 1][0]):
+                    DOB = _results[i + 1]
+                else:
+                    DOB = []
+                result['Date_of_birth'] = (re.split(r':|\s+', DOB[0]))[-1].strip() if DOB else ''
+                result['Date_of_birth_box'] = DOB[1] if DOB else []
+                if re.search(r"Việt Nam", _results[i + 1][0]):
+                    result['Nationality'] = 'Việt Nam'
+                    result['Nationality_box'] = _results[i + 1][1]
+                continue
+            if re.search(r'Giới|Sex', s):
+                Gender = _results[i]
+                result['Gender'] = 'Nữ' if re.search(r'Nữ|nữ', Gender[0]) else 'Nam'
+                result['Gender_box'] = Gender[1] if Gender[1] else []
+                # continue
+            if re.search(r'Quốc|tịch|Nat', s):
+                if (not re.search(r'ty|ing', re.split(r':|,|[.]|ty|tịch', s)[-1].strip()) and (
+                        len(re.split(r':|,|[.]|ty|tịch', s)[-1].strip()) >= 3)):
+                    Nationality = _results[i]
+                elif not re.search(r'[0-9][0-9]/[0-9][0-9]/', _results[i + 1][0]):
+                    Nationality = _results[i + 1]
+                else:
+                    Nationality = _results[i - 1]
+                result['Nationality'] = re.split(r':|-|,|[.]|ty|[0-9]|tịch', Nationality[0])[-1].strip().title()
+                result['Nationality_box'] = Nationality[1] if Nationality[1] else []
+                for s in re.split(r'\s+', result['Nationality']):
+                    if len(s) < 3:
+                        result['Nationality'] = re.split(s, result['Nationality'])[-1].strip().title()
+                if re.search(r'Nam', result['Nationality']):
+                    result['Nationality'] = 'Việt Nam'
+                continue
+            if re.search(r'Quê|origin|ongin|ngin|orging', s):
+                PlaceOfOrigin = [_results[i], _results[i + 1]] if not re.search(r'[0-9]{4}', _results[i + 1][0]) else []
+                if PlaceOfOrigin:
+                    if len(re.split(r':|;|of|ging|gin|ggong', PlaceOfOrigin[0][0])[-1].strip()) > 2:
+                        result['Place_of_origin'] = (
+                                    (re.split(r':|;|of|ging|gin|ggong', PlaceOfOrigin[0][0]))[-1].strip() + ', ' +
+                                    PlaceOfOrigin[1][0])
+                    else:
+                        result['Place_of_origin'] = PlaceOfOrigin[1][0]
+                    result['Place_of_origin_box'] = PlaceOfOrigin[1][1]
+                continue
+            if re.search(r'Nơi|trú|residence', s):
+                vals2 = "" if (i + 2 > len(_results) - 1) else _results[i + 2] if len(_results[i + 2][0]) > 5 else \
+                _results[-1]
+                vals3 = "" if (i + 3 > len(_results) - 1) else _results[i + 3] if len(_results[i + 3][0]) > 5 else \
+                _results[-1]
+                if ((re.split(r':|;|residence|ence|end', s))[-1].strip() != ''):
+                    if (vals2 != '' and not re.search(regex_residence, vals2[0])):
+                        PlaceOfResidence = [_results[i], vals2]
+                    elif (vals3 != '' and not re.search(regex_residence, vals3[0])):
+                        PlaceOfResidence = [_results[i], vals3]
+                    elif not re.search(regex_residence, _results[-1][0]):
+                        PlaceOfResidence = [_results[i], _results[-1]]
+                    else:
+                        PlaceOfResidence = [_results[-1], []]
+                else:
+                    PlaceOfResidence = [vals2, []] if (vals2 and not re.search(regex_residence, vals2[0])) else [
+                        _results[-1], []]
+                print('PlaceOfResidence: {}'.format(PlaceOfResidence))
+                if PlaceOfResidence[1]:
+                    result['Place_of_residence'] = re.split(r':|;|residence|sidencs|ence|end', PlaceOfResidence[0][0])[
+                                                       -1].strip() + ' ' + str(PlaceOfResidence[1][0]).strip()
+                    result['Place_of_residence_box'] = PlaceOfResidence[1][1]
+                else:
+                    result['Place_of_residence'] = PlaceOfResidence[0][0]
+                    result['Place_of_residence_box'] = PlaceOfResidence[0][1] if PlaceOfResidence else []
+                continue
+            elif (i == len(_results) - 1):
+                if result['Place_of_residence'] == '':
+                    if not re.search(regex_residence, _results[-1][0]):
+                        PlaceOfResidence = _results[-1]
+                    elif not re.search(regex_residence, _results[-2][0]):
+                        PlaceOfResidence = _results[-2]
+                    else:
+                        PlaceOfResidence = []
+                    result['Place_of_residence'] = PlaceOfResidence[0] if PlaceOfResidence else ''
+                    result['Place_of_residence_box'] = PlaceOfResidence[1] if PlaceOfResidence else []
+                if result['Gender'] == '':
+                    result['Gender_box'] = []
+                if result['Nationality'] == '':
+                    result['Nationality_box'] = []
+                if result['Name'] == '':
+                    result['Name_box'] = []
+                if result['Date_of_birth'] == '':
+                    result['Date_of_birth_box'] = []
+                if result['Place_of_origin'] == '':
+                    result['Place_of_origin_box'] = []
+            else:
+                continue
+        with open('extracted_infomation.json', 'w', encoding='utf-8') as f:
+            f.write(json.dumps(result, indent=4, ensure_ascii=False))
+            f.close()
+        return result
+####################################################################################################
+idcard_extractor = Extractor()
+# info = idcard_extractor.GetInformationAndSave("extracted_result")
+# print(info)
+if __name__ == '__main__':
+    img_path = './20211019_090832.jpg'
+    frame = cv2.imread(img_path)
+    # annotations = idcard_extractor.Detection(img_path)
+    # extracted_result=[]
+    # threads = []
+    # for i, box in enumerate(annotations):
+    #     top_left     = (int(box[0][0]), int(box[0][1]))
+    #     top_right    = (int(box[1][0]), int(box[1][1]))
+    #     bottom_right = (int(box[2][0]), int(box[2][1]))
+    #     bottom_left  = (int(box[3][0]), int(box[3][1]))
+    #     t = ThreadWithReturnValue(target=idcard_extractor.WarpAndRec, args=(frame,top_left, top_right, bottom_right, bottom_left))
+    #     threads.append(t)
+    # for t in threads:
+    #     t.start()
+    # for t in threads:
+    #     extracted_result.append(t.join())
+    info = idcard_extractor.GetInformationAndSave("extracted_result")
+    print(info)

core/seq2seqocr.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0921503a41375a0584268e23ef3d414ea478a8fe8777865c7745d38f2d0bc5db
+size 89575371

extracted_infomation.json ADDED Viewed

	@@ -0,0 +1,118 @@

+{
+    "ID_number": "077204004336",
+    "Name": "Nguyễn Anh Khoa",
+    "Date_of_birth": "04/09/2004",
+    "Gender": "Nam",
+    "Nationality": "",
+    "Place_of_origin": "Quảng Nam",
+    "Place_of_residence": "Bình Châu, Xuyên Mộc, Bà Rịa - Vũng Tàu",
+    "ID_number_box": [
+        [
+            772.0,
+            560.0
+        ],
+        [
+            1358.0,
+            560.0
+        ],
+        [
+            1358.0,
+            616.0
+        ],
+        [
+            772.0,
+            616.0
+        ]
+    ],
+    "Name_box": [
+        [
+            586.0,
+            708.0
+        ],
+        [
+            1232.0,
+            708.0
+        ],
+        [
+            1232.0,
+            770.0
+        ],
+        [
+            586.0,
+            770.0
+        ]
+    ],
+    "Date_of_birth_box": [
+        [
+            1096.0,
+            789.0
+        ],
+        [
+            1388.0,
+            789.0
+        ],
+        [
+            1388.0,
+            833.0
+        ],
+        [
+            1096.0,
+            833.0
+        ]
+    ],
+    "Gender_box": [
+        [
+            586.0,
+            854.0
+        ],
+        [
+            1495.0,
+            854.0
+        ],
+        [
+            1495.0,
+            902.0
+        ],
+        [
+            586.0,
+            902.0
+        ]
+    ],
+    "Nationality_box": [],
+    "Place_of_origin_box": [
+        [
+            586.0,
+            991.0
+        ],
+        [
+            896.0,
+            991.0
+        ],
+        [
+            896.0,
+            1041.0
+        ],
+        [
+            586.0,
+            1041.0
+        ]
+    ],
+    "Place_of_residence_box": [
+        [
+            586.0,
+            1122.0
+        ],
+        [
+            1665.0,
+            1120.0
+        ],
+        [
+            1665.0,
+            1176.0
+        ],
+        [
+            586.0,
+            1179.0
+        ]
+    ]
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,102 @@

+annotated-types==0.7.0
+anyio==4.5.2
+astor==0.8.1
+attrdict==2.0.1
+babel==2.17.0
+bce-python-sdk==0.9.45
+beautifulsoup4==4.13.5
+blinker==1.8.2
+cachetools==5.5.2
+certifi==2025.8.3
+charset-normalizer==3.4.3
+click==8.1.8
+colorama==0.4.6
+contourpy==1.1.1
+cssselect==1.2.0
+cssutils==2.11.1
+cycler==0.12.1
+Cython==3.1.3
+decorator==5.2.1
+einops==0.2.0
+et_xmlfile==2.0.0
+exceptiongroup==1.3.0
+fastapi==0.116.1
+filelock==3.16.1
+fire==0.7.1
+Flask==3.0.3
+flask-babel==4.0.0
+fonttools==4.57.0
+fsspec==2025.3.0
+future==1.0.0
+gdown==4.4.0
+h11==0.16.0
+httpcore==1.0.9
+httptools==0.6.4
+httpx==0.28.1
+idna==3.10
+imageio==2.35.1
+imgaug==0.4.0
+importlib_metadata==8.5.0
+itsdangerous==2.2.0
+Jinja2==3.1.6
+kiwisolver==1.4.7
+lmdb==1.7.3
+lxml==6.0.1
+MarkupSafe==2.1.5
+matplotlib==3.6.3
+more-itertools==10.5.0
+mpmath==1.3.0
+networkx==3.1
+numpy==1.19.3
+opencv-python-headless==4.4.0.44
+openpyxl==3.1.5
+opt-einsum==3.3.0
+packaging==25.0
+paddleocr==2.7.3
+paddlepaddle==2.6.1
+pandas==1.4.4
+pdf2docx==0.5.7
+Pillow==9.5.0
+prefetch-generator==1.0.1
+premailer==3.10.0
+protobuf==3.20.2
+psutil==7.0.0
+pyclipper==1.3.0.post6
+pycryptodome==3.23.0
+pydantic==2.10.6
+pydantic_core==2.27.2
+PyMuPDF==1.24.11
+pyparsing==3.1.4
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-docx==1.1.2
+python-dotenv==1.0.1
+python-multipart==0.0.20
+pytz==2025.2
+PyWavelets==1.4.1
+PyYAML==6.0.2
+rapidfuzz==3.9.7
+rarfile==4.2
+requests==2.32.4
+scikit-image==0.19.3
+scipy==1.9.3
+shapely==2.0.7
+six==1.17.0
+sniffio==1.3.1
+soupsieve==2.7
+starlette==0.44.0
+sympy==1.13.3
+termcolor==2.4.0
+tifffile==2023.7.10
+torch==2.2.2
+torchvision==0.17.2
+tqdm==4.67.1
+typing_extensions==4.13.2
+urllib3==2.2.3
+uvicorn==0.33.0
+vietocr==0.3.11
+visualdl==2.5.3
+watchfiles==0.24.0
+websockets==13.1
+Werkzeug==3.0.6
+zipp==3.20.2