Spaces:

ranbac
/

OCR

Sleeping

App Files Files Community

ranbac commited on 28 days ago

Commit

c5240fd

verified ·

1 Parent(s): 1bd8107

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -13

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.responses import HTMLResponse
 from fastapi.middleware.cors import CORSMiddleware
 import easyocr
@@ -7,7 +7,7 @@ from PIL import Image
 import io
 # 1. Khởi tạo FastAPI
-app = FastAPI(title="EasyOCR Tiếng Việt API")
 app.add_middleware(
     CORSMiddleware,
@@ -17,11 +17,17 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# 2. Khởi tạo mô hình EasyOCR (Chỉ chạy 1 lần)
-print("Đang tải mô hình EasyOCR (Tiếng Việt & Tiếng Anh) vào RAM...")
-# gpu=False: Khẳng định việc chạy trên CPU
-reader = easyocr.Reader(['vi', 'en'], gpu=False)
-print("Tải mô hình hoàn tất!")
 # -----------------------------------------------------
@@ -37,10 +43,13 @@ async def serve_frontend():
 # -----------------------------------------------------
-# ROUTE 2: Xử lý OCR
 # -----------------------------------------------------
 @app.post("/predict")
-async def predict_image(file: UploadFile = File(...)):
     if not file.content_type.startswith('image/'):
         raise HTTPException(status_code=400, detail="Vui lòng tải tệp hình ảnh.")
@@ -50,11 +59,15 @@ async def predict_image(file: UploadFile = File(...)):
         image = Image.open(io.BytesIO(contents)).convert('RGB')
         img_array = np.array(image)
-        # Đưa vào EasyOCR đọc chữ
-        # detail=0: Bỏ qua tọa độ, chỉ lấy thẳng danh sách các dòng chữ
-        results = reader.readtext(img_array, detail=0)
-        # Ghép các dòng chữ lại với nhau
         extracted_text = "\n".join(results)
         return {"text": extracted_text}

+from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from fastapi.responses import HTMLResponse
 from fastapi.middleware.cors import CORSMiddleware
 import easyocr
 import io
 # 1. Khởi tạo FastAPI
+app = FastAPI(title="EasyOCR Đa Ngôn Ngữ API")
 app.add_middleware(
     CORSMiddleware,
     allow_headers=["*"],
 )
+# 2. Khởi tạo 2 mô hình EasyOCR chạy song song (Chỉ tải 1 lần)
+print("Đang tải các mô hình EasyOCR vào RAM (Sẽ mất thêm chút thời gian cho lần đầu)...")
+print("- Đang nạp mô hình: Tiếng Việt + Tiếng Anh...")
+reader_vi = easyocr.Reader(['vi', 'en'], gpu=False)
+print("- Đang nạp mô hình: Tiếng Trung (Giản thể) + Tiếng Anh...")
+reader_zh = easyocr.Reader(['ch_sim', 'en'], gpu=False)
+# Lưu ý: Nếu muốn đọc Tiếng Trung Phồn thể (Đài Loan, HK), bạn đổi 'ch_sim' thành 'ch_tra' nhé.
+print("Tải mô hình hoàn tất, sẵn sàng phục vụ!")
 # -----------------------------------------------------
 # -----------------------------------------------------
+# ROUTE 2: Xử lý OCR với cờ chọn ngôn ngữ
 # -----------------------------------------------------
 @app.post("/predict")
+async def predict_image(
+    file: UploadFile = File(...),
+    lang: str = Form("vi")  # Nhận biến ngôn ngữ từ Frontend (Mặc định là 'vi')
+):
     if not file.content_type.startswith('image/'):
         raise HTTPException(status_code=400, detail="Vui lòng tải tệp hình ảnh.")
         image = Image.open(io.BytesIO(contents)).convert('RGB')
         img_array = np.array(image)
+        # ĐIỀU HƯỚNG MÔ HÌNH DỰA VÀO LỰA CHỌN CỦA NGƯỜI DÙNG
+        if lang == "zh":
+            # Chạy cỗ máy Tiếng Trung
+            results = reader_zh.readtext(img_array, detail=0)
+        else:
+            # Chạy cỗ máy Tiếng Việt
+            results = reader_vi.readtext(img_array, detail=0)
+        # Ghép các dòng chữ lại
         extracted_text = "\n".join(results)
         return {"text": extracted_text}