Spaces:

ranbac
/

OCR

Sleeping

App Files Files Community

OCR / app.py

ranbac

Update app.py

c5240fd verified 24 days ago

raw

history blame contribute delete

2.73 kB

	from fastapi import FastAPI, UploadFile, File, Form, HTTPException
	from fastapi.responses import HTMLResponse
	from fastapi.middleware.cors import CORSMiddleware
	import easyocr
	import numpy as np
	from PIL import Image
	import io

	# 1. Khởi tạo FastAPI
	app = FastAPI(title="EasyOCR Đa Ngôn Ngữ API")

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# 2. Khởi tạo 2 mô hình EasyOCR chạy song song (Chỉ tải 1 lần)
	print("Đang tải các mô hình EasyOCR vào RAM (Sẽ mất thêm chút thời gian cho lần đầu)...")

	print("- Đang nạp mô hình: Tiếng Việt + Tiếng Anh...")
	reader_vi = easyocr.Reader(['vi', 'en'], gpu=False)

	print("- Đang nạp mô hình: Tiếng Trung (Giản thể) + Tiếng Anh...")
	reader_zh = easyocr.Reader(['ch_sim', 'en'], gpu=False)
	# Lưu ý: Nếu muốn đọc Tiếng Trung Phồn thể (Đài Loan, HK), bạn đổi 'ch_sim' thành 'ch_tra' nhé.

	print("Tải mô hình hoàn tất, sẵn sàng phục vụ!")


	# -----------------------------------------------------
	# ROUTE 1: Giao diện Web
	# -----------------------------------------------------
	@app.get("/", response_class=HTMLResponse)
	async def serve_frontend():
	try:
	with open("index.html", "r", encoding="utf-8") as f:
	return f.read()
	except FileNotFoundError:
	return "<h1>Lỗi: Không tìm thấy file index.html.</h1>"


	# -----------------------------------------------------
	# ROUTE 2: Xử lý OCR với cờ chọn ngôn ngữ
	# -----------------------------------------------------
	@app.post("/predict")
	async def predict_image(
	file: UploadFile = File(...),
	lang: str = Form("vi") # Nhận biến ngôn ngữ từ Frontend (Mặc định là 'vi')
	):
	if not file.content_type.startswith('image/'):
	raise HTTPException(status_code=400, detail="Vui lòng tải tệp hình ảnh.")

	try:
	# Đọc ảnh vào RAM
	contents = await file.read()
	image = Image.open(io.BytesIO(contents)).convert('RGB')
	img_array = np.array(image)

	# ĐIỀU HƯỚNG MÔ HÌNH DỰA VÀO LỰA CHỌN CỦA NGƯỜI DÙNG
	if lang == "zh":
	# Chạy cỗ máy Tiếng Trung
	results = reader_zh.readtext(img_array, detail=0)
	else:
	# Chạy cỗ máy Tiếng Việt
	results = reader_vi.readtext(img_array, detail=0)

	# Ghép các dòng chữ lại
	extracted_text = "\n".join(results)

	return {"text": extracted_text}

	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Lỗi hệ thống: {str(e)}")