Spaces:

hoangthiencm
/

ht-math-web-backend

Running

App Files Files Community

hoangthiencm commited on Dec 27, 2025

Commit

b1ca4dd

verified ·

1 Parent(s): e4765f4

Update app.py

Browse files

Files changed (1) hide show

app.py +271 -217

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
 Backend API cho HT_MATH_WEB - Chạy trên Hugging Face Spaces (Docker Version)
-Phiên bản: 9.2 (Added /version endpoint for Load Balancer Check)
 Tác giả: Hoàng Tấn Thiên
 """
@@ -13,59 +13,53 @@ import tempfile
 import hashlib
 import secrets
 import uuid
-import sys
 from typing import List, Optional
-from fastapi import FastAPI, File, UploadFile, HTTPException, Form, Request, Header, Body
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, FileResponse
 from fastapi.staticfiles import StaticFiles
-from pydantic import BaseModel
 from PIL import Image
 import fitz  # PyMuPDF
 import google.generativeai as genai
-# --- INIT LOGGING ---
-print(">> [SYSTEM] Starting app.py v9.2 (With Version Check)...")
 # --- PANDOC IMPORT ---
 try:
     import pypandoc
-    print(f">> [INFO] Pandoc version: {pypandoc.get_pandoc_version()}")
 except ImportError:
-    print(">> [WARN] pypandoc module not found.")
 except OSError:
-    print(">> [WARN] pandoc binary not found.")
-# --- SUPABASE SETUP ---
 try:
     from supabase import create_client, Client
     SUPABASE_AVAILABLE = True
 except ImportError:
     SUPABASE_AVAILABLE = False
-    print(">> [WARN] supabase module not found.")
 # ===== CẤU HÌNH =====
 GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",")
 GEMINI_MODELS = os.getenv("GEMINI_MODELS", "gemini-2.5-flash,gemini-1.5-pro").split(",")
 SUPABASE_URL = os.getenv("SUPABASE_URL", "")
 SUPABASE_KEY = os.getenv("SUPABASE_KEY", "")
-MAX_THREADS = int(os.getenv("MAX_THREADS", "5"))
-ADMIN_SECRET_KEY = os.getenv("ADMIN_SECRET_KEY", "admin123") # Key mặc định nếu quên set env
 # Setup Supabase
 supabase = None
 if SUPABASE_AVAILABLE and SUPABASE_URL and SUPABASE_KEY:
     try:
         supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
-        print(">> [INFO] Supabase connected.")
     except Exception as e:
-        print(f">> [ERR] Supabase connection failed: {e}")
-# ===== KHỞI TẠO APP =====
-app = FastAPI(title="HT_MATH_WEB API", version="9.2")
-# CORS Middleware
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -74,245 +68,281 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# --- GLOBAL REQUEST LOGGING MIDDLEWARE ---
-@app.middleware("http")
-async def log_requests(request: Request, call_next):
-    # Log để debug: xem Frontend gọi URL nào
-    # print(f">> [REQ] {request.method} {request.url.path}") # Uncomment nếu cần debug chi tiết
-    response = await call_next(request)
-    if response.status_code == 404:
-        print(f">> [ERR] 404 Not Found at: {request.url.path} | Container PID: {os.getpid()}")
-    return response
-# Static Files
 os.makedirs("uploads", exist_ok=True)
 app.mount("/uploads", StaticFiles(directory="uploads"), name="uploads")
-# ===== MODELS & HELPERS =====
-class AdminActionModel(BaseModel):
-    email: str
-    admin_key: str
-def hash_password(password: str) -> str:
-    return hashlib.sha256(password.encode()).hexdigest()
-def verify_password(password: str, hashed: str) -> bool:
-    return hash_password(password) == hashed
 class ApiKeyManager:
     def __init__(self, keys: List[str]):
         self.api_keys = [k.strip() for k in keys if k.strip()]
         self.current_index = 0
     def get_next_key(self) -> Optional[str]:
         if not self.api_keys: return None
         key = self.api_keys[self.current_index]
         self.current_index = (self.current_index + 1) % len(self.api_keys)
         return key
     def get_key_count(self) -> int:
         return len(self.api_keys)
 key_manager = ApiKeyManager(GEMINI_API_KEYS)
-def clean_latex_formulas(text: str) -> str:
-    return re.sub(r'\$\s+(.*?)\s+\$', lambda m: f'${m.group(1).strip()}$', text)
 def stitch_text(text_a: str, text_b: str, min_overlap_chars: int = 20) -> str:
     if not text_a: return text_b
     if not text_b: return text_a
     a_lines = text_a.splitlines()
     b_lines = text_b.splitlines()
     scan_window = min(len(a_lines), len(b_lines), 30)
     best_overlap_idx = 0
     for i in range(scan_window, 0, -1):
         tail_a = "\n".join(a_lines[-i:]).strip()
         head_b = "\n".join(b_lines[:i]).strip()
         if len(tail_a) >= min_overlap_chars and tail_a == head_b:
             best_overlap_idx = i
-            break
     if best_overlap_idx > 0:
         return text_a + "\n" + "\n".join(b_lines[best_overlap_idx:])
     else:
         return text_a + "\n\n" + text_b
-# ===== AI LOGIC =====
-STRONG_PROMPT = r"""Role: Chuyên viên nhập liệu Toán học. Task: Số hóa ảnh thành Markdown/LaTeX. YÊU CẦU: Trích xuất KHÔNG BỎ SÓT. Giữ nguyên định dạng."""
-SAFE_PROMPT = r"""Role: Trợ lý khiếm thị. Task: Mô tả chi tiết nội dung văn bản và toán học."""
-async def process_image_with_gemini(image: Image.Image, model_id: str, prompt_mode: str, max_retries: int = 3) -> str:
-    current_prompt = STRONG_PROMPT if prompt_mode == "latex" else "Trích xuất văn bản."
-    safety_settings = [
-        {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
-        {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
-        {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
-        {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
-    ]
-    for attempt in range(max_retries):
-        try:
-            api_key = key_manager.get_next_key()
-            if not api_key: raise ValueError("No API Key")
-            genai.configure(api_key=api_key)
-            model = genai.GenerativeModel(model_id, generation_config={"temperature": 0.0, "top_p": 1.0, "max_output_tokens": 8192})
-            response = model.generate_content([current_prompt, image], safety_settings=safety_settings)
-            if response.candidates:
-                cand = response.candidates[0]
-                if cand.content and cand.content.parts:
-                    return response.text.strip()
-                reason = cand.finish_reason
-                print(f">> [AI] Blocked: {reason}")
-                if reason == 4 and current_prompt == STRONG_PROMPT:
-                    current_prompt = SAFE_PROMPT
-                    continue
-                if reason == 4: return "\n> *[Hidden: Copyright]*\n"
-                if reason == 3: return "\n> *[Hidden: Safety]*\n"
-        except Exception as e:
-            print(f">> [AI] Error: {e}")
-            if "429" in str(e): time.sleep(2); continue
-    return ""
-async def process_large_image(image: Image.Image, model: str, prompt_mode: str, semaphore: asyncio.Semaphore) -> str:
-    CHUNK_HEIGHT = 1536
-    if image.height <= CHUNK_HEIGHT:
-        async with semaphore: return await process_image_with_gemini(image, model, prompt_mode)
-    chunks = []
-    y = 0
-    while y < image.height:
-        bottom = min(y + CHUNK_HEIGHT, image.height)
-        chunks.append(image.crop((0, y, image.width, bottom)))
-        if bottom == image.height: break
-        y += (CHUNK_HEIGHT - 300)
-    tasks = [process_chunk_wrapper(c, i, model, prompt_mode, semaphore) for i, c in enumerate(chunks)]
-    results = await asyncio.gather(*tasks)
-    results.sort(key=lambda x: x[0])
-    final = results[0][1]
-    for i in range(1, len(results)): final = stitch_text(final, results[i][1])
-    return final
-async def process_chunk_wrapper(chunk, idx, model, mode, sem):
-    async with sem:
-        return idx, await process_image_with_gemini(chunk, model, mode)
-# ===== ROUTES =====
 @app.get("/")
 @app.get("/health")
 async def root():
-    return {"status": "ok", "version": "9.2", "routes": [r.path for r in app.routes]}
-@app.get("/version")
-async def version():
-    """Endpoint để Client kiểm tra phiên bản Container"""
-    return {
-        "version": "9.2",
-        "build_date": time.strftime("%Y-%m-%d %H:%M:%S"),
-        "pid": os.getpid()
-    }
-@app.get("/debug/sys-info")
-async def sys_info():
     return {
-        "version": "9.2",
-        "cwd": os.getcwd(),
-        "files_in_root": os.listdir("."),
-        "python": sys.version
     }
 @app.get("/api/models")
 async def get_models():
     return {"models": GEMINI_MODELS}
-# --- AUTH ROUTES ---
 @app.post("/api/register")
 async def register(email: str = Form(...), password: str = Form(...)):
-    if not supabase: raise HTTPException(500, "DB Error")
-    if supabase.table("users").select("email").eq("email", email).execute().data:
-        raise HTTPException(400, "Email exist")
-    supabase.table("users").insert({
-        "email": email,
-        "password": hash_password(password),
-        "status": "pending",
-        "created_at": time.strftime("%Y-%m-%d %H:%M:%S")
-    }).execute()
-    return {"success": True}
 @app.post("/api/login")
-async def login(email: str = Form(...), password: str = Form(...)):
-    if not supabase: raise HTTPException(500, "DB Error")
     res = supabase.table("users").select("*").eq("email", email).execute()
-    if not res.data: raise HTTPException(401, "Auth failed")
     user = res.data[0]
-    if not verify_password(password, user["password"]): raise HTTPException(401, "Auth failed")
-    if user.get("status") != "active": raise HTTPException(403, "Not active")
     token = secrets.token_urlsafe(32)
     try: supabase.table("sessions").delete().eq("email", email).execute()
     except: pass
-    supabase.table("sessions").insert({"email": email, "token": token}).execute()
     return {"success": True, "token": token, "email": email}
 @app.post("/api/check-session")
 async def check_session(email: str = Form(...), token: str = Form(...)):
-    if not supabase: raise HTTPException(500, "DB Error")
     res = supabase.table("sessions").select("token").eq("email", email).execute()
-    if not res.data or res.data[0]['token'] != token:
-        raise HTTPException(401, "Invalid")
     return {"status": "valid"}
 @app.post("/api/logout")
 async def logout(request: Request):
     try:
         data = await request.json()
-        if supabase: supabase.table("sessions").delete().eq("email", data.get("email")).execute()
     except: pass
     return {"status": "success"}
-# --- ADMIN ROUTES (NEW - FIX 404) ---
-@app.get("/api/admin/users")
-async def admin_get_users(key: str = Header(None)):
-    """Lấy danh sách Users (Yêu cầu Admin Key ở Header)"""
-    if key != ADMIN_SECRET_KEY:
-        raise HTTPException(status_code=401, detail="Invalid Admin Key")
-    if not supabase:
-        raise HTTPException(status_code=500, detail="DB Error")
-    # Lấy toàn bộ users, sắp xếp theo ngày tạo mới nhất
-    res = supabase.table("users").select("*").order("created_at", desc=True).execute()
-    return {"users": res.data}
-@app.post("/api/admin/approve")
-async def admin_approve_user(data: AdminActionModel):
-    """Duyệt user (Active)"""
-    if data.admin_key != ADMIN_SECRET_KEY:
-        raise HTTPException(status_code=401, detail="Invalid Admin Key")
-    if not supabase:
-        raise HTTPException(status_code=500, detail="DB Error")
-    supabase.table("users").update({"status": "active"}).eq("email", data.email).execute()
-    return {"success": True, "message": f"User {data.email} approved"}
-@app.post("/api/admin/delete")
-async def admin_delete_user(data: AdminActionModel):
-    """Xóa user"""
-    if data.admin_key != ADMIN_SECRET_KEY:
-        raise HTTPException(status_code=401, detail="Invalid Admin Key")
-    if not supabase:
-        raise HTTPException(status_code=500, detail="DB Error")
-    # Xóa cả session trước để tránh lỗi khóa ngoại (nếu có)
-    supabase.table("sessions").delete().eq("email", data.email).execute()
-    supabase.table("users").delete().eq("email", data.email).execute()
-    return {"success": True, "message": f"User {data.email} deleted"}
-# --- CONVERT & EXPORT ---
 @app.post("/api/convert")
 async def convert_file(
@@ -321,56 +351,80 @@ async def convert_file(
     model: str = Form("gemini-2.5-flash"),
     mode: str = Form("latex")
 ):
-    print(f">> [CONVERT] Start: {file.filename} | Model: {model}")
-    if key_manager.get_key_count() == 0: raise HTTPException(500, "No API Key")
     try:
-        content = await file.read()
-        ext = os.path.splitext(file.filename)[1].lower()
-        sem = asyncio.Semaphore(MAX_THREADS)
         results = []
-        if ext == ".pdf":
-            doc = fitz.open(stream=content, filetype="pdf")
-            tasks = []
-            for i, page in enumerate(doc):
                 pix = page.get_pixmap(dpi=300)
                 img = Image.open(io.BytesIO(pix.tobytes("png")))
-                tasks.append(process_large_image(img, model, mode, sem))
-            raw_results = await asyncio.gather(*tasks)
-            results = list(raw_results)
             doc.close()
-        elif ext in [".png", ".jpg", ".jpeg", ".bmp"]:
-            img = Image.open(io.BytesIO(content))
-            results.append(await process_large_image(img, model, mode, sem))
-        else:
-            raise HTTPException(400, "Format not supported")
-        return {"success": True, "result": clean_latex_formulas("\n\n".join(results))}
     except Exception as e:
-        import traceback; traceback.print_exc()
-        raise HTTPException(500, str(e))
 @app.post("/api/export-docx")
 async def export_docx(markdown_text: str = Form(...)):
     try:
-        with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp:
-            path = tmp.name
-        pypandoc.convert_text(markdown_text, 'docx', 'markdown', outputfile=path, extra_args=['--standalone'])
-        return FileResponse(path, media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", filename="Result.docx")
     except Exception as e:
-        raise HTTPException(500, str(e))
-# ===== STARTUP PRINT =====
-@app.on_event("startup")
-async def list_routes():
-    print("\n" + "="*40)
-    print(">> REGISTERED ROUTES (v9.2):")
-    for route in app.routes:
-        print(f"   {route.methods} {route.path}")
-    print("="*40 + "\n")
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 """
 Backend API cho HT_MATH_WEB - Chạy trên Hugging Face Spaces (Docker Version)
+Phiên bản: 8.0 (Content-Based Stitching OCR - Overlap Algorithm)
 Tác giả: Hoàng Tấn Thiên
 """
 import hashlib
 import secrets
 import uuid
+import math
 from typing import List, Optional
+from fastapi import FastAPI, File, UploadFile, HTTPException, Form, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, FileResponse
 from fastapi.staticfiles import StaticFiles
 from PIL import Image
 import fitz  # PyMuPDF
 import google.generativeai as genai
 # --- PANDOC IMPORT ---
 try:
     import pypandoc
+    print(f"INFO: Pandoc version detected: {pypandoc.get_pandoc_version()}")
 except ImportError:
+    print("CRITICAL WARNING: pypandoc module not found.")
 except OSError:
+    print("CRITICAL WARNING: pandoc binary not found in system path.")
+# --- SUPABASE ---
 try:
     from supabase import create_client, Client
     SUPABASE_AVAILABLE = True
 except ImportError:
     SUPABASE_AVAILABLE = False
+    Client = None
+    create_client = None
 # ===== CẤU HÌNH =====
 GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",")
 GEMINI_MODELS = os.getenv("GEMINI_MODELS", "gemini-2.5-flash,gemini-1.5-pro").split(",")
 SUPABASE_URL = os.getenv("SUPABASE_URL", "")
 SUPABASE_KEY = os.getenv("SUPABASE_KEY", "")
+MAX_THREADS = int(os.getenv("MAX_THREADS", "5")) # Tăng thread để xử lý các mảnh cắt song song
+ADMIN_SECRET_KEY = os.getenv("ADMIN_SECRET_KEY", "admin123")
 # Setup Supabase
 supabase = None
 if SUPABASE_AVAILABLE and SUPABASE_URL and SUPABASE_KEY:
     try:
         supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
     except Exception as e:
+        print(f"Warning: Không thể kết nối Supabase: {e}")
+app = FastAPI(title="HT_MATH_WEB API", version="8.0")
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# --- SETUP STATIC FILES ---
 os.makedirs("uploads", exist_ok=True)
 app.mount("/uploads", StaticFiles(directory="uploads"), name="uploads")
+@app.exception_handler(404)
+async def not_found_handler(request, exc):
+    return JSONResponse(
+        status_code=404,
+        content={
+            "detail": f"Route not found: {request.url.path}",
+            "available_routes": ["/", "/api/models", "/api/convert", "/api/export-docx", "/api/login", "/api/check-session", "/api/upload-image"]
+        }
+    )
+# ===== KEY MANAGER & RATE LIMIT =====
 class ApiKeyManager:
     def __init__(self, keys: List[str]):
         self.api_keys = [k.strip() for k in keys if k.strip()]
         self.current_index = 0
     def get_next_key(self) -> Optional[str]:
         if not self.api_keys: return None
         key = self.api_keys[self.current_index]
         self.current_index = (self.current_index + 1) % len(self.api_keys)
         return key
     def get_key_count(self) -> int:
         return len(self.api_keys)
 key_manager = ApiKeyManager(GEMINI_API_KEYS)
+ip_rate_limits = {}
+RATE_LIMIT_DURATION = 7
+def check_rate_limit(request: Request):
+    forwarded = request.headers.get("X-Forwarded-For")
+    client_ip = forwarded.split(",")[0].strip() if forwarded else request.client.host
+    now = time.time()
+    if client_ip in ip_rate_limits:
+        elapsed = now - ip_rate_limits[client_ip]
+        if elapsed < RATE_LIMIT_DURATION:
+            print(f"[RateLimit] IP {client_ip} requesting too fast.")
+    ip_rate_limits[client_ip] = now
+# ===== PROMPTS =====
+DIRECT_GEMINI_PROMPT_TEXT_ONLY = r"""**TRÍCH XUẤT VĂN BẢN THUẦN TÚY**
+⚠️ YÊU CẦU BẮT BUỘC:
+- PHẢI trích xuất TOÀN BỘ nội dung xuất hiện trong ảnh/PDF
+- KHÔNG được bỏ sót bất kỳ câu hỏi, ví dụ, bài tập nào
+- KỂ CẢ các câu nhỏ, câu phụ, chữ mờ, chữ sát lề
+- Nếu không chắc, VẪN PHẢI ghi lại nội dung nhìn thấy
+⚠️ NHIỆM VỤ:
+1. Trích xuất toàn bộ văn bản, giữ nguyên định dạng gốc.
+2. KHÔNG sử dụng LaTeX ($...$), giữ nguyên biểu thức toán dạng text (ví dụ: x^2 + 1 = 0).
+3. Đánh dấu tiêu đề bằng Markdown **đậm**.
+4. KHÔNG thêm lời giải thích.
+"""
+DIRECT_GEMINI_PROMPT_LATEX = r"""Bạn là công cụ trích xuất văn bản từ ảnh/PDF. NHIỆM VỤ: Chuyển đổi nội dung trong ảnh sang Markdown với công thức LaTeX.
+⚠️ YÊU CẦU CỐT LÕI - KHÔNG ĐƯỢC BỎ SÓT:
+- Đọc kỹ từng pixel, trích xuất TOÀN BỘ nội dung từ trên xuống dưới.
+- KHÔNG bỏ qua bất kỳ bài tập, hình vẽ, hoặc ghi chú nhỏ nào.
+- Nếu ảnh bị cắt ngang chữ, hãy cố gắng đoán và hoàn thiện từ đó dựa trên ngữ cảnh.
+⚠️ QUY TẮC LATEX (BẮT BUỘC):
+1. Mọi công thức toán PHẢI bọc trong dấu $. Ví dụ: $x^2$, $\frac{1}{2}$.
+2. KHÔNG dùng \[...\] hoặc \(...\).
+3. Luôn có khoảng trắng trước dấu $: "Cho hàm số $f(x)$..." (Đúng).
+⚠️ ĐỊNH DẠNG:
+- Giữ nguyên cấu trúc dòng, đoạn.
+- Tiêu đề in đậm: **Câu 1:**, **Bài tập:**.
+- Bảng biểu giữ nguyên Markdown Table.
+CHỈ TRẢ VỀ MARKDOWN. KHÔNG GIẢI THÍCH THÊM.
+"""
+# ===== STITCHING ALGORITHM (QUAN TRỌNG) =====
 def stitch_text(text_a: str, text_b: str, min_overlap_chars: int = 20) -> str:
+    """
+    Thuật toán ghép nối nội dung dựa trên sự trùng lặp (Content-Based Stitching).
+    So sánh phần đuôi của text_a và phần đầu của text_b để tìm đoạn trùng khớp nhất.
+    """
     if not text_a: return text_b
     if not text_b: return text_a
     a_lines = text_a.splitlines()
     b_lines = text_b.splitlines()
+    # Chỉ so sánh N dòng cuối của A và N dòng đầu của B để tối ưu hiệu năng
+    # (Tránh việc so sánh toàn bộ văn bản nếu văn bản quá dài)
     scan_window = min(len(a_lines), len(b_lines), 30)
     best_overlap_idx = 0
+    # Quét từ overlap lớn nhất (scan_window) về 1
     for i in range(scan_window, 0, -1):
+        # Lấy i dòng cuối của A
         tail_a = "\n".join(a_lines[-i:]).strip()
+        # Lấy i dòng đầu của B
         head_b = "\n".join(b_lines[:i]).strip()
+        # Kiểm tra độ dài tối thiểu và nội dung trùng khớp
+        # .strip() giúp bỏ qua sự khác biệt về khoảng trắng thừa
         if len(tail_a) >= min_overlap_chars and tail_a == head_b:
             best_overlap_idx = i
+            break # Tìm thấy overlap lớn nhất thì dừng ngay (Greedy)
     if best_overlap_idx > 0:
+        # Tìm thấy overlap: Ghép A + B (bỏ đi phần đầu trùng lặp của B)
+        # print(f"[Stitch] Found overlap: {best_overlap_idx} lines")
         return text_a + "\n" + "\n".join(b_lines[best_overlap_idx:])
     else:
+        # Không tìm thấy overlap: Nối bình thường với dòng trống
+        # print("[Stitch] No overlap found, simple join")
         return text_a + "\n\n" + text_b
+# ===== HELPER FUNCTIONS =====
+def clean_latex_formulas(text: str) -> str:
+    return re.sub(r'\$\s+(.*?)\s+\$', lambda m: f'${m.group(1).strip()}$', text)
+def hash_password(password: str) -> str:
+    return hashlib.sha256(password.encode()).hexdigest()
+def verify_password(password: str, hashed: str) -> bool:
+    return hash_password(password) == hashed
+# ===== API ENDPOINTS =====
 @app.get("/")
 @app.get("/health")
 async def root():
+    pandoc_status = "Not Found"
+    try:
+        pandoc_status = pypandoc.get_pandoc_version()
+    except:
+        pass
     return {
+        "status": "ok",
+        "service": "HT_MATH_WEB API v8.0 (Stitching OCR)",
+        "keys_loaded": key_manager.get_key_count(),
+        "pandoc_version": pandoc_status
     }
 @app.get("/api/models")
 async def get_models():
     return {"models": GEMINI_MODELS}
+# --- AUTH API ---
 @app.post("/api/register")
 async def register(email: str = Form(...), password: str = Form(...)):
+    if not supabase: raise HTTPException(status_code=500, detail="DB Error")
+    res = supabase.table("users").select("email").eq("email", email).execute()
+    if res.data: raise HTTPException(status_code=400, detail="Email tồn tại")
+    user_data = {"email": email, "password": hash_password(password), "status": "pending", "created_at": time.strftime("%Y-%m-%d %H:%M:%S")}
+    supabase.table("users").insert(user_data).execute()
+    return {"success": True, "message": "Đăng ký thành công, chờ duyệt."}
 @app.post("/api/login")
+async def login(request: Request, email: str = Form(...), password: str = Form(...)):
+    if not supabase: raise HTTPException(status_code=500, detail="DB Error")
     res = supabase.table("users").select("*").eq("email", email).execute()
+    if not res.data: raise HTTPException(status_code=401, detail="Sai email/pass")
     user = res.data[0]
+    if not verify_password(password, user["password"]): raise HTTPException(status_code=401, detail="Sai email/pass")
+    if user.get("status") != "active": raise HTTPException(status_code=403, detail="Tài khoản chưa kích hoạt")
     token = secrets.token_urlsafe(32)
     try: supabase.table("sessions").delete().eq("email", email).execute()
     except: pass
+    supabase.table("sessions").insert({"email": email, "token": token, "last_seen": time.strftime("%Y-%m-%d %H:%M:%S")}).execute()
     return {"success": True, "token": token, "email": email}
 @app.post("/api/check-session")
 async def check_session(email: str = Form(...), token: str = Form(...)):
+    if not supabase: raise HTTPException(status_code=500, detail="DB Error")
     res = supabase.table("sessions").select("token").eq("email", email).execute()
+    if not res.data or res.data[0]['token'] != token: raise HTTPException(status_code=401, detail="Session expired")
+    supabase.table("sessions").update({"last_seen": time.strftime("%Y-%m-%d %H:%M:%S")}).eq("email", email).execute()
     return {"status": "valid"}
 @app.post("/api/logout")
 async def logout(request: Request):
     try:
         data = await request.json()
+        email = data.get("email")
+        if email and supabase: supabase.table("sessions").delete().eq("email", email).execute()
     except: pass
     return {"status": "success"}
+@app.post("/api/upload-image")
+async def upload_image(file: UploadFile = File(...)):
+    try:
+        file_ext = os.path.splitext(file.filename)[1] or ".png"
+        file_name = f"{uuid.uuid4().hex}{file_ext}"
+        file_path = f"uploads/{file_name}"
+        with open(file_path, "wb") as f: f.write(await file.read())
+        return {"url": file_path}
+    except Exception as e: raise HTTPException(status_code=500, detail=str(e))
+# --- CORE CONVERT LOGIC ---
+async def process_image_with_gemini(image: Image.Image, model_id: str, prompt: str, max_retries: int = 3) -> str:
+    """Gửi 1 ảnh (hoặc mảnh ảnh) lên Gemini và nhận text"""
+    for attempt in range(max_retries):
+        try:
+            api_key = key_manager.get_next_key()
+            if not api_key: raise ValueError("No API Key")
+            genai.configure(api_key=api_key)
+            generation_config = {"temperature": 0.0, "top_p": 1.0, "max_output_tokens": 8192}
+            model = genai.GenerativeModel(model_id, generation_config=generation_config)
+            response = model.generate_content([prompt, image])
+            if response.text:
+                return response.text.strip()
+        except Exception as e:
+            if "429" in str(e) and attempt < max_retries - 1:
+                time.sleep(2)
+                continue
+            if attempt == max_retries - 1: raise e
+    return ""
+async def process_large_image(image: Image.Image, model: str, prompt: str, semaphore: asyncio.Semaphore) -> str:
+    """
+    Xử lý ảnh lớn bằng kỹ thuật: Overlap Splitting + Content-Based Stitching
+    """
+    # Cấu hình cắt ảnh
+    CHUNK_HEIGHT = 2048 # Chiều cao mỗi mảnh (pixel)
+    OVERLAP_HEIGHT = 512 # Chiều cao phần chồng lặp (pixel)
+    width, height = image.size
+    # Nếu ảnh nhỏ hơn ngưỡng cắt, xử lý bình thường
+    if height <= CHUNK_HEIGHT:
+        async with semaphore:
+            return await process_image_with_gemini(image, model, prompt)
+    # --- Cắt ảnh thành các mảnh có Overlap ---
+    chunks = []
+    y = 0
+    while y < height:
+        # Xác định vùng cắt
+        bottom = min(y + CHUNK_HEIGHT, height)
+        box = (0, y, width, bottom)
+        chunk = image.crop(box)
+        chunks.append(chunk)
+        # Nếu đã đến đáy ảnh thì dừng
+        if bottom == height:
+            break
+        # Di chuyển y xuống, nhưng lùi lại một đoạn overlap
+        y += (CHUNK_HEIGHT - OVERLAP_HEIGHT)
+    print(f"[Split] Image height {height}px -> {len(chunks)} chunks with overlap.")
+    # --- Gửi song song các mảnh lên Gemini ---
+    async def process_chunk(chunk_img, index):
+        async with semaphore:
+            text = await process_image_with_gemini(chunk_img, model, prompt)
+            return index, text
+    tasks = [process_chunk(chunk, i) for i, chunk in enumerate(chunks)]
+    chunk_results = await asyncio.gather(*tasks)
+    # Sắp xếp lại theo đúng thứ tự
+    chunk_results.sort(key=lambda x: x[0])
+    ordered_texts = [text for _, text in chunk_results]
+    # --- Ghép nối thông minh (Stitching) ---
+    final_text = ordered_texts[0]
+    for i in range(1, len(ordered_texts)):
+        final_text = stitch_text(final_text, ordered_texts[i], min_overlap_chars=20)
+    return final_text
 @app.post("/api/convert")
 async def convert_file(
     model: str = Form("gemini-2.5-flash"),
     mode: str = Form("latex")
 ):
+    check_rate_limit(request)
+    if key_manager.get_key_count() == 0:
+        raise HTTPException(status_code=500, detail="Chưa cấu hình API Key")
+    prompt = DIRECT_GEMINI_PROMPT_LATEX if mode == "latex" else DIRECT_GEMINI_PROMPT_TEXT_ONLY
     try:
+        file_content = await file.read()
+        file_ext = os.path.splitext(file.filename)[1].lower()
+        # Global semaphore để kiểm soát tổng số request đồng thời lên Gemini
+        # Tránh lỗi 429 Quota Exceeded khi cắt quá nhiều mảnh
+        global_semaphore = asyncio.Semaphore(MAX_THREADS)
         results = []
+        if file_ext == ".pdf":
+            doc = fitz.open(stream=file_content, filetype="pdf")
+            # Hàm xử lý từng trang (có thể cắt nhỏ bên trong nếu trang dài)
+            async def process_page_wrapper(page, idx):
+                # Render ảnh chất lượng cao
                 pix = page.get_pixmap(dpi=300)
                 img = Image.open(io.BytesIO(pix.tobytes("png")))
+                # Gọi hàm xử lý ảnh lớn (tự động cắt/ghép nếu cần)
+                text = await process_large_image(img, model, prompt, global_semaphore)
+                return idx, text
+            tasks = [process_page_wrapper(doc[i], i) for i in range(len(doc))]
+            page_results = await asyncio.gather(*tasks)
+            results = [text for _, text in sorted(page_results, key=lambda x: x[0])]
             doc.close()
+        elif file_ext in [".png", ".jpg", ".jpeg", ".bmp"]:
+            img = Image.open(io.BytesIO(file_content))
+            # Xử lý ảnh upload (tự động cắt/ghép nếu dài)
+            text = await process_large_image(img, model, prompt, global_semaphore)
+            results.append(text)
+        else:
+             raise HTTPException(status_code=400, detail="Định dạng file không hỗ trợ")
+        final_text = "\n\n".join(results)
+        return {"success": True, "result": clean_latex_formulas(final_text)}
     except Exception as e:
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+# --- WORD EXPORT API (PANDOC NATIVE) ---
 @app.post("/api/export-docx")
 async def export_docx(markdown_text: str = Form(...)):
     try:
+        with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp_file:
+            output_filename = tmp_file.name
+        pypandoc.convert_text(
+            markdown_text,
+            to='docx',
+            format='markdown',
+            outputfile=output_filename,
+            extra_args=['--standalone']
+        )
+        return FileResponse(
+            output_filename,
+            media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            filename="Ket_qua_HT_MATH_Pandoc.docx"
+        )
     except Exception as e:
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=f"Lỗi xuất Word: {str(e)}")
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "7860")))