Spaces:

hoangthiencm
/

ht-math-web-backend

Running

App Files Files Community

hoangthiencm commited on Feb 5

Commit

86333ac

verified ·

1 Parent(s): e88455c

Update app.py

Browse files

Files changed (1) hide show

app.py +841 -294

app.py CHANGED Viewed

@@ -1,88 +1,85 @@
 """
-Backend API cho HT_MATH_WEB - Phiên bản Firebase (Firestore) + BYOK (Bring Your Own Key)
-Chạy trên Hugging Face Spaces (Docker Version)
 Tác giả: Hoàng Tấn Thiên
 """
 import os
 import io
-import time
-import asyncio
-import re
-import tempfile
-import hashlib
-import secrets
-import uuid
 import json
 import base64
-import random
 from typing import List, Optional
-from fastapi import FastAPI, File, UploadFile, HTTPException, Form, Request
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse, FileResponse
-from fastapi.staticfiles import StaticFiles
-from PIL import Image
 import fitz  # PyMuPDF
 import google.generativeai as genai
-# --- PANDOC IMPORT ---
 try:
     import pypandoc
-    print(f"INFO: Pandoc version detected: {pypandoc.get_pandoc_version()}")
 except ImportError:
-    print("CRITICAL WARNING: pypandoc module not found.")
-except OSError:
-    print("CRITICAL WARNING: pandoc binary not found in system path.")
-# --- TESSERACT IMPORT ---
 try:
-    import pytesseract
-    print("INFO: Tesseract OCR module loaded.")
 except ImportError:
-    print("WARNING: pytesseract not found. Fallback OCR will not work.")
-    pytesseract = None
-# --- FIREBASE ADMIN SDK ---
-import firebase_admin
-from firebase_admin import credentials, firestore
 # ===== CẤU HÌNH =====
 GEMINI_MODELS = os.getenv("GEMINI_MODELS", "gemini-2.5-flash,gemini-1.5-pro").split(",")
-MAX_THREADS = int(os.getenv("MAX_THREADS", "5"))
-# --- KẾT NỐI FIREBASE ---
-db = None
-try:
-    if not firebase_admin._apps:
-        cred = None
-        firebase_env = os.getenv("FIREBASE_CREDENTIALS")
-        if firebase_env:
-            try:
-                json_info = json.loads(base64.b64decode(firebase_env))
-            except:
-                json_info = json.loads(firebase_env)
-            cred = credentials.Certificate(json_info)
-            print("INFO: Loaded Firebase credentials from Environment.")
-        elif os.path.exists("firebase_key.json"):
-            cred = credentials.Certificate("firebase_key.json")
-            print("INFO: Loaded Firebase credentials from 'firebase_key.json'.")
-        if cred:
-            firebase_admin.initialize_app(cred)
-            db = firestore.client()
-            print("SUCCESS: Connected to Firebase Firestore.")
-        else:
-            print("WARNING: No Firebase credentials found. Database features will fail.")
-    else:
-        db = firestore.client()
-except Exception as e:
-    print(f"ERROR: Firebase Init Failed: {e}")
-app = FastAPI(title="HT_MATH_WEB API (Firebase + BYOK)", version="10.2")
 app.add_middleware(
     CORSMiddleware,
@@ -92,46 +89,175 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# --- SETUP STATIC FILES ---
-os.makedirs("uploads", exist_ok=True)
-app.mount("/uploads", StaticFiles(directory="uploads"), name="uploads")
-# ===== KEY HELPER (BYOK) =====
-def get_random_key_from_request(api_keys_str: str) -> Optional[str]:
-    """Lấy 1 key ngẫu nhiên từ chuỗi key user gửi lên"""
-    if not api_keys_str:
-        return None
-    keys = [k.strip() for k in re.split(r'[,\n]', api_keys_str) if k.strip()]
-    if not keys:
-        return None
-    return random.choice(keys)
 ip_rate_limits = {}
-RATE_LIMIT_DURATION = 2
 def check_rate_limit(request: Request):
     forwarded = request.headers.get("X-Forwarded-For")
-    client_ip = forwarded.split(",")[0].strip() if forwarded else request.client.host
     now = time.time()
     if client_ip in ip_rate_limits:
         elapsed = now - ip_rate_limits[client_ip]
         if elapsed < RATE_LIMIT_DURATION:
             print(f"[RateLimit] IP {client_ip} requesting too fast.")
     ip_rate_limits[client_ip] = now
-# ===== PROMPTS =====
-DIRECT_GEMINI_PROMPT_TEXT_ONLY = r"""Đóng vai một CÔNG CỤ OCR CHUYÊN DỤNG cho văn bản hành chính Việt Nam.
-NHIỆM VỤ: Trích xuất nguyên văn (Verbatim) nội dung trong ảnh.
-QUY TẮC: KHÔNG thêm lời dẫn, KHÔNG nhận xét. Giữ nguyên văn cấu trúc."""
-DIRECT_GEMINI_PROMPT_LATEX = r"""Đóng vai công cụ số hóa tài liệu Toán học chính xác tuyệt đối.
-NHIỆM VỤ: Chuyển đổi ảnh sang Markdown + LaTeX.
-QUY TẮC: Công thức toán nằm trong `$`. KHÔNG thêm lời dẫn. Chỉ trả về Markdown."""
 # ===== HELPER FUNCTIONS =====
 def clean_latex_formulas(text: str) -> str:
-    text = re.sub(r'\$\s+(.*?)\s+\$', lambda m: f'${m.group(1).strip()}$', text)
-    return text
 def hash_password(password: str) -> str:
     return hashlib.sha256(password.encode()).hexdigest()
@@ -139,277 +265,698 @@ def hash_password(password: str) -> str:
 def verify_password(password: str, hashed: str) -> bool:
     return hash_password(password) == hashed
-def safe_get_text(response) -> str:
-    if not response.candidates: return ""
-    candidate = response.candidates[0]
-    if candidate.finish_reason == 4: return "[BLOCKED_BY_COPYRIGHT]"
-    parts = candidate.content.parts
-    texts = [p.text for p in parts if hasattr(p, "text")]
-    return "\n".join(texts)
-def stitch_text(text_a: str, text_b: str, min_overlap_chars: int = 20) -> str:
-    if not text_a: return text_b
-    if not text_b: return text_a
-    a_lines = text_a.splitlines()
-    b_lines = text_b.splitlines()
-    scan_window = min(len(a_lines), len(b_lines), 30)
-    best_overlap_idx = 0
-    for i in range(scan_window, 0, -1):
-        tail_a = "\n".join(a_lines[-i:]).strip()
-        head_b = "\n".join(b_lines[:i]).strip()
-        if len(tail_a) >= min_overlap_chars and tail_a == head_b:
-            best_overlap_idx = i
-            break
-    if best_overlap_idx > 0:
-        return text_a + "\n" + "\n".join(b_lines[best_overlap_idx:])
-    else:
-        return text_a + "\n\n" + text_b
-async def fallback_ocr_tesseract(image: Image.Image) -> str:
-    if pytesseract is None: return "**[Lỗi] Gemini từ chối (Bản quyền) & No Tesseract.**"
-    try:
-        loop = asyncio.get_running_loop()
-        text = await loop.run_in_executor(None, lambda: pytesseract.image_to_string(image, lang='vie+eng'))
-        return f"**[OCR Fallback]**\n\n{text}"
-    except Exception as e: return "**[Lỗi OCR Fallback]**"
 # ===== API ENDPOINTS =====
 @app.get("/")
 @app.get("/health")
-@app.head("/")       # FIX: Cho phép UptimeRobot ping HEAD
-@app.head("/health") # FIX: Cho phép UptimeRobot ping HEAD
 async def root():
     return {
         "status": "ok",
-        "service": "HT_MATH_WEB (Firebase Backend + BYOK)",
-        "database": "Firebase Firestore" if db else "Disconnected",
-        "mode": "Bring Your Own Key"
     }
 @app.get("/api/models")
 async def get_models():
     return {"models": GEMINI_MODELS}
-# --- AUTH API (FIREBASE) ---
 @app.post("/api/register")
 async def register(email: str = Form(...), password: str = Form(...)):
-    if not db: raise HTTPException(status_code=503, detail="Database chưa cấu hình")
-    users_ref = db.collection('users')
-    query = users_ref.where('email', '==', email).stream()
-    if any(query):
-        raise HTTPException(status_code=400, detail="Email này đã tồn tại.")
-    new_user = {
         "email": email,
         "password": hash_password(password),
-        "status": "pending",
-        "role": "user",
-        "created_at": firestore.SERVER_TIMESTAMP
     }
-    db.collection('users').add(new_user)
-    return {"success": True, "message": "Đăng ký thành công! Vui lòng chờ Admin duyệt."}
 @app.post("/api/login")
 async def login(request: Request, email: str = Form(...), password: str = Form(...)):
-    if not db: raise HTTPException(status_code=503, detail="Database chưa cấu hình")
-    users_ref = db.collection('users')
-    query = users_ref.where('email', '==', email).limit(1).stream()
-    user_doc = None
-    for doc in query:
-        user_doc = doc
-        break
-    if not user_doc:
-        raise HTTPException(status_code=401, detail="Email hoặc mật khẩu không đúng")
-    user_data = user_doc.to_dict()
-    if not verify_password(password, user_data.get("password", "")):
-        raise HTTPException(status_code=401, detail="Email hoặc mật khẩu không đúng")
-    if user_data.get("status") != "active":
-        raise HTTPException(status_code=403, detail="Tài khoản chưa được kích hoạt.")
-    token = secrets.token_urlsafe(32)
-    sessions_ref = db.collection('sessions')
-    old_sessions = sessions_ref.where('email', '==', email).stream()
-    for s in old_sessions:
-        s.reference.delete()
-    session_data = {
-        "email": email,
-        "token": token,
-        "last_seen": firestore.SERVER_TIMESTAMP
-    }
-    db.collection('sessions').add(session_data)
     return {"success": True, "token": token, "email": email}
 @app.post("/api/check-session")
 async def check_session(email: str = Form(...), token: str = Form(...)):
-    if not db: raise HTTPException(status_code=503, detail="Database Err")
-    sessions_ref = db.collection('sessions')
-    query = sessions_ref.where('email', '==', email).where('token', '==', token).limit(1).stream()
-    valid = False
-    for doc in query:
-        valid = True
-        doc.reference.update({"last_seen": firestore.SERVER_TIMESTAMP})
-        break
-    if not valid: raise HTTPException(status_code=401, detail="Session expired")
-    return {"status": "valid"}
 @app.post("/api/logout")
 async def logout(request: Request):
-    if not db: return {"status": "error"}
     try:
         data = await request.json()
         email = data.get("email")
-        if email:
-            sessions = db.collection('sessions').where('email', '==', email).stream()
-            for s in sessions: s.reference.delete()
-    except: pass
-    return {"status": "success"}
-# --- ADMIN API ---
-@app.get("/admin/users")
-async def admin_get_users(request: Request):
-    admin_key = request.headers.get("key")
-    ADMIN_SECRET_KEY = os.getenv("ADMIN_SECRET_KEY", "admin123")
-    if admin_key != ADMIN_SECRET_KEY: raise HTTPException(status_code=401)
-    if not db: return {"users": []}
-    users = []
-    docs = db.collection('users').stream()
-    for doc in docs:
-        u = doc.to_dict()
-        created = u.get("created_at")
-        if created: u["created_at"] = created.strftime("%Y-%m-%d %H:%M:%S")
-        users.append(u)
-    return {"users": users}
-@app.post("/admin/approve")
-async def admin_approve(request: Request):
-    data = await request.json()
-    email = data.get("email")
-    admin_key = data.get("admin_key")
-    ADMIN_SECRET_KEY = os.getenv("ADMIN_SECRET_KEY", "admin123")
-    if admin_key != ADMIN_SECRET_KEY: raise HTTPException(status_code=401)
-    docs = db.collection('users').where('email', '==', email).stream()
-    for doc in docs:
-        doc.reference.update({"status": "active"})
-        return {"success": True}
-    raise HTTPException(status_code=404)
-@app.post("/admin/delete")
-async def admin_delete(request: Request):
-    data = await request.json()
-    email = data.get("email")
-    admin_key = data.get("admin_key")
-    ADMIN_SECRET_KEY = os.getenv("ADMIN_SECRET_KEY", "admin123")
-    if admin_key != ADMIN_SECRET_KEY: raise HTTPException(status_code=401)
-    docs = db.collection('users').where('email', '==', email).stream()
-    for doc in docs: doc.reference.delete()
-    sessions = db.collection('sessions').where('email', '==', email).stream()
-    for s in sessions: s.reference.delete()
-    return {"success": True}
-@app.post("/api/upload-image")
-async def upload_image(file: UploadFile = File(...)):
-    try:
-        file_ext = os.path.splitext(file.filename)[1] or ".png"
-        file_name = f"{uuid.uuid4().hex}{file_ext}"
-        file_path = f"uploads/{file_name}"
-        with open(file_path, "wb") as f: f.write(await file.read())
-        return {"url": file_path}
-    except Exception as e: raise HTTPException(status_code=500, detail=str(e))
-# --- CORE CONVERT LOGIC (BYOK) ---
-async def process_image_with_gemini(image: Image.Image, model_id: str, prompt: str, user_api_keys: str) -> str:
-    """Xử lý ảnh dùng key của user"""
-    try:
-        api_key = get_random_key_from_request(user_api_keys)
-        if not api_key:
-            return "**[Lỗi] Bạn chưa nhập API Key. Vui lòng nhập key trong phần cấu hình.**"
-        genai.configure(api_key=api_key)
-        model = genai.GenerativeModel(model_id)
-        response = model.generate_content([prompt, image])
-        text = safe_get_text(response)
-        if text == "[BLOCKED_BY_COPYRIGHT]":
-            return await fallback_ocr_tesseract(image)
-        return text.strip() if text else ""
-    except Exception as e:
-        print(f"Gemini Error (User Key): {e}")
-        if "403" in str(e) or "400" in str(e) or "API_KEY_INVALID" in str(e):
-             return "**[Lỗi API Key] Key của bạn không hợp lệ hoặc đã hết hạn ngạch (Quota).**"
-        if "429" in str(e):
-             return "**[Lỗi Quota] Key của bạn đang bị giới hạn tốc độ (Rate Limit).**"
-        return await fallback_ocr_tesseract(image)
-async def process_large_image(image: Image.Image, model: str, prompt: str, semaphore: asyncio.Semaphore, user_api_keys: str) -> str:
-    width, height = image.size
-    if height <= 2000:
-        async with semaphore: return await process_image_with_gemini(image, model, prompt, user_api_keys)
-    top = image.crop((0, 0, width, height // 2 + 200))
-    bottom = image.crop((0, height // 2 - 200, width, height))
-    async with semaphore:
-        t1 = await process_image_with_gemini(top, model, prompt, user_api_keys)
-        t2 = await process_image_with_gemini(bottom, model, prompt, user_api_keys)
-    return stitch_text(t1, t2)
 @app.post("/api/convert")
 async def convert_file(
     request: Request,
     file: UploadFile = File(...),
     model: str = Form("gemini-2.5-flash"),
-    mode: str = Form("latex"),
-    api_keys: str = Form(...)
 ):
     check_rate_limit(request)
-    if not api_keys or not api_keys.strip():
-        raise HTTPException(status_code=400, detail="Vui lòng cung cấp API Key (User Key)")
     prompt = DIRECT_GEMINI_PROMPT_LATEX if mode == "latex" else DIRECT_GEMINI_PROMPT_TEXT_ONLY
     try:
         file_content = await file.read()
         file_ext = os.path.splitext(file.filename)[1].lower()
-        sem = asyncio.Semaphore(MAX_THREADS)
         results = []
         if file_ext == ".pdf":
             doc = fitz.open(stream=file_content, filetype="pdf")
-            for i in range(len(doc)):
-                pix = doc[i].get_pixmap(dpi=200)
-                img = Image.open(io.BytesIO(pix.tobytes("png")))
-                results.append(await process_large_image(img, model, prompt, sem, api_keys))
-        else:
-            img = Image.open(io.BytesIO(file_content))
-            results.append(await process_large_image(img, model, prompt, sem, api_keys))
-        return {"success": True, "result": clean_latex_formulas("\n\n".join(results))}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/api/export-docx")
-async def export_docx(markdown_text: str = Form(...)):
     try:
-        with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp:
-            pypandoc.convert_text(markdown_text, to='docx', format='markdown', outputfile=tmp.name, extra_args=['--standalone'])
-            return FileResponse(tmp.name, filename="Result.docx")
-    except: raise HTTPException(status_code=500, detail="Export Error")
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 """
+Backend API cho HT_MATH_WEB - Chạy trên Hugging Face Spaces
+Phiên bản: 6.7 (Real Word Equation Support)
 Tác giả: Hoàng Tấn Thiên
 """
 import os
 import io
 import json
 import base64
+import tempfile
+import time
+import re
+import asyncio
+import xml.etree.ElementTree as ET
 from typing import List, Optional
+from fastapi import FastAPI, File, UploadFile, HTTPException, Depends, Form, Request
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse, StreamingResponse, FileResponse
+from pydantic import BaseModel
 import fitz  # PyMuPDF
+from PIL import Image
 import google.generativeai as genai
+# Thư viện Word & Math
+from docx import Document
+from docx.shared import Cm, Pt, RGBColor
+from docx.enum.text import WD_LINE_SPACING
+from docx.oxml import OxmlElement
+from docx.oxml.ns import qn
+# Thư viện Excel
+from openpyxl import Workbook
+from openpyxl.styles import Font, Alignment, Border, Side
+# Import thư viện chuyển đổi LaTeX sang MathML
+# Ưu tiên pypandoc (mạnh hơn), fallback về latex2mathml
+latex_to_mathml = None
+pypandoc_available = False
 try:
     import pypandoc
+    pypandoc_available = True
+    print("Info: pypandoc available - using for LaTeX to MathML conversion")
 except ImportError:
+    try:
+        from latex2mathml.converter import convert as latex_to_mathml
+        print("Info: latex2mathml available - using for LaTeX to MathML conversion")
+    except ImportError:
+        print("Warning: Neither pypandoc nor latex2mathml found. Word export might fail for equations.")
+        latex_to_mathml = None
 try:
+    from supabase import create_client, Client
+    SUPABASE_AVAILABLE = True
 except ImportError:
+    SUPABASE_AVAILABLE = False
+    Client = None
+    create_client = None
+import hashlib
+import secrets
 # ===== CẤU HÌNH =====
+GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",")
 GEMINI_MODELS = os.getenv("GEMINI_MODELS", "gemini-2.5-flash,gemini-1.5-pro").split(",")
+SUPABASE_URL = os.getenv("SUPABASE_URL", "")
+SUPABASE_KEY = os.getenv("SUPABASE_KEY", "")
+MAX_THREADS = int(os.getenv("MAX_THREADS", "3"))
+ADMIN_SECRET_KEY = os.getenv("ADMIN_SECRET_KEY", "admin123")
+# Setup Supabase
+supabase = None
+if SUPABASE_AVAILABLE and SUPABASE_URL and SUPABASE_KEY:
+    try:
+        supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
+    except Exception as e:
+        print(f"Warning: Không thể kết nối Supabase: {e}")
+app = FastAPI(title="HT_MATH_WEB API", version="6.7")
 app.add_middleware(
     CORSMiddleware,
     allow_headers=["*"],
 )
+@app.exception_handler(404)
+async def not_found_handler(request, exc):
+    return JSONResponse(
+        status_code=404,
+        content={
+            "detail": f"Route not found: {request.url.path}",
+            "available_routes": ["/", "/api/models", "/api/convert", "/api/export-docx", "/api/login", "/api/check-session"]
+        }
+    )
+# ===== RATE LIMITING (Backend) =====
 ip_rate_limits = {}
+RATE_LIMIT_DURATION = 7  # giây
 def check_rate_limit(request: Request):
+    # Nếu muốn tắt rate limit để tránh lỗi trên Cloud
+    # return
     forwarded = request.headers.get("X-Forwarded-For")
+    if forwarded:
+        client_ip = forwarded.split(",")[0].strip()
+    else:
+        client_ip = request.client.host
     now = time.time()
     if client_ip in ip_rate_limits:
         elapsed = now - ip_rate_limits[client_ip]
         if elapsed < RATE_LIMIT_DURATION:
             print(f"[RateLimit] IP {client_ip} requesting too fast.")
     ip_rate_limits[client_ip] = now
+# ===== PROMPTS (Strict Mode) =====
+DIRECT_GEMINI_PROMPT_TEXT_ONLY = r"""**TRÍCH XUẤT VĂN BẢN THUẦN TÚY**
+Bạn là một chuyên gia trong việc trích xuất nội dung văn bản từ ảnh và PDF.
+NHIỆM VỤ CỦA BẠN:
+1.  **Trích xuất toàn bộ văn bản** từ hình ảnh/PDF được cung cấp.
+2.  **Giữ nguyên định dạng văn bản gốc**, bao gồm các ký tự toán học, mà **KHÔNG** chuyển đổi chúng sang LaTeX. Ví dụ, biểu thức `x^2 + y^2 = r^2` phải được giữ nguyên, không đổi thành `$x^2 + y^2 = r^2$`.
+3.  **Chuyển đổi sang định dạng Markdown** cơ bản cho tiêu đề và bảng biểu.
+4.  Các đề mục, tiêu đề, Định nghĩa, Ví dụ, Bài tập, trắc nghiệm, Lưu ý đều in đậm (dùng markdown `**text**`).
+5.  Không trích xuất Header, Footer, hoặc số trang.
+6.  Giữ nguyên cấu trúc đoạn văn, bảng biểu, và danh sách so với file gốc.
+QUY TẮC QUAN TRỌNG:
+- **KHÔNG SỬ DỤNG LATEX**. Mọi công thức toán học phải được giữ ở dạng văn bản thuần túy như trong tài liệu gốc.
+PHẢN HỒI:
+- Chỉ trả về văn bản Markdown đã được trích xuất.
+- KHÔNG đưa ra bất kỳ giải thích nào hoặc tự ý thêm vào nội dung.
+"""
+DIRECT_GEMINI_PROMPT_LATEX = r"""Bạn là công cụ trích xuất văn bản từ ảnh/PDF. NHIỆM VỤ: Chuyển đổi nội dung trong ảnh sang Markdown với công thức LaTeX.
+⚠️ QUY TẮC BẮT BUỘC - TUÂN THỦ 100%:
+1. ĐẦU RA: CHỈ là Markdown thuần túy - KHÔNG có giải thích, comment, hoặc text thừa. KHÔNG thêm "Wait, Câu X:" hay bất kỳ comment nào.
+2. CÔNG THỨC TOÁN HỌC - TẤT CẢ phải bọc trong $...$ (KHÔNG có space trong $...$):
+   ✅ ĐÚNG: $f(x)=2x-13$, $M(x)=0$, $a=2$, $b=-13$, $x=0$
+   ❌ SAI: f(x)=2x-13, Cho M(x)=0, a=2; b=-13, với x=0
+   - Biến đơn: $x$, $y$, $z$, $a$, $b$, $c$
+   - Số: $2$, $3$, $-13$, $0$
+   - Phương trình: $f(x)=2x-13$, $M(x)=0$
+   - Biểu thức: $a=2$, $b=a-15$, $x=0$
+3. KHOẢNG TRẮNG: Luôn có space TRƯỚC $ mở (trừ đầu dòng):
+   ✅ ĐÚNG: Vậy $f(x)=2x-13$. Cho $M(x)=0$. Thay $b=a-15$ vào. Đa thức $M(x)$ có nghiệm $x=0$.
+   ❌ SAI: Vậy$f(x)=2x-13$. Cho$M(x)=0$. Thay$b=a-15$vào.
+4. CÔNG THỨC ĐẶC BIỆT - QUY TẮC CHUẨN:
+   - Phân số: $\frac{a}{b}$, $\frac{1}{2}$, $\frac{x+1}{x-1}$
+   - Số mũ: $x^{2}$, $a^{n}$, $2^{3}$, $x^{n+1}$
+   - Căn bậc 2: $\sqrt{x}$, $\sqrt{2}$, $\sqrt{a+b}$
+   - Căn bậc n: $\sqrt[n]{x}$, $\sqrt[3]{8}$
+   - Tích phân: $\int_{0}^{2} f(x)dx$, $\int\limits_{0}^{2} y^{2}dx$
+   - Tổng: $\sum_{i=1}^{n} a_i$
+   - Giới hạn: $\lim_{x \to 0} f(x)$
+   - Logarit: $\log x$, $\ln x$, $\log_{2} x$
+   - Lượng giác: $\sin x$, $\cos x$, $\tan x$
+5. HÌNH HỌC - QUY TẮC:
+   - Điểm: $A$, $B$, $C$, $M$, $N$
+   - Đoạn thẳng: $AB$, $CD$, $MN$, $BC$
+   - Tam giác: $\Delta ABC$, $\Delta$ (không dùng $\Triangle$)
+   - Góc: $\widehat{ABC}$, $\angle ABC$
+   - Song song: $AB // CD$ (không dùng $AB \parallel CD$)
+   - Vuông góc: $AB \perp CD$
+   - Bằng nhau: $AB = CD$ (không dùng $\cong$)
+6. ĐƠN VỊ - QUY TẮC:
+   - Diện tích: $cm^{2}$, $m^{2}$, $km^{2}$
+   - Thể tích: $cm^{3}$, $m^{3}$
+   - Độ: $90^{0}$, $45^{0}$, $180^{0}$
+   - Phần trăm: $50\%$, $100\%$
+7. SỐ THẬP PHÂN - QUY TẮC:
+   - Dùng dấu phẩy: $1,3$, $2,5$, $0,75$
+   - KHÔNG dùng dấu chấm: không viết $1.3$
+8. TẬP HỢP - QUY TẮC:
+   - Tập hợp: $A = \{1, 2, 3\}$, $B = \{x \in N | x > 5\}$
+   - Thuộc: $x \in A$, $2 \in A$
+   - Không thuộc: $x \notin A$, $5 \notin A$
+   - Tập con: $A \subset B$
+9. BẤT ĐẲNG THỨC - QUY TẮC:
+   - Lớn hơn: $a > b$, $x \geq 5$
+   - Nhỏ hơn: $a < b$, $x \leq 10$
+   - Khoảng: $[a; b]$, $(a; b)$, $[a; b)$, $(a; b]$
+10. GIỮ NGUYÊN CẤU TRÚC:
+    - Giữ nguyên bố cục, ngắt dòng như ảnh gốc
+    - Tiêu đề in đậm: **Bài 7:**, **Dạng 4:**, **PHẦN I.**
+    - Bảng: giữ nguyên số hàng/cột, dùng | để phân cách
+11. BẢNG - QUY TẮC QUAN TRỌNG ‼️:
+    ⚠️ ĐẾM CHÍNH XÁC SỐ CỘT VÀ HÀNG:
+    - TRƯỚC KHI VIẾT BẢNG: Đếm kỹ số cột từ trái sang phải trong ảnh gốc
+    - KIỂM TRA KỸ: Đảm bảo TẤT CẢ các cột đều được nhận diện, KHÔNG BỎ SÓT
+    - Nếu ảnh có 11 cột thì bảng Markdown PHẢI có 11 cột
+    - Nếu ảnh có 20 hàng thì bảng Markdown PHẢI có 20 hàng
+    - KHÔNG gộp hoặc bỏ qua cột/hàng nào
+    - KHÔNG chuyển hàng thành cột hoặc ngược lại
+    📋 FORMAT BẢNG CHUẨN:
+    | Cột 1 | Cột 2 | Cột 3 | ... | Cột N |
+    |-------|-------|-------|-----|-------|
+    | Data1 | Data2 | Data3 | ... | DataN |
+    ✅ VÍ DỤ ĐÚNG (11 cột):
+    | Col1 | Col2 | Col3 | Col4 | Col5 | Col6 | Col7 | Col8 | Col9 | Col10 | Col11 |
+    |------|------|------|------|------|------|------|------|------|-------|-------|
+    | A    | B    | C    | D    | E    | F    | G    | H    | I    | J     | K     |
+    ❌ SAI (thiếu cột):
+    | Col1 | Col2 | Col3 | Col4 | Col5 | Col6 | Col7 |
+    |------|------|------|------|------|------|------|
+    | A    | B    | C    | D    | E    | F    | G    |
+⚠️ LƯU Ý CUỐI CÙNG:
+- KHÔNG thêm comment, giải thích, hoặc text thừa
+- KHÔNG tự ý thay đổi nội dung
+- CHỈ trả về Markdown thuần túy
+- Bỏ qua Header/Footer/số trang
+- ĐẾM KỸ số cột/hàng trong bảng - KHÔNG ĐƯỢC BỎ SÓT
+BẮT ĐẦU TRÍCH XUẤT NGAY - CHỈ TRẢ VỀ MARKDOWN, KHÔNG CÓ GIẢI THÍCH.
+"""
+# ===== KEY MANAGER =====
+class ApiKeyManager:
+    def __init__(self, keys: List[str]):
+        self.api_keys = [k.strip() for k in keys if k.strip()]
+        self.current_index = 0
+    def get_next_key(self) -> Optional[str]:
+        if not self.api_keys: return None
+        key = self.api_keys[self.current_index]
+        self.current_index = (self.current_index + 1) % len(self.api_keys)
+        return key
+    def get_key_count(self) -> int:
+        return len(self.api_keys)
+key_manager = ApiKeyManager(GEMINI_API_KEYS)
 # ===== HELPER FUNCTIONS =====
 def clean_latex_formulas(text: str) -> str:
+    # Xóa các khoảng trắng thừa quanh dấu $
+    return re.sub(r'\$(.*?)\$', lambda m: f'${m.group(1).strip()}$', text)
 def hash_password(password: str) -> str:
     return hashlib.sha256(password.encode()).hexdigest()
 def verify_password(password: str, hashed: str) -> bool:
     return hash_password(password) == hashed
 # ===== API ENDPOINTS =====
 @app.get("/")
 @app.get("/health")
 async def root():
     return {
         "status": "ok",
+        "service": "HT_MATH_WEB API v6.7",
+        "keys_loaded": key_manager.get_key_count(),
+        "models_available": GEMINI_MODELS
     }
 @app.get("/api/models")
 async def get_models():
     return {"models": GEMINI_MODELS}
+# --- AUTH API (Token Only) ---
 @app.post("/api/register")
 async def register(email: str = Form(...), password: str = Form(...)):
+    if not supabase: raise HTTPException(status_code=500, detail="DB Error")
+    res = supabase.table("users").select("email").eq("email", email).execute()
+    if res.data: raise HTTPException(status_code=400, detail="Email tồn tại")
+    user_data = {
         "email": email,
         "password": hash_password(password),
+        "status": "pending",
+        "created_at": time.strftime("%Y-%m-%d %H:%M:%S")
     }
+    supabase.table("users").insert(user_data).execute()
+    return {"success": True, "message": "Đăng ký thành công, chờ duyệt."}
 @app.post("/api/login")
 async def login(request: Request, email: str = Form(...), password: str = Form(...)):
+    if not supabase: raise HTTPException(status_code=500, detail="DB Error")
+    res = supabase.table("users").select("*").eq("email", email).execute()
+    if not res.data: raise HTTPException(status_code=401, detail="Sai email/pass")
+    user = res.data[0]
+    if not verify_password(password, user["password"]):
+        raise HTTPException(status_code=401, detail="Sai email/pass")
+    if user.get("status") != "active":
+        raise HTTPException(status_code=403, detail="Tài khoản chưa kích hoạt")
+    token = secrets.token_urlsafe(32)
+    try: supabase.table("sessions").delete().eq("email", email).execute()
+    except Exception as e: print(f"Lỗi xóa session cũ: {e}")
+    try:
+        supabase.table("sessions").insert({
+            "email": email,
+            "token": token,
+            "last_seen": time.strftime("%Y-%m-%d %H:%M:%S")
+        }).execute()
+    except Exception as e:
+        print(f"Lỗi tạo session: {e}")
+        raise HTTPException(status_code=500, detail="Lỗi tạo phiên làm việc")
     return {"success": True, "token": token, "email": email}
 @app.post("/api/check-session")
 async def check_session(email: str = Form(...), token: str = Form(...)):
+    if not supabase: raise HTTPException(status_code=500, detail="DB Error")
+    try:
+        res = supabase.table("sessions").select("token").eq("email", email).execute()
+        if not res.data:
+            raise HTTPException(status_code=401, detail="Session expired")
+        server_token = res.data[0]['token']
+        if token != server_token:
+            print(f"[AUTH] Token mismatch for {email}. Old session invalid.")
+            raise HTTPException(status_code=401, detail="Logged in elsewhere")
+        supabase.table("sessions").update({
+            "last_seen": time.strftime("%Y-%m-%d %H:%M:%S")
+        }).eq("email", email).execute()
+        return {"status": "valid"}
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(f"Error checking session: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
 @app.post("/api/logout")
 async def logout(request: Request):
     try:
         data = await request.json()
         email = data.get("email")
+        if email and supabase:
+            supabase.table("sessions").delete().eq("email", email).execute()
+        return {"status": "success"}
+    except:
+        return {"status": "success"}
+# --- CONVERT API ---
+async def process_image_with_gemini(image: Image.Image, model_id: str, prompt: str, max_retries: int = 3) -> str:
+    for attempt in range(max_retries):
+        try:
+            api_key = key_manager.get_next_key()
+            if not api_key: raise ValueError("No API Key")
+            genai.configure(api_key=api_key)
+            generation_config = {
+                "temperature": 0.0,
+                "top_p": 1.0,
+                "max_output_tokens": 8192,
+            }
+            safety_settings = [
+                {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
+                {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
+                {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
+                {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
+            ]
+            model = genai.GenerativeModel(model_name=model_id, generation_config=generation_config, safety_settings=safety_settings)
+            response = model.generate_content([prompt, image])
+            if response.text:
+                text = response.text.strip()
+                # Loại bỏ các comment/giải thích không cần thiết từ AI
+                lines = text.split('\n')
+                cleaned_lines = []
+                for line in lines:
+                    # Bỏ qua các dòng comment như "Wait, Câu X:" hoặc giải thích
+                    stripped = line.strip()
+                    if stripped.startswith('*Wait') or (stripped.startswith('Wait,') and ':' in stripped):
+                        continue
+                    # Bỏ qua dòng chỉ có comment markdown
+                    if stripped in ['*', '**', '***', '****']:
+                        continue
+                    cleaned_lines.append(line)
+                cleaned_text = '\n'.join(cleaned_lines).strip()
+                # Loại bỏ các pattern comment thường gặp
+                cleaned_text = re.sub(r'\*Wait,.*?:\*', '', cleaned_text, flags=re.IGNORECASE | re.MULTILINE)
+                cleaned_text = re.sub(r'Wait,.*?:', '', cleaned_text, flags=re.IGNORECASE | re.MULTILINE)
+                # Loại bỏ các dòng trống thừa
+                cleaned_text = re.sub(r'\n{3,}', '\n\n', cleaned_text)
+                return cleaned_text
+            else:
+                raise ValueError("Empty response")
+        except Exception as e:
+            if "429" in str(e) or "Quota" in str(e):
+                if attempt < max_retries - 1:
+                    time.sleep(2)
+                    continue
+            print(f"Gemini Error (Attempt {attempt}): {e}")
+            raise HTTPException(status_code=500, detail=str(e))
+    raise HTTPException(status_code=500, detail="Failed after retries")
 @app.post("/api/convert")
 async def convert_file(
     request: Request,
     file: UploadFile = File(...),
     model: str = Form("gemini-2.5-flash"),
+    mode: str = Form("latex")
 ):
     check_rate_limit(request)
+    if key_manager.get_key_count() == 0:
+        raise HTTPException(status_code=500, detail="Chưa cấu hình API Key")
     prompt = DIRECT_GEMINI_PROMPT_LATEX if mode == "latex" else DIRECT_GEMINI_PROMPT_TEXT_ONLY
     try:
         file_content = await file.read()
         file_ext = os.path.splitext(file.filename)[1].lower()
         results = []
         if file_ext == ".pdf":
             doc = fitz.open(stream=file_content, filetype="pdf")
+            page_count = len(doc)
+            # Tạo semaphore để giới hạn số lượng concurrent requests
+            semaphore = asyncio.Semaphore(min(MAX_THREADS, page_count))
+            async def process_page_async(page, page_num, model, prompt):
+                """Xử lý một trang PDF với semaphore để giới hạn concurrent requests"""
+                async with semaphore:
+                    pix = page.get_pixmap(dpi=300)
+                    img = Image.open(io.BytesIO(pix.tobytes("png")))
+                    text = await process_image_with_gemini(img, model, prompt)
+                    return page_num, text
+            # Tạo tasks cho tất cả các trang
+            tasks = []
+            for page_num in range(page_count):
+                page = doc[page_num]
+                tasks.append(process_page_async(page, page_num, model, prompt))
+            # Chạy tất cả tasks song song (giới hạn bởi semaphore)
+            page_results = await asyncio.gather(*tasks)
+            # Sắp xếp kết quả theo thứ tự trang
+            page_results.sort(key=lambda x: x[0])
+            results = [text for _, text in page_results]
+            doc.close()
+        elif file_ext in [".png", ".jpg", ".jpeg", ".bmp"]:
+            img = Image.open(io.BytesIO(file_content))
+            text = await process_image_with_gemini(img, model, prompt)
+            results.append(text)
+        else:
+            raise HTTPException(status_code=400, detail="Định dạng file không hỗ trợ")
+        final_text = "\n\n".join(results)
+        return {"success": True, "result": clean_latex_formulas(final_text)}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+# --- WORD EXPORT API (REAL EQUATION SUPPORT) ---
+def convert_latex_to_mathml(latex):
+    """Convert LaTeX to MathML using best available method."""
+    if not latex or not latex.strip():
+        return None
+    # Ưu tiên pypandoc (mạnh hơn, hỗ trợ nhiều LaTeX phức tạp)
+    if pypandoc_available:
+        try:
+            # pypandoc convert LaTeX sang MathML
+            mathml = pypandoc.convert_text(
+                f"$${latex}$$",
+                "mathml",
+                format="latex",
+                extra_args=["--mathml"]
+            )
+            if mathml and mathml.strip():
+                return mathml.strip()
+        except Exception as e:
+            print(f"pypandoc conversion error: {e}, falling back to latex2mathml")
+    # Fallback về latex2mathml
+    if latex_to_mathml:
+        try:
+            mathml = latex_to_mathml(latex)
+            if mathml and mathml.strip():
+                return mathml.strip()
+        except Exception as e:
+            print(f"latex2mathml conversion error: {e}")
+    return None
+def insert_equation(paragraph, latex):
+    """Chèn phương trình LaTeX vào Word dưới dạng OMath (Equation thật)."""
+    if not latex or not latex.strip():
+        return
+    try:
+        # Convert LaTeX -> MathML (dùng pypandoc nếu có, fallback latex2mathml)
+        mathml = convert_latex_to_mathml(latex)
+        if not mathml or not mathml.strip():
+            raise ValueError("MathML is empty")
+        # Parse MathML - xử lý namespace
+        try:
+            root = ET.fromstring(mathml)
+        except ET.ParseError as e:
+            print(f"MathML parse error: {e}, mathml: {mathml[:100]}")
+            raise
+        # Create <m:oMathPara> - QUAN TRỌNG: Word cần oMathPara wrapper
+        omath_para = OxmlElement("m:oMathPara")
+        omath_para.set(qn('xmlns:m'), 'http://schemas.openxmlformats.org/officeDocument/2006/math')
+        omath = OxmlElement("m:oMath")
+        omath_para.append(omath)
+        # Helper: convert MathML to OMath
+        def convert(elem):
+            if elem is None:
+                return None
+            # Xử lý namespace
+            if elem.tag.startswith("{"):
+                tag = elem.tag.split("}")[-1]
+                namespace = elem.tag.split("}")[0] + "}"
+            else:
+                tag = elem.tag
+                namespace = None
+            # Variables, numbers, operators
+            if tag in ("mi", "mn", "mo", "mtext"):
+                r = OxmlElement("m:r")
+                t = OxmlElement("m:t")
+                text_content = (elem.text or "").strip()
+                t.text = text_content
+                if text_content:
+                    t.set(qn("xml:space"), "preserve")
+                r.append(t)
+                return r
+            # Group / row
+            if tag == "mrow":
+                r = OxmlElement("m:r")
+                for child in elem:
+                    converted = convert(child)
+                    if converted is not None:
+                        r.append(converted)
+                return r if len(r) > 0 else None
+            # Fraction
+            if tag == "mfrac":
+                f = OxmlElement("m:f")
+                num = OxmlElement("m:num")
+                den = OxmlElement("m:den")
+                if len(elem) >= 1:
+                    num_child = convert(elem[0])
+                    if num_child:
+                        num.append(num_child)
+                if len(elem) >= 2:
+                    den_child = convert(elem[1])
+                    if den_child:
+                        den.append(den_child)
+                f.append(num)
+                f.append(den)
+                return f
+            # Superscript
+            if tag == "msup":
+                sup = OxmlElement("m:sSup")
+                e = OxmlElement("m:e")
+                supchild = OxmlElement("m:sup")
+                if len(elem) >= 1:
+                    base = convert(elem[0])
+                    if base:
+                        e.append(base)
+                if len(elem) >= 2:
+                    sup_base = convert(elem[1])
+                    if sup_base:
+                        supchild.append(sup_base)
+                sup.append(e)
+                sup.append(supchild)
+                return sup
+            # Subscript
+            if tag == "msub":
+                sub = OxmlElement("m:sSub")
+                e = OxmlElement("m:e")
+                subchild = OxmlElement("m:sub")
+                if len(elem) >= 1:
+                    base = convert(elem[0])
+                    if base:
+                        e.append(base)
+                if len(elem) >= 2:
+                    sub_base = convert(elem[1])
+                    if sub_base:
+                        subchild.append(sub_base)
+                sub.append(e)
+                sub.append(subchild)
+                return sub
+            # Square root
+            if tag == "msqrt":
+                rad = OxmlElement("m:rad")
+                rad_pr = OxmlElement("m:radPr")
+                e = OxmlElement("m:e")
+                rad.append(rad_pr)
+                if len(elem) >= 1:
+                    rad_child = convert(elem[0])
+                    if rad_child:
+                        e.append(rad_child)
+                rad.append(e)
+                return rad
+            # Fallback: wrap in run
+            r = OxmlElement("m:r")
+            has_content = False
+            for child in elem:
+                converted = convert(child)
+                if converted is not None:
+                    r.append(converted)
+                    has_content = True
+            return r if has_content else None
+        # Convert children of <math> tag
+        math_tag = root.tag
+        if math_tag.endswith('math') or (isinstance(math_tag, str) and 'math' in math_tag):
+            has_children = False
+            for child in root:
+                converted = convert(child)
+                if converted is not None:
+                    omath.append(converted)
+                    has_children = True
+            if not has_children:
+                # Nếu không có children hợp lệ, fallback
+                raise ValueError("No valid MathML children")
+        else:
+            converted = convert(root)
+            if converted is not None:
+                omath.append(converted)
+            else:
+                raise ValueError("MathML conversion failed")
+        # Insert into run
+        run = paragraph.add_run()
+        run._r.append(omath_para)
+    except Exception as e:
+        print(f"Equation fallback for '{latex[:50]}...': {e}")
+        import traceback
+        traceback.print_exc()
+        # Fallback về text (không phải equation thật, nhưng vẫn hiển thị được)
+        run = paragraph.add_run("$" + latex + "$")
+        run.font.name = "Cambria Math"
+        run.font.size = Pt(14)
+        run.italic = True  # In nghiêng để phân biệt với text thường
 @app.post("/api/export-docx")
+async def export_docx(
+    markdown_text: str = Form(...)
+):
+    try:
+        doc = Document()
+        # 1. Page Setup
+        section = doc.sections[0]
+        section.page_height = Cm(29.7)
+        section.page_width = Cm(21.0)
+        section.left_margin = Cm(1.5)
+        section.right_margin = Cm(1.5)
+        section.top_margin = Cm(1.5)
+        section.bottom_margin = Cm(1.5)
+        # 2. Font & Style Default
+        style = doc.styles['Normal']
+        font = style.font
+        font.name = 'Times New Roman'
+        font.size = Pt(14)
+        font.color.rgb = RGBColor(0, 0, 0)
+        doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), 'Times New Roman')
+        # 3. Process Content
+        lines = markdown_text.split("\n")
+        # Biến để theo dõi bảng đang xử lý
+        current_table = None
+        i = 0
+        while i < len(lines):
+            line = lines[i]
+            stripped_line = line.strip()
+            # Table Handling - Xử lý bảng với header row
+            if stripped_line.startswith("|"):
+                cells = [c.strip() for c in stripped_line.split("|") if c.strip()]
+                # Bỏ qua dòng separator (---)
+                if "---" in stripped_line:
+                    i += 1
+                    continue
+                if len(cells) > 0:
+                    # Nếu chưa có bảng, tạo bảng mới với header row
+                    if current_table is None:
+                        current_table = doc.add_table(rows=1, cols=len(cells))
+                        current_table.style = 'Table Grid'
+                        # Header row
+                        hdr_cells = current_table.rows[0].cells
+                        for j, cell_text in enumerate(cells):
+                            if j < len(hdr_cells):
+                                hdr_cells[j].text = cell_text
+                                # Format header
+                                for paragraph in hdr_cells[j].paragraphs:
+                                    paragraph.paragraph_format.line_spacing = 1.0
+                                    for run in paragraph.runs:
+                                        run.font.name = 'Times New Roman'
+                                        run.font.size = Pt(14)
+                                        run.bold = True  # Header in đậm
+                    else:
+                        # Thêm row mới vào bảng hiện tại
+                        row = current_table.add_row().cells
+                        for j, cell_text in enumerate(cells):
+                            if j < len(row):
+                                row[j].text = cell_text
+                                for paragraph in row[j].paragraphs:
+                                    paragraph.paragraph_format.line_spacing = 1.0
+                                    for run in paragraph.runs:
+                                        run.font.name = 'Times New Roman'
+                                        run.font.size = Pt(14)
+                i += 1
+                continue
+            # Reset bảng khi gặp dòng trống hoặc không phải bảng
+            if current_table is not None:
+                current_table = None
+            # Bỏ qua dòng trắng
+            if not stripped_line:
+                i += 1
+                continue
+            # Paragraph Handling
+            p = doc.add_paragraph()
+            p.paragraph_format.line_spacing_rule = WD_LINE_SPACING.SINGLE # Giãn dòng đơn
+            p.paragraph_format.line_spacing = 1.0 # Đảm bảo 1.0
+            p.paragraph_format.space_after = Pt(0)
+            parts = re.split(r'(\$.*?\$)', line)
+            for part in parts:
+                if not part: continue
+                # --- XỬ LÝ EQUATION (QUAN TRỌNG) ---
+                if part.startswith("$") and part.endswith("$"):
+                    latex = part.strip("$") # Lấy nội dung LaTeX bỏ dấu $
+                    insert_equation(p, latex)
+                else:
+                    # Text thường
+                    sub_parts = re.split(r'(\*\*.*?\*\*)', part)
+                    for sub in sub_parts:
+                        clean_sub = sub.replace("**", "")
+                        if not clean_sub: continue
+                        run = p.add_run(clean_sub)
+                        run.font.name = 'Times New Roman'
+                        run.font.size = Pt(14)
+                        if sub.startswith("**") and sub.endswith("**"):
+                            run.bold = True
+            i += 1
+        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".docx")
+        doc.save(tmp.name)
+        tmp.close()
+        return FileResponse(
+            tmp.name,
+            media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            filename="ket_qua_HT_MATH.docx"
+        )
+    except Exception as e:
+        import traceback
+        error_detail = str(e)
+        traceback.print_exc()
+        print(f"Error Export Docx: {error_detail}")
+        raise HTTPException(status_code=500, detail=f"Lỗi xuất Word: {error_detail}")
+# --- EXCEL EXPORT API ---
+@app.post("/api/export-excel")
+async def export_excel(
+    markdown_text: str = Form(...)
+):
+    """Xuất nội dung Markdown sang file Excel (.xlsx)
+    - Chuyển đổi bảng Markdown sang Excel tables
+    - Giữ định dạng bold cho text in đậm
+    - Gỡ bỏ hoàn toàn công thức LaTeX ($...$)
+    """
     try:
+        # Tạo workbook mới
+        wb = Workbook()
+        ws = wb.active
+        ws.title = "HT MATH WEB"
+        # Gỡ bỏ tất cả công thức LaTeX khỏi markdown
+        cleaned_text = re.sub(r'\$[^$]+\$', '', markdown_text)
+        # Loại bỏ khoảng trắng thừa
+        cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
+        cleaned_text = re.sub(r'\n\s*\n', '\n', cleaned_text)
+        lines = cleaned_text.split("\n")
+        current_row = 1
+        current_table = None
+        table_start_row = None
+        # Định nghĩa borders cho bảng
+        thin_border = Border(
+            left=Side(style='thin'),
+            right=Side(style='thin'),
+            top=Side(style='thin'),
+            bottom=Side(style='thin')
+        )
+        i = 0
+        while i < len(lines):
+            line = lines[i]
+            stripped_line = line.strip()
+            # Xử lý bảng Markdown
+            if stripped_line.startswith("|"):
+                cells = [c.strip() for c in stripped_line.split("|") if c.strip()]
+                # Bỏ qua dòng separator (---)
+                if "---" in stripped_line:
+                    i += 1
+                    continue
+                if len(cells) > 0:
+                    # Nếu là bảng mới
+                    if current_table is None:
+                        current_table = True
+                        table_start_row = current_row
+                    # Thêm cells vào Excel
+                    for col_idx, cell_text in enumerate(cells, start=1):
+                        cell = ws.cell(row=current_row, column=col_idx)
+                        cell.value = cell_text
+                        cell.border = thin_border
+                        cell.alignment = Alignment(horizontal='left', vertical='center')
+                        # Header row (row đầu tiên của bảng) - in đậm
+                        if current_row == table_start_row:
+                            cell.font = Font(name='Times New Roman', size=12, bold=True)
+                        else:
+                            cell.font = Font(name='Times New Roman', size=12)
+                    current_row += 1
+                i += 1
+                continue
+            # Reset bảng khi gặp dòng không phải bảng
+            if current_table is not None:
+                current_table = None
+                table_start_row = None
+            # Bỏ qua dòng trống
+            if not stripped_line:
+                i += 1
+                continue
+            # Xử lý text thường và bold
+            # Tách bold text (**text**)
+            parts = re.split(r'(\*\*.*?\*\*)', stripped_line)
+            combined_text = ""
+            has_bold = False
+            for part in parts:
+                if part.startswith("**") and part.endswith("**"):
+                    # Text in đậm
+                    has_bold = True
+                    combined_text += part.replace("**", "")
+                else:
+                    combined_text += part
+            if combined_text.strip():
+                cell = ws.cell(row=current_row, column=1)
+                cell.value = combined_text.strip()
+                if has_bold or stripped_line.startswith("**"):
+                    cell.font = Font(name='Times New Roman', size=12, bold=True)
+                else:
+                    cell.font = Font(name='Times New Roman', size=12)
+                cell.alignment = Alignment(horizontal='left', vertical='center', wrap_text=True)
+                current_row += 1
+            i += 1
+        # Tự động điều chỉnh độ rộng cột
+        for col in ws.columns:
+            max_length = 0
+            column = col[0].column_letter
+            for cell in col:
+                try:
+                    if len(str(cell.value)) > max_length:
+                        max_length = len(str(cell.value))
+                except:
+                    pass
+            adjusted_width = min(max_length + 2, 50)  # Giới hạn max width
+            ws.column_dimensions[column].width = adjusted_width
+        # Lưu file Excel
+        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
+        wb.save(tmp.name)
+        tmp.close()
+        return FileResponse(
+            tmp.name,
+            media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            filename="Ket_qua_HT_MATH.xlsx"
+        )
+    except Exception as e:
+        import traceback
+        error_detail = str(e)
+        traceback.print_exc()
+        print(f"Error Export Excel: {error_detail}")
+        raise HTTPException(status_code=500, detail=f"Lỗi xuất Excel: {error_detail}")
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "7860")), log_level="info")