ht-math-web-backend / server.py
hoangthiencm's picture
Update server.py
4fbda48 verified
"""
Backend API HỢP NHẤT (Web + Desktop) - HT MATH V6
Chạy trên Hugging Face Spaces (Docker Version)
"""
import os
import io
import time
import asyncio
import re
import tempfile
import hashlib
import secrets
import uuid
import math
import base64
import random
from typing import List, Optional
# --- THƯ VIỆN CHÍNH ---
from fastapi import FastAPI, File, UploadFile, HTTPException, Form, Request, Body
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, FileResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel # Thêm Pydantic cho Desktop App
# --- THƯ VIỆN XỬ LÝ ẢNH & AI ---
from PIL import Image
import fitz # PyMuPDF
import google.generativeai as genai
# --- PANDOC IMPORT ---
try:
import pypandoc
print(f"INFO: Pandoc version detected: {pypandoc.get_pandoc_version()}")
except ImportError:
print("CRITICAL WARNING: pypandoc module not found.")
except OSError:
print("CRITICAL WARNING: pandoc binary not found in system path.")
# --- SUPABASE ---
try:
from supabase import create_client, Client
SUPABASE_AVAILABLE = True
except ImportError:
SUPABASE_AVAILABLE = False
Client = None
create_client = None
# ===== CẤU HÌNH =====
# Load biến môi trường
GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",")
# Lọc bỏ key rỗng
GEMINI_API_KEYS = [k.strip() for k in GEMINI_API_KEYS if k.strip()]
GEMINI_MODELS = os.getenv("GEMINI_MODELS", "gemini-2.5-flash,gemini-1.5-pro").split(",")
GEMINI_MODELS = [m.strip() for m in GEMINI_MODELS if m.strip()]
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")
# Khởi tạo Supabase
supabase: Optional[Client] = None
if SUPABASE_AVAILABLE and SUPABASE_URL and SUPABASE_KEY:
try:
supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
print("INFO: Supabase connected successfully.")
except Exception as e:
print(f"ERROR: Failed to connect to Supabase: {e}")
# Khởi tạo FastAPI
app = FastAPI(title="HT MATH UNIFIED SERVER")
# Cấu hình CORS (Cho phép cả Web và Desktop App)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# --- HELPER FUNCTIONS ---
def get_random_api_key():
if not GEMINI_API_KEYS:
raise HTTPException(status_code=500, detail="Server chưa cấu hình GEMINI_API_KEYS")
return secrets.choice(GEMINI_API_KEYS)
def clean_latex_formulas(text):
"""Làm sạch và chuẩn hóa LaTeX"""
text = re.sub(r'\\\(', '$', text)
text = re.sub(r'\\\)', '$', text)
text = re.sub(r'\\\[', '$$', text)
text = re.sub(r'\\\]', '$$', text)
return text
# ==============================================================================
# PHẦN 1: API DÀNH RIÊNG CHO DESKTOP APP (HT MATH V6 CLIENT)
# ==============================================================================
class DesktopGenerateRequest(BaseModel):
prompt: str
model: Optional[str] = "gemini-1.5-flash"
image: Optional[str] = None # Base64 string
@app.get("/api/models")
async def get_models_desktop():
"""API trả về danh sách model cho Desktop App cập nhật vào ComboBox"""
# Nếu danh sách rỗng, trả về default để tránh lỗi app
models = GEMINI_MODELS if GEMINI_MODELS else ["gemini-1.5-flash"]
return {"models": models}
@app.post("/api/generate")
async def generate_content_desktop(req: DesktopGenerateRequest):
"""
API xử lý AI cho Desktop App.
Khác với Web App (nhận Multipart), API này nhận JSON chứa Base64 image.
"""
try:
# 1. Chọn Key ngẫu nhiên (Load Balancing)
api_key = get_random_api_key()
genai.configure(api_key=api_key)
# 2. Chọn Model
# Nếu model client gửi lên không có trong danh sách hỗ trợ, dùng model đầu tiên
model_name = req.model
if model_name not in GEMINI_MODELS and GEMINI_MODELS:
model_name = GEMINI_MODELS[0]
model = genai.GenerativeModel(model_name)
# 3. Chuẩn bị nội dung gửi đi
content_parts = [req.prompt]
# 4. Xử lý ảnh (Base64 -> Image)
if req.image:
try:
# Desktop App gửi ảnh dạng Base64 string
# Cần xử lý trường hợp có prefix data:image/...;base64,
if "," in req.image:
req.image = req.image.split(",")[1]
image_bytes = base64.b64decode(req.image)
image = Image.open(io.BytesIO(image_bytes))
content_parts.append(image)
except Exception as e:
raise HTTPException(status_code=400, detail=f"Lỗi xử lý ảnh base64: {str(e)}")
# 5. Gọi Google Gemini
response = model.generate_content(content_parts)
if response.text:
return {"result": response.text}
else:
raise HTTPException(status_code=500, detail="Gemini không trả về nội dung text.")
except Exception as e:
print(f"Error Desktop API: {e}")
raise HTTPException(status_code=500, detail=str(e))
# ==============================================================================
# PHẦN 2: API DÀNH CHO WEB APP (HT MATH WEB V6)
# ==============================================================================
# --- AUTHENTICATION (WEB) ---
@app.post("/api/auth/register")
async def register(request: Request):
if not supabase:
raise HTTPException(status_code=503, detail="Database service unavailable")
data = await request.json()
email = data.get("email")
password = data.get("password")
full_name = data.get("full_name")
if not email or not password:
raise HTTPException(status_code=400, detail="Vui lòng nhập Email và Mật khẩu")
try:
# 1. Đăng ký Auth user
auth_res = supabase.auth.sign_up({
"email": email,
"password": password,
"options": {"data": {"full_name": full_name}}
})
if not auth_res.user:
raise HTTPException(status_code=400, detail="Đăng ký thất bại (Auth)")
# 2. Lưu vào bảng users (public)
user_data = {
"id": auth_res.user.id,
"email": email,
"full_name": full_name,
"role": "user",
"created_at": "now()"
}
supabase.table("users").insert(user_data).execute()
return {"success": True, "message": "Đăng ký thành công! Vui lòng kiểm tra email xác nhận."}
except Exception as e:
print(f"Register Error: {str(e)}")
# Xử lý lỗi Supabase trả về
msg = str(e)
if "User already registered" in msg:
raise HTTPException(status_code=400, detail="Email này đã được đăng ký.")
raise HTTPException(status_code=500, detail=f"Lỗi đăng ký: {msg}")
@app.post("/api/auth/login")
async def login(request: Request):
if not supabase:
raise HTTPException(status_code=503, detail="Database service unavailable")
data = await request.json()
email = data.get("email")
password = data.get("password")
try:
res = supabase.auth.sign_in_with_password({"email": email, "password": password})
if res.user:
# Lấy thông tin role từ bảng users
user_info = supabase.table("users").select("*").eq("id", res.user.id).execute()
role = "user"
full_name = ""
if user_info.data:
role = user_info.data[0].get("role", "user")
full_name = user_info.data[0].get("full_name", "")
return {
"success": True,
"access_token": res.session.access_token,
"user": {
"id": res.user.id,
"email": res.user.email,
"role": role,
"full_name": full_name
}
}
raise HTTPException(status_code=401, detail="Email hoặc mật khẩu không đúng")
except Exception as e:
raise HTTPException(status_code=401, detail=str(e))
# --- IMAGE PROCESSING UTILS (WEB) ---
async def process_large_image(image: Image.Image, model_name: str, prompt: str, semaphore: asyncio.Semaphore) -> str:
"""Xử lý ảnh lớn bằng cách cắt nhỏ (Overlap Stitching)"""
width, height = image.size
# Nếu ảnh nhỏ, xử lý trực tiếp
if height < 2000:
async with semaphore:
return await call_gemini_vision(image, model_name, prompt)
# Cấu hình cắt ảnh
segment_height = 1500
overlap = 300
segments = []
for y in range(0, height, segment_height - overlap):
box = (0, y, width, min(y + segment_height, height))
segment = image.crop(box)
segments.append(segment)
if y + segment_height >= height:
break
# Gọi API song song cho các phần
tasks = []
for seg in segments:
tasks.append(call_gemini_vision(seg, model_name, prompt))
results = await asyncio.gather(*tasks)
return "\n".join(results) # Ghép kết quả đơn giản
async def call_gemini_vision(image: Image.Image, model_name: str, prompt: str) -> str:
"""Hàm wrapper gọi Gemini Vision"""
try:
api_key = get_random_api_key()
genai.configure(api_key=api_key)
model = genai.GenerativeModel(model_name)
response = await model.generate_content_async([prompt, image])
return response.text if response.text else ""
except Exception as e:
print(f"Gemini Error: {e}")
return ""
# --- MAIN API: PROCESS IMAGE (WEB) ---
@app.post("/api/process-image")
async def process_image_web(
file: UploadFile = File(...),
model: str = Form("gemini-1.5-pro"),
prompt: str = Form("Hãy chuyển đổi nội dung trong ảnh thành định dạng Markdown LaTeX.")
):
try:
contents = await file.read()
image = Image.open(io.BytesIO(contents))
# Giới hạn số luồng xử lý đồng thời để tránh Rate Limit
global_semaphore = asyncio.Semaphore(5)
# Xử lý ảnh
result_text = await process_large_image(image, model, prompt, global_semaphore)
return {"success": True, "result": clean_latex_formulas(result_text)}
except Exception as e:
import traceback
traceback.print_exc()
raise HTTPException(status_code=500, detail=str(e))
# --- WORD EXPORT API (PANDOC NATIVE) ---
@app.post("/api/export-docx")
async def export_docx(markdown_text: str = Form(...)):
try:
with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp_file:
output_filename = tmp_file.name
# Dùng Pypandoc để convert
pypandoc.convert_text(
markdown_text,
to='docx',
format='markdown',
outputfile=output_filename,
extra_args=['--standalone']
)
return FileResponse(
output_filename,
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
filename="Ket_qua_HT_MATH.docx"
)
except Exception as e:
import traceback
traceback.print_exc()
raise HTTPException(status_code=500, detail=f"Lỗi xuất Word: {str(e)}")
# --- TEST ENDPOINT ---
@app.get("/")
def home():
return {
"server": "HT MATH UNIFIED (Web + Desktop)",
"status": "online",
"pandoc": "detected" if 'pypandoc' in globals() else "missing",
"desktop_api_ready": True
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)