hoangthiencm commited on
Commit
87b1a34
·
verified ·
1 Parent(s): 474d92e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -179
app.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
  Backend API cho HT_MATH_WEB - Chạy trên Hugging Face Spaces (Docker Version)
3
- Phiên bản: 8.3 (Debug Mode: Route Logging & Dynamic 404 Handler)
4
  Tác giả: Hoàng Tấn Thiên
5
  """
6
 
@@ -16,7 +16,7 @@ import uuid
16
  import math
17
  from typing import List, Optional
18
 
19
- from fastapi import FastAPI, File, UploadFile, HTTPException, Form, Request
20
  from fastapi.middleware.cors import CORSMiddleware
21
  from fastapi.responses import JSONResponse, FileResponse
22
  from fastapi.staticfiles import StaticFiles
@@ -33,7 +33,7 @@ except ImportError:
33
  except OSError:
34
  print("CRITICAL WARNING: pandoc binary not found in system path.")
35
 
36
- # --- SUPABASE ---
37
  try:
38
  from supabase import create_client, Client
39
  SUPABASE_AVAILABLE = True
@@ -47,10 +47,10 @@ GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",")
47
  GEMINI_MODELS = os.getenv("GEMINI_MODELS", "gemini-2.5-flash,gemini-1.5-pro").split(",")
48
  SUPABASE_URL = os.getenv("SUPABASE_URL", "")
49
  SUPABASE_KEY = os.getenv("SUPABASE_KEY", "")
50
- MAX_THREADS = int(os.getenv("MAX_THREADS", "5"))
51
  ADMIN_SECRET_KEY = os.getenv("ADMIN_SECRET_KEY", "admin123")
52
 
53
- # Setup Supabase
54
  supabase = None
55
  if SUPABASE_AVAILABLE and SUPABASE_URL and SUPABASE_KEY:
56
  try:
@@ -58,8 +58,10 @@ if SUPABASE_AVAILABLE and SUPABASE_URL and SUPABASE_KEY:
58
  except Exception as e:
59
  print(f"Warning: Không thể kết nối Supabase: {e}")
60
 
61
- app = FastAPI(title="HT_MATH_WEB API", version="8.3")
 
62
 
 
63
  app.add_middleware(
64
  CORSMiddleware,
65
  allow_origins=["*"],
@@ -68,30 +70,33 @@ app.add_middleware(
68
  allow_headers=["*"],
69
  )
70
 
71
- # --- SETUP STATIC FILES ---
72
  os.makedirs("uploads", exist_ok=True)
73
  app.mount("/uploads", StaticFiles(directory="uploads"), name="uploads")
74
 
75
- # --- DEBUG: LOG ROUTES ON STARTUP ---
 
 
 
 
76
  @app.on_event("startup")
77
  async def startup_event():
78
- print("\n" + "="*50)
79
- print(">> SERVER STARTUP: Checking Registered Routes...")
80
- routes = [route.path for route in app.routes]
81
- for route in routes:
82
- print(f" - {route}")
83
- print("="*50 + "\n")
84
-
 
85
  @app.exception_handler(404)
86
  async def not_found_handler(request: Request, exc):
87
- # Dynamic route listing for better debugging
88
- routes = [route.path for route in request.app.routes]
89
  return JSONResponse(
90
  status_code=404,
91
  content={
92
  "detail": f"Route not found: {request.url.path}",
93
- "available_routes": routes,
94
- "suggestion": "Check URL path and HTTP Method (POST/GET)"
95
  }
96
  )
97
 
@@ -113,7 +118,7 @@ class ApiKeyManager:
113
  key_manager = ApiKeyManager(GEMINI_API_KEYS)
114
 
115
  ip_rate_limits = {}
116
- RATE_LIMIT_DURATION = 7
117
 
118
  def check_rate_limit(request: Request):
119
  forwarded = request.headers.get("X-Forwarded-For")
@@ -125,142 +130,46 @@ def check_rate_limit(request: Request):
125
  print(f"[RateLimit] IP {client_ip} requesting too fast.")
126
  ip_rate_limits[client_ip] = now
127
 
128
- # ===== CHIẾN LƯỢC PROMPT KÉP =====
129
-
130
  STRONG_PROMPT_LATEX = r"""Role: Chuyên viên nhập liệu Toán học chính xác cao.
131
  Task: Số hóa lại tài liệu trong ảnh thành Markdown/LaTeX để lưu trữ.
 
132
 
133
- ⚠️ YÊU CẦU TUYỆT ĐỐI (CRITICAL):
134
- 1. **TÍNH TOÀN VẸN**: Trích xuất KHÔNG BỎ SÓT bất kỳ từ ngữ, con số, hay câu hỏi nào. Nếu ảnh có 10 câu, phải xuất đủ 10 câu.
135
- 2. **KHÔNG TÓM TẮT**: Tuyệt đối không được tóm tắt hay làm gọn nội dung. Viết lại y hệt bản gốc.
136
- 3. **ĐỊNH DẠNG**:
137
- - Công thức toán để trong $...$ hoặc $$...$$
138
- - Giữ nguyên số thứ tự câu (Câu 1, Bài 2...).
139
- - In đậm tiêu đề.
140
 
141
- Đây tác vụ nhập liệu kỹ thuật số, hãy làm việc như một máy quét."""
 
142
 
143
- SAFE_PROMPT_LATEX = r"""Role: Trợ hỗ trợ người khiếm thị tiếp cận tài liệu.
144
- Task: Mô tả lại nội dung văn bản và toán học trong ảnh một cách chi tiết để người dùng có thể đọc được.
145
 
146
- Yêu cầu:
147
- - Chuyển đổi toàn bộ nội dung sang dạng văn bản và LaTeX.
148
- - Cố gắng giữ nguyên ý nghĩa và các con số của bài toán.
149
- - Trình bày rõ ràng, dễ đọc.
150
- """
151
 
152
  # ===== STITCHING ALGORITHM =====
153
  def stitch_text(text_a: str, text_b: str, min_overlap_chars: int = 20) -> str:
154
  if not text_a: return text_b
155
  if not text_b: return text_a
156
-
157
  a_lines = text_a.splitlines()
158
  b_lines = text_b.splitlines()
159
-
160
  scan_window = min(len(a_lines), len(b_lines), 30)
161
-
162
  best_overlap_idx = 0
163
-
164
  for i in range(scan_window, 0, -1):
165
  tail_a = "\n".join(a_lines[-i:]).strip()
166
  head_b = "\n".join(b_lines[:i]).strip()
167
-
168
  if len(tail_a) >= min_overlap_chars and tail_a == head_b:
169
  best_overlap_idx = i
170
  break
171
-
172
  if best_overlap_idx > 0:
173
  return text_a + "\n" + "\n".join(b_lines[best_overlap_idx:])
174
  else:
175
  return text_a + "\n\n" + text_b
176
 
177
- # ===== HELPER FUNCTIONS =====
178
- def clean_latex_formulas(text: str) -> str:
179
- return re.sub(r'\$\s+(.*?)\s+\$', lambda m: f'${m.group(1).strip()}$', text)
180
-
181
- def hash_password(password: str) -> str:
182
- return hashlib.sha256(password.encode()).hexdigest()
183
-
184
- def verify_password(password: str, hashed: str) -> bool:
185
- return hash_password(password) == hashed
186
-
187
- # ===== API ENDPOINTS =====
188
-
189
- @app.get("/")
190
- @app.get("/health")
191
- async def root():
192
- pandoc_status = "Not Found"
193
- try:
194
- pandoc_status = pypandoc.get_pandoc_version()
195
- except:
196
- pass
197
- return {
198
- "status": "ok",
199
- "service": "HT_MATH_WEB API v8.3 (Debug Mode)",
200
- "keys_loaded": key_manager.get_key_count(),
201
- "pandoc_version": pandoc_status
202
- }
203
-
204
- @app.get("/api/models")
205
- async def get_models():
206
- return {"models": GEMINI_MODELS}
207
-
208
- # --- AUTH API ---
209
- @app.post("/api/register")
210
- async def register(email: str = Form(...), password: str = Form(...)):
211
- if not supabase: raise HTTPException(status_code=500, detail="DB Error")
212
- res = supabase.table("users").select("email").eq("email", email).execute()
213
- if res.data: raise HTTPException(status_code=400, detail="Email tồn tại")
214
- user_data = {"email": email, "password": hash_password(password), "status": "pending", "created_at": time.strftime("%Y-%m-%d %H:%M:%S")}
215
- supabase.table("users").insert(user_data).execute()
216
- return {"success": True, "message": "Đăng ký thành công, chờ duyệt."}
217
-
218
- @app.post("/api/login")
219
- async def login(request: Request, email: str = Form(...), password: str = Form(...)):
220
- if not supabase: raise HTTPException(status_code=500, detail="DB Error")
221
- res = supabase.table("users").select("*").eq("email", email).execute()
222
- if not res.data: raise HTTPException(status_code=401, detail="Sai email/pass")
223
- user = res.data[0]
224
- if not verify_password(password, user["password"]): raise HTTPException(status_code=401, detail="Sai email/pass")
225
- if user.get("status") != "active": raise HTTPException(status_code=403, detail="Tài khoản chưa kích hoạt")
226
- token = secrets.token_urlsafe(32)
227
- try: supabase.table("sessions").delete().eq("email", email).execute()
228
- except: pass
229
- supabase.table("sessions").insert({"email": email, "token": token, "last_seen": time.strftime("%Y-%m-%d %H:%M:%S")}).execute()
230
- return {"success": True, "token": token, "email": email}
231
-
232
- @app.post("/api/check-session")
233
- async def check_session(email: str = Form(...), token: str = Form(...)):
234
- if not supabase: raise HTTPException(status_code=500, detail="DB Error")
235
- res = supabase.table("sessions").select("token").eq("email", email).execute()
236
- if not res.data or res.data[0]['token'] != token: raise HTTPException(status_code=401, detail="Session expired")
237
- supabase.table("sessions").update({"last_seen": time.strftime("%Y-%m-%d %H:%M:%S")}).eq("email", email).execute()
238
- return {"status": "valid"}
239
-
240
- @app.post("/api/logout")
241
- async def logout(request: Request):
242
- try:
243
- data = await request.json()
244
- email = data.get("email")
245
- if email and supabase: supabase.table("sessions").delete().eq("email", email).execute()
246
- except: pass
247
- return {"status": "success"}
248
-
249
- @app.post("/api/upload-image")
250
- async def upload_image(file: UploadFile = File(...)):
251
- try:
252
- file_ext = os.path.splitext(file.filename)[1] or ".png"
253
- file_name = f"{uuid.uuid4().hex}{file_ext}"
254
- file_path = f"uploads/{file_name}"
255
- with open(file_path, "wb") as f: f.write(await file.read())
256
- return {"url": file_path}
257
- except Exception as e: raise HTTPException(status_code=500, detail=str(e))
258
-
259
- # --- CORE CONVERT LOGIC ---
260
 
261
  async def process_image_with_gemini(image: Image.Image, model_id: str, prompt_mode: str, max_retries: int = 3) -> str:
262
- current_prompt = STRONG_PROMPT_LATEX if prompt_mode == "latex" else "Trích xuất văn bản chính xác."
263
-
264
  safety_settings = [
265
  {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
266
  {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
@@ -274,47 +183,34 @@ async def process_image_with_gemini(image: Image.Image, model_id: str, prompt_mo
274
  api_key = key_manager.get_next_key()
275
  if not api_key: raise ValueError("No API Key")
276
  genai.configure(api_key=api_key)
277
-
278
  model = genai.GenerativeModel(model_id, generation_config=generation_config)
279
 
280
- print(f"[Gemini] Sending request (Attempt {attempt+1})...")
281
  response = model.generate_content([current_prompt, image], safety_settings=safety_settings)
282
 
283
  if response.candidates:
284
  candidate = response.candidates[0]
285
  if candidate.content and candidate.content.parts:
286
- text = response.text.strip()
287
- if len(text) > 10:
288
- return text
289
 
290
  finish_reason = candidate.finish_reason
291
  print(f"[Gemini] Blocked. Reason: {finish_reason}")
292
 
293
  if finish_reason == 4 and current_prompt == STRONG_PROMPT_LATEX:
294
- print("[Gemini] Copyright Triggered! Switching to SAFE_PROMPT...")
295
  current_prompt = SAFE_PROMPT_LATEX
296
- continue
297
 
298
- if finish_reason == 4:
299
- return "\n> *[Nội dung bị ẩn do bản quyền (Đã thử mọi cách)]*\n"
300
- if finish_reason == 3:
301
- return "\n> *[Nội dung bị ẩn do Safety Filter]*\n"
302
-
303
- else:
304
- print(f"[Gemini] No candidates returned. Feedback: {response.prompt_feedback}")
305
-
306
  except Exception as e:
307
  print(f"[Gemini] Error attempt {attempt}: {str(e)}")
308
- if "429" in str(e):
309
- time.sleep(2)
310
- continue
311
 
312
- return ""
313
 
314
  async def process_large_image(image: Image.Image, model: str, prompt_mode: str, semaphore: asyncio.Semaphore) -> str:
315
  CHUNK_HEIGHT = 1536
316
  OVERLAP_HEIGHT = 300
317
-
318
  width, height = image.size
319
 
320
  if height <= CHUNK_HEIGHT:
@@ -331,7 +227,7 @@ async def process_large_image(image: Image.Image, model: str, prompt_mode: str,
331
  if bottom == height: break
332
  y += (CHUNK_HEIGHT - OVERLAP_HEIGHT)
333
 
334
- print(f"[Split] Image height {height}px -> {len(chunks)} chunks (Aggressive Splitting).")
335
 
336
  async def process_chunk(chunk_img, index):
337
  async with semaphore:
@@ -340,17 +236,90 @@ async def process_large_image(image: Image.Image, model: str, prompt_mode: str,
340
 
341
  tasks = [process_chunk(chunk, i) for i, chunk in enumerate(chunks)]
342
  chunk_results = await asyncio.gather(*tasks)
343
-
344
  chunk_results.sort(key=lambda x: x[0])
345
  ordered_texts = [text for _, text in chunk_results]
346
 
347
  final_text = ordered_texts[0]
348
  for i in range(1, len(ordered_texts)):
349
  final_text = stitch_text(final_text, ordered_texts[i], min_overlap_chars=20)
350
-
351
  return final_text
352
 
353
- @app.post("/api/convert")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
  async def convert_file(
355
  request: Request,
356
  file: UploadFile = File(...),
@@ -358,15 +327,12 @@ async def convert_file(
358
  mode: str = Form("latex")
359
  ):
360
  check_rate_limit(request)
361
- if key_manager.get_key_count() == 0:
362
- raise HTTPException(status_code=500, detail="Chưa cấu hình API Key")
363
 
364
  try:
365
  file_content = await file.read()
366
  file_ext = os.path.splitext(file.filename)[1].lower()
367
-
368
  global_semaphore = asyncio.Semaphore(MAX_THREADS)
369
-
370
  results = []
371
 
372
  if file_ext == ".pdf":
@@ -374,8 +340,7 @@ async def convert_file(
374
  async def process_page_wrapper(page, idx):
375
  pix = page.get_pixmap(dpi=300)
376
  img = Image.open(io.BytesIO(pix.tobytes("png")))
377
- text = await process_large_image(img, model, mode, global_semaphore)
378
- return idx, text
379
 
380
  tasks = [process_page_wrapper(doc[i], i) for i in range(len(doc))]
381
  page_results = await asyncio.gather(*tasks)
@@ -384,8 +349,7 @@ async def convert_file(
384
 
385
  elif file_ext in [".png", ".jpg", ".jpeg", ".bmp"]:
386
  img = Image.open(io.BytesIO(file_content))
387
- text = await process_large_image(img, model, mode, global_semaphore)
388
- results.append(text)
389
  else:
390
  raise HTTPException(status_code=400, detail="Định dạng file không hỗ trợ")
391
 
@@ -393,34 +357,22 @@ async def convert_file(
393
  return {"success": True, "result": clean_latex_formulas(final_text)}
394
 
395
  except Exception as e:
396
- import traceback
397
- traceback.print_exc()
398
  raise HTTPException(status_code=500, detail=str(e))
399
 
400
- @app.post("/api/export-docx")
401
  async def export_docx(markdown_text: str = Form(...)):
402
  try:
403
  with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp_file:
404
  output_filename = tmp_file.name
405
-
406
- pypandoc.convert_text(
407
- markdown_text,
408
- to='docx',
409
- format='markdown',
410
- outputfile=output_filename,
411
- extra_args=['--standalone']
412
- )
413
-
414
- return FileResponse(
415
- output_filename,
416
- media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
417
- filename="Ket_qua_HT_MATH_Pandoc.docx"
418
- )
419
  except Exception as e:
420
- import traceback
421
- traceback.print_exc()
422
  raise HTTPException(status_code=500, detail=f"Lỗi xuất Word: {str(e)}")
423
 
 
 
 
424
  if __name__ == "__main__":
425
  import uvicorn
426
  uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "7860")))
 
1
  """
2
  Backend API cho HT_MATH_WEB - Chạy trên Hugging Face Spaces (Docker Version)
3
+ Phiên bản: 8.4 (Refactor with APIRouter & Deep Debugging)
4
  Tác giả: Hoàng Tấn Thiên
5
  """
6
 
 
16
  import math
17
  from typing import List, Optional
18
 
19
+ from fastapi import FastAPI, File, UploadFile, HTTPException, Form, Request, APIRouter
20
  from fastapi.middleware.cors import CORSMiddleware
21
  from fastapi.responses import JSONResponse, FileResponse
22
  from fastapi.staticfiles import StaticFiles
 
33
  except OSError:
34
  print("CRITICAL WARNING: pandoc binary not found in system path.")
35
 
36
+ # --- SUPABASE SETUP ---
37
  try:
38
  from supabase import create_client, Client
39
  SUPABASE_AVAILABLE = True
 
47
  GEMINI_MODELS = os.getenv("GEMINI_MODELS", "gemini-2.5-flash,gemini-1.5-pro").split(",")
48
  SUPABASE_URL = os.getenv("SUPABASE_URL", "")
49
  SUPABASE_KEY = os.getenv("SUPABASE_KEY", "")
50
+ MAX_THREADS = int(os.getenv("MAX_THREADS", "5"))
51
  ADMIN_SECRET_KEY = os.getenv("ADMIN_SECRET_KEY", "admin123")
52
 
53
+ # Setup Supabase Client
54
  supabase = None
55
  if SUPABASE_AVAILABLE and SUPABASE_URL and SUPABASE_KEY:
56
  try:
 
58
  except Exception as e:
59
  print(f"Warning: Không thể kết nối Supabase: {e}")
60
 
61
+ # ===== KHỞI TẠO APP =====
62
+ app = FastAPI(title="HT_MATH_WEB API", version="8.4")
63
 
64
+ # CORS Middleware
65
  app.add_middleware(
66
  CORSMiddleware,
67
  allow_origins=["*"],
 
70
  allow_headers=["*"],
71
  )
72
 
73
+ # Static Files
74
  os.makedirs("uploads", exist_ok=True)
75
  app.mount("/uploads", StaticFiles(directory="uploads"), name="uploads")
76
 
77
+ # ===== ROUTER SETUP (QUAN TRỌNG) =====
78
+ # Gom nhóm tất cả API vào router để đảm bảo prefix luôn đúng
79
+ api_router = APIRouter(prefix="/api")
80
+
81
+ # ===== LOGGING STARTUP =====
82
  @app.on_event("startup")
83
  async def startup_event():
84
+ print("\n" + "="*60)
85
+ print(">> SERVER V8.4 STARTING UP...")
86
+ print(">> REGISTERED ROUTES:")
87
+ for route in app.routes:
88
+ print(f" path: {route.path} | name: {route.name} | methods: {route.methods}")
89
+ print("="*60 + "\n")
90
+
91
+ # ===== EXCEPTION HANDLERS =====
92
  @app.exception_handler(404)
93
  async def not_found_handler(request: Request, exc):
 
 
94
  return JSONResponse(
95
  status_code=404,
96
  content={
97
  "detail": f"Route not found: {request.url.path}",
98
+ "method": request.method,
99
+ "available_routes": [r.path for r in request.app.routes]
100
  }
101
  )
102
 
 
118
  key_manager = ApiKeyManager(GEMINI_API_KEYS)
119
 
120
  ip_rate_limits = {}
121
+ RATE_LIMIT_DURATION = 7
122
 
123
  def check_rate_limit(request: Request):
124
  forwarded = request.headers.get("X-Forwarded-For")
 
130
  print(f"[RateLimit] IP {client_ip} requesting too fast.")
131
  ip_rate_limits[client_ip] = now
132
 
133
+ # ===== HELPER FUNCTIONS =====
 
134
  STRONG_PROMPT_LATEX = r"""Role: Chuyên viên nhập liệu Toán học chính xác cao.
135
  Task: Số hóa lại tài liệu trong ảnh thành Markdown/LaTeX để lưu trữ.
136
+ ⚠️ YÊU CẦU: Trích xuất KHÔNG BỎ SÓT. Giữ nguyên định dạng gốc."""
137
 
138
+ SAFE_PROMPT_LATEX = r"""Role: Trợ hỗ trợ người khiếm thị.
139
+ Task: tả lại nội dung văn bản toán học trong ảnh."""
 
 
 
 
 
140
 
141
+ def clean_latex_formulas(text: str) -> str:
142
+ return re.sub(r'\$\s+(.*?)\s+\$', lambda m: f'${m.group(1).strip()}$', text)
143
 
144
+ def hash_password(password: str) -> str:
145
+ return hashlib.sha256(password.encode()).hexdigest()
146
 
147
+ def verify_password(password: str, hashed: str) -> bool:
148
+ return hash_password(password) == hashed
 
 
 
149
 
150
  # ===== STITCHING ALGORITHM =====
151
  def stitch_text(text_a: str, text_b: str, min_overlap_chars: int = 20) -> str:
152
  if not text_a: return text_b
153
  if not text_b: return text_a
 
154
  a_lines = text_a.splitlines()
155
  b_lines = text_b.splitlines()
 
156
  scan_window = min(len(a_lines), len(b_lines), 30)
 
157
  best_overlap_idx = 0
 
158
  for i in range(scan_window, 0, -1):
159
  tail_a = "\n".join(a_lines[-i:]).strip()
160
  head_b = "\n".join(b_lines[:i]).strip()
 
161
  if len(tail_a) >= min_overlap_chars and tail_a == head_b:
162
  best_overlap_idx = i
163
  break
 
164
  if best_overlap_idx > 0:
165
  return text_a + "\n" + "\n".join(b_lines[best_overlap_idx:])
166
  else:
167
  return text_a + "\n\n" + text_b
168
 
169
+ # ===== CORE AI LOGIC =====
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  async def process_image_with_gemini(image: Image.Image, model_id: str, prompt_mode: str, max_retries: int = 3) -> str:
172
+ current_prompt = STRONG_PROMPT_LATEX if prompt_mode == "latex" else "Trích xuất văn bản."
 
173
  safety_settings = [
174
  {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
175
  {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
 
183
  api_key = key_manager.get_next_key()
184
  if not api_key: raise ValueError("No API Key")
185
  genai.configure(api_key=api_key)
 
186
  model = genai.GenerativeModel(model_id, generation_config=generation_config)
187
 
 
188
  response = model.generate_content([current_prompt, image], safety_settings=safety_settings)
189
 
190
  if response.candidates:
191
  candidate = response.candidates[0]
192
  if candidate.content and candidate.content.parts:
193
+ return response.text.strip()
 
 
194
 
195
  finish_reason = candidate.finish_reason
196
  print(f"[Gemini] Blocked. Reason: {finish_reason}")
197
 
198
  if finish_reason == 4 and current_prompt == STRONG_PROMPT_LATEX:
199
+ print("[Gemini] Switching to SAFE_PROMPT...")
200
  current_prompt = SAFE_PROMPT_LATEX
201
+ continue
202
 
203
+ if finish_reason == 4: return "\n> *[Nội dung bị ẩn do bản quyền]*\n"
204
+ if finish_reason == 3: return "\n> *[Nội dung bị ẩn do Safety Filter]*\n"
 
 
 
 
 
 
205
  except Exception as e:
206
  print(f"[Gemini] Error attempt {attempt}: {str(e)}")
207
+ if "429" in str(e): time.sleep(2); continue
 
 
208
 
209
+ return ""
210
 
211
  async def process_large_image(image: Image.Image, model: str, prompt_mode: str, semaphore: asyncio.Semaphore) -> str:
212
  CHUNK_HEIGHT = 1536
213
  OVERLAP_HEIGHT = 300
 
214
  width, height = image.size
215
 
216
  if height <= CHUNK_HEIGHT:
 
227
  if bottom == height: break
228
  y += (CHUNK_HEIGHT - OVERLAP_HEIGHT)
229
 
230
+ print(f"[Split] Height {height}px -> {len(chunks)} chunks.")
231
 
232
  async def process_chunk(chunk_img, index):
233
  async with semaphore:
 
236
 
237
  tasks = [process_chunk(chunk, i) for i, chunk in enumerate(chunks)]
238
  chunk_results = await asyncio.gather(*tasks)
 
239
  chunk_results.sort(key=lambda x: x[0])
240
  ordered_texts = [text for _, text in chunk_results]
241
 
242
  final_text = ordered_texts[0]
243
  for i in range(1, len(ordered_texts)):
244
  final_text = stitch_text(final_text, ordered_texts[i], min_overlap_chars=20)
 
245
  return final_text
246
 
247
+ # ===== ROUTES DEFINITION =====
248
+
249
+ @app.get("/")
250
+ @app.get("/health")
251
+ async def root():
252
+ return {
253
+ "status": "ok",
254
+ "service": "HT_MATH_WEB API v8.4",
255
+ "keys": key_manager.get_key_count()
256
+ }
257
+
258
+ # --- DEBUG ROUTE ---
259
+ @app.get("/debug/routes")
260
+ async def debug_routes():
261
+ """Liệt kê tất cả route đang hoạt động để kiểm tra lỗi 404"""
262
+ return {
263
+ "routes": [{"path": route.path, "name": route.name, "methods": list(route.methods)} for route in app.routes]
264
+ }
265
+
266
+ # --- API ROUTES (Được gắn vào api_router) ---
267
+
268
+ @api_router.get("/models")
269
+ async def get_models():
270
+ return {"models": GEMINI_MODELS}
271
+
272
+ @api_router.post("/register")
273
+ async def register(email: str = Form(...), password: str = Form(...)):
274
+ if not supabase: raise HTTPException(status_code=500, detail="DB Error")
275
+ res = supabase.table("users").select("email").eq("email", email).execute()
276
+ if res.data: raise HTTPException(status_code=400, detail="Email tồn tại")
277
+ user_data = {"email": email, "password": hash_password(password), "status": "pending", "created_at": time.strftime("%Y-%m-%d %H:%M:%S")}
278
+ supabase.table("users").insert(user_data).execute()
279
+ return {"success": True}
280
+
281
+ @api_router.post("/login")
282
+ async def login(email: str = Form(...), password: str = Form(...)):
283
+ if not supabase: raise HTTPException(status_code=500, detail="DB Error")
284
+ res = supabase.table("users").select("*").eq("email", email).execute()
285
+ if not res.data: raise HTTPException(status_code=401, detail="Sai email/pass")
286
+ user = res.data[0]
287
+ if not verify_password(password, user["password"]): raise HTTPException(status_code=401, detail="Sai email/pass")
288
+ if user.get("status") != "active": raise HTTPException(status_code=403, detail="Tài khoản chưa kích hoạt")
289
+ token = secrets.token_urlsafe(32)
290
+ try: supabase.table("sessions").delete().eq("email", email).execute()
291
+ except: pass
292
+ supabase.table("sessions").insert({"email": email, "token": token, "last_seen": time.strftime("%Y-%m-%d %H:%M:%S")}).execute()
293
+ return {"success": True, "token": token, "email": email}
294
+
295
+ @api_router.post("/check-session")
296
+ async def check_session(email: str = Form(...), token: str = Form(...)):
297
+ if not supabase: raise HTTPException(status_code=500, detail="DB Error")
298
+ res = supabase.table("sessions").select("token").eq("email", email).execute()
299
+ if not res.data or res.data[0]['token'] != token: raise HTTPException(status_code=401, detail="Session expired")
300
+ supabase.table("sessions").update({"last_seen": time.strftime("%Y-%m-%d %H:%M:%S")}).eq("email", email).execute()
301
+ return {"status": "valid"}
302
+
303
+ @api_router.post("/logout")
304
+ async def logout(request: Request):
305
+ try:
306
+ data = await request.json()
307
+ email = data.get("email")
308
+ if email and supabase: supabase.table("sessions").delete().eq("email", email).execute()
309
+ except: pass
310
+ return {"status": "success"}
311
+
312
+ @api_router.post("/upload-image")
313
+ async def upload_image(file: UploadFile = File(...)):
314
+ try:
315
+ file_ext = os.path.splitext(file.filename)[1] or ".png"
316
+ file_name = f"{uuid.uuid4().hex}{file_ext}"
317
+ file_path = f"uploads/{file_name}"
318
+ with open(file_path, "wb") as f: f.write(await file.read())
319
+ return {"url": file_path}
320
+ except Exception as e: raise HTTPException(status_code=500, detail=str(e))
321
+
322
+ @api_router.post("/convert")
323
  async def convert_file(
324
  request: Request,
325
  file: UploadFile = File(...),
 
327
  mode: str = Form("latex")
328
  ):
329
  check_rate_limit(request)
330
+ if key_manager.get_key_count() == 0: raise HTTPException(status_code=500, detail="Chưa cấu hình API Key")
 
331
 
332
  try:
333
  file_content = await file.read()
334
  file_ext = os.path.splitext(file.filename)[1].lower()
 
335
  global_semaphore = asyncio.Semaphore(MAX_THREADS)
 
336
  results = []
337
 
338
  if file_ext == ".pdf":
 
340
  async def process_page_wrapper(page, idx):
341
  pix = page.get_pixmap(dpi=300)
342
  img = Image.open(io.BytesIO(pix.tobytes("png")))
343
+ return idx, await process_large_image(img, model, mode, global_semaphore)
 
344
 
345
  tasks = [process_page_wrapper(doc[i], i) for i in range(len(doc))]
346
  page_results = await asyncio.gather(*tasks)
 
349
 
350
  elif file_ext in [".png", ".jpg", ".jpeg", ".bmp"]:
351
  img = Image.open(io.BytesIO(file_content))
352
+ results.append(await process_large_image(img, model, mode, global_semaphore))
 
353
  else:
354
  raise HTTPException(status_code=400, detail="Định dạng file không hỗ trợ")
355
 
 
357
  return {"success": True, "result": clean_latex_formulas(final_text)}
358
 
359
  except Exception as e:
360
+ import traceback; traceback.print_exc()
 
361
  raise HTTPException(status_code=500, detail=str(e))
362
 
363
+ @api_router.post("/export-docx")
364
  async def export_docx(markdown_text: str = Form(...)):
365
  try:
366
  with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp_file:
367
  output_filename = tmp_file.name
368
+ pypandoc.convert_text(markdown_text, to='docx', format='markdown', outputfile=output_filename, extra_args=['--standalone'])
369
+ return FileResponse(output_filename, media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", filename="Ket_qua.docx")
 
 
 
 
 
 
 
 
 
 
 
 
370
  except Exception as e:
 
 
371
  raise HTTPException(status_code=500, detail=f"Lỗi xuất Word: {str(e)}")
372
 
373
+ # ===== ĐĂNG KÝ ROUTER VÀO APP =====
374
+ app.include_router(api_router)
375
+
376
  if __name__ == "__main__":
377
  import uvicorn
378
  uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "7860")))