Spaces:
Running
Running
Nguyễn Quốc Vỹ commited on
Commit ·
98f87e1
1
Parent(s): 4b842c3
Chuyển từ dữ liệu tạm sang runtime để dữ liệu không bị mất
Browse files- backend/auth.py +3 -0
- backend/db.py +13 -4
- backend/db_sync.py +113 -0
- scripts/bootstrap_space_data.py +9 -1
backend/auth.py
CHANGED
|
@@ -11,6 +11,7 @@ import sqlite3
|
|
| 11 |
from datetime import datetime, timedelta
|
| 12 |
from backend.admin_config import is_bootstrap_admin_email
|
| 13 |
from backend.runtime_paths import DB_PATH
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
def _get_connection():
|
|
@@ -80,6 +81,7 @@ def register_user(email: str, password: str, display_name: str = None) -> dict:
|
|
| 80 |
"role": role,
|
| 81 |
}
|
| 82 |
print(f"[AUTH] ✅ Đăng ký thành công: {email}")
|
|
|
|
| 83 |
return {"success": True, "message": "Đăng ký thành công!", "user": user}
|
| 84 |
|
| 85 |
except Exception as e:
|
|
@@ -209,6 +211,7 @@ def reset_password(email: str, token: str, new_password: str) -> dict:
|
|
| 209 |
conn.commit()
|
| 210 |
|
| 211 |
print(f"[AUTH] ✅ Password reset for: {email}")
|
|
|
|
| 212 |
return {"success": True, "message": "Đặt lại mật khẩu thành công!"}
|
| 213 |
|
| 214 |
finally:
|
|
|
|
| 11 |
from datetime import datetime, timedelta
|
| 12 |
from backend.admin_config import is_bootstrap_admin_email
|
| 13 |
from backend.runtime_paths import DB_PATH
|
| 14 |
+
from backend.db_sync import schedule_sync as _schedule_sync
|
| 15 |
|
| 16 |
|
| 17 |
def _get_connection():
|
|
|
|
| 81 |
"role": role,
|
| 82 |
}
|
| 83 |
print(f"[AUTH] ✅ Đăng ký thành công: {email}")
|
| 84 |
+
_schedule_sync()
|
| 85 |
return {"success": True, "message": "Đăng ký thành công!", "user": user}
|
| 86 |
|
| 87 |
except Exception as e:
|
|
|
|
| 211 |
conn.commit()
|
| 212 |
|
| 213 |
print(f"[AUTH] ✅ Password reset for: {email}")
|
| 214 |
+
_schedule_sync()
|
| 215 |
return {"success": True, "message": "Đặt lại mật khẩu thành công!"}
|
| 216 |
|
| 217 |
finally:
|
backend/db.py
CHANGED
|
@@ -9,6 +9,7 @@ import time
|
|
| 9 |
from datetime import datetime
|
| 10 |
from backend.admin_config import get_admin_emails
|
| 11 |
from backend.runtime_paths import DB_PATH
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
def _get_connection():
|
|
@@ -249,6 +250,7 @@ def save_conversation(ma_cuoc_tro_chuyen: str, tieu_de: str = "Cuộc trò chuy
|
|
| 249 |
conn.commit()
|
| 250 |
finally:
|
| 251 |
conn.close()
|
|
|
|
| 252 |
|
| 253 |
|
| 254 |
def save_message(ma_cuoc_tro_chuyen: str, vai_tro: str, noi_dung: str,
|
|
@@ -268,9 +270,11 @@ def save_message(ma_cuoc_tro_chuyen: str, vai_tro: str, noi_dung: str,
|
|
| 268 |
),
|
| 269 |
)
|
| 270 |
conn.commit()
|
| 271 |
-
|
| 272 |
finally:
|
| 273 |
conn.close()
|
|
|
|
|
|
|
| 274 |
|
| 275 |
|
| 276 |
def save_message_sources(ma_tin_nhan: int, sources: list):
|
|
@@ -457,6 +461,7 @@ def delete_conversation(ma_cuoc_tro_chuyen: str):
|
|
| 457 |
print(f"[DB] 🗑️ Deleted conversation: {ma_cuoc_tro_chuyen}")
|
| 458 |
finally:
|
| 459 |
conn.close()
|
|
|
|
| 460 |
|
| 461 |
|
| 462 |
# ======================== TÀI LIỆU (GỘP user + admin) ========================
|
|
@@ -532,6 +537,7 @@ def save_phan_hoi(ma_tin_nhan: int, loai: str, ma_nguoi_dung: str = None,
|
|
| 532 |
conn.commit()
|
| 533 |
finally:
|
| 534 |
conn.close()
|
|
|
|
| 535 |
|
| 536 |
|
| 537 |
def load_phan_hoi_by_tin_nhan(ma_tin_nhan: int) -> list:
|
|
@@ -601,9 +607,10 @@ def update_user_role(ma_nguoi_dung: str, vai_tro: str) -> bool:
|
|
| 601 |
(vai_tro, ma_nguoi_dung),
|
| 602 |
)
|
| 603 |
conn.commit()
|
| 604 |
-
return True
|
| 605 |
finally:
|
| 606 |
conn.close()
|
|
|
|
|
|
|
| 607 |
|
| 608 |
|
| 609 |
def lock_user(ma_nguoi_dung: str) -> bool:
|
|
@@ -615,9 +622,10 @@ def lock_user(ma_nguoi_dung: str) -> bool:
|
|
| 615 |
(ma_nguoi_dung,),
|
| 616 |
)
|
| 617 |
conn.commit()
|
| 618 |
-
return True
|
| 619 |
finally:
|
| 620 |
conn.close()
|
|
|
|
|
|
|
| 621 |
|
| 622 |
|
| 623 |
def unlock_user(ma_nguoi_dung: str) -> bool:
|
|
@@ -629,9 +637,10 @@ def unlock_user(ma_nguoi_dung: str) -> bool:
|
|
| 629 |
(ma_nguoi_dung,),
|
| 630 |
)
|
| 631 |
conn.commit()
|
| 632 |
-
return True
|
| 633 |
finally:
|
| 634 |
conn.close()
|
|
|
|
|
|
|
| 635 |
|
| 636 |
|
| 637 |
# ======================== ADMIN: LỊCH SỬ HỎI ĐÁP TOÀN HỆ THỐNG ========================
|
|
|
|
| 9 |
from datetime import datetime
|
| 10 |
from backend.admin_config import get_admin_emails
|
| 11 |
from backend.runtime_paths import DB_PATH
|
| 12 |
+
from backend.db_sync import schedule_sync as _schedule_sync
|
| 13 |
|
| 14 |
|
| 15 |
def _get_connection():
|
|
|
|
| 250 |
conn.commit()
|
| 251 |
finally:
|
| 252 |
conn.close()
|
| 253 |
+
_schedule_sync()
|
| 254 |
|
| 255 |
|
| 256 |
def save_message(ma_cuoc_tro_chuyen: str, vai_tro: str, noi_dung: str,
|
|
|
|
| 270 |
),
|
| 271 |
)
|
| 272 |
conn.commit()
|
| 273 |
+
lastrowid = cursor.lastrowid
|
| 274 |
finally:
|
| 275 |
conn.close()
|
| 276 |
+
_schedule_sync()
|
| 277 |
+
return lastrowid
|
| 278 |
|
| 279 |
|
| 280 |
def save_message_sources(ma_tin_nhan: int, sources: list):
|
|
|
|
| 461 |
print(f"[DB] 🗑️ Deleted conversation: {ma_cuoc_tro_chuyen}")
|
| 462 |
finally:
|
| 463 |
conn.close()
|
| 464 |
+
_schedule_sync()
|
| 465 |
|
| 466 |
|
| 467 |
# ======================== TÀI LIỆU (GỘP user + admin) ========================
|
|
|
|
| 537 |
conn.commit()
|
| 538 |
finally:
|
| 539 |
conn.close()
|
| 540 |
+
_schedule_sync()
|
| 541 |
|
| 542 |
|
| 543 |
def load_phan_hoi_by_tin_nhan(ma_tin_nhan: int) -> list:
|
|
|
|
| 607 |
(vai_tro, ma_nguoi_dung),
|
| 608 |
)
|
| 609 |
conn.commit()
|
|
|
|
| 610 |
finally:
|
| 611 |
conn.close()
|
| 612 |
+
_schedule_sync()
|
| 613 |
+
return True
|
| 614 |
|
| 615 |
|
| 616 |
def lock_user(ma_nguoi_dung: str) -> bool:
|
|
|
|
| 622 |
(ma_nguoi_dung,),
|
| 623 |
)
|
| 624 |
conn.commit()
|
|
|
|
| 625 |
finally:
|
| 626 |
conn.close()
|
| 627 |
+
_schedule_sync()
|
| 628 |
+
return True
|
| 629 |
|
| 630 |
|
| 631 |
def unlock_user(ma_nguoi_dung: str) -> bool:
|
|
|
|
| 637 |
(ma_nguoi_dung,),
|
| 638 |
)
|
| 639 |
conn.commit()
|
|
|
|
| 640 |
finally:
|
| 641 |
conn.close()
|
| 642 |
+
_schedule_sync()
|
| 643 |
+
return True
|
| 644 |
|
| 645 |
|
| 646 |
# ======================== ADMIN: LỊCH SỬ HỎI ĐÁP TOÀN HỆ THỐNG ========================
|
backend/db_sync.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Đồng bộ chatbot.db lên Hugging Face Dataset repo để dữ liệu bền vững.
|
| 3 |
+
|
| 4 |
+
Chỉ hoạt động khi chạy trên HF Space và có cấu hình:
|
| 5 |
+
- HF_DATASET_REPO
|
| 6 |
+
- HF_TOKEN
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import os
|
| 12 |
+
import shutil
|
| 13 |
+
import sqlite3
|
| 14 |
+
import threading
|
| 15 |
+
import time
|
| 16 |
+
|
| 17 |
+
_lock = threading.Lock()
|
| 18 |
+
_last_sync: float = 0
|
| 19 |
+
_MIN_INTERVAL = 30 # tối thiểu 30 giây giữa 2 lần sync
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _get_config() -> tuple[str, str, str] | None:
|
| 23 |
+
repo = os.getenv("HF_DATASET_REPO", "").strip()
|
| 24 |
+
token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
|
| 25 |
+
if not repo or not token:
|
| 26 |
+
return None
|
| 27 |
+
revision = os.getenv("HF_DATASET_REVISION", "main").strip() or "main"
|
| 28 |
+
return repo, token, revision
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _safe_copy_db(db_path: str, dest: str) -> bool:
|
| 32 |
+
"""Tạo bản sao sạch của SQLite (checkpoint WAL trước khi copy)."""
|
| 33 |
+
try:
|
| 34 |
+
conn = sqlite3.connect(db_path)
|
| 35 |
+
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
|
| 36 |
+
conn.close()
|
| 37 |
+
except Exception:
|
| 38 |
+
pass
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
shutil.copy2(db_path, dest)
|
| 42 |
+
return True
|
| 43 |
+
except Exception as e:
|
| 44 |
+
print(f"[DB_SYNC] Copy failed: {e}")
|
| 45 |
+
return False
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _do_upload(db_path: str, repo: str, token: str, revision: str) -> bool:
|
| 49 |
+
"""Upload chatbot.db lên dataset repo."""
|
| 50 |
+
try:
|
| 51 |
+
from huggingface_hub import HfApi
|
| 52 |
+
api = HfApi(token=token)
|
| 53 |
+
|
| 54 |
+
tmp_path = db_path + ".sync_copy"
|
| 55 |
+
if not _safe_copy_db(db_path, tmp_path):
|
| 56 |
+
return False
|
| 57 |
+
|
| 58 |
+
api.upload_file(
|
| 59 |
+
path_or_fileobj=tmp_path,
|
| 60 |
+
path_in_repo="chatbot.db",
|
| 61 |
+
repo_id=repo,
|
| 62 |
+
repo_type="dataset",
|
| 63 |
+
revision=revision,
|
| 64 |
+
commit_message="Auto-sync chatbot.db",
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
try:
|
| 68 |
+
os.remove(tmp_path)
|
| 69 |
+
except OSError:
|
| 70 |
+
pass
|
| 71 |
+
|
| 72 |
+
print(f"[DB_SYNC] Uploaded chatbot.db to {repo}")
|
| 73 |
+
return True
|
| 74 |
+
|
| 75 |
+
except Exception as e:
|
| 76 |
+
print(f"[DB_SYNC] Upload failed: {e}")
|
| 77 |
+
return False
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def schedule_sync(db_path: str | None = None) -> None:
|
| 81 |
+
"""
|
| 82 |
+
Lên lịch đồng bộ DB lên dataset repo (chạy background, có rate-limit).
|
| 83 |
+
Gọi hàm này sau mỗi lần ghi quan trọng vào DB.
|
| 84 |
+
"""
|
| 85 |
+
global _last_sync
|
| 86 |
+
|
| 87 |
+
config = _get_config()
|
| 88 |
+
if not config:
|
| 89 |
+
return
|
| 90 |
+
|
| 91 |
+
now = time.time()
|
| 92 |
+
if now - _last_sync < _MIN_INTERVAL:
|
| 93 |
+
return
|
| 94 |
+
|
| 95 |
+
if db_path is None:
|
| 96 |
+
from backend.runtime_paths import DB_PATH
|
| 97 |
+
db_path = DB_PATH
|
| 98 |
+
|
| 99 |
+
if not os.path.exists(db_path):
|
| 100 |
+
return
|
| 101 |
+
|
| 102 |
+
repo, token, revision = config
|
| 103 |
+
|
| 104 |
+
def _sync():
|
| 105 |
+
global _last_sync
|
| 106 |
+
with _lock:
|
| 107 |
+
if time.time() - _last_sync < _MIN_INTERVAL:
|
| 108 |
+
return
|
| 109 |
+
_last_sync = time.time()
|
| 110 |
+
_do_upload(db_path, repo, token, revision)
|
| 111 |
+
|
| 112 |
+
thread = threading.Thread(target=_sync, daemon=True)
|
| 113 |
+
thread.start()
|
scripts/bootstrap_space_data.py
CHANGED
|
@@ -59,6 +59,13 @@ def _copy_file_if_missing(src: str, dst: str) -> None:
|
|
| 59 |
shutil.copy2(src, dst)
|
| 60 |
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
def _vector_dir_ready() -> bool:
|
| 63 |
return os.path.isdir(VECTOR_DIR) and any(Path(VECTOR_DIR).iterdir())
|
| 64 |
|
|
@@ -131,7 +138,8 @@ def bootstrap_space_data(force: bool = False) -> bool:
|
|
| 131 |
_copy_tree(pdf_src, PDF_DIR)
|
| 132 |
|
| 133 |
db_src = os.path.join(snapshot_dir, "chatbot.db")
|
| 134 |
-
|
|
|
|
| 135 |
|
| 136 |
meta = {
|
| 137 |
"repo_id": repo_id,
|
|
|
|
| 59 |
shutil.copy2(src, dst)
|
| 60 |
|
| 61 |
|
| 62 |
+
def _copy_file_always(src: str, dst: str) -> None:
|
| 63 |
+
"""Luôn copy file từ src sang dst (ghi đè nếu đã tồn tại)."""
|
| 64 |
+
if os.path.exists(src):
|
| 65 |
+
os.makedirs(os.path.dirname(dst), exist_ok=True)
|
| 66 |
+
shutil.copy2(src, dst)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
def _vector_dir_ready() -> bool:
|
| 70 |
return os.path.isdir(VECTOR_DIR) and any(Path(VECTOR_DIR).iterdir())
|
| 71 |
|
|
|
|
| 138 |
_copy_tree(pdf_src, PDF_DIR)
|
| 139 |
|
| 140 |
db_src = os.path.join(snapshot_dir, "chatbot.db")
|
| 141 |
+
_copy_file_always(db_src, DB_PATH)
|
| 142 |
+
print(f"[BOOTSTRAP] Đã tải chatbot.db mới nhất từ dataset repo")
|
| 143 |
|
| 144 |
meta = {
|
| 145 |
"repo_id": repo_id,
|