Nguyễn Quốc Vỹ commited on
Commit
98f87e1
·
1 Parent(s): 4b842c3

Chuyển từ dữ liệu tạm sang runtime để dữ liệu không bị mất

Browse files
backend/auth.py CHANGED
@@ -11,6 +11,7 @@ import sqlite3
11
  from datetime import datetime, timedelta
12
  from backend.admin_config import is_bootstrap_admin_email
13
  from backend.runtime_paths import DB_PATH
 
14
 
15
 
16
  def _get_connection():
@@ -80,6 +81,7 @@ def register_user(email: str, password: str, display_name: str = None) -> dict:
80
  "role": role,
81
  }
82
  print(f"[AUTH] ✅ Đăng ký thành công: {email}")
 
83
  return {"success": True, "message": "Đăng ký thành công!", "user": user}
84
 
85
  except Exception as e:
@@ -209,6 +211,7 @@ def reset_password(email: str, token: str, new_password: str) -> dict:
209
  conn.commit()
210
 
211
  print(f"[AUTH] ✅ Password reset for: {email}")
 
212
  return {"success": True, "message": "Đặt lại mật khẩu thành công!"}
213
 
214
  finally:
 
11
  from datetime import datetime, timedelta
12
  from backend.admin_config import is_bootstrap_admin_email
13
  from backend.runtime_paths import DB_PATH
14
+ from backend.db_sync import schedule_sync as _schedule_sync
15
 
16
 
17
  def _get_connection():
 
81
  "role": role,
82
  }
83
  print(f"[AUTH] ✅ Đăng ký thành công: {email}")
84
+ _schedule_sync()
85
  return {"success": True, "message": "Đăng ký thành công!", "user": user}
86
 
87
  except Exception as e:
 
211
  conn.commit()
212
 
213
  print(f"[AUTH] ✅ Password reset for: {email}")
214
+ _schedule_sync()
215
  return {"success": True, "message": "Đặt lại mật khẩu thành công!"}
216
 
217
  finally:
backend/db.py CHANGED
@@ -9,6 +9,7 @@ import time
9
  from datetime import datetime
10
  from backend.admin_config import get_admin_emails
11
  from backend.runtime_paths import DB_PATH
 
12
 
13
 
14
  def _get_connection():
@@ -249,6 +250,7 @@ def save_conversation(ma_cuoc_tro_chuyen: str, tieu_de: str = "Cuộc trò chuy
249
  conn.commit()
250
  finally:
251
  conn.close()
 
252
 
253
 
254
  def save_message(ma_cuoc_tro_chuyen: str, vai_tro: str, noi_dung: str,
@@ -268,9 +270,11 @@ def save_message(ma_cuoc_tro_chuyen: str, vai_tro: str, noi_dung: str,
268
  ),
269
  )
270
  conn.commit()
271
- return cursor.lastrowid
272
  finally:
273
  conn.close()
 
 
274
 
275
 
276
  def save_message_sources(ma_tin_nhan: int, sources: list):
@@ -457,6 +461,7 @@ def delete_conversation(ma_cuoc_tro_chuyen: str):
457
  print(f"[DB] 🗑️ Deleted conversation: {ma_cuoc_tro_chuyen}")
458
  finally:
459
  conn.close()
 
460
 
461
 
462
  # ======================== TÀI LIỆU (GỘP user + admin) ========================
@@ -532,6 +537,7 @@ def save_phan_hoi(ma_tin_nhan: int, loai: str, ma_nguoi_dung: str = None,
532
  conn.commit()
533
  finally:
534
  conn.close()
 
535
 
536
 
537
  def load_phan_hoi_by_tin_nhan(ma_tin_nhan: int) -> list:
@@ -601,9 +607,10 @@ def update_user_role(ma_nguoi_dung: str, vai_tro: str) -> bool:
601
  (vai_tro, ma_nguoi_dung),
602
  )
603
  conn.commit()
604
- return True
605
  finally:
606
  conn.close()
 
 
607
 
608
 
609
  def lock_user(ma_nguoi_dung: str) -> bool:
@@ -615,9 +622,10 @@ def lock_user(ma_nguoi_dung: str) -> bool:
615
  (ma_nguoi_dung,),
616
  )
617
  conn.commit()
618
- return True
619
  finally:
620
  conn.close()
 
 
621
 
622
 
623
  def unlock_user(ma_nguoi_dung: str) -> bool:
@@ -629,9 +637,10 @@ def unlock_user(ma_nguoi_dung: str) -> bool:
629
  (ma_nguoi_dung,),
630
  )
631
  conn.commit()
632
- return True
633
  finally:
634
  conn.close()
 
 
635
 
636
 
637
  # ======================== ADMIN: LỊCH SỬ HỎI ĐÁP TOÀN HỆ THỐNG ========================
 
9
  from datetime import datetime
10
  from backend.admin_config import get_admin_emails
11
  from backend.runtime_paths import DB_PATH
12
+ from backend.db_sync import schedule_sync as _schedule_sync
13
 
14
 
15
  def _get_connection():
 
250
  conn.commit()
251
  finally:
252
  conn.close()
253
+ _schedule_sync()
254
 
255
 
256
  def save_message(ma_cuoc_tro_chuyen: str, vai_tro: str, noi_dung: str,
 
270
  ),
271
  )
272
  conn.commit()
273
+ lastrowid = cursor.lastrowid
274
  finally:
275
  conn.close()
276
+ _schedule_sync()
277
+ return lastrowid
278
 
279
 
280
  def save_message_sources(ma_tin_nhan: int, sources: list):
 
461
  print(f"[DB] 🗑️ Deleted conversation: {ma_cuoc_tro_chuyen}")
462
  finally:
463
  conn.close()
464
+ _schedule_sync()
465
 
466
 
467
  # ======================== TÀI LIỆU (GỘP user + admin) ========================
 
537
  conn.commit()
538
  finally:
539
  conn.close()
540
+ _schedule_sync()
541
 
542
 
543
  def load_phan_hoi_by_tin_nhan(ma_tin_nhan: int) -> list:
 
607
  (vai_tro, ma_nguoi_dung),
608
  )
609
  conn.commit()
 
610
  finally:
611
  conn.close()
612
+ _schedule_sync()
613
+ return True
614
 
615
 
616
  def lock_user(ma_nguoi_dung: str) -> bool:
 
622
  (ma_nguoi_dung,),
623
  )
624
  conn.commit()
 
625
  finally:
626
  conn.close()
627
+ _schedule_sync()
628
+ return True
629
 
630
 
631
  def unlock_user(ma_nguoi_dung: str) -> bool:
 
637
  (ma_nguoi_dung,),
638
  )
639
  conn.commit()
 
640
  finally:
641
  conn.close()
642
+ _schedule_sync()
643
+ return True
644
 
645
 
646
  # ======================== ADMIN: LỊCH SỬ HỎI ĐÁP TOÀN HỆ THỐNG ========================
backend/db_sync.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Đồng bộ chatbot.db lên Hugging Face Dataset repo để dữ liệu bền vững.
3
+
4
+ Chỉ hoạt động khi chạy trên HF Space và có cấu hình:
5
+ - HF_DATASET_REPO
6
+ - HF_TOKEN
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ import shutil
13
+ import sqlite3
14
+ import threading
15
+ import time
16
+
17
+ _lock = threading.Lock()
18
+ _last_sync: float = 0
19
+ _MIN_INTERVAL = 30 # tối thiểu 30 giây giữa 2 lần sync
20
+
21
+
22
+ def _get_config() -> tuple[str, str, str] | None:
23
+ repo = os.getenv("HF_DATASET_REPO", "").strip()
24
+ token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
25
+ if not repo or not token:
26
+ return None
27
+ revision = os.getenv("HF_DATASET_REVISION", "main").strip() or "main"
28
+ return repo, token, revision
29
+
30
+
31
+ def _safe_copy_db(db_path: str, dest: str) -> bool:
32
+ """Tạo bản sao sạch của SQLite (checkpoint WAL trước khi copy)."""
33
+ try:
34
+ conn = sqlite3.connect(db_path)
35
+ conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
36
+ conn.close()
37
+ except Exception:
38
+ pass
39
+
40
+ try:
41
+ shutil.copy2(db_path, dest)
42
+ return True
43
+ except Exception as e:
44
+ print(f"[DB_SYNC] Copy failed: {e}")
45
+ return False
46
+
47
+
48
+ def _do_upload(db_path: str, repo: str, token: str, revision: str) -> bool:
49
+ """Upload chatbot.db lên dataset repo."""
50
+ try:
51
+ from huggingface_hub import HfApi
52
+ api = HfApi(token=token)
53
+
54
+ tmp_path = db_path + ".sync_copy"
55
+ if not _safe_copy_db(db_path, tmp_path):
56
+ return False
57
+
58
+ api.upload_file(
59
+ path_or_fileobj=tmp_path,
60
+ path_in_repo="chatbot.db",
61
+ repo_id=repo,
62
+ repo_type="dataset",
63
+ revision=revision,
64
+ commit_message="Auto-sync chatbot.db",
65
+ )
66
+
67
+ try:
68
+ os.remove(tmp_path)
69
+ except OSError:
70
+ pass
71
+
72
+ print(f"[DB_SYNC] Uploaded chatbot.db to {repo}")
73
+ return True
74
+
75
+ except Exception as e:
76
+ print(f"[DB_SYNC] Upload failed: {e}")
77
+ return False
78
+
79
+
80
+ def schedule_sync(db_path: str | None = None) -> None:
81
+ """
82
+ Lên lịch đồng bộ DB lên dataset repo (chạy background, có rate-limit).
83
+ Gọi hàm này sau mỗi lần ghi quan trọng vào DB.
84
+ """
85
+ global _last_sync
86
+
87
+ config = _get_config()
88
+ if not config:
89
+ return
90
+
91
+ now = time.time()
92
+ if now - _last_sync < _MIN_INTERVAL:
93
+ return
94
+
95
+ if db_path is None:
96
+ from backend.runtime_paths import DB_PATH
97
+ db_path = DB_PATH
98
+
99
+ if not os.path.exists(db_path):
100
+ return
101
+
102
+ repo, token, revision = config
103
+
104
+ def _sync():
105
+ global _last_sync
106
+ with _lock:
107
+ if time.time() - _last_sync < _MIN_INTERVAL:
108
+ return
109
+ _last_sync = time.time()
110
+ _do_upload(db_path, repo, token, revision)
111
+
112
+ thread = threading.Thread(target=_sync, daemon=True)
113
+ thread.start()
scripts/bootstrap_space_data.py CHANGED
@@ -59,6 +59,13 @@ def _copy_file_if_missing(src: str, dst: str) -> None:
59
  shutil.copy2(src, dst)
60
 
61
 
 
 
 
 
 
 
 
62
  def _vector_dir_ready() -> bool:
63
  return os.path.isdir(VECTOR_DIR) and any(Path(VECTOR_DIR).iterdir())
64
 
@@ -131,7 +138,8 @@ def bootstrap_space_data(force: bool = False) -> bool:
131
  _copy_tree(pdf_src, PDF_DIR)
132
 
133
  db_src = os.path.join(snapshot_dir, "chatbot.db")
134
- _copy_file_if_missing(db_src, DB_PATH)
 
135
 
136
  meta = {
137
  "repo_id": repo_id,
 
59
  shutil.copy2(src, dst)
60
 
61
 
62
+ def _copy_file_always(src: str, dst: str) -> None:
63
+ """Luôn copy file từ src sang dst (ghi đè nếu đã tồn tại)."""
64
+ if os.path.exists(src):
65
+ os.makedirs(os.path.dirname(dst), exist_ok=True)
66
+ shutil.copy2(src, dst)
67
+
68
+
69
  def _vector_dir_ready() -> bool:
70
  return os.path.isdir(VECTOR_DIR) and any(Path(VECTOR_DIR).iterdir())
71
 
 
138
  _copy_tree(pdf_src, PDF_DIR)
139
 
140
  db_src = os.path.join(snapshot_dir, "chatbot.db")
141
+ _copy_file_always(db_src, DB_PATH)
142
+ print(f"[BOOTSTRAP] Đã tải chatbot.db mới nhất từ dataset repo")
143
 
144
  meta = {
145
  "repo_id": repo_id,