Dyraa18 commited on
Commit
4fd0dc6
·
verified ·
1 Parent(s): 187181f
Files changed (1) hide show
  1. app.py +171 -90
app.py CHANGED
@@ -1,10 +1,8 @@
1
- # app.py (HF Spaces CPU-Optimized)
2
- # RAG sekolah super hemat CPU:
3
- # - Default model: 3B instruct (GGUF) + ctx 1024
4
- # - Retrieval cepat: FAISS top-12 → pilih kalimat pakai lexical overlap (tanpa encode per-kalimat)
5
- # - Encoder dipakai HANYA untuk query & FAISS (1x per request)
6
- # - Jawaban final lewat <final>...</final>, stop di </final>, retry kalau kosong/ellipsis
7
- # - Admin + Auth Postgres tetap sama
8
 
9
  import os, json, re, time, logging
10
  from functools import lru_cache, wraps
@@ -29,8 +27,9 @@ load_dotenv()
29
  # ========= ENV & LOGGING =========
30
  os.environ.setdefault("KMP_DUPLICATE_LIB_OK", "TRUE")
31
  os.environ.setdefault("OMP_NUM_THREADS", "1")
 
32
  try:
33
- torch.set_num_threads(int(os.environ.get("NUM_THREADS", "3"))) # 3 thread cukup di CPU Spaces
34
  torch.set_num_interop_threads(1)
35
  except Exception:
36
  pass
@@ -38,24 +37,27 @@ except Exception:
38
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
39
  log = logging.getLogger("rag-app")
40
 
41
- # ========= IMPORT EKSTERNAL (wrapper & guardrail) =========
42
- from Guardrail import validate_input # -> bool
 
 
 
43
  from Model import load_model, generate # -> llama.cpp wrapper
44
 
45
- # ========= PATH ROOT =========
46
  BASE_DIR = Path(__file__).resolve().parent
47
 
48
- # ========= KONFIG MODEL & RAG (di-tune untuk CPU) =========
49
- GGUF_DEFAULT = "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf" # kecil & cepat; upload ke /models
50
- MODEL_PATH = str(BASE_DIR / "models" / os.getenv("GGUF_FILENAME", GGUF_DEFAULT))
51
- CTX_WINDOW = int(os.environ.get("CTX_WINDOW", 1024))
52
- N_GPU_LAYERS = int(os.environ.get("N_GPU_LAYERS", 0))
53
- N_THREADS = int(os.environ.get("NUM_THREADS", 3))
54
 
 
55
  ENCODER_NAME = os.environ.get("ENCODER_NAME", "intfloat/multilingual-e5-large")
56
  ENCODER_DEVICE = torch.device("cpu")
57
 
58
- # Dataset sudah ada di Space → path RELATIF (samakan dengan struktur kamu)
59
  SUBJECTS: Dict[str, Dict[str, str]] = {
60
  "ipas": {
61
  "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Ipas" / "IPA_index.index"),
@@ -80,11 +82,11 @@ SUBJECTS: Dict[str, Dict[str, str]] = {
80
  }
81
  }
82
 
83
- # Threshold & parameter cepat
84
- TOP_K_FAISS = int(os.environ.get("TOP_K_FAISS", 12))
85
- TOP_K_FINAL = int(os.environ.get("TOP_K_FINAL", 6))
86
- MIN_COSINE = float(os.environ.get("MIN_COSINE", 0.80)) # lebih longgar biar jarang fallback
87
- MIN_LEXICAL = float(os.environ.get("MIN_LEXICAL", 0.10))
88
  FALLBACK_TEXT = os.environ.get("FALLBACK_TEXT", "maap pengetahuan tidak ada dalam database")
89
  GUARDRAIL_BLOCK_TEXT = os.environ.get("GUARDRAIL_BLOCK_TEXT", "maap, pertanyaan ditolak oleh guardrail")
90
  ENABLE_PROFILING = os.environ.get("ENABLE_PROFILING", "false").lower() == "true"
@@ -95,6 +97,7 @@ app.secret_key = os.environ.get("FLASK_SECRET_KEY", "dev-secret-please-change")
95
 
96
  from werkzeug.middleware.proxy_fix import ProxyFix
97
  app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1)
 
98
  app.config.update(
99
  SESSION_COOKIE_NAME="session",
100
  SESSION_COOKIE_SAMESITE="None",
@@ -104,7 +107,7 @@ app.config.update(
104
  PREFERRED_URL_SCHEME="https",
105
  )
106
 
107
- # ========= GLOBALS =========
108
  ENCODER_TOKENIZER = None
109
  ENCODER_MODEL = None
110
  LLM = None
@@ -115,7 +118,7 @@ class SubjectAssets:
115
  texts: List[str]
116
  embs: np.ndarray
117
 
118
- # ========= TEKS UTIL =========
119
  STOPWORDS_ID = {
120
  "yang","dan","atau","pada","di","ke","dari","itu","ini","adalah","dengan",
121
  "untuk","serta","sebagai","oleh","dalam","akan","kamu","apa","karena",
@@ -123,12 +126,8 @@ STOPWORDS_ID = {
123
  }
124
  TOKEN_RE = re.compile(r"[A-Za-zÀ-ÖØ-öø-ÿ]+", re.UNICODE)
125
 
126
- @lru_cache(maxsize=4096)
127
- def _tok_cached(word: str) -> str:
128
- return word.lower()
129
-
130
  def tok_id(text: str) -> List[str]:
131
- return [tw for w in TOKEN_RE.findall(text or "") if (tw:=_tok_cached(w)) not in STOPWORDS_ID]
132
 
133
  def lexical_overlap(query: str, sent: str) -> float:
134
  q = set(tok_id(query)); s = set(tok_id(sent))
@@ -150,7 +149,7 @@ META_PREFIX_RE = re.compile(r"^\s*(?:" + r"|".join(META_PREFIX_PATTERNS) + r")\s
150
 
151
  def clean_prefix(t: str) -> str:
152
  t = (t or "").strip()
153
- for _ in range(3):
154
  t2 = META_PREFIX_RE.sub("", t).lstrip()
155
  if t2 == t:
156
  break
@@ -166,8 +165,7 @@ def strip_meta_sentence(s: str) -> str:
166
 
167
  SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+")
168
 
169
- def split_sentences_fast(text: str) -> List[str]:
170
- # tanpa encoding per-kalimat
171
  outs = []
172
  for p in SENT_SPLIT_RE.split(text or ""):
173
  s = clean_prefix((p or "").strip())
@@ -179,12 +177,12 @@ def split_sentences_fast(text: str) -> List[str]:
179
  continue
180
  if INSTRUCTION_RE.search(s):
181
  continue
182
- if len(s) < 12:
183
  continue
184
  outs.append(s)
185
  return outs
186
 
187
- # ========= MODEL WARMUP =========
188
 
189
  def warmup_models():
190
  global ENCODER_TOKENIZER, ENCODER_MODEL, LLM
@@ -193,13 +191,13 @@ def warmup_models():
193
  ENCODER_TOKENIZER = AutoTokenizer.from_pretrained(ENCODER_NAME)
194
  ENCODER_MODEL = AutoModel.from_pretrained(ENCODER_NAME).to(ENCODER_DEVICE).eval()
195
  if LLM is None:
196
- log.info(f"[INIT] Load LLM: {MODEL_PATH} | ctx={CTX_WINDOW} | threads={N_THREADS}")
197
  LLM = load_model(MODEL_PATH, n_ctx=CTX_WINDOW, n_gpu_layers=N_GPU_LAYERS, n_threads=N_THREADS)
198
 
199
- # ========= ASSETS =========
200
 
201
  @lru_cache(maxsize=8)
202
- def load_subject_assets(subject_key: str) -> "SubjectAssets":
203
  if subject_key not in SUBJECTS:
204
  raise ValueError(f"Unknown subject: {subject_key}")
205
  cfg = SUBJECTS[subject_key]
@@ -212,28 +210,25 @@ def load_subject_assets(subject_key: str) -> "SubjectAssets":
212
  raise FileNotFoundError(cfg["embeddings"])
213
  index = faiss.read_index(cfg["index"])
214
  with open(cfg["chunks"], "r", encoding="utf-8") as f:
215
- texts = [it.get("text", "") for it in json.load(f)]
216
- embs = np.load(cfg["embeddings"]) # (N, dim)
217
  if index.ntotal != len(embs):
218
  raise RuntimeError(f"Mismatch ntotal({index.ntotal}) vs emb({len(embs)})")
219
  return SubjectAssets(index=index, texts=texts, embs=embs)
220
 
221
- # ========= ENCODER =========
222
 
223
  @torch.inference_mode()
224
- @lru_cache(maxsize=1024)
225
  def encode_query_exact(text: str) -> np.ndarray:
226
  toks = ENCODER_TOKENIZER(text, padding=True, truncation=True, return_tensors="pt").to(ENCODER_DEVICE)
227
  out = ENCODER_MODEL(**toks)
 
228
  vec = out.last_hidden_state.mean(dim=1)
229
  return vec.cpu().numpy()
230
 
231
  def cosine_sim(a: np.ndarray, b: np.ndarray) -> float:
232
  a = np.asarray(a).reshape(-1); b = np.asarray(b).reshape(-1)
233
- denom = (np.linalg.norm(a) * np.linalg.norm(b)) + 1e-12
234
- return float(np.dot(a, b) / denom)
235
-
236
- # ========= RETRIEVAL CEPAT =========
237
 
238
  def best_cosine_from_faiss(query: str, subject_key: str) -> float:
239
  assets = load_subject_assets(subject_key)
@@ -246,54 +241,56 @@ def best_cosine_from_faiss(query: str, subject_key: str) -> float:
246
  best = max(best, cosine_sim(qv, assets.embs[i]))
247
  return best
248
 
249
- def retrieve_top_chunks(query: str, subject_key: str) -> List[str]:
250
  assets = load_subject_assets(subject_key)
251
  q = encode_query_exact(query)
252
- _, idx = assets.index.search(q, TOP_K_FAISS)
253
  idxs = [i for i in idx[0] if 0 <= i < len(assets.texts)]
254
- return [assets.texts[i] for i in idxs[:TOP_K_FINAL]]
 
 
 
 
 
 
 
255
 
256
- def pick_best_sentences_fast(query: str, chunks: List[str], top_k: int = 4) -> List[str]:
257
- # Tanpa encode per kalimat — hanya lexical overlap + panjang wajar
 
 
258
  cands: List[Tuple[float, str]] = []
259
  for ch in chunks:
260
- for s in split_sentences_fast(ch):
 
 
261
  ovl = lexical_overlap(query, s)
262
- if ovl < MIN_LEXICAL:
263
- continue
264
- # bonus sedikit kalau kalimat panjang wajar (50–220 char)
265
- L = len(s)
266
- len_bonus = 0.05 if 50 <= L <= 220 else 0.0
267
- score = ovl + len_bonus
268
- cands.append((score, s))
269
  cands.sort(key=lambda x: x[0], reverse=True)
270
  return [s for _, s in cands[:top_k]]
271
 
272
- # ========= PROMPT =========
273
-
274
  def build_prompt(user_query: str, sentences: List[str]) -> str:
275
  block = "\n".join(f"- {clean_prefix(s)}" for s in sentences)
276
  system = (
277
  "Kamu asisten RAG.\n"
 
278
  f"- Jika tidak ada kalimat yang relevan, tulis persis: {FALLBACK_TEXT}\n"
279
- "- Jawab TEPAT 1 kalimat, ringkas, Bahasa Indonesia baku (≥ 6 kata).\n"
280
- "- Tanpa frasa meta (berdasarkan/menurut/merujuk/mengacu/bersumber).\n"
281
- "- Tulis jawaban final di dalam tag <final>Jawaban.</final> dan jangan menulis apa pun setelah </final>."
282
- )
283
- fewshot = (
284
- "Contoh format: \n"
285
- "KALIMAT SUMBER:\n- Air memuai saat dipanaskan.\n"
286
- "PERTANYAAN: Apa yang terjadi pada air saat dipanaskan?\n"
287
- "<final>Air akan memuai ketika dipanaskan.</final>\n"
288
  )
289
  return (
290
- f"{system}\n\n{fewshot}\n"
291
  f"KALIMAT SUMBER:\n{block}\n\n"
292
  f"PERTANYAAN: {user_query}\n"
293
  f"TULIS JAWABAN DI DALAM <final>...</final> SAJA:"
294
  )
295
 
296
- @lru_cache(maxsize=1024)
297
  def validate_input_cached(q: str) -> bool:
298
  try:
299
  return validate_input(q)
@@ -378,9 +375,11 @@ def auth_login():
378
  request.form.get("identity") or request.form.get("email") or request.form.get("username") or ""
379
  ).strip().lower()
380
  pw_input = (request.form.get("password") or "").strip()
 
381
  if not identity or not pw_input:
382
  flash("Mohon isi email/username dan password.", "error")
383
  return render_template("login.html"), 400
 
384
  s = db()
385
  try:
386
  user = (
@@ -392,15 +391,18 @@ def auth_login():
392
  ok = bool(user and user.is_active and check_password_hash(user.password, pw_input))
393
  finally:
394
  s.close()
 
395
  if not ok:
396
  flash("Identitas atau password salah.", "error")
397
  return render_template("login.html"), 401
 
398
  session["logged_in"] = True
399
  session["user_id"] = user.id
400
  session["username"] = user.username
401
  session["is_admin"] = bool(user.is_admin)
402
  log.info(f"[LOGIN] OK user_id={user.id}; session set.")
403
  return redirect(url_for("subjects"))
 
404
  return render_template("login.html")
405
 
406
  @app.route("/whoami")
@@ -419,6 +421,7 @@ def auth_register():
419
  email = (request.form.get("email") or "").strip().lower()
420
  pw = (request.form.get("password") or "").strip()
421
  confirm = (request.form.get("confirm") or "").strip()
 
422
  if not username or not email or not pw:
423
  flash("Semua field wajib diisi.", "error")
424
  return render_template("register.html"), 400
@@ -428,6 +431,7 @@ def auth_register():
428
  if pw != confirm:
429
  flash("Konfirmasi password tidak cocok.", "error")
430
  return render_template("register.html"), 400
 
431
  s = db()
432
  try:
433
  existed = (
@@ -442,8 +446,10 @@ def auth_register():
442
  s.add(u); s.commit()
443
  finally:
444
  s.close()
 
445
  flash("Registrasi berhasil. Silakan login.", "success")
446
  return redirect(url_for("auth_login"))
 
447
  return render_template("register.html")
448
 
449
  @app.route("/auth/logout")
@@ -468,6 +474,7 @@ def chat_subject(subject_key: str):
468
  return redirect(url_for("subjects"))
469
  session["subject_selected"] = subject_key
470
  label = SUBJECTS[subject_key]["label"]
 
471
  s = db()
472
  try:
473
  uid = session.get("user_id")
@@ -480,6 +487,7 @@ def chat_subject(subject_key: str):
480
  history = [{"role": r.role, "message": r.message} for r in rows]
481
  finally:
482
  s.close()
 
483
  return render_template("chat.html", subject=subject_key, subject_label=label, history=history)
484
 
485
  @app.route("/health")
@@ -490,7 +498,6 @@ def health():
490
  "llm_loaded": LLM is not None,
491
  "model_path": MODEL_PATH,
492
  "ctx_window": CTX_WINDOW,
493
- "threads": N_THREADS,
494
  })
495
 
496
  @app.route("/ask/<subject_key>", methods=["POST"])
@@ -498,6 +505,8 @@ def health():
498
  def ask(subject_key: str):
499
  if subject_key not in SUBJECTS:
500
  return jsonify({"ok": False, "error": "invalid subject"}), 400
 
 
501
  warmup_models()
502
  t0 = time.perf_counter()
503
 
@@ -505,6 +514,7 @@ def ask(subject_key: str):
505
  query = (data.get("message") or "").strip()
506
  if not query:
507
  return jsonify({"ok": False, "error": "empty query"}), 400
 
508
  if not validate_input_cached(query):
509
  return jsonify({"ok": True, "answer": GUARDRAIL_BLOCK_TEXT})
510
 
@@ -519,18 +529,18 @@ def ask(subject_key: str):
519
  if best < MIN_COSINE:
520
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
521
 
522
- chunks = retrieve_top_chunks(query, subject_key)
523
  if not chunks:
524
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
525
 
526
- sentences = pick_best_sentences_fast(query, chunks, top_k=4)
527
  if not sentences:
528
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
529
 
530
  prompt = build_prompt(query, sentences)
531
 
532
  try:
533
- # PASS-1: deterministik & singkat
534
  raw_answer = generate(
535
  LLM,
536
  prompt,
@@ -542,19 +552,38 @@ def ask(subject_key: str):
542
  raw_answer = raw_answer.strip()
543
  log.info(f"[LLM] Raw answer repr (pass1): {repr(raw_answer)}")
544
 
 
545
  text = re.sub(r"<think\b[^>]*>.*?</think>", "", raw_answer, flags=re.DOTALL | re.IGNORECASE).strip()
546
  text = re.sub(r"</?think\b[^>]*>", "", text, flags=re.IGNORECASE).strip()
547
  m_final = re.search(r"<final>\s*(.+)$", text, flags=re.IGNORECASE | re.DOTALL)
548
  cleaned = (m_final.group(1).strip() if m_final else re.sub(r"<[^>]+>", "", text).strip())
549
 
550
- def _is_bad(s: str) -> bool:
551
- s2 = s.strip()
552
- return (len(re.sub(r"[^A-Za-zÀ-ÖØ-öø-ÿ]+", "", s2)) < 3) or (s2 in {"...", ".", "..", "…"}) or (len(s2.split()) < 6)
553
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554
  if _is_bad(cleaned):
555
  prompt_retry = (
556
  prompt
557
- + "\n\nULANGI DENGAN TAAT FORMAT: Tulis satu kalimat faktual tanpa placeholder/ellipsis, minimal 6 kata, mulai huruf kapital dan akhiri titik. Tulis hanya di dalam <final>...</final>."
 
 
558
  )
559
  raw_answer2 = generate(
560
  LLM,
@@ -566,6 +595,7 @@ def ask(subject_key: str):
566
  ) or ""
567
  raw_answer2 = raw_answer2.strip()
568
  log.info(f"[LLM] Raw answer repr (pass2): {repr(raw_answer2)}")
 
569
  text2 = re.sub(r"<think\b[^>]*>.*?</think>", "", raw_answer2, flags=re.DOTALL | re.IGNORECASE).strip()
570
  text2 = re.sub(r"</?think\b[^>]*>", "", text2, flags=re.IGNORECASE).strip()
571
  m_final2 = re.search(r"<final>\s*(.+)$", text2, flags=re.IGNORECASE | re.DOTALL)
@@ -578,12 +608,12 @@ def ask(subject_key: str):
578
  log.exception(f"[LLM] generate error: {e}")
579
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
580
 
581
- # Ambil 1 kalimat pertama saja
582
  m = re.search(r"(.+?[.!?])(\s|$)", answer)
583
  answer = (m.group(1) if m else answer).strip()
584
  answer = strip_meta_sentence(answer)
585
 
586
- # Simpan history
587
  try:
588
  s = db()
589
  uid = session.get("user_id")
@@ -612,7 +642,8 @@ def ask(subject_key: str):
612
 
613
  return jsonify({"ok": True, "answer": answer})
614
 
615
- # ===== Admin =====
 
616
  @app.route("/admin")
617
  @admin_required
618
  def admin_dashboard():
@@ -624,7 +655,13 @@ def admin_dashboard():
624
  total_msgs = s.query(func.count(ChatHistory.id)).scalar() or 0
625
  finally:
626
  s.close()
627
- return render_template("admin_dashboard.html", total_users=total_users, total_active=total_active, total_admins=total_admins, total_msgs=total_msgs)
 
 
 
 
 
 
628
 
629
  @app.route("/admin/users")
630
  @admin_required
@@ -632,17 +669,34 @@ def admin_users():
632
  q = (request.args.get("q") or "").strip().lower()
633
  page = max(int(request.args.get("page", 1)), 1)
634
  per_page = min(max(int(request.args.get("per_page", 20)), 5), 100)
 
635
  s = db()
636
  try:
637
  base = s.query(User)
638
  if q:
639
- base = base.filter(or_(func.lower(User.username).like(f"%{q}%"), func.lower(User.email).like(f"%{q}%")))
 
 
 
 
 
640
  total = base.count()
641
- users = base.order_by(User.id.asc()).offset((page - 1) * per_page).limit(per_page).all()
 
 
 
 
 
642
  user_ids = [u.id for u in users] or [-1]
643
- counts = dict(s.query(ChatHistory.user_id, func.count(ChatHistory.id)).filter(ChatHistory.user_id.in_(user_ids)).group_by(ChatHistory.user_id).all())
 
 
 
 
 
644
  finally:
645
  s.close()
 
646
  return render_template("admin_users.html", users=users, counts=counts, q=q, page=page, per_page=per_page, total=total)
647
 
648
  @app.route("/admin/history")
@@ -652,23 +706,37 @@ def admin_history():
652
  username = (request.args.get("username") or "").strip().lower()
653
  subject = (request.args.get("subject") or "").strip().lower()
654
  role = (request.args.get("role") or "").strip().lower()
 
655
  page = max(int(request.args.get("page", 1)), 1)
656
  per_page = min(max(int(request.args.get("per_page", 30)), 5), 200)
 
657
  s = db()
658
  try:
659
  base = (s.query(ChatHistory, User).join(User, User.id == ChatHistory.user_id))
660
  if q:
661
  base = base.filter(func.lower(ChatHistory.message).like(f"%{q}%"))
662
  if username:
663
- base = base.filter(or_(func.lower(User.username) == username, func.lower(User.email) == username))
 
 
 
 
 
664
  if subject:
665
  base = base.filter(func.lower(ChatHistory.subject_key) == subject)
666
  if role in ("user", "bot"):
667
  base = base.filter(ChatHistory.role == role)
 
668
  total = base.count()
669
- rows = base.order_by(ChatHistory.id.desc()).offset((page - 1) * per_page).limit(per_page).all()
 
 
 
 
 
670
  finally:
671
  s.close()
 
672
  items = [{
673
  "id": r.ChatHistory.id,
674
  "username": r.User.username,
@@ -678,7 +746,20 @@ def admin_history():
678
  "message": r.ChatHistory.message,
679
  "timestamp": r.ChatHistory.timestamp,
680
  } for r in rows]
681
- return render_template("admin_history.html", items=items, subjects=SUBJECTS, q=q, username=username, subject=subject, role=role, page=page, per_page=per_page, total=total)
 
 
 
 
 
 
 
 
 
 
 
 
 
682
 
683
  def _is_last_admin(s: Session) -> bool:
684
  return (s.query(func.count(User.id)).filter(User.is_admin.is_(True)).scalar() or 0) <= 1
 
1
+ # app.py
2
+ # Flask RAG app (HF Spaces / CPU) — fixed finalization protocol for R1-style models
3
+ # - Forces model to write answer inside <final>...</final> and stops at </final>
4
+ # - Safer cleaning of <think> blocks
5
+ # - Same routes, admin pages, and Postgres auth as before
 
 
6
 
7
  import os, json, re, time, logging
8
  from functools import lru_cache, wraps
 
27
  # ========= ENV & LOGGING =========
28
  os.environ.setdefault("KMP_DUPLICATE_LIB_OK", "TRUE")
29
  os.environ.setdefault("OMP_NUM_THREADS", "1")
30
+ # keep CPU footprint low in HF Spaces
31
  try:
32
+ torch.set_num_threads(int(os.environ.get("NUM_THREADS", "4")))
33
  torch.set_num_interop_threads(1)
34
  except Exception:
35
  pass
 
37
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
38
  log = logging.getLogger("rag-app")
39
 
40
+ # ========= IMPORT EKSTERNAL =========
41
+ # Expect file Guardrail.py with validate_input(text:str)->bool
42
+ # Expect file Model.py with load_model(gguf_path, n_ctx, n_gpu_layers, n_threads) and
43
+ # generate(llm, prompt, max_tokens, temperature, top_p, stop:list[str]) -> str
44
+ from Guardrail import validate_input # -> bool (lazy in file)
45
  from Model import load_model, generate # -> llama.cpp wrapper
46
 
47
+ # ========= PATH ROOT PROYEK =========
48
  BASE_DIR = Path(__file__).resolve().parent
49
 
50
+ # ========= KONFIGURASI RAG =========
51
+ MODEL_PATH = str(BASE_DIR / "models" / os.getenv("GGUF_FILENAME", "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf"))
52
+ CTX_WINDOW = int(os.environ.get("CTX_WINDOW", 2048)) # 2048 cukup untuk RAG singkat
53
+ N_GPU_LAYERS = int(os.environ.get("N_GPU_LAYERS", 0)) # HF Spaces CPU only
54
+ N_THREADS = int(os.environ.get("NUM_THREADS", 4))
 
55
 
56
+ # ganti ke encoder lain jika perlu (m-e5-large cukup bagus untuk multilingual)
57
  ENCODER_NAME = os.environ.get("ENCODER_NAME", "intfloat/multilingual-e5-large")
58
  ENCODER_DEVICE = torch.device("cpu")
59
 
60
+ # Dataset sudah ada di Space → path RELATIF
61
  SUBJECTS: Dict[str, Dict[str, str]] = {
62
  "ipas": {
63
  "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Ipas" / "IPA_index.index"),
 
82
  }
83
  }
84
 
85
+ # Threshold dan fallback
86
+ TOP_K_FAISS = int(os.environ.get("TOP_K_FAISS", 24))
87
+ TOP_K_FINAL = int(os.environ.get("TOP_K_FINAL", 10))
88
+ MIN_COSINE = float(os.environ.get("MIN_COSINE", 0.84))
89
+ MIN_HYBRID = float(os.environ.get("MIN_HYBRID", 0.15))
90
  FALLBACK_TEXT = os.environ.get("FALLBACK_TEXT", "maap pengetahuan tidak ada dalam database")
91
  GUARDRAIL_BLOCK_TEXT = os.environ.get("GUARDRAIL_BLOCK_TEXT", "maap, pertanyaan ditolak oleh guardrail")
92
  ENABLE_PROFILING = os.environ.get("ENABLE_PROFILING", "false").lower() == "true"
 
97
 
98
  from werkzeug.middleware.proxy_fix import ProxyFix
99
  app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1)
100
+ # supaya session tersimpan di browser saat lewat proxy/HTTPS (HF Spaces)
101
  app.config.update(
102
  SESSION_COOKIE_NAME="session",
103
  SESSION_COOKIE_SAMESITE="None",
 
107
  PREFERRED_URL_SCHEME="https",
108
  )
109
 
110
+ # ========= GLOBAL MODEL =========
111
  ENCODER_TOKENIZER = None
112
  ENCODER_MODEL = None
113
  LLM = None
 
118
  texts: List[str]
119
  embs: np.ndarray
120
 
121
+ # ========= TEKS UTILITAS =========
122
  STOPWORDS_ID = {
123
  "yang","dan","atau","pada","di","ke","dari","itu","ini","adalah","dengan",
124
  "untuk","serta","sebagai","oleh","dalam","akan","kamu","apa","karena",
 
126
  }
127
  TOKEN_RE = re.compile(r"[A-Za-zÀ-ÖØ-öø-ÿ]+", re.UNICODE)
128
 
 
 
 
 
129
  def tok_id(text: str) -> List[str]:
130
+ return [t.lower() for t in TOKEN_RE.findall(text or "") if t.lower() not in STOPWORDS_ID]
131
 
132
  def lexical_overlap(query: str, sent: str) -> float:
133
  q = set(tok_id(query)); s = set(tok_id(sent))
 
149
 
150
  def clean_prefix(t: str) -> str:
151
  t = (t or "").strip()
152
+ for _ in range(5):
153
  t2 = META_PREFIX_RE.sub("", t).lstrip()
154
  if t2 == t:
155
  break
 
165
 
166
  SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+")
167
 
168
+ def split_sentences(text: str) -> List[str]:
 
169
  outs = []
170
  for p in SENT_SPLIT_RE.split(text or ""):
171
  s = clean_prefix((p or "").strip())
 
177
  continue
178
  if INSTRUCTION_RE.search(s):
179
  continue
180
+ if len(s.strip()) < 10:
181
  continue
182
  outs.append(s)
183
  return outs
184
 
185
+ # ========= MODEL WARMUP (LAZY) =========
186
 
187
  def warmup_models():
188
  global ENCODER_TOKENIZER, ENCODER_MODEL, LLM
 
191
  ENCODER_TOKENIZER = AutoTokenizer.from_pretrained(ENCODER_NAME)
192
  ENCODER_MODEL = AutoModel.from_pretrained(ENCODER_NAME).to(ENCODER_DEVICE).eval()
193
  if LLM is None:
194
+ log.info(f"[INIT] Load LLM: {MODEL_PATH}")
195
  LLM = load_model(MODEL_PATH, n_ctx=CTX_WINDOW, n_gpu_layers=N_GPU_LAYERS, n_threads=N_THREADS)
196
 
197
+ # ========= LOAD ASSETS PER-MAPEL =========
198
 
199
  @lru_cache(maxsize=8)
200
+ def load_subject_assets(subject_key: str) -> SubjectAssets:
201
  if subject_key not in SUBJECTS:
202
  raise ValueError(f"Unknown subject: {subject_key}")
203
  cfg = SUBJECTS[subject_key]
 
210
  raise FileNotFoundError(cfg["embeddings"])
211
  index = faiss.read_index(cfg["index"])
212
  with open(cfg["chunks"], "r", encoding="utf-8") as f:
213
+ texts = [it["text"] for it in json.load(f)]
214
+ embs = np.load(cfg["embeddings"]) # shape: (N, dim)
215
  if index.ntotal != len(embs):
216
  raise RuntimeError(f"Mismatch ntotal({index.ntotal}) vs emb({len(embs)})")
217
  return SubjectAssets(index=index, texts=texts, embs=embs)
218
 
219
+ # ========= ENCODER & RETRIEVAL =========
220
 
221
  @torch.inference_mode()
 
222
  def encode_query_exact(text: str) -> np.ndarray:
223
  toks = ENCODER_TOKENIZER(text, padding=True, truncation=True, return_tensors="pt").to(ENCODER_DEVICE)
224
  out = ENCODER_MODEL(**toks)
225
+ # simple mean pooling (CLS-less encoders)
226
  vec = out.last_hidden_state.mean(dim=1)
227
  return vec.cpu().numpy()
228
 
229
  def cosine_sim(a: np.ndarray, b: np.ndarray) -> float:
230
  a = np.asarray(a).reshape(-1); b = np.asarray(b).reshape(-1)
231
+ return float(np.dot(a, b) / ((np.linalg.norm(a) * np.linalg.norm(b)) + 1e-12))
 
 
 
232
 
233
  def best_cosine_from_faiss(query: str, subject_key: str) -> float:
234
  assets = load_subject_assets(subject_key)
 
241
  best = max(best, cosine_sim(qv, assets.embs[i]))
242
  return best
243
 
244
+ def retrieve_rerank_cosine(query: str, subject_key: str) -> List[str]:
245
  assets = load_subject_assets(subject_key)
246
  q = encode_query_exact(query)
247
+ D, idx = assets.index.search(q, TOP_K_FAISS)
248
  idxs = [i for i in idx[0] if 0 <= i < len(assets.texts)]
249
+ if not idxs:
250
+ return []
251
+ qv = q.reshape(-1)
252
+ scores = [cosine_sim(qv, assets.embs[i]) for i in idxs]
253
+ pairs = sorted(zip(scores, idxs), reverse=True)
254
+ top_texts = [assets.texts[i] for _, i in pairs[:TOP_K_FINAL]]
255
+ log.info(f"[RETRIEVE] subject={subject_key} | top={len(top_texts)}")
256
+ return top_texts
257
 
258
+ def pick_best_sentences(query: str, chunks: List[str], top_k: int = 5) -> List[str]:
259
+ if not chunks:
260
+ return []
261
+ qv = encode_query_exact(query).reshape(-1)
262
  cands: List[Tuple[float, str]] = []
263
  for ch in chunks:
264
+ for s in split_sentences(ch):
265
+ sv = encode_query_exact(s).reshape(-1)
266
+ cos = cosine_sim(qv, sv)
267
  ovl = lexical_overlap(query, s)
268
+ penalty = 0.1 if len(s) < 50 else 0.0
269
+ score = 0.7 * cos + 0.3 * ovl - penalty
270
+ if score >= MIN_HYBRID:
271
+ cands.append((score, s))
 
 
 
272
  cands.sort(key=lambda x: x[0], reverse=True)
273
  return [s for _, s in cands[:top_k]]
274
 
 
 
275
  def build_prompt(user_query: str, sentences: List[str]) -> str:
276
  block = "\n".join(f"- {clean_prefix(s)}" for s in sentences)
277
  system = (
278
  "Kamu asisten RAG.\n"
279
+ "- Jawab HANYA berdasarkan daftar kalimat fakta di bawah.\n"
280
  f"- Jika tidak ada kalimat yang relevan, tulis persis: {FALLBACK_TEXT}\n"
281
+ "- Jawab TEPAT 1 kalimat, ringkas, Bahasa Indonesia baku.\n"
282
+ "- DILARANG menulis frasa meta seperti 'berdasarkan', 'menurut', 'merujuk', 'mengacu', atau 'bersumber'.\n"
283
+ "- Tulis jawaban final di dalam tag <final>... seperti: <final>Jawaban satu kalimat.</final>\n"
284
+ "- Jangan menulis apa pun setelah </final>."
 
 
 
 
 
285
  )
286
  return (
287
+ f"{system}\n\n"
288
  f"KALIMAT SUMBER:\n{block}\n\n"
289
  f"PERTANYAAN: {user_query}\n"
290
  f"TULIS JAWABAN DI DALAM <final>...</final> SAJA:"
291
  )
292
 
293
+ @lru_cache(maxsize=512)
294
  def validate_input_cached(q: str) -> bool:
295
  try:
296
  return validate_input(q)
 
375
  request.form.get("identity") or request.form.get("email") or request.form.get("username") or ""
376
  ).strip().lower()
377
  pw_input = (request.form.get("password") or "").strip()
378
+
379
  if not identity or not pw_input:
380
  flash("Mohon isi email/username dan password.", "error")
381
  return render_template("login.html"), 400
382
+
383
  s = db()
384
  try:
385
  user = (
 
391
  ok = bool(user and user.is_active and check_password_hash(user.password, pw_input))
392
  finally:
393
  s.close()
394
+
395
  if not ok:
396
  flash("Identitas atau password salah.", "error")
397
  return render_template("login.html"), 401
398
+
399
  session["logged_in"] = True
400
  session["user_id"] = user.id
401
  session["username"] = user.username
402
  session["is_admin"] = bool(user.is_admin)
403
  log.info(f"[LOGIN] OK user_id={user.id}; session set.")
404
  return redirect(url_for("subjects"))
405
+
406
  return render_template("login.html")
407
 
408
  @app.route("/whoami")
 
421
  email = (request.form.get("email") or "").strip().lower()
422
  pw = (request.form.get("password") or "").strip()
423
  confirm = (request.form.get("confirm") or "").strip()
424
+
425
  if not username or not email or not pw:
426
  flash("Semua field wajib diisi.", "error")
427
  return render_template("register.html"), 400
 
431
  if pw != confirm:
432
  flash("Konfirmasi password tidak cocok.", "error")
433
  return render_template("register.html"), 400
434
+
435
  s = db()
436
  try:
437
  existed = (
 
446
  s.add(u); s.commit()
447
  finally:
448
  s.close()
449
+
450
  flash("Registrasi berhasil. Silakan login.", "success")
451
  return redirect(url_for("auth_login"))
452
+
453
  return render_template("register.html")
454
 
455
  @app.route("/auth/logout")
 
474
  return redirect(url_for("subjects"))
475
  session["subject_selected"] = subject_key
476
  label = SUBJECTS[subject_key]["label"]
477
+
478
  s = db()
479
  try:
480
  uid = session.get("user_id")
 
487
  history = [{"role": r.role, "message": r.message} for r in rows]
488
  finally:
489
  s.close()
490
+
491
  return render_template("chat.html", subject=subject_key, subject_label=label, history=history)
492
 
493
  @app.route("/health")
 
498
  "llm_loaded": LLM is not None,
499
  "model_path": MODEL_PATH,
500
  "ctx_window": CTX_WINDOW,
 
501
  })
502
 
503
  @app.route("/ask/<subject_key>", methods=["POST"])
 
505
  def ask(subject_key: str):
506
  if subject_key not in SUBJECTS:
507
  return jsonify({"ok": False, "error": "invalid subject"}), 400
508
+
509
+ # pastikan model siap saat request (lazy)
510
  warmup_models()
511
  t0 = time.perf_counter()
512
 
 
514
  query = (data.get("message") or "").strip()
515
  if not query:
516
  return jsonify({"ok": False, "error": "empty query"}), 400
517
+
518
  if not validate_input_cached(query):
519
  return jsonify({"ok": True, "answer": GUARDRAIL_BLOCK_TEXT})
520
 
 
529
  if best < MIN_COSINE:
530
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
531
 
532
+ chunks = retrieve_rerank_cosine(query, subject_key)
533
  if not chunks:
534
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
535
 
536
+ sentences = pick_best_sentences(query, chunks, top_k=5)
537
  if not sentences:
538
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
539
 
540
  prompt = build_prompt(query, sentences)
541
 
542
  try:
543
+ # === 1st pass (deterministik) ===
544
  raw_answer = generate(
545
  LLM,
546
  prompt,
 
552
  raw_answer = raw_answer.strip()
553
  log.info(f"[LLM] Raw answer repr (pass1): {repr(raw_answer)}")
554
 
555
+ # Bersihkan blok <think> dan ambil isi <final>
556
  text = re.sub(r"<think\b[^>]*>.*?</think>", "", raw_answer, flags=re.DOTALL | re.IGNORECASE).strip()
557
  text = re.sub(r"</?think\b[^>]*>", "", text, flags=re.IGNORECASE).strip()
558
  m_final = re.search(r"<final>\s*(.+)$", text, flags=re.IGNORECASE | re.DOTALL)
559
  cleaned = (m_final.group(1).strip() if m_final else re.sub(r"<[^>]+>", "", text).strip())
560
 
561
+ def _alpha_tokens(s: str) -> List[str]:
562
+ return re.findall(r"[A-Za-zÀ-ÖØ-öø-ÿ]+", s or "")
 
563
 
564
+ def _is_bad(s: str) -> bool:
565
+ s2 = (s or "").strip()
566
+ if not s2:
567
+ return True
568
+ # nolak placeholder/ellipsis saja
569
+ if s2 in {"...", ".", "..", "…"}:
570
+ return True
571
+ toks = _alpha_tokens(s2)
572
+ # cukup 4 token alfabetik untuk lolos (lebih toleran utk jawaban singkat)
573
+ if len(toks) >= 4:
574
+ return False
575
+ # pengecualian: fakta pendek dengan unit/istilah umum tetap lolos
576
+ if any(t.lower() in {"newton","n","kg","m","s"} for t in toks) and len(toks) >= 3:
577
+ return False
578
+ return True
579
+
580
+ # Retry hanya jika PASS-1 benar-benar buruk
581
  if _is_bad(cleaned):
582
  prompt_retry = (
583
  prompt
584
+ + "
585
+
586
+ ULANGI DENGAN TAAT FORMAT: Tulis satu kalimat faktual tanpa placeholder/ellipsis, mulai huruf kapital dan akhiri titik. Tulis hanya di dalam <final>...</final>."
587
  )
588
  raw_answer2 = generate(
589
  LLM,
 
595
  ) or ""
596
  raw_answer2 = raw_answer2.strip()
597
  log.info(f"[LLM] Raw answer repr (pass2): {repr(raw_answer2)}")
598
+
599
  text2 = re.sub(r"<think\b[^>]*>.*?</think>", "", raw_answer2, flags=re.DOTALL | re.IGNORECASE).strip()
600
  text2 = re.sub(r"</?think\b[^>]*>", "", text2, flags=re.IGNORECASE).strip()
601
  m_final2 = re.search(r"<final>\s*(.+)$", text2, flags=re.IGNORECASE | re.DOTALL)
 
608
  log.exception(f"[LLM] generate error: {e}")
609
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
610
 
611
+ # Ambil 1 kalimat pertama (jika model mengeluarkan beberapa kalimat)
612
  m = re.search(r"(.+?[.!?])(\s|$)", answer)
613
  answer = (m.group(1) if m else answer).strip()
614
  answer = strip_meta_sentence(answer)
615
 
616
+ # === Simpan ke history ===
617
  try:
618
  s = db()
619
  uid = session.get("user_id")
 
642
 
643
  return jsonify({"ok": True, "answer": answer})
644
 
645
+ # ===== Admin views & delete actions (tetap) =====
646
+
647
  @app.route("/admin")
648
  @admin_required
649
  def admin_dashboard():
 
655
  total_msgs = s.query(func.count(ChatHistory.id)).scalar() or 0
656
  finally:
657
  s.close()
658
+ return render_template(
659
+ "admin_dashboard.html",
660
+ total_users=total_users,
661
+ total_active=total_active,
662
+ total_admins=total_admins,
663
+ total_msgs=total_msgs,
664
+ )
665
 
666
  @app.route("/admin/users")
667
  @admin_required
 
669
  q = (request.args.get("q") or "").strip().lower()
670
  page = max(int(request.args.get("page", 1)), 1)
671
  per_page = min(max(int(request.args.get("per_page", 20)), 5), 100)
672
+
673
  s = db()
674
  try:
675
  base = s.query(User)
676
  if q:
677
+ base = base.filter(
678
+ or_(
679
+ func.lower(User.username).like(f"%{q}%"),
680
+ func.lower(User.email).like(f"%{q}%"),
681
+ )
682
+ )
683
  total = base.count()
684
+ users = (
685
+ base.order_by(User.id.asc())
686
+ .offset((page - 1) * per_page)
687
+ .limit(per_page)
688
+ .all()
689
+ )
690
  user_ids = [u.id for u in users] or [-1]
691
+ counts = dict(
692
+ s.query(ChatHistory.user_id, func.count(ChatHistory.id))
693
+ .filter(ChatHistory.user_id.in_(user_ids))
694
+ .group_by(ChatHistory.user_id)
695
+ .all()
696
+ )
697
  finally:
698
  s.close()
699
+
700
  return render_template("admin_users.html", users=users, counts=counts, q=q, page=page, per_page=per_page, total=total)
701
 
702
  @app.route("/admin/history")
 
706
  username = (request.args.get("username") or "").strip().lower()
707
  subject = (request.args.get("subject") or "").strip().lower()
708
  role = (request.args.get("role") or "").strip().lower()
709
+
710
  page = max(int(request.args.get("page", 1)), 1)
711
  per_page = min(max(int(request.args.get("per_page", 30)), 5), 200)
712
+
713
  s = db()
714
  try:
715
  base = (s.query(ChatHistory, User).join(User, User.id == ChatHistory.user_id))
716
  if q:
717
  base = base.filter(func.lower(ChatHistory.message).like(f"%{q}%"))
718
  if username:
719
+ base = base.filter(
720
+ or_(
721
+ func.lower(User.username) == username,
722
+ func.lower(User.email) == username,
723
+ )
724
+ )
725
  if subject:
726
  base = base.filter(func.lower(ChatHistory.subject_key) == subject)
727
  if role in ("user", "bot"):
728
  base = base.filter(ChatHistory.role == role)
729
+
730
  total = base.count()
731
+ rows = (
732
+ base.order_by(ChatHistory.id.desc())
733
+ .offset((page - 1) * per_page)
734
+ .limit(per_page)
735
+ .all()
736
+ )
737
  finally:
738
  s.close()
739
+
740
  items = [{
741
  "id": r.ChatHistory.id,
742
  "username": r.User.username,
 
746
  "message": r.ChatHistory.message,
747
  "timestamp": r.ChatHistory.timestamp,
748
  } for r in rows]
749
+
750
+ return render_template(
751
+ "admin_history.html",
752
+ items=items,
753
+ subjects=SUBJECTS,
754
+ q=q,
755
+ username=username,
756
+ subject=subject,
757
+ role=role,
758
+ page=page,
759
+ per_page=per_page,
760
+ total=total,
761
+ )
762
+
763
 
764
  def _is_last_admin(s: Session) -> bool:
765
  return (s.query(func.count(User.id)).filter(User.is_admin.is_(True)).scalar() or 0) <= 1