gekina commited on
Commit
a6f330a
·
verified ·
1 Parent(s): a3f643c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -23
app.py CHANGED
@@ -6,14 +6,22 @@ import numpy as np
6
  from sentence_transformers import SentenceTransformer
7
  from huggingface_hub import InferenceClient, hf_hub_download
8
 
 
9
  # 1. KONFIGURASI
 
 
 
10
  hf_token = os.getenv("HF_TOKEN")
11
- REPO_ID_DATASET = "gekina/medical_dataset" # <--- ID DATASET ANDA
 
 
12
  client = InferenceClient(model="Qwen/Qwen2.5-7B-Instruct", token=hf_token)
13
 
 
14
  # 2. LOAD DATA
15
- print("⬇️ Mendownload data...")
16
  try:
 
17
  path_data = hf_hub_download(repo_id=REPO_ID_DATASET, filename="chunks_data.parquet", repo_type="dataset", token=hf_token)
18
  path_index = hf_hub_download(repo_id=REPO_ID_DATASET, filename="alodokter_index.faiss", repo_type="dataset", token=hf_token)
19
 
@@ -26,40 +34,87 @@ except Exception as e:
26
  df_chunks = pd.DataFrame()
27
  index = None
28
 
29
- # 3. LOGIKA SEARCH
30
- def cari(query):
 
 
31
  if index is None: return []
32
  try:
33
  q_emb = embedder.encode([f"query: {query}"], normalize_embeddings=True)
34
- D, I = index.search(q_emb, 3)
 
35
  results = []
36
  seen = set()
37
  for idx in I[0]:
38
  if idx < 0 or idx >= len(df_chunks): continue
39
  row = df_chunks.iloc[idx]
40
- if row['parent_id'] not in seen:
 
41
  results.append(row['chunk_text'])
42
- seen.add(row['parent_id'])
43
  return results
44
  except: return []
45
 
46
- # 4. LOGIKA CHAT
47
- def respon(message, history):
48
- docs = cari(message)
49
- context = "\n\n".join(docs) if docs else "Tidak ada data."
50
-
51
- sys_prompt = "Anda Asisten Medis. Jawab berdasarkan KONTEKS. Jangan resepkan obat keras."
52
- prompt = f"KONTEKS:\n{context}\n\nPERTANYAAN:\n{message}"
53
 
54
- msg = [{"role": "system", "content": sys_prompt}, {"role": "user", "content": prompt}]
55
-
56
- partial = ""
57
- for chunk in client.chat_completion(msg, max_tokens=1024, stream=True):
58
- if chunk.choices[0].delta.content:
59
- partial += chunk.choices[0].delta.content
60
- yield partial
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- # 5. UI
63
- demo = gr.ChatInterface(fn=respon, title="🏥 Chatbot Medis (RAG)", theme="soft")
64
  if __name__ == "__main__":
65
  demo.launch()
 
6
  from sentence_transformers import SentenceTransformer
7
  from huggingface_hub import InferenceClient, hf_hub_download
8
 
9
+ # =======================================================
10
  # 1. KONFIGURASI
11
+ # =======================================================
12
+ print("⏳ Memulai Asisten Kesehatan (Mode Strict)...")
13
+
14
  hf_token = os.getenv("HF_TOKEN")
15
+ REPO_ID_DATASET = "gekina/medical_dataset"
16
+
17
+ # Setup Client LLM
18
  client = InferenceClient(model="Qwen/Qwen2.5-7B-Instruct", token=hf_token)
19
 
20
+ # =======================================================
21
  # 2. LOAD DATA
22
+ # =======================================================
23
  try:
24
+ print(f"⬇️ Download data dari {REPO_ID_DATASET}...")
25
  path_data = hf_hub_download(repo_id=REPO_ID_DATASET, filename="chunks_data.parquet", repo_type="dataset", token=hf_token)
26
  path_index = hf_hub_download(repo_id=REPO_ID_DATASET, filename="alodokter_index.faiss", repo_type="dataset", token=hf_token)
27
 
 
34
  df_chunks = pd.DataFrame()
35
  index = None
36
 
37
+ # =======================================================
38
+ # 3. LOGIKA PENCARIAN
39
+ # =======================================================
40
+ def cari_dokumen(query, k=4):
41
  if index is None: return []
42
  try:
43
  q_emb = embedder.encode([f"query: {query}"], normalize_embeddings=True)
44
+ D, I = index.search(q_emb, k)
45
+
46
  results = []
47
  seen = set()
48
  for idx in I[0]:
49
  if idx < 0 or idx >= len(df_chunks): continue
50
  row = df_chunks.iloc[idx]
51
+ p_id = row['parent_id']
52
+ if p_id not in seen:
53
  results.append(row['chunk_text'])
54
+ seen.add(p_id)
55
  return results
56
  except: return []
57
 
58
+ # =======================================================
59
+ # 4. LOGIKA STRICT RAG (MODIFIKASI UTAMA)
60
+ # =======================================================
61
+ def respon_bot(message, history):
62
+ # A. Cari Dokumen
63
+ docs = cari_dokumen(message)
 
64
 
65
+ # [FILTER 1] Jika mesin pencari tidak menemukan apa-apa sama sekali
66
+ if not docs:
67
+ yield "Mohon maaf, informasi tersebut tidak tersedia di dalam dataset database kami."
68
+ return
69
+
70
+ context_str = "\n\n".join(docs)
71
+
72
+ # B. SYSTEM PROMPT (SANGAT KETAT)
73
+ system_prompt = """Anda adalah Asisten Penjawab Data.
74
+
75
+ ATURAN MUTLAK (STRICT MODE):
76
+ 1. Anda HANYA boleh menjawab berdasarkan informasi yang tertulis di 'KONTEKS DATA' di bawah.
77
+ 2. DILARANG menggunakan pengetahuan internal/umum Anda sendiri. Lupakan bahwa Anda adalah AI yang tahu segalanya.
78
+ 3. JIKA JAWABAN TIDAK ADA DI KONTEKS: Katakan "Mohon maaf, informasi spesifik mengenai hal ini tidak ditemukan dalam dataset kami." Jangan mencoba menjawab atau mengarang.
79
+ 4. JANGAN pernah menyebutkan "Berdasarkan konteks...", langsung saja berikan jawabannya.
80
+ 5. Gunakan Bahasa Indonesia yang baik."""
81
+
82
+ prompt_final = f"""KONTEKS DATA:\n{context_str}\n\nPERTANYAAN:\n{message}"""
83
+
84
+ messages = [
85
+ {"role": "system", "content": system_prompt},
86
+ {"role": "user", "content": prompt_final}
87
+ ]
88
+
89
+ try:
90
+ # [SETTING 2] Temperature 0.1 agar tidak kreatif/halusinasi
91
+ stream = client.chat_completion(
92
+ messages,
93
+ max_tokens=1024,
94
+ stream=True,
95
+ temperature=0.1,
96
+ top_p=0.9
97
+ )
98
+
99
+ partial_text = ""
100
+ for chunk in stream:
101
+ if chunk.choices[0].delta.content:
102
+ partial_text += chunk.choices[0].delta.content
103
+ yield partial_text
104
+
105
+ except Exception as e:
106
+ yield f"Error: {e}"
107
+
108
+ # =======================================================
109
+ # 5. UI GRADIO
110
+ # =======================================================
111
+ demo = gr.ChatInterface(
112
+ fn=respon_bot,
113
+ title="📚 Chatbot Database Medis (Strict)",
114
+ description="Bot ini **hanya** menjawab jika datanya ada di dataset. Jika tidak ada, bot akan menolak menjawab.",
115
+ theme="soft",
116
+ examples=["Apa obat sakit kepala?", "Cara mengatasi demam"]
117
+ )
118
 
 
 
119
  if __name__ == "__main__":
120
  demo.launch()