irhamni commited on
Commit
0e42f0d
Β·
verified Β·
1 Parent(s): 2167067

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -93
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py β€” RAG + Deterministic QA + Local LLM (CPU-friendly)
2
  import os, re, json, pickle, hashlib
3
  from pathlib import Path
4
  import gradio as gr
@@ -6,23 +6,20 @@ import numpy as np
6
  from sklearn.neighbors import NearestNeighbors
7
  from sentence_transformers import SentenceTransformer
8
 
9
- # =================== Konfigurasi ===================
10
  DATA_PATH = Path(os.getenv("DATA_PATH", "IPLM_QnA_Chatbot.jsonl"))
11
- CACHE_EMB = Path("embeddings.pkl")
12
- CACHE_META = Path("meta.json")
13
-
14
- EMB_MODEL = os.getenv("EMB_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
15
- LOCAL_MODEL = os.getenv("LOCAL_MODEL", "microsoft/phi-2") # jalan di CPU
16
-
17
- TOP_K_DEFAULT = int(os.getenv("TOP_K_DEFAULT", "4"))
18
- TEMPERATURE_DEFAULT = float(os.getenv("TEMPERATURE_DEFAULT", "0.2"))
19
- MAX_TOKENS = int(os.getenv("MAX_TOKENS", "256"))
20
- SCORE_THRESHOLD = float(os.getenv("SCORE_THRESHOLD", "0.60")) # 0..1
21
 
22
  SYSTEM_PROMPT = (
23
  "You are an Indonesian librarian assistant. Jawab singkat, akurat, dan sopan. "
24
- "Jawab HANYA berdasarkan konteks yang diberikan. "
25
- "Jika tidak ada jawabannya di konteks, balas persis: Data tidak tersedia."
26
  )
27
 
28
  # =================== Utilitas ===================
@@ -42,8 +39,7 @@ def load_jsonl(path: Path):
42
  rows = []
43
  with path.open("r", encoding="utf-8") as f:
44
  for line in f:
45
- line = line.strip()
46
- if not line: continue
47
  obj = json.loads(line)
48
  q = obj.get("question") or obj.get("pertanyaan") or obj.get("q")
49
  a = obj.get("answer") or obj.get("jawaban") or obj.get("a")
@@ -57,29 +53,31 @@ def load_jsonl(path: Path):
57
  seen.add(r["question"]); uniq.append(r)
58
  return uniq
59
 
60
- # =================== Retriever (k-NN atas embedding pertanyaan) ===================
61
  class FAQIndex:
62
  def __init__(self):
63
  self.rows=None; self.model=None; self.emb=None; self.nn=None
64
 
65
  def build(self, rows, force=False):
 
 
66
  self.rows = rows
67
- if not force and CACHE_EMB.exists() and CACHE_META.exists():
68
  try:
69
- meta = json.loads(CACHE_META.read_text(encoding="utf-8"))
70
  if meta.get("hash")==dataset_hash(rows) and meta.get("emb_model")==EMB_MODEL:
71
- cached = pickle.loads(CACHE_EMB.read_bytes())
72
  self.emb, self.nn = cached["emb"], cached["nn"]
73
  if self.model is None: self.model = SentenceTransformer(EMB_MODEL)
74
  return
75
  except Exception:
76
  pass
77
  self.model = SentenceTransformer(EMB_MODEL)
78
- qs = [r["question"] for r in rows] # embed pertanyaan saja
79
  self.emb = self.model.encode(qs, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=False)
80
  self.nn = NearestNeighbors(n_neighbors=min(10, len(qs)), metric="cosine").fit(self.emb)
81
- CACHE_EMB.write_bytes(pickle.dumps({"emb": self.emb, "nn": self.nn}))
82
- CACHE_META.write_text(json.dumps({"hash": dataset_hash(rows), "emb_model": EMB_MODEL}, ensure_ascii=False))
83
 
84
  def retrieve(self, query: str, top_k: int):
85
  if not query.strip(): return []
@@ -92,9 +90,9 @@ class FAQIndex:
92
  out.append({"question": r["question"], "answer": r["answer"], "score": float(sim)})
93
  return out
94
 
95
- # =================== Local LLM (transformers pipeline di CPU) ===================
96
  _local_pipe = None
97
- def call_local(prompt: str, temperature=TEMPERATURE_DEFAULT, max_tokens=MAX_TOKENS):
98
  global _local_pipe
99
  try:
100
  if _local_pipe is None:
@@ -102,41 +100,33 @@ def call_local(prompt: str, temperature=TEMPERATURE_DEFAULT, max_tokens=MAX_TOKE
102
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
103
  tok = AutoTokenizer.from_pretrained(LOCAL_MODEL)
104
  model = AutoModelForCausalLM.from_pretrained(LOCAL_MODEL, torch_dtype=torch.float32)
105
- _local_pipe = pipeline(
106
- "text-generation",
107
- model=model,
108
- tokenizer=tok,
109
- device=-1, # CPU
110
- )
111
- outs = _local_pipe(
112
- prompt,
113
- do_sample=True,
114
- temperature=float(temperature),
115
- max_new_tokens=int(max_tokens),
116
- return_full_text=False,
117
- )
118
  if isinstance(outs, list) and outs and "generated_text" in outs[0]:
119
  return outs[0]["generated_text"]
120
  return str(outs)
121
  except Exception as e:
122
  return f"❌ Gagal menjalankan model lokal: {e}"
123
 
124
- # =================== RAG Orchestrator (dengan deterministic QA) ===================
125
  def build_context(hits):
126
- return "\n\n".join([f"[DOC {i} | {h['score']:.2f}] {h['answer']}" for i, h in enumerate(hits, 1)])
127
 
128
- def rag_answer(user_msg, top_k=TOP_K_DEFAULT, temperature=TEMPERATURE_DEFAULT, threshold=SCORE_THRESHOLD):
129
- hits = faq.retrieve(user_msg, top_k=int(top_k))
130
  if not hits:
131
- return "Maaf, saya tidak menemukan referensi di basis pengetahuan Anda."
132
- top = hits[0]
133
 
134
- # 1) Jika yakin β†’ langsung pakai jawaban sumber (tanpa LLM)
135
- if top["score"] >= float(threshold):
136
- bullets = "\n".join([f"- ({h['score']:.2f}) {h['question']}" for h in hits])
137
- return f"{top['answer']}\n\n**Sumber terdekat (deterministik):**\n{bullets}"
 
 
 
138
 
139
- # 2) Kurang yakin β†’ minta LLM merangkum beberapa jawaban
140
  context = build_context(hits)
141
  prompt = (
142
  f"SISTEM: {SYSTEM_PROMPT}\n\n"
@@ -145,52 +135,31 @@ def rag_answer(user_msg, top_k=TOP_K_DEFAULT, temperature=TEMPERATURE_DEFAULT, t
145
  "Instruksi: Jawab singkat dan HANYA berdasarkan KONTEKS di atas. "
146
  "Jika tidak ada jawabannya, balas persis: Data tidak tersedia."
147
  )
148
- llm_out = call_local(prompt, temperature=float(temperature), max_tokens=MAX_TOKENS)
149
- bullets = "\n".join([f"- ({h['score']:.2f}) {h['question']}" for h in hits])
150
- return f"{llm_out}\n\n**Sumber terdekat (lokal):**\n{bullets}"
 
 
151
 
152
- # =================== Load & Upload ===================
153
  faq = FAQIndex()
154
- rows = load_jsonl(DATA_PATH)
155
- faq.build(rows, force=False)
156
-
157
- def upload_jsonl(file_obj):
158
- if file_obj is None: return gr.update(value="Tidak ada file.")
159
- Path(file_obj.name).replace(DATA_PATH)
160
- if CACHE_EMB.exists(): CACHE_EMB.unlink()
161
- if CACHE_META.exists(): CACHE_META.unlink()
162
- global rows, faq
163
- rows = load_jsonl(DATA_PATH)
164
- faq = FAQIndex(); faq.build(rows, force=True)
165
- return f"βœ… Basis pengetahuan diperbarui. Total Q&A: {len(rows)}."
166
-
167
- # =================== UI ===================
168
- with gr.Blocks(title="RAG + LLM β€” Local Model") as demo:
169
- gr.Markdown("## πŸ“š RAG + LLM β€” Local Model\nMasukkan pertanyaan β†’ retrieve Q&A β†’ model lokal merangkum bila perlu.")
170
- with gr.Row():
171
- with gr.Column(scale=2):
172
- gr.ChatInterface(
173
- fn=lambda msg, hist, k, t, th: rag_answer(msg, top_k=int(k), temperature=float(t), threshold=float(th)),
174
- additional_inputs=[
175
- gr.Slider(1, 10, value=TOP_K_DEFAULT, step=1, label="Top-K dokumen"),
176
- gr.Slider(0.0, 1.0, value=TEMPERATURE_DEFAULT, step=0.05, label="Temperatur"),
177
- gr.Slider(0.0, 1.0, value=SCORE_THRESHOLD, step=0.01, label="Ambil langsung jika skor β‰₯"),
178
- ],
179
- title="Asisten Perpustakaan (RAG)",
180
- description="Jawab *berdasarkan konteks* dari dokumen JSONL Anda.",
181
- examples=[
182
- ["Apa itu IPLM?", TOP_K_DEFAULT, TEMPERATURE_DEFAULT, SCORE_THRESHOLD],
183
- ["Bagaimana menghitung IPLM?", TOP_K_DEFAULT, TEMPERATURE_DEFAULT, SCORE_THRESHOLD],
184
- ["Apa saja dimensi IPLM?", TOP_K_DEFAULT, TEMPERATURE_DEFAULT, SCORE_THRESHOLD],
185
- ],
186
- cache_examples=False,
187
- )
188
- with gr.Column(scale=1):
189
- gr.Markdown("### πŸ”„ Perbarui Basis Data")
190
- uploader = gr.File(label="Upload JSONL Q&A (keys: question, answer)")
191
- status = gr.Textbox(label="Status", interactive=False)
192
- uploader.change(fn=upload_jsonl, inputs=uploader, outputs=status)
193
- gr.Markdown("_Model berjalan lokal (CPU). Anda dapat mengganti `LOCAL_MODEL` via Settings β†’ Variables._")
194
 
195
  if __name__ == "__main__":
196
  demo.launch()
 
1
+ # app.py β€” IPLM Chatbot (UI sederhana ala GPT)
2
  import os, re, json, pickle, hashlib
3
  from pathlib import Path
4
  import gradio as gr
 
6
  from sklearn.neighbors import NearestNeighbors
7
  from sentence_transformers import SentenceTransformer
8
 
9
+ # =================== Konfigurasi lewat ENV ===================
10
  DATA_PATH = Path(os.getenv("DATA_PATH", "IPLM_QnA_Chatbot.jsonl"))
11
+ EMB_MODEL = os.getenv("EMB_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
12
+ LOCAL_MODEL= os.getenv("LOCAL_MODEL", "microsoft/phi-2") # model lokal (CPU)
13
+ TOP_K = int(os.getenv("TOP_K", "4"))
14
+ TEMPERATURE= float(os.getenv("TEMPERATURE", "0.2"))
15
+ MAX_TOKENS = int(os.getenv("MAX_TOKENS", "256"))
16
+ THRESHOLD = float(os.getenv("THRESHOLD", "0.60")) # ambil jawaban langsung jika skor >= threshold
17
+ SHOW_SOURCES = os.getenv("SHOW_SOURCES", "false").lower() == "true" # set true jika ingin tampilkan sumber terdekat
 
 
 
18
 
19
  SYSTEM_PROMPT = (
20
  "You are an Indonesian librarian assistant. Jawab singkat, akurat, dan sopan. "
21
+ "Jawab HANYA berdasarkan konteks yang diberikan. Jika tidak ada jawabannya, "
22
+ "balas persis: Data tidak tersedia."
23
  )
24
 
25
  # =================== Utilitas ===================
 
39
  rows = []
40
  with path.open("r", encoding="utf-8") as f:
41
  for line in f:
42
+ if not line.strip(): continue
 
43
  obj = json.loads(line)
44
  q = obj.get("question") or obj.get("pertanyaan") or obj.get("q")
45
  a = obj.get("answer") or obj.get("jawaban") or obj.get("a")
 
53
  seen.add(r["question"]); uniq.append(r)
54
  return uniq
55
 
56
+ # =================== Retriever ===================
57
  class FAQIndex:
58
  def __init__(self):
59
  self.rows=None; self.model=None; self.emb=None; self.nn=None
60
 
61
  def build(self, rows, force=False):
62
+ cache_emb = Path("embeddings.pkl")
63
+ cache_meta = Path("meta.json")
64
  self.rows = rows
65
+ if not force and cache_emb.exists() and cache_meta.exists():
66
  try:
67
+ meta = json.loads(cache_meta.read_text(encoding="utf-8"))
68
  if meta.get("hash")==dataset_hash(rows) and meta.get("emb_model")==EMB_MODEL:
69
+ cached = pickle.loads(cache_emb.read_bytes())
70
  self.emb, self.nn = cached["emb"], cached["nn"]
71
  if self.model is None: self.model = SentenceTransformer(EMB_MODEL)
72
  return
73
  except Exception:
74
  pass
75
  self.model = SentenceTransformer(EMB_MODEL)
76
+ qs = [r["question"] for r in rows]
77
  self.emb = self.model.encode(qs, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=False)
78
  self.nn = NearestNeighbors(n_neighbors=min(10, len(qs)), metric="cosine").fit(self.emb)
79
+ cache_emb.write_bytes(pickle.dumps({"emb": self.emb, "nn": self.nn}))
80
+ cache_meta.write_text(json.dumps({"hash": dataset_hash(rows), "emb_model": EMB_MODEL}, ensure_ascii=False))
81
 
82
  def retrieve(self, query: str, top_k: int):
83
  if not query.strip(): return []
 
90
  out.append({"question": r["question"], "answer": r["answer"], "score": float(sim)})
91
  return out
92
 
93
+ # =================== Local LLM (CPU) ===================
94
  _local_pipe = None
95
+ def generate_with_local(prompt: str, temperature=TEMPERATURE, max_tokens=MAX_TOKENS):
96
  global _local_pipe
97
  try:
98
  if _local_pipe is None:
 
100
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
101
  tok = AutoTokenizer.from_pretrained(LOCAL_MODEL)
102
  model = AutoModelForCausalLM.from_pretrained(LOCAL_MODEL, torch_dtype=torch.float32)
103
+ _local_pipe = pipeline("text-generation", model=model, tokenizer=tok, device=-1) # CPU
104
+ outs = _local_pipe(prompt, do_sample=True, temperature=float(temperature),
105
+ max_new_tokens=int(max_tokens), return_full_text=False)
 
 
 
 
 
 
 
 
 
 
106
  if isinstance(outs, list) and outs and "generated_text" in outs[0]:
107
  return outs[0]["generated_text"]
108
  return str(outs)
109
  except Exception as e:
110
  return f"❌ Gagal menjalankan model lokal: {e}"
111
 
112
+ # =================== RAG (deterministic β†’ generatif bila perlu) ===================
113
  def build_context(hits):
114
+ return "\n\n".join([f"[DOC {i}] {h['answer']}" for i, h in enumerate(hits, 1)])
115
 
116
+ def answer_query(user_msg: str) -> str:
117
+ hits = faq.retrieve(user_msg, top_k=TOP_K)
118
  if not hits:
119
+ return "Data tidak tersedia."
 
120
 
121
+ # Deterministic: kalau yakin β†’ pakai jawaban sumber
122
+ if hits[0]["score"] >= THRESHOLD:
123
+ result = hits[0]['answer']
124
+ if SHOW_SOURCES:
125
+ bullets = "\n".join([f"- ({h['score']:.2f}) {h['question']}" for h in hits])
126
+ result += f"\n\n**Sumber terdekat:**\n{bullets}"
127
+ return result
128
 
129
+ # Jika kurang yakin β†’ rangkum dengan LLM lokal
130
  context = build_context(hits)
131
  prompt = (
132
  f"SISTEM: {SYSTEM_PROMPT}\n\n"
 
135
  "Instruksi: Jawab singkat dan HANYA berdasarkan KONTEKS di atas. "
136
  "Jika tidak ada jawabannya, balas persis: Data tidak tersedia."
137
  )
138
+ result = generate_with_local(prompt, temperature=TEMPERATURE, max_tokens=MAX_TOKENS)
139
+ if SHOW_SOURCES:
140
+ bullets = "\n".join([f"- ({h['score']:.2f}) {h['question']}" for h in hits])
141
+ result += f"\n\n**Sumber terdekat (lokal):**\n{bullets}"
142
+ return result
143
 
144
+ # =================== Load data & index ===================
145
  faq = FAQIndex()
146
+ _rows = load_jsonl(DATA_PATH)
147
+ faq.build(_rows, force=False)
148
+
149
+ # =================== UI minimal ===================
150
+ def chat_fn(message, history):
151
+ return answer_query(message)
152
+
153
+ with gr.Blocks(title="IPLM Chatbot") as demo:
154
+ gr.Markdown("### πŸ“š IPLM Chatbot\nTanya apa saja tentang **IPLM**. (UI sengaja disederhanakan)")
155
+ gr.ChatInterface(
156
+ fn=chat_fn,
157
+ title="",
158
+ description="",
159
+ examples=["Apa itu IPLM?", "Bagaimana menghitung IPLM?", "Apa saja dimensi IPLM?"],
160
+ cache_examples=False,
161
+ autofocus=True,
162
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
  if __name__ == "__main__":
165
  demo.launch()