irhamni commited on
Commit
193eeb6
Β·
verified Β·
1 Parent(s): a41fc66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -68
app.py CHANGED
@@ -1,37 +1,36 @@
1
- # app.py β€” RAG + Local LLM (TinyLlama) for Hugging Face Spaces (CPU)
2
- import os, re, json, pickle, hashlib, time
3
  from pathlib import Path
4
- import requests # still used for safety, but not calling API now
5
  import gradio as gr
6
  import numpy as np
7
  from sklearn.neighbors import NearestNeighbors
8
  from sentence_transformers import SentenceTransformer
9
 
10
- # ===== Config =====
11
- DATA_PATH = Path(os.getenv("DATA_PATH", "IPLM_QnA_Chatbot.jsonl"))
12
- CACHE_EMB = Path("embeddings.pkl")
13
  CACHE_META = Path("meta.json")
14
 
15
- EMB_MODEL = os.getenv("EMB_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
16
- GEN_MODEL = os.getenv("GEN_MODEL", "TinyLlama/TinyLlama-1.1B-Chat-v1.0") # local small model
17
 
18
  TOP_K_DEFAULT = int(os.getenv("TOP_K_DEFAULT", "4"))
19
  TEMPERATURE_DEFAULT = float(os.getenv("TEMPERATURE_DEFAULT", "0.2"))
20
  MAX_TOKENS = int(os.getenv("MAX_TOKENS", "256"))
 
21
 
22
  SYSTEM_PROMPT = (
23
  "You are an Indonesian librarian assistant. Jawab singkat, akurat, dan sopan. "
24
- "Gunakan HANYA informasi dari konteks yang diberikan. "
25
- "Jika konteks tidak memuat jawabannya, balas persis: Data tidak tersedia."
26
  )
27
 
28
- # ===== Utils =====
29
  def norm(s: str) -> str:
30
  if s is None: return ""
31
  return re.sub(r"\s+", " ", str(s).strip())
32
 
33
  def dataset_hash(rows) -> str:
34
- import hashlib
35
  m = hashlib.md5()
36
  for r in rows:
37
  m.update((norm(r.get("question","")) + "|" + norm(r.get("answer",""))).encode("utf-8"))
@@ -43,7 +42,8 @@ def load_jsonl(path: Path):
43
  rows = []
44
  with path.open("r", encoding="utf-8") as f:
45
  for line in f:
46
- if not line.strip(): continue
 
47
  obj = json.loads(line)
48
  q = obj.get("question") or obj.get("pertanyaan") or obj.get("q")
49
  a = obj.get("answer") or obj.get("jawaban") or obj.get("a")
@@ -57,7 +57,7 @@ def load_jsonl(path: Path):
57
  seen.add(r["question"]); uniq.append(r)
58
  return uniq
59
 
60
- # ===== Index (retriever) =====
61
  class FAQIndex:
62
  def __init__(self):
63
  self.rows=None; self.model=None; self.emb=None; self.nn=None
@@ -75,13 +75,13 @@ class FAQIndex:
75
  except Exception:
76
  pass
77
  self.model = SentenceTransformer(EMB_MODEL)
78
- qs = [r["question"] for r in rows]
79
  self.emb = self.model.encode(qs, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=False)
80
  self.nn = NearestNeighbors(n_neighbors=min(10, len(qs)), metric="cosine").fit(self.emb)
81
  CACHE_EMB.write_bytes(pickle.dumps({"emb": self.emb, "nn": self.nn}))
82
  CACHE_META.write_text(json.dumps({"hash": dataset_hash(rows), "emb_model": EMB_MODEL}, ensure_ascii=False))
83
 
84
- def retrieve(self, query: str, top_k: int = TOP_K_DEFAULT):
85
  if not query.strip(): return []
86
  qv = self.model.encode([query], normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=False)
87
  dists, idxs = self.nn.kneighbors(qv, n_neighbors=min(top_k, len(self.rows)))
@@ -92,31 +92,23 @@ class FAQIndex:
92
  out.append({"question": r["question"], "answer": r["answer"], "score": float(sim)})
93
  return out
94
 
95
- # ===== Local LLM (transformers pipeline) =====
96
  _local_pipe = None
97
- def get_local_pipe():
98
  global _local_pipe
99
- if _local_pipe is not None:
100
- return _local_pipe
101
- import torch
102
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
103
-
104
- # CPU-only for free Spaces; dtype=float32 for stability on CPU
105
- tok = AutoTokenizer.from_pretrained(GEN_MODEL)
106
- model = AutoModelForCausalLM.from_pretrained(GEN_MODEL, torch_dtype=torch.float32)
107
- _local_pipe = pipeline(
108
- "text-generation",
109
- model=model,
110
- tokenizer=tok,
111
- device=-1, # CPU
112
- # no explicit framework args; transformers picks PyTorch
113
- )
114
- return _local_pipe
115
-
116
- def call_local_llm(prompt: str, temperature=TEMPERATURE_DEFAULT, max_tokens=MAX_TOKENS):
117
  try:
118
- pipe = get_local_pipe()
119
- outs = pipe(
 
 
 
 
 
 
 
 
 
 
120
  prompt,
121
  do_sample=True,
122
  temperature=float(temperature),
@@ -129,31 +121,35 @@ def call_local_llm(prompt: str, temperature=TEMPERATURE_DEFAULT, max_tokens=MAX_
129
  except Exception as e:
130
  return f"❌ Gagal menjalankan model lokal: {e}"
131
 
132
- # ===== RAG Orchestrator =====
133
- def build_context(retrieved):
134
- # kirim HANYA jawaban ke LLM sebagai konteks
135
- return "\n\n".join([f"[DOC {i}] {r['answer']}" for i, r in enumerate(retrieved, 1)])
136
 
137
- def rag_answer(user_msg, top_k=TOP_K_DEFAULT, temperature=TEMPERATURE_DEFAULT):
138
- try:
139
- hits = faq.retrieve(user_msg, top_k=int(top_k))
140
- if not hits:
141
- return "Maaf, saya tidak menemukan referensi di basis pengetahuan Anda."
142
- context = build_context(hits)
143
- prompt = (
144
- f"SISTEM: {SYSTEM_PROMPT}\n\n"
145
- f"KONTEKS:\n{context}\n\n"
146
- f"PERTANYAAN:\n{user_msg}\n\n"
147
- "Instruksi: Jawab singkat, akurat, dan HANYA berdasarkan KONTEKS. "
148
- "Jika tidak ada jawabannya, balas persis: Data tidak tersedia."
149
- )
150
- out = call_local_llm(prompt, temperature=float(temperature), max_tokens=MAX_TOKENS)
151
  bullets = "\n".join([f"- ({h['score']:.2f}) {h['question']}" for h in hits])
152
- return f"{out}\n\n**Sumber terdekat:**\n{bullets}"
153
- except Exception as e:
154
- return f"❌ Terjadi error tak terduga: {e}"
 
 
 
 
 
 
 
 
 
 
 
155
 
156
- # ===== Load & Upload =====
157
  faq = FAQIndex()
158
  rows = load_jsonl(DATA_PATH)
159
  faq.build(rows, force=False)
@@ -168,23 +164,24 @@ def upload_jsonl(file_obj):
168
  faq = FAQIndex(); faq.build(rows, force=True)
169
  return f"βœ… Basis pengetahuan diperbarui. Total Q&A: {len(rows)}."
170
 
171
- # ===== UI =====
172
- with gr.Blocks(title="RAG + LLM (Local, JSONL)") as demo:
173
- gr.Markdown("# πŸ“š RAG + LLM β€” Local Model\nMasukkan pertanyaan β†’ retrieve Q&A β†’ model lokal menjawab berdasar konteks.")
174
  with gr.Row():
175
  with gr.Column(scale=2):
176
  gr.ChatInterface(
177
- fn=lambda msg, hist, k, t: rag_answer(msg, top_k=int(k), temperature=float(t)),
178
  additional_inputs=[
179
  gr.Slider(1, 10, value=TOP_K_DEFAULT, step=1, label="Top-K dokumen"),
180
  gr.Slider(0.0, 1.0, value=TEMPERATURE_DEFAULT, step=0.05, label="Temperatur"),
 
181
  ],
182
  title="Asisten Perpustakaan (RAG)",
183
  description="Jawab *berdasarkan konteks* dari dokumen JSONL Anda.",
184
  examples=[
185
- ["Apa itu IPLM?", TOP_K_DEFAULT, TEMPERATURE_DEFAULT],
186
- ["Apa saja dimensi IPLM?", TOP_K_DEFAULT, TEMPERATURE_DEFAULT],
187
- ["Apa manfaat IPLM bagi daerah?", TOP_K_DEFAULT, TEMPERATURE_DEFAULT],
188
  ],
189
  cache_examples=False,
190
  )
@@ -193,7 +190,7 @@ with gr.Blocks(title="RAG + LLM (Local, JSONL)") as demo:
193
  uploader = gr.File(label="Upload JSONL Q&A (keys: question, answer)")
194
  status = gr.Textbox(label="Status", interactive=False)
195
  uploader.change(fn=upload_jsonl, inputs=uploader, outputs=status)
196
- gr.Markdown("_Model berjalan lokal; tidak membutuhkan HF_TOKEN._")
197
 
198
  if __name__ == "__main__":
199
  demo.launch()
 
1
+ # app.py β€” RAG + Deterministic QA + Local LLM (CPU-friendly)
2
+ import os, re, json, pickle, hashlib
3
  from pathlib import Path
 
4
  import gradio as gr
5
  import numpy as np
6
  from sklearn.neighbors import NearestNeighbors
7
  from sentence_transformers import SentenceTransformer
8
 
9
+ # =================== Konfigurasi ===================
10
+ DATA_PATH = Path(os.getenv("DATA_PATH", "IPLM_QnA_Chatbot.jsonl"))
11
+ CACHE_EMB = Path("embeddings.pkl")
12
  CACHE_META = Path("meta.json")
13
 
14
+ EMB_MODEL = os.getenv("EMB_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
15
+ LOCAL_MODEL = os.getenv("LOCAL_MODEL", "microsoft/phi-2") # jalan di CPU
16
 
17
  TOP_K_DEFAULT = int(os.getenv("TOP_K_DEFAULT", "4"))
18
  TEMPERATURE_DEFAULT = float(os.getenv("TEMPERATURE_DEFAULT", "0.2"))
19
  MAX_TOKENS = int(os.getenv("MAX_TOKENS", "256"))
20
+ SCORE_THRESHOLD = float(os.getenv("SCORE_THRESHOLD", "0.60")) # 0..1
21
 
22
  SYSTEM_PROMPT = (
23
  "You are an Indonesian librarian assistant. Jawab singkat, akurat, dan sopan. "
24
+ "Jawab HANYA berdasarkan konteks yang diberikan. "
25
+ "Jika tidak ada jawabannya di konteks, balas persis: Data tidak tersedia."
26
  )
27
 
28
+ # =================== Utilitas ===================
29
  def norm(s: str) -> str:
30
  if s is None: return ""
31
  return re.sub(r"\s+", " ", str(s).strip())
32
 
33
  def dataset_hash(rows) -> str:
 
34
  m = hashlib.md5()
35
  for r in rows:
36
  m.update((norm(r.get("question","")) + "|" + norm(r.get("answer",""))).encode("utf-8"))
 
42
  rows = []
43
  with path.open("r", encoding="utf-8") as f:
44
  for line in f:
45
+ line = line.strip()
46
+ if not line: continue
47
  obj = json.loads(line)
48
  q = obj.get("question") or obj.get("pertanyaan") or obj.get("q")
49
  a = obj.get("answer") or obj.get("jawaban") or obj.get("a")
 
57
  seen.add(r["question"]); uniq.append(r)
58
  return uniq
59
 
60
+ # =================== Retriever (k-NN atas embedding pertanyaan) ===================
61
  class FAQIndex:
62
  def __init__(self):
63
  self.rows=None; self.model=None; self.emb=None; self.nn=None
 
75
  except Exception:
76
  pass
77
  self.model = SentenceTransformer(EMB_MODEL)
78
+ qs = [r["question"] for r in rows] # embed pertanyaan saja
79
  self.emb = self.model.encode(qs, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=False)
80
  self.nn = NearestNeighbors(n_neighbors=min(10, len(qs)), metric="cosine").fit(self.emb)
81
  CACHE_EMB.write_bytes(pickle.dumps({"emb": self.emb, "nn": self.nn}))
82
  CACHE_META.write_text(json.dumps({"hash": dataset_hash(rows), "emb_model": EMB_MODEL}, ensure_ascii=False))
83
 
84
+ def retrieve(self, query: str, top_k: int):
85
  if not query.strip(): return []
86
  qv = self.model.encode([query], normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=False)
87
  dists, idxs = self.nn.kneighbors(qv, n_neighbors=min(top_k, len(self.rows)))
 
92
  out.append({"question": r["question"], "answer": r["answer"], "score": float(sim)})
93
  return out
94
 
95
+ # =================== Local LLM (transformers pipeline di CPU) ===================
96
  _local_pipe = None
97
+ def call_local(prompt: str, temperature=TEMPERATURE_DEFAULT, max_tokens=MAX_TOKENS):
98
  global _local_pipe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  try:
100
+ if _local_pipe is None:
101
+ import torch
102
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
103
+ tok = AutoTokenizer.from_pretrained(LOCAL_MODEL)
104
+ model = AutoModelForCausalLM.from_pretrained(LOCAL_MODEL, torch_dtype=torch.float32)
105
+ _local_pipe = pipeline(
106
+ "text-generation",
107
+ model=model,
108
+ tokenizer=tok,
109
+ device=-1, # CPU
110
+ )
111
+ outs = _local_pipe(
112
  prompt,
113
  do_sample=True,
114
  temperature=float(temperature),
 
121
  except Exception as e:
122
  return f"❌ Gagal menjalankan model lokal: {e}"
123
 
124
+ # =================== RAG Orchestrator (dengan deterministic QA) ===================
125
+ def build_context(hits):
126
+ return "\n\n".join([f"[DOC {i} | {h['score']:.2f}] {h['answer']}" for i, h in enumerate(hits, 1)])
 
127
 
128
+ def rag_answer(user_msg, top_k=TOP_K_DEFAULT, temperature=TEMPERATURE_DEFAULT, threshold=SCORE_THRESHOLD):
129
+ hits = faq.retrieve(user_msg, top_k=int(top_k))
130
+ if not hits:
131
+ return "Maaf, saya tidak menemukan referensi di basis pengetahuan Anda."
132
+ top = hits[0]
133
+
134
+ # 1) Jika yakin β†’ langsung pakai jawaban sumber (tanpa LLM)
135
+ if top["score"] >= float(threshold):
 
 
 
 
 
 
136
  bullets = "\n".join([f"- ({h['score']:.2f}) {h['question']}" for h in hits])
137
+ return f"{top['answer']}\n\n**Sumber terdekat (deterministik):**\n{bullets}"
138
+
139
+ # 2) Kurang yakin β†’ minta LLM merangkum beberapa jawaban
140
+ context = build_context(hits)
141
+ prompt = (
142
+ f"SISTEM: {SYSTEM_PROMPT}\n\n"
143
+ f"KONTEKS:\n{context}\n\n"
144
+ f"PERTANYAAN:\n{user_msg}\n\n"
145
+ "Instruksi: Jawab singkat dan HANYA berdasarkan KONTEKS di atas. "
146
+ "Jika tidak ada jawabannya, balas persis: Data tidak tersedia."
147
+ )
148
+ llm_out = call_local(prompt, temperature=float(temperature), max_tokens=MAX_TOKENS)
149
+ bullets = "\n".join([f"- ({h['score']:.2f}) {h['question']}" for h in hits])
150
+ return f"{llm_out}\n\n**Sumber terdekat (lokal):**\n{bullets}"
151
 
152
+ # =================== Load & Upload ===================
153
  faq = FAQIndex()
154
  rows = load_jsonl(DATA_PATH)
155
  faq.build(rows, force=False)
 
164
  faq = FAQIndex(); faq.build(rows, force=True)
165
  return f"βœ… Basis pengetahuan diperbarui. Total Q&A: {len(rows)}."
166
 
167
+ # =================== UI ===================
168
+ with gr.Blocks(title="RAG + LLM β€” Local Model") as demo:
169
+ gr.Markdown("## πŸ“š RAG + LLM β€” Local Model\nMasukkan pertanyaan β†’ retrieve Q&A β†’ model lokal merangkum bila perlu.")
170
  with gr.Row():
171
  with gr.Column(scale=2):
172
  gr.ChatInterface(
173
+ fn=lambda msg, hist, k, t, th: rag_answer(msg, top_k=int(k), temperature=float(t), threshold=float(th)),
174
  additional_inputs=[
175
  gr.Slider(1, 10, value=TOP_K_DEFAULT, step=1, label="Top-K dokumen"),
176
  gr.Slider(0.0, 1.0, value=TEMPERATURE_DEFAULT, step=0.05, label="Temperatur"),
177
+ gr.Slider(0.0, 1.0, value=SCORE_THRESHOLD, step=0.01, label="Ambil langsung jika skor β‰₯"),
178
  ],
179
  title="Asisten Perpustakaan (RAG)",
180
  description="Jawab *berdasarkan konteks* dari dokumen JSONL Anda.",
181
  examples=[
182
+ ["Apa itu IPLM?", TOP_K_DEFAULT, TEMPERATURE_DEFAULT, SCORE_THRESHOLD],
183
+ ["Bagaimana menghitung IPLM?", TOP_K_DEFAULT, TEMPERATURE_DEFAULT, SCORE_THRESHOLD],
184
+ ["Apa saja dimensi IPLM?", TOP_K_DEFAULT, TEMPERATURE_DEFAULT, SCORE_THRESHOLD],
185
  ],
186
  cache_examples=False,
187
  )
 
190
  uploader = gr.File(label="Upload JSONL Q&A (keys: question, answer)")
191
  status = gr.Textbox(label="Status", interactive=False)
192
  uploader.change(fn=upload_jsonl, inputs=uploader, outputs=status)
193
+ gr.Markdown("_Model berjalan lokal (CPU). Anda dapat mengganti `LOCAL_MODEL` via Settings β†’ Variables._")
194
 
195
  if __name__ == "__main__":
196
  demo.launch()