irhamni commited on
Commit
9a43d06
Β·
verified Β·
1 Parent(s): 1314618

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -51
app.py CHANGED
@@ -1,23 +1,19 @@
1
- # app.py β€” RAG + LLM (HF Inference API - TinyLlama, gratis & ringan)
2
-
3
- import os, re, json, pickle, hashlib, requests
4
  from pathlib import Path
 
5
  import gradio as gr
6
  import numpy as np
7
  from sklearn.neighbors import NearestNeighbors
8
  from sentence_transformers import SentenceTransformer
9
 
10
- # =================== Config ===================
11
  DATA_PATH = Path(os.getenv("DATA_PATH", "IPLM_QnA_Chatbot.jsonl"))
12
  CACHE_EMB = Path("embeddings.pkl")
13
  CACHE_META = Path("meta.json")
14
 
15
- # Embedding model untuk retrieval (kecil & cepat)
16
  EMB_MODEL = os.getenv("EMB_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
17
-
18
- # LLM kecil & kompatibel via HF Inference API (gratis)
19
- HF_TOKEN = os.getenv("HF_TOKEN", "")
20
- LLM_MODEL = os.getenv("LLM_MODEL", "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
21
 
22
  TOP_K_DEFAULT = int(os.getenv("TOP_K_DEFAULT", "4"))
23
  TEMPERATURE_DEFAULT = float(os.getenv("TEMPERATURE_DEFAULT", "0.2"))
@@ -29,12 +25,13 @@ SYSTEM_PROMPT = (
29
  "Jika konteks tidak memuat jawabannya, balas persis: Data tidak tersedia."
30
  )
31
 
32
- # =================== Utils ===================
33
  def norm(s: str) -> str:
34
  if s is None: return ""
35
  return re.sub(r"\s+", " ", str(s).strip())
36
 
37
  def dataset_hash(rows) -> str:
 
38
  m = hashlib.md5()
39
  for r in rows:
40
  m.update((norm(r.get("question","")) + "|" + norm(r.get("answer",""))).encode("utf-8"))
@@ -50,8 +47,7 @@ def load_jsonl(path: Path):
50
  obj = json.loads(line)
51
  q = obj.get("question") or obj.get("pertanyaan") or obj.get("q")
52
  a = obj.get("answer") or obj.get("jawaban") or obj.get("a")
53
- if q and a:
54
- rows.append({"question": norm(q), "answer": norm(a)})
55
  if not rows:
56
  raise ValueError("JSONL kosong atau tidak ada pasangan 'question'/'answer'.")
57
  # dedup by question
@@ -61,7 +57,7 @@ def load_jsonl(path: Path):
61
  seen.add(r["question"]); uniq.append(r)
62
  return uniq
63
 
64
- # =================== Index ===================
65
  class FAQIndex:
66
  def __init__(self):
67
  self.rows=None; self.model=None; self.emb=None; self.nn=None
@@ -79,7 +75,6 @@ class FAQIndex:
79
  except Exception:
80
  pass
81
  self.model = SentenceTransformer(EMB_MODEL)
82
- # Embed HANYA pertanyaan agar retrieval fokus
83
  qs = [r["question"] for r in rows]
84
  self.emb = self.model.encode(qs, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=False)
85
  self.nn = NearestNeighbors(n_neighbors=min(10, len(qs)), metric="cosine").fit(self.emb)
@@ -97,38 +92,46 @@ class FAQIndex:
97
  out.append({"question": r["question"], "answer": r["answer"], "score": float(sim)})
98
  return out
99
 
100
- # =================== LLM Caller (HF Inference API /models/<model>) ===================
101
- def call_hf_chat(prompt: str, temperature=TEMPERATURE_DEFAULT, max_tokens=MAX_TOKENS):
102
- if not HF_TOKEN:
103
- return "⚠️ HF_TOKEN belum diatur di Settings β†’ Secrets."
104
- url = f"https://api-inference.huggingface.co/models/{LLM_MODEL}"
105
- headers = {"Authorization": f"Bearer {HF_TOKEN}"}
106
- payload = {
107
- "inputs": prompt,
108
- "parameters": {
109
- "temperature": float(temperature),
110
- "max_new_tokens": int(max_tokens),
111
- "return_full_text": False
112
- }
113
- }
 
 
 
 
 
 
 
 
114
  try:
115
- r = requests.post(url, headers=headers, json=payload, timeout=90)
116
- # Jika model baru bangun, HF mengembalikan 503 "loading". Tunjukkan info ramah.
117
- if r.status_code == 503:
118
- return "⏳ Model sedang loading di Inference API. Coba lagi sebentar."
119
- r.raise_for_status()
120
- data = r.json()
121
- if isinstance(data, list) and data and "generated_text" in data[0]:
122
- return data[0]["generated_text"]
123
- if isinstance(data, dict) and "generated_text" in data:
124
- return data["generated_text"]
125
- return str(data)
126
  except Exception as e:
127
- return f"❌ Error API: {e}\n{r.text[:400] if 'r' in locals() else ''}"
128
 
129
- # =================== RAG Orchestrator ===================
130
  def build_context(retrieved):
131
- # Kirim HANYA jawaban ke LLM sebagai konteks
132
  return "\n\n".join([f"[DOC {i}] {r['answer']}" for i, r in enumerate(retrieved, 1)])
133
 
134
  def rag_answer(user_msg, top_k=TOP_K_DEFAULT, temperature=TEMPERATURE_DEFAULT):
@@ -144,21 +147,19 @@ def rag_answer(user_msg, top_k=TOP_K_DEFAULT, temperature=TEMPERATURE_DEFAULT):
144
  "Instruksi: Jawab singkat, akurat, dan HANYA berdasarkan KONTEKS. "
145
  "Jika tidak ada jawabannya, balas persis: Data tidak tersedia."
146
  )
147
- out = call_hf_chat(prompt, temperature=float(temperature), max_tokens=MAX_TOKENS)
148
  bullets = "\n".join([f"- ({h['score']:.2f}) {h['question']}" for h in hits])
149
  return f"{out}\n\n**Sumber terdekat:**\n{bullets}"
150
  except Exception as e:
151
- # Pastikan tidak melempar exception ke UI (biar tak muncul bubble "Error")
152
  return f"❌ Terjadi error tak terduga: {e}"
153
 
154
- # =================== Load & Upload ===================
155
  faq = FAQIndex()
156
  rows = load_jsonl(DATA_PATH)
157
  faq.build(rows, force=False)
158
 
159
  def upload_jsonl(file_obj):
160
- if file_obj is None:
161
- return gr.update(value="Tidak ada file.")
162
  Path(file_obj.name).replace(DATA_PATH)
163
  if CACHE_EMB.exists(): CACHE_EMB.unlink()
164
  if CACHE_META.exists(): CACHE_META.unlink()
@@ -167,9 +168,9 @@ def upload_jsonl(file_obj):
167
  faq = FAQIndex(); faq.build(rows, force=True)
168
  return f"βœ… Basis pengetahuan diperbarui. Total Q&A: {len(rows)}."
169
 
170
- # =================== UI ===================
171
- with gr.Blocks(title="RAG + LLM (JSONL)") as demo:
172
- gr.Markdown("# πŸ“š RAG + LLM β€” dari JSONL Q&A\nMasukkan pertanyaan β†’ retrieve Q&A β†’ LLM menjawab berdasar konteks.")
173
  with gr.Row():
174
  with gr.Column(scale=2):
175
  gr.ChatInterface(
@@ -192,6 +193,7 @@ with gr.Blocks(title="RAG + LLM (JSONL)") as demo:
192
  uploader = gr.File(label="Upload JSONL Q&A (keys: question, answer)")
193
  status = gr.Textbox(label="Status", interactive=False)
194
  uploader.change(fn=upload_jsonl, inputs=uploader, outputs=status)
195
- gr.Markdown("Set **HF_TOKEN** di Settings β†’ Secrets. Model default: TinyLlama-1.1B-Chat.")
 
196
  if __name__ == "__main__":
197
  demo.launch()
 
1
+ # app.py β€” RAG + Local LLM (TinyLlama) for Hugging Face Spaces (CPU)
2
+ import os, re, json, pickle, hashlib, time
 
3
  from pathlib import Path
4
+ import requests # still used for safety, but not calling API now
5
  import gradio as gr
6
  import numpy as np
7
  from sklearn.neighbors import NearestNeighbors
8
  from sentence_transformers import SentenceTransformer
9
 
10
+ # ===== Config =====
11
  DATA_PATH = Path(os.getenv("DATA_PATH", "IPLM_QnA_Chatbot.jsonl"))
12
  CACHE_EMB = Path("embeddings.pkl")
13
  CACHE_META = Path("meta.json")
14
 
 
15
  EMB_MODEL = os.getenv("EMB_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
16
+ GEN_MODEL = os.getenv("GEN_MODEL", "TinyLlama/TinyLlama-1.1B-Chat-v1.0") # local small model
 
 
 
17
 
18
  TOP_K_DEFAULT = int(os.getenv("TOP_K_DEFAULT", "4"))
19
  TEMPERATURE_DEFAULT = float(os.getenv("TEMPERATURE_DEFAULT", "0.2"))
 
25
  "Jika konteks tidak memuat jawabannya, balas persis: Data tidak tersedia."
26
  )
27
 
28
+ # ===== Utils =====
29
  def norm(s: str) -> str:
30
  if s is None: return ""
31
  return re.sub(r"\s+", " ", str(s).strip())
32
 
33
  def dataset_hash(rows) -> str:
34
+ import hashlib
35
  m = hashlib.md5()
36
  for r in rows:
37
  m.update((norm(r.get("question","")) + "|" + norm(r.get("answer",""))).encode("utf-8"))
 
47
  obj = json.loads(line)
48
  q = obj.get("question") or obj.get("pertanyaan") or obj.get("q")
49
  a = obj.get("answer") or obj.get("jawaban") or obj.get("a")
50
+ if q and a: rows.append({"question": norm(q), "answer": norm(a)})
 
51
  if not rows:
52
  raise ValueError("JSONL kosong atau tidak ada pasangan 'question'/'answer'.")
53
  # dedup by question
 
57
  seen.add(r["question"]); uniq.append(r)
58
  return uniq
59
 
60
+ # ===== Index (retriever) =====
61
  class FAQIndex:
62
  def __init__(self):
63
  self.rows=None; self.model=None; self.emb=None; self.nn=None
 
75
  except Exception:
76
  pass
77
  self.model = SentenceTransformer(EMB_MODEL)
 
78
  qs = [r["question"] for r in rows]
79
  self.emb = self.model.encode(qs, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=False)
80
  self.nn = NearestNeighbors(n_neighbors=min(10, len(qs)), metric="cosine").fit(self.emb)
 
92
  out.append({"question": r["question"], "answer": r["answer"], "score": float(sim)})
93
  return out
94
 
95
+ # ===== Local LLM (transformers pipeline) =====
96
+ _local_pipe = None
97
+ def get_local_pipe():
98
+ global _local_pipe
99
+ if _local_pipe is not None:
100
+ return _local_pipe
101
+ import torch
102
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
103
+
104
+ # CPU-only for free Spaces; dtype=float32 for stability on CPU
105
+ tok = AutoTokenizer.from_pretrained(GEN_MODEL)
106
+ model = AutoModelForCausalLM.from_pretrained(GEN_MODEL, torch_dtype=torch.float32)
107
+ _local_pipe = pipeline(
108
+ "text-generation",
109
+ model=model,
110
+ tokenizer=tok,
111
+ device=-1, # CPU
112
+ # no explicit framework args; transformers picks PyTorch
113
+ )
114
+ return _local_pipe
115
+
116
+ def call_local_llm(prompt: str, temperature=TEMPERATURE_DEFAULT, max_tokens=MAX_TOKENS):
117
  try:
118
+ pipe = get_local_pipe()
119
+ outs = pipe(
120
+ prompt,
121
+ do_sample=True,
122
+ temperature=float(temperature),
123
+ max_new_tokens=int(max_tokens),
124
+ return_full_text=False,
125
+ )
126
+ if isinstance(outs, list) and outs and "generated_text" in outs[0]:
127
+ return outs[0]["generated_text"]
128
+ return str(outs)
129
  except Exception as e:
130
+ return f"❌ Gagal menjalankan model lokal: {e}"
131
 
132
+ # ===== RAG Orchestrator =====
133
  def build_context(retrieved):
134
+ # kirim HANYA jawaban ke LLM sebagai konteks
135
  return "\n\n".join([f"[DOC {i}] {r['answer']}" for i, r in enumerate(retrieved, 1)])
136
 
137
  def rag_answer(user_msg, top_k=TOP_K_DEFAULT, temperature=TEMPERATURE_DEFAULT):
 
147
  "Instruksi: Jawab singkat, akurat, dan HANYA berdasarkan KONTEKS. "
148
  "Jika tidak ada jawabannya, balas persis: Data tidak tersedia."
149
  )
150
+ out = call_local_llm(prompt, temperature=float(temperature), max_tokens=MAX_TOKENS)
151
  bullets = "\n".join([f"- ({h['score']:.2f}) {h['question']}" for h in hits])
152
  return f"{out}\n\n**Sumber terdekat:**\n{bullets}"
153
  except Exception as e:
 
154
  return f"❌ Terjadi error tak terduga: {e}"
155
 
156
+ # ===== Load & Upload =====
157
  faq = FAQIndex()
158
  rows = load_jsonl(DATA_PATH)
159
  faq.build(rows, force=False)
160
 
161
  def upload_jsonl(file_obj):
162
+ if file_obj is None: return gr.update(value="Tidak ada file.")
 
163
  Path(file_obj.name).replace(DATA_PATH)
164
  if CACHE_EMB.exists(): CACHE_EMB.unlink()
165
  if CACHE_META.exists(): CACHE_META.unlink()
 
168
  faq = FAQIndex(); faq.build(rows, force=True)
169
  return f"βœ… Basis pengetahuan diperbarui. Total Q&A: {len(rows)}."
170
 
171
+ # ===== UI =====
172
+ with gr.Blocks(title="RAG + LLM (Local, JSONL)") as demo:
173
+ gr.Markdown("# πŸ“š RAG + LLM β€” Local Model\nMasukkan pertanyaan β†’ retrieve Q&A β†’ model lokal menjawab berdasar konteks.")
174
  with gr.Row():
175
  with gr.Column(scale=2):
176
  gr.ChatInterface(
 
193
  uploader = gr.File(label="Upload JSONL Q&A (keys: question, answer)")
194
  status = gr.Textbox(label="Status", interactive=False)
195
  uploader.change(fn=upload_jsonl, inputs=uploader, outputs=status)
196
+ gr.Markdown("_Model berjalan lokal; tidak membutuhkan HF_TOKEN._")
197
+
198
  if __name__ == "__main__":
199
  demo.launch()