feryms commited on
Commit
fb7ce8a
·
1 Parent(s): 7d8273c
Dockerfile CHANGED
@@ -1,6 +1,5 @@
1
  FROM python:3.12-slim
2
 
3
- # Set direktori kerja di dalam container
4
  WORKDIR /app
5
 
6
  RUN apt-get update && apt-get install -y \
 
1
  FROM python:3.12-slim
2
 
 
3
  WORKDIR /app
4
 
5
  RUN apt-get update && apt-get install -y \
chroma_data/cdcb1c1c-f374-4f62-9cc7-7e62dcdaccd0/length.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acd769936f1d22a7253e4ff279f1366aac68ebb667c50946415ea98c3dbcc1bd
3
  size 400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a2b2eaaf35eded0d090310dd7b61d5b8881b439105becc13f6387989c3fb27a
3
  size 400
chroma_data/chroma.sqlite3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26051dcbbbc7ed704c9c79a0e8770dfe0cf2634b5e8a1649558c284cbff6b780
3
  size 1761280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db126d77883a3f3f04dd9e92788769e76e4a6bff24270eb1150dd94cd95b54c1
3
  size 1761280
db_setup.py CHANGED
@@ -40,11 +40,9 @@ def main():
40
  )
41
  documents.append(doc)
42
 
43
- # 4. Setup Embedding Model
44
  print("Memuat Embedding Model")
45
  embeddings = HuggingFaceEmbeddings(model_name="Qwen/Qwen3-Embedding-0.6B")
46
 
47
- # 5. Simpan ke ChromaDB via LangChain
48
  print("Menyimpan data ke ChromaDB")
49
  Chroma.from_documents(
50
  documents=documents,
 
40
  )
41
  documents.append(doc)
42
 
 
43
  print("Memuat Embedding Model")
44
  embeddings = HuggingFaceEmbeddings(model_name="Qwen/Qwen3-Embedding-0.6B")
45
 
 
46
  print("Menyimpan data ke ChromaDB")
47
  Chroma.from_documents(
48
  documents=documents,
docker-compose.yml CHANGED
@@ -26,7 +26,7 @@ services:
26
  frontend_web:
27
  image: nginx:alpine
28
  container_name: chilicare_frontend
29
- # Nginx untuk menjalankan web HTML
30
  ports:
31
  - "3000:80"
32
  volumes:
 
26
  frontend_web:
27
  image: nginx:alpine
28
  container_name: chilicare_frontend
29
+ # Nginx untuk web HTML
30
  ports:
31
  - "3000:80"
32
  volumes:
frontend/index.html CHANGED
@@ -74,7 +74,6 @@
74
  background: #94a3b8;
75
  }
76
 
77
- /* Sembunyikan scrollbar di menu mobile agar lebih rapi */
78
  .hide-scrollbar::-webkit-scrollbar {
79
  display: none;
80
  }
 
74
  background: #94a3b8;
75
  }
76
 
 
77
  .hide-scrollbar::-webkit-scrollbar {
78
  display: none;
79
  }
frontend/script.js CHANGED
@@ -23,10 +23,8 @@ function parseMarkdown(text) {
23
  tableHtml += '<tr class="border-b border-slate-200 last:border-0 hover:bg-slate-50 transition-colors">';
24
  cols.forEach(col => {
25
  if (index === 0) {
26
- // Baris pertama jadi Header (TH)
27
  tableHtml += `<th class="px-4 py-3 bg-slate-100 font-semibold text-slate-700 whitespace-nowrap">${col}</th>`;
28
  } else {
29
- // Baris selanjutnya jadi Data (TD)
30
  tableHtml += `<td class="px-4 py-3 align-top">${col}</td>`;
31
  }
32
  });
@@ -36,35 +34,27 @@ function parseMarkdown(text) {
36
  return tableHtml;
37
  });
38
 
39
- // 2. Heading 3 (### Teks)
40
  html = html.replace(/^###\s+(.*$)/gim, '<h3 class="text-lg font-bold text-slate-800 mt-5 mb-2">$1</h3>');
41
 
42
- // 3. Heading 2 (## Teks)
43
  html = html.replace(/^##\s+(.*$)/gim, '<h2 class="text-xl font-bold text-slate-800 mt-5 mb-2">$1</h2>');
44
 
45
- // 4. Bold (**Teks**)
46
  html = html.replace(/\*\*(.*?)\*\*/g, '<strong class="font-bold text-slate-800">$1</strong>');
47
 
48
- // 5. Bullet points (* Teks atau - Teks di awal baris)
49
  html = html.replace(/^[\*\-]\s+(.*$)/gim, '<div class="flex gap-2 mt-1.5"><span class="text-rose-500 font-bold shrink-0">•</span><span>$1</span></div>');
50
 
51
- // 6. Italic (*Teks*)
52
  html = html.replace(/(?<!^)\*(.*?)\*/g, '<em class="italic text-slate-700">$1</em>');
53
 
54
- // 7. Ubah Enter (\n) menjadi <br>
55
  html = html.replace(/\n/g, '<br/>');
56
 
57
- // 8. Bersihkan sisa <br> berlebih agar spasi tidak terlalu jauh
58
  html = html.replace(/<\/h3><br\/>/g, '</h3>');
59
  html = html.replace(/<\/h2><br\/>/g, '</h2>');
60
  html = html.replace(/<\/div><br\/>/g, '</div>');
61
- html = html.replace(/<\/div><br\/><br\/>/g, '</div>'); // Jaga jarak bawah tabel
62
  html = html.replace(/(<br\/>){3,}/g, '<br/><br/>');
63
 
64
  return html;
65
  }
66
 
67
- // --- 1. UPDATE FUNGSI NAVIGASI ---
68
  function switchMenu(menu) {
69
  const secDetect = document.getElementById("section-detect");
70
  const secChat = document.getElementById("section-chat");
@@ -91,7 +81,6 @@ function switchMenu(menu) {
91
  }
92
  }
93
 
94
- // --- FUNGSI DETEKSI PENYAKIT ---
95
  function handleFileChange(event) {
96
  const file = event.target.files[0];
97
  if (file) {
@@ -145,7 +134,6 @@ async function handleUpload() {
145
  }
146
  }
147
 
148
- // --- 2. UPDATE KOTAK HASIL DETEKSI ---
149
  function tampilkanHasilDeteksi(data) {
150
  const resultContainer = document.getElementById("detect-result");
151
  const resultImage = document.getElementById("result-image");
@@ -190,7 +178,6 @@ function tampilkanHasilDeteksi(data) {
190
  resultContainer.classList.remove("hidden");
191
  }
192
 
193
- // --- FUNGSI CHATBOT ---
194
  async function handleSendChat(event) {
195
  event.preventDefault();
196
 
@@ -231,7 +218,6 @@ async function handleSendChat(event) {
231
  }
232
  }
233
 
234
- // --- 3. UPDATE CHAT BUBBLE ---
235
  function appendMessage(role, content) {
236
  const chatBox = document.getElementById("chat-messages");
237
  const wrapper = document.createElement("div");
 
23
  tableHtml += '<tr class="border-b border-slate-200 last:border-0 hover:bg-slate-50 transition-colors">';
24
  cols.forEach(col => {
25
  if (index === 0) {
 
26
  tableHtml += `<th class="px-4 py-3 bg-slate-100 font-semibold text-slate-700 whitespace-nowrap">${col}</th>`;
27
  } else {
 
28
  tableHtml += `<td class="px-4 py-3 align-top">${col}</td>`;
29
  }
30
  });
 
34
  return tableHtml;
35
  });
36
 
 
37
  html = html.replace(/^###\s+(.*$)/gim, '<h3 class="text-lg font-bold text-slate-800 mt-5 mb-2">$1</h3>');
38
 
 
39
  html = html.replace(/^##\s+(.*$)/gim, '<h2 class="text-xl font-bold text-slate-800 mt-5 mb-2">$1</h2>');
40
 
 
41
  html = html.replace(/\*\*(.*?)\*\*/g, '<strong class="font-bold text-slate-800">$1</strong>');
42
 
 
43
  html = html.replace(/^[\*\-]\s+(.*$)/gim, '<div class="flex gap-2 mt-1.5"><span class="text-rose-500 font-bold shrink-0">•</span><span>$1</span></div>');
44
 
 
45
  html = html.replace(/(?<!^)\*(.*?)\*/g, '<em class="italic text-slate-700">$1</em>');
46
 
 
47
  html = html.replace(/\n/g, '<br/>');
48
 
 
49
  html = html.replace(/<\/h3><br\/>/g, '</h3>');
50
  html = html.replace(/<\/h2><br\/>/g, '</h2>');
51
  html = html.replace(/<\/div><br\/>/g, '</div>');
52
+ html = html.replace(/<\/div><br\/><br\/>/g, '</div>');
53
  html = html.replace(/(<br\/>){3,}/g, '<br/><br/>');
54
 
55
  return html;
56
  }
57
 
 
58
  function switchMenu(menu) {
59
  const secDetect = document.getElementById("section-detect");
60
  const secChat = document.getElementById("section-chat");
 
81
  }
82
  }
83
 
 
84
  function handleFileChange(event) {
85
  const file = event.target.files[0];
86
  if (file) {
 
134
  }
135
  }
136
 
 
137
  function tampilkanHasilDeteksi(data) {
138
  const resultContainer = document.getElementById("detect-result");
139
  const resultImage = document.getElementById("result-image");
 
178
  resultContainer.classList.remove("hidden");
179
  }
180
 
 
181
  async function handleSendChat(event) {
182
  event.preventDefault();
183
 
 
218
  }
219
  }
220
 
 
221
  function appendMessage(role, content) {
222
  const chatBox = document.getElementById("chat-messages");
223
  const wrapper = document.createElement("div");
ingest.py CHANGED
@@ -5,7 +5,6 @@ sys.path.append(root_dir)
5
 
6
  from src.ingestion.loader import load_data
7
  from src.ingestion.chunker import split_documents
8
- # Hapus import get_embedding_model karena sudah di-handle di dalam vector_store.py
9
  from src.retrieval.vector_store import get_vector_store
10
 
11
  SOURCES = [
@@ -31,10 +30,8 @@ def run_ingestion_pipeline():
31
  if all_chunks:
32
  print(f"\nMenyiapkan model embedding dan menyimpan {len(all_chunks)} chunks ke ChromaDB...")
33
 
34
- # 1. Panggil database yang sudah ada (termasuk embedder-nya)
35
  db = get_vector_store()
36
 
37
- # 2. Tambahkan chunks baru ke dalam koleksi 'chilicare_kb'
38
  db.add_documents(all_chunks)
39
 
40
  print("\nSelesai! Semua data web telah masuk ke database dan siap digunakan oleh API/Streamlit.")
 
5
 
6
  from src.ingestion.loader import load_data
7
  from src.ingestion.chunker import split_documents
 
8
  from src.retrieval.vector_store import get_vector_store
9
 
10
  SOURCES = [
 
30
  if all_chunks:
31
  print(f"\nMenyiapkan model embedding dan menyimpan {len(all_chunks)} chunks ke ChromaDB...")
32
 
 
33
  db = get_vector_store()
34
 
 
35
  db.add_documents(all_chunks)
36
 
37
  print("\nSelesai! Semua data web telah masuk ke database dan siap digunakan oleh API/Streamlit.")
src/chains/chain.py CHANGED
@@ -41,21 +41,14 @@ def create_rag_chain(disease_label=None):
41
  prompt = get_rag_prompt()
42
 
43
  def format_docs(docs):
44
- # ==========================================
45
- # INTERCEPTOR: Print metadata ke Terminal
46
- # ==========================================
47
- print("\n" + "▼"*50)
48
  print("🔍 [DEBUG] DOKUMEN YANG DITARIK MULTI-QUERY RETRIEVER:")
49
  for i, doc in enumerate(docs):
50
  sumber = doc.metadata.get('label', 'Sumber tidak diketahui')
51
  print(f" [{i+1}] Topik/Label: {sumber}")
52
- print("▲"*50 + "\n")
53
- # ==========================================
54
 
55
  # Gabungkan teks dokumen yang berhasil dikumpulkan dari semua query alternatif
56
  return "\n\n".join(doc.page_content for doc in docs)
57
 
58
- # 6. Rangkai menjadi Chain (LCEL)
59
  rag_chain = (
60
  {"context": retriever | format_docs, "input": RunnablePassthrough()}
61
  | prompt
@@ -65,7 +58,6 @@ def create_rag_chain(disease_label=None):
65
 
66
  return rag_chain
67
 
68
- # ======= Blok testing =========
69
  if __name__ == "__main__":
70
  chain = create_rag_chain()
71
 
@@ -75,7 +67,7 @@ if __name__ == "__main__":
75
 
76
  try:
77
  jawaban = chain.invoke(pertanyaan)
78
- print("=== JAWABAN RAG ===")
79
  print(jawaban)
80
  except Exception as e:
81
  print(f"Terjadi kesalahan: {e}")
 
41
  prompt = get_rag_prompt()
42
 
43
  def format_docs(docs):
 
 
 
 
44
  print("🔍 [DEBUG] DOKUMEN YANG DITARIK MULTI-QUERY RETRIEVER:")
45
  for i, doc in enumerate(docs):
46
  sumber = doc.metadata.get('label', 'Sumber tidak diketahui')
47
  print(f" [{i+1}] Topik/Label: {sumber}")
 
 
48
 
49
  # Gabungkan teks dokumen yang berhasil dikumpulkan dari semua query alternatif
50
  return "\n\n".join(doc.page_content for doc in docs)
51
 
 
52
  rag_chain = (
53
  {"context": retriever | format_docs, "input": RunnablePassthrough()}
54
  | prompt
 
58
 
59
  return rag_chain
60
 
 
61
  if __name__ == "__main__":
62
  chain = create_rag_chain()
63
 
 
67
 
68
  try:
69
  jawaban = chain.invoke(pertanyaan)
70
+ print("JAWABAN RAG")
71
  print(jawaban)
72
  except Exception as e:
73
  print(f"Terjadi kesalahan: {e}")
src/ingestion/loader.py CHANGED
@@ -18,7 +18,6 @@ def load_data(source: str):
18
  loader = TextLoader(source, encoding="utf-8")
19
  documents = loader.load()
20
 
21
- # TAMBAHAN BARU UNTUK CSV
22
  elif source.lower().endswith(".csv"):
23
  if not os.path.exists(source): raise FileNotFoundError(f"File tidak ditemukan: {source}")
24
  loader = CSVLoader(source, encoding="utf-8")
 
18
  loader = TextLoader(source, encoding="utf-8")
19
  documents = loader.load()
20
 
 
21
  elif source.lower().endswith(".csv"):
22
  if not os.path.exists(source): raise FileNotFoundError(f"File tidak ditemukan: {source}")
23
  loader = CSVLoader(source, encoding="utf-8")
src/retrieval/vector_store.py CHANGED
@@ -5,12 +5,10 @@ CHROMA_DB_DIR = "chroma_data"
5
  COLLECTION_NAME = "chilicare_kb"
6
 
7
  def get_embedder():
8
- # Wajib sama persis dengan yang ada di db_setup.py
9
  return HuggingFaceEmbeddings(model_name="Qwen/Qwen3-Embedding-0.6B")
10
 
11
  def get_vector_store():
12
  embedder = get_embedder()
13
- # Memuat database Chroma yang sudah dibuat oleh db_setup.py
14
  return Chroma(
15
  persist_directory=CHROMA_DB_DIR,
16
  embedding_function=embedder,
 
5
  COLLECTION_NAME = "chilicare_kb"
6
 
7
  def get_embedder():
 
8
  return HuggingFaceEmbeddings(model_name="Qwen/Qwen3-Embedding-0.6B")
9
 
10
  def get_vector_store():
11
  embedder = get_embedder()
 
12
  return Chroma(
13
  persist_directory=CHROMA_DB_DIR,
14
  embedding_function=embedder,