Spaces:
Running
Running
finish
Browse files- Dockerfile +0 -1
- chroma_data/cdcb1c1c-f374-4f62-9cc7-7e62dcdaccd0/length.bin +1 -1
- chroma_data/chroma.sqlite3 +1 -1
- db_setup.py +0 -2
- docker-compose.yml +1 -1
- frontend/index.html +0 -1
- frontend/script.js +1 -15
- ingest.py +0 -3
- src/chains/chain.py +1 -9
- src/ingestion/loader.py +0 -1
- src/retrieval/vector_store.py +0 -2
Dockerfile
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
FROM python:3.12-slim
|
| 2 |
|
| 3 |
-
# Set direktori kerja di dalam container
|
| 4 |
WORKDIR /app
|
| 5 |
|
| 6 |
RUN apt-get update && apt-get install -y \
|
|
|
|
| 1 |
FROM python:3.12-slim
|
| 2 |
|
|
|
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
RUN apt-get update && apt-get install -y \
|
chroma_data/cdcb1c1c-f374-4f62-9cc7-7e62dcdaccd0/length.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 400
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a2b2eaaf35eded0d090310dd7b61d5b8881b439105becc13f6387989c3fb27a
|
| 3 |
size 400
|
chroma_data/chroma.sqlite3
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1761280
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db126d77883a3f3f04dd9e92788769e76e4a6bff24270eb1150dd94cd95b54c1
|
| 3 |
size 1761280
|
db_setup.py
CHANGED
|
@@ -40,11 +40,9 @@ def main():
|
|
| 40 |
)
|
| 41 |
documents.append(doc)
|
| 42 |
|
| 43 |
-
# 4. Setup Embedding Model
|
| 44 |
print("Memuat Embedding Model")
|
| 45 |
embeddings = HuggingFaceEmbeddings(model_name="Qwen/Qwen3-Embedding-0.6B")
|
| 46 |
|
| 47 |
-
# 5. Simpan ke ChromaDB via LangChain
|
| 48 |
print("Menyimpan data ke ChromaDB")
|
| 49 |
Chroma.from_documents(
|
| 50 |
documents=documents,
|
|
|
|
| 40 |
)
|
| 41 |
documents.append(doc)
|
| 42 |
|
|
|
|
| 43 |
print("Memuat Embedding Model")
|
| 44 |
embeddings = HuggingFaceEmbeddings(model_name="Qwen/Qwen3-Embedding-0.6B")
|
| 45 |
|
|
|
|
| 46 |
print("Menyimpan data ke ChromaDB")
|
| 47 |
Chroma.from_documents(
|
| 48 |
documents=documents,
|
docker-compose.yml
CHANGED
|
@@ -26,7 +26,7 @@ services:
|
|
| 26 |
frontend_web:
|
| 27 |
image: nginx:alpine
|
| 28 |
container_name: chilicare_frontend
|
| 29 |
-
# Nginx untuk
|
| 30 |
ports:
|
| 31 |
- "3000:80"
|
| 32 |
volumes:
|
|
|
|
| 26 |
frontend_web:
|
| 27 |
image: nginx:alpine
|
| 28 |
container_name: chilicare_frontend
|
| 29 |
+
# Nginx untuk web HTML
|
| 30 |
ports:
|
| 31 |
- "3000:80"
|
| 32 |
volumes:
|
frontend/index.html
CHANGED
|
@@ -74,7 +74,6 @@
|
|
| 74 |
background: #94a3b8;
|
| 75 |
}
|
| 76 |
|
| 77 |
-
/* Sembunyikan scrollbar di menu mobile agar lebih rapi */
|
| 78 |
.hide-scrollbar::-webkit-scrollbar {
|
| 79 |
display: none;
|
| 80 |
}
|
|
|
|
| 74 |
background: #94a3b8;
|
| 75 |
}
|
| 76 |
|
|
|
|
| 77 |
.hide-scrollbar::-webkit-scrollbar {
|
| 78 |
display: none;
|
| 79 |
}
|
frontend/script.js
CHANGED
|
@@ -23,10 +23,8 @@ function parseMarkdown(text) {
|
|
| 23 |
tableHtml += '<tr class="border-b border-slate-200 last:border-0 hover:bg-slate-50 transition-colors">';
|
| 24 |
cols.forEach(col => {
|
| 25 |
if (index === 0) {
|
| 26 |
-
// Baris pertama jadi Header (TH)
|
| 27 |
tableHtml += `<th class="px-4 py-3 bg-slate-100 font-semibold text-slate-700 whitespace-nowrap">${col}</th>`;
|
| 28 |
} else {
|
| 29 |
-
// Baris selanjutnya jadi Data (TD)
|
| 30 |
tableHtml += `<td class="px-4 py-3 align-top">${col}</td>`;
|
| 31 |
}
|
| 32 |
});
|
|
@@ -36,35 +34,27 @@ function parseMarkdown(text) {
|
|
| 36 |
return tableHtml;
|
| 37 |
});
|
| 38 |
|
| 39 |
-
// 2. Heading 3 (### Teks)
|
| 40 |
html = html.replace(/^###\s+(.*$)/gim, '<h3 class="text-lg font-bold text-slate-800 mt-5 mb-2">$1</h3>');
|
| 41 |
|
| 42 |
-
// 3. Heading 2 (## Teks)
|
| 43 |
html = html.replace(/^##\s+(.*$)/gim, '<h2 class="text-xl font-bold text-slate-800 mt-5 mb-2">$1</h2>');
|
| 44 |
|
| 45 |
-
// 4. Bold (**Teks**)
|
| 46 |
html = html.replace(/\*\*(.*?)\*\*/g, '<strong class="font-bold text-slate-800">$1</strong>');
|
| 47 |
|
| 48 |
-
// 5. Bullet points (* Teks atau - Teks di awal baris)
|
| 49 |
html = html.replace(/^[\*\-]\s+(.*$)/gim, '<div class="flex gap-2 mt-1.5"><span class="text-rose-500 font-bold shrink-0">•</span><span>$1</span></div>');
|
| 50 |
|
| 51 |
-
// 6. Italic (*Teks*)
|
| 52 |
html = html.replace(/(?<!^)\*(.*?)\*/g, '<em class="italic text-slate-700">$1</em>');
|
| 53 |
|
| 54 |
-
// 7. Ubah Enter (\n) menjadi <br>
|
| 55 |
html = html.replace(/\n/g, '<br/>');
|
| 56 |
|
| 57 |
-
// 8. Bersihkan sisa <br> berlebih agar spasi tidak terlalu jauh
|
| 58 |
html = html.replace(/<\/h3><br\/>/g, '</h3>');
|
| 59 |
html = html.replace(/<\/h2><br\/>/g, '</h2>');
|
| 60 |
html = html.replace(/<\/div><br\/>/g, '</div>');
|
| 61 |
-
html = html.replace(/<\/div><br\/><br\/>/g, '</div>');
|
| 62 |
html = html.replace(/(<br\/>){3,}/g, '<br/><br/>');
|
| 63 |
|
| 64 |
return html;
|
| 65 |
}
|
| 66 |
|
| 67 |
-
// --- 1. UPDATE FUNGSI NAVIGASI ---
|
| 68 |
function switchMenu(menu) {
|
| 69 |
const secDetect = document.getElementById("section-detect");
|
| 70 |
const secChat = document.getElementById("section-chat");
|
|
@@ -91,7 +81,6 @@ function switchMenu(menu) {
|
|
| 91 |
}
|
| 92 |
}
|
| 93 |
|
| 94 |
-
// --- FUNGSI DETEKSI PENYAKIT ---
|
| 95 |
function handleFileChange(event) {
|
| 96 |
const file = event.target.files[0];
|
| 97 |
if (file) {
|
|
@@ -145,7 +134,6 @@ async function handleUpload() {
|
|
| 145 |
}
|
| 146 |
}
|
| 147 |
|
| 148 |
-
// --- 2. UPDATE KOTAK HASIL DETEKSI ---
|
| 149 |
function tampilkanHasilDeteksi(data) {
|
| 150 |
const resultContainer = document.getElementById("detect-result");
|
| 151 |
const resultImage = document.getElementById("result-image");
|
|
@@ -190,7 +178,6 @@ function tampilkanHasilDeteksi(data) {
|
|
| 190 |
resultContainer.classList.remove("hidden");
|
| 191 |
}
|
| 192 |
|
| 193 |
-
// --- FUNGSI CHATBOT ---
|
| 194 |
async function handleSendChat(event) {
|
| 195 |
event.preventDefault();
|
| 196 |
|
|
@@ -231,7 +218,6 @@ async function handleSendChat(event) {
|
|
| 231 |
}
|
| 232 |
}
|
| 233 |
|
| 234 |
-
// --- 3. UPDATE CHAT BUBBLE ---
|
| 235 |
function appendMessage(role, content) {
|
| 236 |
const chatBox = document.getElementById("chat-messages");
|
| 237 |
const wrapper = document.createElement("div");
|
|
|
|
| 23 |
tableHtml += '<tr class="border-b border-slate-200 last:border-0 hover:bg-slate-50 transition-colors">';
|
| 24 |
cols.forEach(col => {
|
| 25 |
if (index === 0) {
|
|
|
|
| 26 |
tableHtml += `<th class="px-4 py-3 bg-slate-100 font-semibold text-slate-700 whitespace-nowrap">${col}</th>`;
|
| 27 |
} else {
|
|
|
|
| 28 |
tableHtml += `<td class="px-4 py-3 align-top">${col}</td>`;
|
| 29 |
}
|
| 30 |
});
|
|
|
|
| 34 |
return tableHtml;
|
| 35 |
});
|
| 36 |
|
|
|
|
| 37 |
html = html.replace(/^###\s+(.*$)/gim, '<h3 class="text-lg font-bold text-slate-800 mt-5 mb-2">$1</h3>');
|
| 38 |
|
|
|
|
| 39 |
html = html.replace(/^##\s+(.*$)/gim, '<h2 class="text-xl font-bold text-slate-800 mt-5 mb-2">$1</h2>');
|
| 40 |
|
|
|
|
| 41 |
html = html.replace(/\*\*(.*?)\*\*/g, '<strong class="font-bold text-slate-800">$1</strong>');
|
| 42 |
|
|
|
|
| 43 |
html = html.replace(/^[\*\-]\s+(.*$)/gim, '<div class="flex gap-2 mt-1.5"><span class="text-rose-500 font-bold shrink-0">•</span><span>$1</span></div>');
|
| 44 |
|
|
|
|
| 45 |
html = html.replace(/(?<!^)\*(.*?)\*/g, '<em class="italic text-slate-700">$1</em>');
|
| 46 |
|
|
|
|
| 47 |
html = html.replace(/\n/g, '<br/>');
|
| 48 |
|
|
|
|
| 49 |
html = html.replace(/<\/h3><br\/>/g, '</h3>');
|
| 50 |
html = html.replace(/<\/h2><br\/>/g, '</h2>');
|
| 51 |
html = html.replace(/<\/div><br\/>/g, '</div>');
|
| 52 |
+
html = html.replace(/<\/div><br\/><br\/>/g, '</div>');
|
| 53 |
html = html.replace(/(<br\/>){3,}/g, '<br/><br/>');
|
| 54 |
|
| 55 |
return html;
|
| 56 |
}
|
| 57 |
|
|
|
|
| 58 |
function switchMenu(menu) {
|
| 59 |
const secDetect = document.getElementById("section-detect");
|
| 60 |
const secChat = document.getElementById("section-chat");
|
|
|
|
| 81 |
}
|
| 82 |
}
|
| 83 |
|
|
|
|
| 84 |
function handleFileChange(event) {
|
| 85 |
const file = event.target.files[0];
|
| 86 |
if (file) {
|
|
|
|
| 134 |
}
|
| 135 |
}
|
| 136 |
|
|
|
|
| 137 |
function tampilkanHasilDeteksi(data) {
|
| 138 |
const resultContainer = document.getElementById("detect-result");
|
| 139 |
const resultImage = document.getElementById("result-image");
|
|
|
|
| 178 |
resultContainer.classList.remove("hidden");
|
| 179 |
}
|
| 180 |
|
|
|
|
| 181 |
async function handleSendChat(event) {
|
| 182 |
event.preventDefault();
|
| 183 |
|
|
|
|
| 218 |
}
|
| 219 |
}
|
| 220 |
|
|
|
|
| 221 |
function appendMessage(role, content) {
|
| 222 |
const chatBox = document.getElementById("chat-messages");
|
| 223 |
const wrapper = document.createElement("div");
|
ingest.py
CHANGED
|
@@ -5,7 +5,6 @@ sys.path.append(root_dir)
|
|
| 5 |
|
| 6 |
from src.ingestion.loader import load_data
|
| 7 |
from src.ingestion.chunker import split_documents
|
| 8 |
-
# Hapus import get_embedding_model karena sudah di-handle di dalam vector_store.py
|
| 9 |
from src.retrieval.vector_store import get_vector_store
|
| 10 |
|
| 11 |
SOURCES = [
|
|
@@ -31,10 +30,8 @@ def run_ingestion_pipeline():
|
|
| 31 |
if all_chunks:
|
| 32 |
print(f"\nMenyiapkan model embedding dan menyimpan {len(all_chunks)} chunks ke ChromaDB...")
|
| 33 |
|
| 34 |
-
# 1. Panggil database yang sudah ada (termasuk embedder-nya)
|
| 35 |
db = get_vector_store()
|
| 36 |
|
| 37 |
-
# 2. Tambahkan chunks baru ke dalam koleksi 'chilicare_kb'
|
| 38 |
db.add_documents(all_chunks)
|
| 39 |
|
| 40 |
print("\nSelesai! Semua data web telah masuk ke database dan siap digunakan oleh API/Streamlit.")
|
|
|
|
| 5 |
|
| 6 |
from src.ingestion.loader import load_data
|
| 7 |
from src.ingestion.chunker import split_documents
|
|
|
|
| 8 |
from src.retrieval.vector_store import get_vector_store
|
| 9 |
|
| 10 |
SOURCES = [
|
|
|
|
| 30 |
if all_chunks:
|
| 31 |
print(f"\nMenyiapkan model embedding dan menyimpan {len(all_chunks)} chunks ke ChromaDB...")
|
| 32 |
|
|
|
|
| 33 |
db = get_vector_store()
|
| 34 |
|
|
|
|
| 35 |
db.add_documents(all_chunks)
|
| 36 |
|
| 37 |
print("\nSelesai! Semua data web telah masuk ke database dan siap digunakan oleh API/Streamlit.")
|
src/chains/chain.py
CHANGED
|
@@ -41,21 +41,14 @@ def create_rag_chain(disease_label=None):
|
|
| 41 |
prompt = get_rag_prompt()
|
| 42 |
|
| 43 |
def format_docs(docs):
|
| 44 |
-
# ==========================================
|
| 45 |
-
# INTERCEPTOR: Print metadata ke Terminal
|
| 46 |
-
# ==========================================
|
| 47 |
-
print("\n" + "▼"*50)
|
| 48 |
print("🔍 [DEBUG] DOKUMEN YANG DITARIK MULTI-QUERY RETRIEVER:")
|
| 49 |
for i, doc in enumerate(docs):
|
| 50 |
sumber = doc.metadata.get('label', 'Sumber tidak diketahui')
|
| 51 |
print(f" [{i+1}] Topik/Label: {sumber}")
|
| 52 |
-
print("▲"*50 + "\n")
|
| 53 |
-
# ==========================================
|
| 54 |
|
| 55 |
# Gabungkan teks dokumen yang berhasil dikumpulkan dari semua query alternatif
|
| 56 |
return "\n\n".join(doc.page_content for doc in docs)
|
| 57 |
|
| 58 |
-
# 6. Rangkai menjadi Chain (LCEL)
|
| 59 |
rag_chain = (
|
| 60 |
{"context": retriever | format_docs, "input": RunnablePassthrough()}
|
| 61 |
| prompt
|
|
@@ -65,7 +58,6 @@ def create_rag_chain(disease_label=None):
|
|
| 65 |
|
| 66 |
return rag_chain
|
| 67 |
|
| 68 |
-
# ======= Blok testing =========
|
| 69 |
if __name__ == "__main__":
|
| 70 |
chain = create_rag_chain()
|
| 71 |
|
|
@@ -75,7 +67,7 @@ if __name__ == "__main__":
|
|
| 75 |
|
| 76 |
try:
|
| 77 |
jawaban = chain.invoke(pertanyaan)
|
| 78 |
-
print("
|
| 79 |
print(jawaban)
|
| 80 |
except Exception as e:
|
| 81 |
print(f"Terjadi kesalahan: {e}")
|
|
|
|
| 41 |
prompt = get_rag_prompt()
|
| 42 |
|
| 43 |
def format_docs(docs):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
print("🔍 [DEBUG] DOKUMEN YANG DITARIK MULTI-QUERY RETRIEVER:")
|
| 45 |
for i, doc in enumerate(docs):
|
| 46 |
sumber = doc.metadata.get('label', 'Sumber tidak diketahui')
|
| 47 |
print(f" [{i+1}] Topik/Label: {sumber}")
|
|
|
|
|
|
|
| 48 |
|
| 49 |
# Gabungkan teks dokumen yang berhasil dikumpulkan dari semua query alternatif
|
| 50 |
return "\n\n".join(doc.page_content for doc in docs)
|
| 51 |
|
|
|
|
| 52 |
rag_chain = (
|
| 53 |
{"context": retriever | format_docs, "input": RunnablePassthrough()}
|
| 54 |
| prompt
|
|
|
|
| 58 |
|
| 59 |
return rag_chain
|
| 60 |
|
|
|
|
| 61 |
if __name__ == "__main__":
|
| 62 |
chain = create_rag_chain()
|
| 63 |
|
|
|
|
| 67 |
|
| 68 |
try:
|
| 69 |
jawaban = chain.invoke(pertanyaan)
|
| 70 |
+
print("JAWABAN RAG")
|
| 71 |
print(jawaban)
|
| 72 |
except Exception as e:
|
| 73 |
print(f"Terjadi kesalahan: {e}")
|
src/ingestion/loader.py
CHANGED
|
@@ -18,7 +18,6 @@ def load_data(source: str):
|
|
| 18 |
loader = TextLoader(source, encoding="utf-8")
|
| 19 |
documents = loader.load()
|
| 20 |
|
| 21 |
-
# TAMBAHAN BARU UNTUK CSV
|
| 22 |
elif source.lower().endswith(".csv"):
|
| 23 |
if not os.path.exists(source): raise FileNotFoundError(f"File tidak ditemukan: {source}")
|
| 24 |
loader = CSVLoader(source, encoding="utf-8")
|
|
|
|
| 18 |
loader = TextLoader(source, encoding="utf-8")
|
| 19 |
documents = loader.load()
|
| 20 |
|
|
|
|
| 21 |
elif source.lower().endswith(".csv"):
|
| 22 |
if not os.path.exists(source): raise FileNotFoundError(f"File tidak ditemukan: {source}")
|
| 23 |
loader = CSVLoader(source, encoding="utf-8")
|
src/retrieval/vector_store.py
CHANGED
|
@@ -5,12 +5,10 @@ CHROMA_DB_DIR = "chroma_data"
|
|
| 5 |
COLLECTION_NAME = "chilicare_kb"
|
| 6 |
|
| 7 |
def get_embedder():
|
| 8 |
-
# Wajib sama persis dengan yang ada di db_setup.py
|
| 9 |
return HuggingFaceEmbeddings(model_name="Qwen/Qwen3-Embedding-0.6B")
|
| 10 |
|
| 11 |
def get_vector_store():
|
| 12 |
embedder = get_embedder()
|
| 13 |
-
# Memuat database Chroma yang sudah dibuat oleh db_setup.py
|
| 14 |
return Chroma(
|
| 15 |
persist_directory=CHROMA_DB_DIR,
|
| 16 |
embedding_function=embedder,
|
|
|
|
| 5 |
COLLECTION_NAME = "chilicare_kb"
|
| 6 |
|
| 7 |
def get_embedder():
|
|
|
|
| 8 |
return HuggingFaceEmbeddings(model_name="Qwen/Qwen3-Embedding-0.6B")
|
| 9 |
|
| 10 |
def get_vector_store():
|
| 11 |
embedder = get_embedder()
|
|
|
|
| 12 |
return Chroma(
|
| 13 |
persist_directory=CHROMA_DB_DIR,
|
| 14 |
embedding_function=embedder,
|