feryms commited on
Commit
1e384db
·
1 Parent(s): 2498457

update docker

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ # Set direktori kerja di dalam container
4
+ WORKDIR /app
5
+
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ libglib2.0-0 \
9
+ libgl1 \
10
+ libxcb1 \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ COPY requirements.txt .
14
+
15
+ RUN pip install --no-cache-dir -r requirements.txt
16
+
17
+ COPY . .
18
+
19
+ ENV PYTHONPATH=/app
app.py CHANGED
@@ -5,8 +5,11 @@ import io
5
  import base64
6
 
7
  # URL untuk kedua endpoint FastAPI
8
- API_DETECT_URL = "http://localhost:8000/detect"
9
- API_ASK_URL = "http://localhost:8000/ask"
 
 
 
10
 
11
  st.set_page_config(page_title="ChiliCare AI", page_icon="🌶️", layout="centered")
12
 
 
5
  import base64
6
 
7
  # URL untuk kedua endpoint FastAPI
8
+ # API_DETECT_URL = "http://localhost:8000/detect"
9
+ # API_ASK_URL = "http://localhost:8000/ask"
10
+
11
+ API_DETECT_URL = "http://backend:8000/detect"
12
+ API_ASK_URL = "http://backend:8000/ask"
13
 
14
  st.set_page_config(page_title="ChiliCare AI", page_icon="🌶️", layout="centered")
15
 
chroma_data/{c3279b3c-8393-4cc2-a5e7-962590e279ef → cdcb1c1c-f374-4f62-9cc7-7e62dcdaccd0}/data_level0.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c57e2be68a8bb2dc20a1df18f9858aab631f6fecaff7375e003b362095604a19
3
  size 423600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41b1d6558680207762d59e507f9dcba0cb9fbbd4c23c79e14025206d3742e17f
3
  size 423600
chroma_data/{c3279b3c-8393-4cc2-a5e7-962590e279ef → cdcb1c1c-f374-4f62-9cc7-7e62dcdaccd0}/header.bin RENAMED
File without changes
chroma_data/{c3279b3c-8393-4cc2-a5e7-962590e279ef → cdcb1c1c-f374-4f62-9cc7-7e62dcdaccd0}/length.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f488958c83249ab7c95ce9427b92f8dfa6f7f024140cb7a76d63ab6d57b359a2
3
  size 400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a12e561363385e9dfeeab326368731c030ed4b374e7f5897ac819159d2884c5
3
  size 400
chroma_data/{c3279b3c-8393-4cc2-a5e7-962590e279ef → cdcb1c1c-f374-4f62-9cc7-7e62dcdaccd0}/link_lists.bin RENAMED
File without changes
chroma_data/chroma.sqlite3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f4806fdeb9a3a3ae4b8675347baf0130a40b0067a57e64c9624279fd61f67b2
3
- size 1056768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4591d17a5270b7679f514c4e344b021379bad5bf7730928870061f60f5f44b33
3
+ size 1761280
model/best_YOLOv11L.pt → data/cabai.pdf RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2de6aa02599b51eaec5d2171e117e7ddacee608e39415fc68c2c3e5f0281b97
3
- size 51207065
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fd9d64b7fbf5742019eec709b900fd45bd7642939bbf720a221b0b6c830edcb
3
+ size 1640127
docker-compose.yml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ backend:
3
+ build: .
4
+ container_name: chilicare_backend
5
+ # server FastAPI
6
+ command: uvicorn backend.api:app --host 0.0.0.0 --port 8000
7
+ ports:
8
+ - "8000:8000"
9
+ volumes:
10
+ - ./chroma_data:/app/chroma_data
11
+ - ./model:/app/model
12
+ - hf_cache:/root/.cache/huggingface
13
+ env_file:
14
+ - .env
15
+
16
+ streamlit:
17
+ build: .
18
+ container_name: chilicare_streamlit
19
+ # Streamlit
20
+ command: streamlit run app.py --server.address 0.0.0.0
21
+ ports:
22
+ - "8501:8501"
23
+ depends_on:
24
+ - backend
25
+
26
+ frontend_web:
27
+ image: nginx:alpine
28
+ container_name: chilicare_frontend
29
+ # Nginx untuk menjalankan web HTML
30
+ ports:
31
+ - "3000:80"
32
+ volumes:
33
+ - ./frontend:/usr/share/nginx/html
34
+
35
+ volumes:
36
+ hf_cache:
frontend/script.js CHANGED
@@ -1,15 +1,17 @@
1
- const API_DETECT_URL = "http://localhost:8000/detect";
2
- const API_ASK_URL = "http://localhost:8000/ask";
3
 
4
- // const API_DETECT_URL = "https://r7sc5m17-8000.asse.devtunnels.ms/detect";
5
- // const API_ASK_URL = "https://r7sc5m17-8000.asse.devtunnels.ms/ask";
 
 
 
6
 
7
  let selectedFile = null;
8
 
9
  function parseMarkdown(text) {
10
  if (!text) return "";
11
  let html = text;
12
-
13
  html = html.replace(/(?:^\|.*\|(?:\n|\r|$))+/gm, function(match) {
14
  let rows = match.trim().split('\n');
15
  let tableHtml = '<div class="overflow-x-auto my-5 rounded-xl ring-1 ring-slate-200 shadow-sm"><table class="w-full text-sm text-left text-slate-600">';
 
1
+ // const API_DETECT_URL = "http://localhost:8000/detect";
2
+ // const API_ASK_URL = "http://localhost:8000/ask";
3
 
4
+ const API_DETECT_URL = "https://r7sc5m17-8000.asse.devtunnels.ms/detect";
5
+ const API_ASK_URL = "https://r7sc5m17-8000.asse.devtunnels.ms/ask";
6
+
7
+ // API_DETECT_URL = "http://backend:8000/detect";
8
+ // API_ASK_URL = "http://backend:8000/ask";
9
 
10
  let selectedFile = null;
11
 
12
  function parseMarkdown(text) {
13
  if (!text) return "";
14
  let html = text;
 
15
  html = html.replace(/(?:^\|.*\|(?:\n|\r|$))+/gm, function(match) {
16
  let rows = match.trim().split('\n');
17
  let tableHtml = '<div class="overflow-x-auto my-5 rounded-xl ring-1 ring-slate-200 shadow-sm"><table class="w-full text-sm text-left text-slate-600">';
ingest.py CHANGED
@@ -11,7 +11,8 @@ from src.retrieval.vector_store import get_vector_store
11
  SOURCES = [
12
  "https://www.dgwfertilizer.co.id/8-hama-dan-penyakit-penting-pada-tanaman-cabai/",
13
  "https://mitrabertani.com/artikel/detail/Budidaya-Cabai-Sederhana-tapi-Penting-Cara-Tepat-Tanam-Cabai",
14
- "https://digitani.ipb.ac.id/bagaimana-langkah-langkah-budidaya-cabai/"
 
15
  ]
16
 
17
  def run_ingestion_pipeline():
 
11
  SOURCES = [
12
  "https://www.dgwfertilizer.co.id/8-hama-dan-penyakit-penting-pada-tanaman-cabai/",
13
  "https://mitrabertani.com/artikel/detail/Budidaya-Cabai-Sederhana-tapi-Penting-Cara-Tepat-Tanam-Cabai",
14
+ "https://digitani.ipb.ac.id/bagaimana-langkah-langkah-budidaya-cabai/",
15
+ "data/cabai.pdf"
16
  ]
17
 
18
  def run_ingestion_pipeline():
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cpu
2
+ torch
3
+ torchvision
4
+ fastapi
5
+ uvicorn
6
+ python-multipart
7
+ sentence-transformers
8
+ ultralytics
9
+ pillow
10
+ langchain_classic
11
+ langchain_core
12
+ opencv-python-headless
13
+ langchain
14
+ langchain-core
15
+ langchain-openai
16
+ langchain-chroma
17
+ langchain-huggingface
18
+ chromadb
19
+ pypdf
20
+ streamlit
21
+ python-dotenv
src/chains/__pycache__/chain.cpython-312.pyc CHANGED
Binary files a/src/chains/__pycache__/chain.cpython-312.pyc and b/src/chains/__pycache__/chain.cpython-312.pyc differ
 
src/chains/__pycache__/rag.cpython-312.pyc CHANGED
Binary files a/src/chains/__pycache__/rag.cpython-312.pyc and b/src/chains/__pycache__/rag.cpython-312.pyc differ
 
src/chains/chain.py CHANGED
@@ -2,30 +2,22 @@ import sys
2
  import os
3
  from dotenv import load_dotenv
4
 
5
- # Menambahkan root directory ke sys.path agar bisa import dari folder src
6
  root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))
7
  sys.path.append(root_dir)
8
 
9
  from langchain_core.runnables import RunnablePassthrough
10
  from langchain_core.output_parsers import StrOutputParser
11
- from langchain_openai import ChatOpenAI # <- Import ini wajib ada untuk OpenRouter
12
  from langchain_core.messages import SystemMessage
13
- from src.ingestion.embedder import get_embedding_model # Menggunakan fungsi get_embedder yang kita bahas sebelumnya
 
14
  from src.retrieval.vector_store import get_vector_store
15
  from src.retrieval.retriever import get_retriever
16
  from src.chains.prompt import get_rag_prompt
17
 
18
- # Load environment variables (seperti OPENROUTER_API_KEY) dari file .env
19
  load_dotenv()
20
 
21
  def create_rag_chain():
22
- # 1. Setup Komponen Pencarian (Retriever)
23
- vs = get_vector_store()
24
- retriever = get_retriever(vs) # Fungsi ini dari retriever.py yang sudah di-set k=2
25
-
26
- # 2. Setup Prompt & LLM
27
- prompt = get_rag_prompt()
28
-
29
  llm = ChatOpenAI(
30
  model="nvidia/nemotron-3-nano-30b-a3b:free",
31
  temperature=0.2,
@@ -33,28 +25,38 @@ def create_rag_chain():
33
  openai_api_base="https://openrouter.ai/api/v1",
34
  )
35
 
36
- # 3. Fungsi Interceptor untuk Debugging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  def format_docs(docs):
38
  # ==========================================
39
- # INTERCEPTOR: Print metadata ke Terminal VS Code
40
  # ==========================================
41
  print("\n" + "▼"*50)
42
- print("🔍 [DEBUG] DOKUMEN YANG DITARIK RETRIEVER:")
43
  for i, doc in enumerate(docs):
44
- # Mengambil informasi 'label' (penyakit) dari metadata db_setup.py
45
  sumber = doc.metadata.get('label', 'Sumber tidak diketahui')
46
-
47
  print(f" [{i+1}] Topik/Label: {sumber}")
48
- # print(f" Teks: {doc.page_content[:75]}...")
49
  print("▲"*50 + "\n")
50
  # ==========================================
51
 
52
- # Gabungkan teks untuk dikirim ke LLM
53
  return "\n\n".join(doc.page_content for doc in docs)
54
 
55
- # 4. Rangkai menjadi Chain (LCEL)
56
  rag_chain = (
57
- # Ubah "question" menjadi "input" agar cocok dengan prompt Anda
58
  {"context": retriever | format_docs, "input": RunnablePassthrough()}
59
  | prompt
60
  | llm
@@ -67,7 +69,6 @@ def create_rag_chain():
67
  if __name__ == "__main__":
68
  chain = create_rag_chain()
69
 
70
- # Menggunakan pertanyaan seputar cabai agar LLM bisa mengambil dari ChromaDB Anda
71
  pertanyaan = "Bagaimana cara menangani penyakit antraknosa (patek) pada tanaman cabai?"
72
  print(f"\nUser: {pertanyaan}")
73
  print("AI sedang berpikir (memproses via OpenRouter)...\n")
 
2
  import os
3
  from dotenv import load_dotenv
4
 
 
5
  root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))
6
  sys.path.append(root_dir)
7
 
8
  from langchain_core.runnables import RunnablePassthrough
9
  from langchain_core.output_parsers import StrOutputParser
10
+ from langchain_openai import ChatOpenAI
11
  from langchain_core.messages import SystemMessage
12
+ from langchain_classic.retrievers import MultiQueryRetriever
13
+ from src.ingestion.embedder import get_embedding_model
14
  from src.retrieval.vector_store import get_vector_store
15
  from src.retrieval.retriever import get_retriever
16
  from src.chains.prompt import get_rag_prompt
17
 
 
18
  load_dotenv()
19
 
20
  def create_rag_chain():
 
 
 
 
 
 
 
21
  llm = ChatOpenAI(
22
  model="nvidia/nemotron-3-nano-30b-a3b:free",
23
  temperature=0.2,
 
25
  openai_api_base="https://openrouter.ai/api/v1",
26
  )
27
 
28
+ vs = get_vector_store()
29
+ base_retriever = get_retriever(vs, search_type="similarity", k=3) # Mengambil 3 chunks teratas
30
+
31
+ # 3. REFACTOR: Bungkus menjadi Multi-Query Retriever
32
+ # LLM akan otomatis membuat ~3 variasi pertanyaan alternatif dari pertanyaan user
33
+ # untuk memastikan dokumen di ChromaDB terambil dengan lebih akurat secara semantik.
34
+ retriever = MultiQueryRetriever.from_llm(
35
+ retriever=base_retriever,
36
+ llm=llm
37
+ )
38
+
39
+ # 4. Setup Prompt
40
+ prompt = get_rag_prompt()
41
+
42
+ # 5. Fungsi Interceptor untuk Debugging di Terminal
43
  def format_docs(docs):
44
  # ==========================================
45
+ # INTERCEPTOR: Print metadata ke Terminal
46
  # ==========================================
47
  print("\n" + "▼"*50)
48
+ print("🔍 [DEBUG] DOKUMEN YANG DITARIK MULTI-QUERY RETRIEVER:")
49
  for i, doc in enumerate(docs):
 
50
  sumber = doc.metadata.get('label', 'Sumber tidak diketahui')
 
51
  print(f" [{i+1}] Topik/Label: {sumber}")
 
52
  print("▲"*50 + "\n")
53
  # ==========================================
54
 
55
+ # Gabungkan teks dokumen yang berhasil dikumpulkan dari semua query alternatif
56
  return "\n\n".join(doc.page_content for doc in docs)
57
 
58
+ # 6. Rangkai menjadi Chain (LCEL)
59
  rag_chain = (
 
60
  {"context": retriever | format_docs, "input": RunnablePassthrough()}
61
  | prompt
62
  | llm
 
69
  if __name__ == "__main__":
70
  chain = create_rag_chain()
71
 
 
72
  pertanyaan = "Bagaimana cara menangani penyakit antraknosa (patek) pada tanaman cabai?"
73
  print(f"\nUser: {pertanyaan}")
74
  print("AI sedang berpikir (memproses via OpenRouter)...\n")
src/chains/rag.py CHANGED
@@ -2,8 +2,6 @@ import os
2
  from langchain_openai import ChatOpenAI
3
  from langchain_chroma import Chroma
4
  from langchain_huggingface import HuggingFaceEmbeddings
5
-
6
- # 1. Import prompt dari file terpisah yang baru dibuat
7
  from src.chains.prompt import DISEASE_PROMPT_TEMPLATE
8
 
9
  llm = ChatOpenAI(
@@ -27,16 +25,23 @@ chain = DISEASE_PROMPT_TEMPLATE | llm
27
  def generate_narrative(disease_name):
28
  print(f"Mencari data untuk label: {disease_name}...")
29
 
 
 
 
 
 
30
  results = vectorstore.similarity_search(
31
- query="berikan penjelasan lengkap", # dummy query karena kita sudah filter berdasarkan label
32
- k=1,
33
  filter={"label": disease_name}
34
  )
35
 
36
  if not results:
37
  return f"Data penyakit '{disease_name}' tidak ditemukan di database."
38
 
39
- retrieved_context = results[0].page_content
 
 
40
 
41
  print("Data ditemukan. Menghasilkan narasi dengan LLM...")
42
 
 
2
  from langchain_openai import ChatOpenAI
3
  from langchain_chroma import Chroma
4
  from langchain_huggingface import HuggingFaceEmbeddings
 
 
5
  from src.chains.prompt import DISEASE_PROMPT_TEMPLATE
6
 
7
  llm = ChatOpenAI(
 
25
  def generate_narrative(disease_name):
26
  print(f"Mencari data untuk label: {disease_name}...")
27
 
28
+ # PERBAIKAN 1: Buat query pencarian yang deskriptif secara semantik
29
+ # Ini membantu model embedding mencari potongan teks yang paling relevan
30
+ search_query = f"Penjelasan lengkap mengenai penyebab, ciri-ciri gejala, dan cara mengatasi penyakit {disease_name} pada tanaman cabai."
31
+
32
+ # PERBAIKAN 2: Tingkatkan nilai k untuk mengambil lebih banyak konteks
33
  results = vectorstore.similarity_search(
34
+ query=search_query,
35
+ k=3, # Mengambil 3 potongan (chunks) teratas
36
  filter={"label": disease_name}
37
  )
38
 
39
  if not results:
40
  return f"Data penyakit '{disease_name}' tidak ditemukan di database."
41
 
42
+ # PERBAIKAN 3: Gabungkan semua teks dari dokumen yang ditemukan
43
+ # Agar LLM mendapatkan informasi yang utuh, tidak hanya dari 1 chunk saja
44
+ retrieved_context = "\n\n".join([doc.page_content for doc in results])
45
 
46
  print("Data ditemukan. Menghasilkan narasi dengan LLM...")
47