File size: 1,809 Bytes
faf22ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c34ad45
faf22ac
 
c34ad45
0dd2dc1
faf22ac
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import json
import os
from langchain_core.documents import Document
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

JSON_FILE_PATH = "data/data.json"
CHROMA_DB_DIR = "chroma_data"

def main():
    if not os.path.exists(JSON_FILE_PATH):
        print(f"Error: File {JSON_FILE_PATH} not found.")
        return
    
    print("Reading JSON data...")
    with open(JSON_FILE_PATH, "r", encoding="utf-8") as f:
        chili_data = json.load(f)

    print("Converting nested JSON data into LangChain Document format")
    documents = []
    
    for label, details in chili_data.items():
        teks_gabungan = f"Penyakit/Kondisi: {label}\n"
        
        if details.get("penyebab"):
            teks_gabungan += "Penyebab:\n- " + "\n- ".join(details["penyebab"]) + "\n"
        
        if details.get("gejala"):
            teks_gabungan += "Gejala:\n- " + "\n- ".join(details["gejala"]) + "\n"
            
        if details.get("penanganan"):
            teks_gabungan += "Penanganan:\n- " + "\n- ".join(details["penanganan"]) + "\n"
            
        if details.get("pencegahan"):
            teks_gabungan += "Pencegahan:\n- " + "\n- ".join(details["pencegahan"]) + "\n"

        doc = Document(
            page_content=teks_gabungan,
            metadata={"label": label}
        )
        documents.append(doc)

    print("Memuat Embedding Model")
    embeddings = HuggingFaceEmbeddings(model_name="Qwen/Qwen3-Embedding-0.6B")

    print("Menyimpan data ke ChromaDB")
    Chroma.from_documents(
        documents=documents,
        embedding=embeddings,
        persist_directory=CHROMA_DB_DIR,
        collection_name="chilicare_kb"
    )
    print(f"Berhasil! Database tersimpan di folder: {CHROMA_DB_DIR}\n")


if __name__ == "__main__":
    main()