Spaces:
Running
Running
| import json | |
| import os | |
| from langchain_core.documents import Document | |
| from langchain_chroma import Chroma | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| JSON_FILE_PATH = "data/data.json" | |
| CHROMA_DB_DIR = "chroma_data" | |
| def main(): | |
| if not os.path.exists(JSON_FILE_PATH): | |
| print(f"Error: File {JSON_FILE_PATH} not found.") | |
| return | |
| print("Reading JSON data...") | |
| with open(JSON_FILE_PATH, "r", encoding="utf-8") as f: | |
| chili_data = json.load(f) | |
| print("Converting nested JSON data into LangChain Document format") | |
| documents = [] | |
| for label, details in chili_data.items(): | |
| teks_gabungan = f"Penyakit/Kondisi: {label}\n" | |
| if details.get("penyebab"): | |
| teks_gabungan += "Penyebab:\n- " + "\n- ".join(details["penyebab"]) + "\n" | |
| if details.get("gejala"): | |
| teks_gabungan += "Gejala:\n- " + "\n- ".join(details["gejala"]) + "\n" | |
| if details.get("penanganan"): | |
| teks_gabungan += "Penanganan:\n- " + "\n- ".join(details["penanganan"]) + "\n" | |
| if details.get("pencegahan"): | |
| teks_gabungan += "Pencegahan:\n- " + "\n- ".join(details["pencegahan"]) + "\n" | |
| doc = Document( | |
| page_content=teks_gabungan, | |
| metadata={"label": label} | |
| ) | |
| documents.append(doc) | |
| print("Memuat Embedding Model") | |
| embeddings = HuggingFaceEmbeddings(model_name="Qwen/Qwen3-Embedding-0.6B") | |
| print("Menyimpan data ke ChromaDB") | |
| Chroma.from_documents( | |
| documents=documents, | |
| embedding=embeddings, | |
| persist_directory=CHROMA_DB_DIR, | |
| collection_name="chilicare_kb" | |
| ) | |
| print(f"Berhasil! Database tersimpan di folder: {CHROMA_DB_DIR}\n") | |
| if __name__ == "__main__": | |
| main() |