persadian commited on
Commit
90f1840
·
verified ·
1 Parent(s): 9c08f2d

Create create_crop_db.py

Browse files
Files changed (1) hide show
  1. crop_db/create_crop_db.py +42 -0
crop_db/create_crop_db.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+ from datasets import load_dataset
3
+ from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
4
+
5
+ # Initialize ChromaDB
6
+ client = chromadb.PersistentClient(path="crop_db")
7
+ embedder = SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
8
+
9
+ # Create collection with metadata
10
+ collection = client.get_or_create_collection(
11
+ name="crop_data",
12
+ embedding_function=embedder,
13
+ metadata={"hnsw:space": "cosine"}
14
+ )
15
+
16
+ # Load crop optimization dataset
17
+ dataset = load_dataset("DARJYO/sawotiQ29_crop_optimization")
18
+
19
+ # Prepare documents and metadata
20
+ documents = []
21
+ metadatas = []
22
+ ids = []
23
+
24
+ for idx, item in enumerate(dataset['train']):
25
+ doc = f"{item['crop_name']} - {item['region']}. Optimal conditions: {item['optimal_temperature']}°C, {item['annual_rainfall']}mm rainfall. Soil: {item['preferred_soil']}. Yield: {item['average_yield']}"
26
+
27
+ documents.append(doc)
28
+ metadatas.append({
29
+ "type": "crop",
30
+ "region": item['region'],
31
+ "season": item['best_season']
32
+ })
33
+ ids.append(str(idx))
34
+
35
+ # Add to collection
36
+ collection.add(
37
+ documents=documents,
38
+ metadatas=metadatas,
39
+ ids=ids
40
+ )
41
+
42
+ print(f"Created crop database with {len(dataset['train'])} entries")