Spaces:

arbabarshad
/

agllm2-dev

Sleeping

arbabarshad Claude Opus 4.6 commited on Mar 3

Commit

c43f82f

1 Parent(s): 6dfcfaf

Update retrieval evaluation with k=1,2,4,8 and fix random seed

- Change K values from [1,3,5] to [1,2,4,8] for finer-grained eval
- Add random.seed(42) for reproducible sampling
- Update README evaluation table with new metrics
- Regenerate evaluation results

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (5) hide show

README.md +6 -6
retrieval_evaluation.py +2 -1
retrieval_evaluation_results.json +100 -60
vector-databases-deployed/db5-agllm-data-isu-field-insects-all-species/27ac9297-abc2-406e-8919-7670a60055f1/length.bin +1 -1
vector-databases-deployed/db5-agllm-data-isu-field-insects-all-species/chroma.sqlite3 +1 -1

README.md CHANGED Viewed

@@ -114,12 +114,12 @@ python generate_usa_ipm_info.py --step parse     # Create Excel sheet
 **Output:** Updates `species-organized/PestID Species - Organized.xlsx` with "USA" sheet containing 110 species present in the United States (pests + beneficials).
 ### Evaluation Filters (retrieval_evaluation.py)
-| Filter | P@5 | nDCG@5 |
-|--------|-----|--------|
-| No Filter | 0.82 | 0.72 |
-| Species Only | 0.99 | 0.89 |
-| Region Only | 0.83 | 0.73 |
-| Species + Region | **1.00** | **0.90** |
 ---

 **Output:** Updates `species-organized/PestID Species - Organized.xlsx` with "USA" sheet containing 110 species present in the United States (pests + beneficials).
 ### Evaluation Filters (retrieval_evaluation.py)
+| Filter | P@1 | P@2 | P@4 | P@8 | nDCG@1 | nDCG@2 | nDCG@4 | nDCG@8 |
+|--------|-----|-----|-----|-----|--------|--------|--------|--------|
+| No Filter | 0.64 | 0.76 | 0.81 | 0.85 | 0.64 | 0.72 | 0.74 | 0.75 |
+| Species Only | 0.68 | 0.84 | 0.93 | **1.00** | 0.68 | 0.78 | 0.83 | 0.85 |
+| Region Only | 0.69 | 0.78 | 0.84 | 0.87 | 0.69 | 0.75 | 0.78 | 0.79 |
+| Species + Region | **0.79** | **0.91** | **0.99** | **1.00** | **0.79** | **0.87** | **0.90** | **0.91** |
 ---

retrieval_evaluation.py CHANGED Viewed

@@ -228,6 +228,7 @@ def load_chunks_from_vectordb(persist_directory: str, sample_size: Optional[int]
         chunks.append(chunk_data)
     if sample_size and len(chunks) > sample_size:
         chunks = random.sample(chunks, sample_size)
     return chunks
@@ -238,7 +239,7 @@ def main():
     # Configuration
     VECTOR_DB_PATH = 'vector-databases-deployed/db5-agllm-data-isu-field-insects-all-species'
     SAMPLE_SIZE = 100   # Start with smaller sample for testing
-    K_VALUES = [1, 3, 5]
     OUTPUT_FILE = 'retrieval_evaluation_results.json'
     print("Starting Retrieval Evaluation Pipeline")

         chunks.append(chunk_data)
     if sample_size and len(chunks) > sample_size:
+        random.seed(42)
         chunks = random.sample(chunks, sample_size)
     return chunks
     # Configuration
     VECTOR_DB_PATH = 'vector-databases-deployed/db5-agllm-data-isu-field-insects-all-species'
     SAMPLE_SIZE = 100   # Start with smaller sample for testing
+    K_VALUES = [1, 2, 4, 8]
     OUTPUT_FILE = 'retrieval_evaluation_results.json'
     print("Starting Retrieval Evaluation Pipeline")

retrieval_evaluation_results.json CHANGED Viewed

@@ -1,129 +1,169 @@
 {
   "no_filter": {
     "precision@1": {
-      "mean": 0.55,
-      "std": 0.49749371855331,
       "count": 100
     },
-    "precision@3": {
-      "mean": 0.74,
-      "std": 0.4386342439892262,
       "count": 100
     },
-    "precision@5": {
-      "mean": 0.8,
-      "std": 0.4,
       "count": 100
     },
     "ndcg@1": {
-      "mean": 0.55,
-      "std": 0.49749371855331,
       "count": 100
     },
-    "ndcg@3": {
-      "mean": 0.6620208679642894,
-      "std": 0.4224663020789173,
       "count": 100
     },
-    "ndcg@5": {
-      "mean": 0.6865467489235274,
-      "std": 0.39428047037500696,
       "count": 100
     }
   },
   "species_only": {
     "precision@1": {
-      "mean": 0.72,
-      "std": 0.4489988864128729,
       "count": 100
     },
-    "precision@3": {
-      "mean": 1.0,
-      "std": 0.0,
       "count": 100
     },
-    "precision@5": {
       "mean": 1.0,
       "std": 0.0,
       "count": 100
     },
     "ndcg@1": {
-      "mean": 0.72,
-      "std": 0.4489988864128729,
       "count": 100
     },
-    "ndcg@3": {
-      "mean": 0.8861859507142915,
-      "std": 0.18517270734359137,
       "count": 100
     },
-    "ndcg@5": {
-      "mean": 0.8861859507142915,
-      "std": 0.18517270734359137,
       "count": 100
     }
   },
   "region_only": {
     "precision@1": {
-      "mean": 0.56,
-      "std": 0.4963869458396343,
       "count": 100
     },
-    "precision@3": {
-      "mean": 0.74,
-      "std": 0.4386342439892262,
       "count": 100
     },
-    "precision@5": {
-      "mean": 0.82,
-      "std": 0.38418745424597095,
       "count": 100
     },
     "ndcg@1": {
-      "mean": 0.56,
-      "std": 0.4963869458396343,
       "count": 100
     },
-    "ndcg@3": {
-      "mean": 0.6683301655000039,
-      "std": 0.423160630771083,
       "count": 100
     },
-    "ndcg@5": {
-      "mean": 0.7010313401123213,
-      "std": 0.38430545848027436,
       "count": 100
     }
   },
   "species_and_region": {
     "precision@1": {
-      "mean": 0.74,
-      "std": 0.4386342439892262,
       "count": 100
     },
-    "precision@3": {
-      "mean": 1.0,
-      "std": 0.0,
       "count": 100
     },
-    "precision@5": {
       "mean": 1.0,
       "std": 0.0,
       "count": 100
     },
     "ndcg@1": {
-      "mean": 0.74,
-      "std": 0.4386342439892262,
       "count": 100
     },
-    "ndcg@3": {
-      "mean": 0.8961859507142915,
-      "std": 0.17738436382801476,
       "count": 100
     },
-    "ndcg@5": {
-      "mean": 0.8961859507142915,
-      "std": 0.17738436382801476,
       "count": 100
     }
   }

 {
   "no_filter": {
     "precision@1": {
+      "mean": 0.64,
+      "std": 0.48,
       "count": 100
     },
+    "precision@2": {
+      "mean": 0.76,
+      "std": 0.4270831300812524,
       "count": 100
     },
+    "precision@4": {
+      "mean": 0.81,
+      "std": 0.39230090491866054,
+      "count": 100
+    },
+    "precision@8": {
+      "mean": 0.85,
+      "std": 0.3570714214271425,
       "count": 100
     },
     "ndcg@1": {
+      "mean": 0.64,
+      "std": 0.48,
       "count": 100
     },
+    "ndcg@2": {
+      "mean": 0.7157115704285749,
+      "std": 0.41895779074707734,
       "count": 100
     },
+    "ndcg@4": {
+      "mean": 0.7400183360093088,
+      "std": 0.38986711559166154,
+      "count": 100
+    },
+    "ndcg@8": {
+      "mean": 0.754344341157148,
+      "std": 0.3684202134419992,
       "count": 100
     }
   },
   "species_only": {
     "precision@1": {
+      "mean": 0.68,
+      "std": 0.466476151587624,
       "count": 100
     },
+    "precision@2": {
+      "mean": 0.84,
+      "std": 0.3666060555964672,
+      "count": 100
+    },
+    "precision@4": {
+      "mean": 0.93,
+      "std": 0.25514701644346144,
       "count": 100
     },
+    "precision@8": {
       "mean": 1.0,
       "std": 0.0,
       "count": 100
     },
     "ndcg@1": {
+      "mean": 0.68,
+      "std": 0.466476151587624,
+      "count": 100
+    },
+    "ndcg@2": {
+      "mean": 0.7809487605714333,
+      "std": 0.36580132584863706,
       "count": 100
     },
+    "ndcg@4": {
+      "mean": 0.824562291732901,
+      "std": 0.29154459287790513,
       "count": 100
     },
+    "ndcg@8": {
+      "mean": 0.8508003372990416,
+      "std": 0.22561628946474954,
       "count": 100
     }
   },
   "region_only": {
     "precision@1": {
+      "mean": 0.69,
+      "std": 0.462493243193887,
       "count": 100
     },
+    "precision@2": {
+      "mean": 0.78,
+      "std": 0.4142463035441596,
       "count": 100
     },
+    "precision@4": {
+      "mean": 0.84,
+      "std": 0.36660605559646725,
+      "count": 100
+    },
+    "precision@8": {
+      "mean": 0.87,
+      "std": 0.33630343441600474,
       "count": 100
     },
     "ndcg@1": {
+      "mean": 0.69,
+      "std": 0.462493243193887,
       "count": 100
     },
+    "ndcg@2": {
+      "mean": 0.7467836778214312,
+      "std": 0.4100495706548825,
       "count": 100
     },
+    "ndcg@4": {
+      "mean": 0.7747039745636328,
+      "std": 0.37245769233603554,
+      "count": 100
+    },
+    "ndcg@8": {
+      "mean": 0.7857743640416571,
+      "std": 0.35433577663047267,
       "count": 100
     }
   },
   "species_and_region": {
     "precision@1": {
+      "mean": 0.79,
+      "std": 0.40730823708832603,
       "count": 100
     },
+    "precision@2": {
+      "mean": 0.91,
+      "std": 0.2861817604250837,
+      "count": 100
+    },
+    "precision@4": {
+      "mean": 0.99,
+      "std": 0.09949874371066199,
       "count": 100
     },
+    "precision@8": {
       "mean": 1.0,
       "std": 0.0,
       "count": 100
     },
     "ndcg@1": {
+      "mean": 0.79,
+      "std": 0.40730823708832603,
+      "count": 100
+    },
+    "ndcg@2": {
+      "mean": 0.8657115704285749,
+      "std": 0.2971736181074952,
       "count": 100
     },
+    "ndcg@4": {
+      "mean": 0.9036318671707767,
+      "std": 0.19819838555518796,
       "count": 100
     },
+    "ndcg@8": {
+      "mean": 0.907500395243122,
+      "std": 0.18377359707383636,
       "count": 100
     }
   }

vector-databases-deployed/db5-agllm-data-isu-field-insects-all-species/27ac9297-abc2-406e-8919-7670a60055f1/length.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6bb9e60f9d3b2f5f6a730ae39372f43f54d173ad0cfb5e463b21ab0794b67883
 size 40000

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f2fe12bccc27d05f521a0a00eb43912172a7113283d0a99b45af0b2da569582
 size 40000

vector-databases-deployed/db5-agllm-data-isu-field-insects-all-species/chroma.sqlite3 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61d8f6141f09d3335edbde43ae25cdd9763b4328ae01daac0f4195563188d2fe
 size 10715136

 version https://git-lfs.github.com/spec/v1
+oid sha256:08dce2645f4aa422368f3fc4e975e58a518875b335458e83f60a0205c46f27b8
 size 10715136