Spaces:

Ashar086
/

Llama_Developer_Aid

Sleeping

App Files Files Community

Ashar086 commited on Nov 10, 2024

Commit

c038ce2

verified ·

1 Parent(s): eca89a1

Create modules/evaluation.py

Browse files

Files changed (1) hide show

modules/evaluation.py +87 -0

modules/evaluation.py ADDED Viewed

	@@ -0,0 +1,87 @@

+from sklearn.metrics import accuracy_score, precision_score, recall_score
+from typing import List, Dict, Any
+import pandas as pd
+from llama_index import GPTVectorStoreIndex
+class EvaluationModule:
+    def __init__(self):
+        self.metric_functions = {
+            "Accuracy": accuracy_score,
+            "Precision": precision_score,
+            "Recall": recall_score
+        }
+        self.evaluation_results = []
+    def evaluate_model(
+        self,
+        index: GPTVectorStoreIndex,
+        selected_metrics: List[str],
+        test_data: pd.DataFrame = None
+    ) -> Dict[str, float]:
+        """Evaluate model performance using selected metrics"""
+        results = {}
+        # If test data is not provided, use default test set
+        if test_data is None:
+            # You would need to implement this based on your specific needs
+            return self._run_default_evaluation(index, selected_metrics)
+        for metric in selected_metrics:
+            if metric in self.metric_functions:
+                # Calculate predictions using the index
+                predictions = self._get_predictions(index, test_data)
+                true_values = test_data['target'].values  # Adjust column name as needed
+                # Calculate metric
+                score = self.metric_functions[metric](true_values, predictions)
+                results[metric] = score
+        return results
+    def _get_predictions(
+        self,
+        index: GPTVectorStoreIndex,
+        test_data: pd.DataFrame
+    ) -> List[Any]:
+        """Get predictions for test data using the index"""
+        predictions = []
+        for _, row in test_data.iterrows():
+            # Convert row to query format
+            query = " ".join([f"{col}: {val}" for col, val in row.items()])
+            # Query the index
+            response = index.as_query_engine().query(query)
+            predictions.append(response.response)
+        return predictions
+    def _run_default_evaluation(
+        self,
+        index: GPTVectorStoreIndex,
+        selected_metrics: List[str]
+    ) -> Dict[str, float]:
+        """Run evaluation using default test cases"""
+        # Define some default test cases
+        default_tests = [
+            {
+                "query": "What is machine learning?",
+                "expected": "Machine learning is a branch of artificial intelligence..."
+            },
+            # Add more default test cases as needed
+        ]
+        results = {}
+        for metric in selected_metrics:
+            # For demonstration, return placeholder scores
+            results[metric] = 0.85  # Replace with actual evaluation logic
+        return results
+    def save_evaluation_results(self, results: Dict[str, float]):
+        """Save evaluation results for tracking"""
+        self.evaluation_results.append(results)
+    def get_evaluation_history(self) -> List[Dict[str, float]]:
+        """Get history of evaluation results"""
+        return self.evaluation_results