Ashar086 commited on
Commit
c038ce2
·
verified ·
1 Parent(s): eca89a1

Create modules/evaluation.py

Browse files
Files changed (1) hide show
  1. modules/evaluation.py +87 -0
modules/evaluation.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.metrics import accuracy_score, precision_score, recall_score
2
+ from typing import List, Dict, Any
3
+ import pandas as pd
4
+ from llama_index import GPTVectorStoreIndex
5
+
6
+ class EvaluationModule:
7
+ def __init__(self):
8
+ self.metric_functions = {
9
+ "Accuracy": accuracy_score,
10
+ "Precision": precision_score,
11
+ "Recall": recall_score
12
+ }
13
+ self.evaluation_results = []
14
+
15
+ def evaluate_model(
16
+ self,
17
+ index: GPTVectorStoreIndex,
18
+ selected_metrics: List[str],
19
+ test_data: pd.DataFrame = None
20
+ ) -> Dict[str, float]:
21
+ """Evaluate model performance using selected metrics"""
22
+ results = {}
23
+
24
+ # If test data is not provided, use default test set
25
+ if test_data is None:
26
+ # You would need to implement this based on your specific needs
27
+ return self._run_default_evaluation(index, selected_metrics)
28
+
29
+ for metric in selected_metrics:
30
+ if metric in self.metric_functions:
31
+ # Calculate predictions using the index
32
+ predictions = self._get_predictions(index, test_data)
33
+ true_values = test_data['target'].values # Adjust column name as needed
34
+
35
+ # Calculate metric
36
+ score = self.metric_functions[metric](true_values, predictions)
37
+ results[metric] = score
38
+
39
+ return results
40
+
41
+ def _get_predictions(
42
+ self,
43
+ index: GPTVectorStoreIndex,
44
+ test_data: pd.DataFrame
45
+ ) -> List[Any]:
46
+ """Get predictions for test data using the index"""
47
+ predictions = []
48
+
49
+ for _, row in test_data.iterrows():
50
+ # Convert row to query format
51
+ query = " ".join([f"{col}: {val}" for col, val in row.items()])
52
+
53
+ # Query the index
54
+ response = index.as_query_engine().query(query)
55
+ predictions.append(response.response)
56
+
57
+ return predictions
58
+
59
+ def _run_default_evaluation(
60
+ self,
61
+ index: GPTVectorStoreIndex,
62
+ selected_metrics: List[str]
63
+ ) -> Dict[str, float]:
64
+ """Run evaluation using default test cases"""
65
+ # Define some default test cases
66
+ default_tests = [
67
+ {
68
+ "query": "What is machine learning?",
69
+ "expected": "Machine learning is a branch of artificial intelligence..."
70
+ },
71
+ # Add more default test cases as needed
72
+ ]
73
+
74
+ results = {}
75
+ for metric in selected_metrics:
76
+ # For demonstration, return placeholder scores
77
+ results[metric] = 0.85 # Replace with actual evaluation logic
78
+
79
+ return results
80
+
81
+ def save_evaluation_results(self, results: Dict[str, float]):
82
+ """Save evaluation results for tracking"""
83
+ self.evaluation_results.append(results)
84
+
85
+ def get_evaluation_history(self) -> List[Dict[str, float]]:
86
+ """Get history of evaluation results"""
87
+ return self.evaluation_results