from sklearn.metrics import accuracy_score, precision_score, recall_score from typing import List, Dict, Any import pandas as pd from llama_index import GPTVectorStoreIndex class EvaluationModule: def __init__(self): self.metric_functions = { "Accuracy": accuracy_score, "Precision": precision_score, "Recall": recall_score } self.evaluation_results = [] def evaluate_model( self, index: GPTVectorStoreIndex, selected_metrics: List[str], test_data: pd.DataFrame = None ) -> Dict[str, float]: """Evaluate model performance using selected metrics""" results = {} # If test data is not provided, use default test set if test_data is None: # You would need to implement this based on your specific needs return self._run_default_evaluation(index, selected_metrics) for metric in selected_metrics: if metric in self.metric_functions: # Calculate predictions using the index predictions = self._get_predictions(index, test_data) true_values = test_data['target'].values # Adjust column name as needed # Calculate metric score = self.metric_functions[metric](true_values, predictions) results[metric] = score return results def _get_predictions( self, index: GPTVectorStoreIndex, test_data: pd.DataFrame ) -> List[Any]: """Get predictions for test data using the index""" predictions = [] for _, row in test_data.iterrows(): # Convert row to query format query = " ".join([f"{col}: {val}" for col, val in row.items()]) # Query the index response = index.as_query_engine().query(query) predictions.append(response.response) return predictions def _run_default_evaluation( self, index: GPTVectorStoreIndex, selected_metrics: List[str] ) -> Dict[str, float]: """Run evaluation using default test cases""" # Define some default test cases default_tests = [ { "query": "What is machine learning?", "expected": "Machine learning is a branch of artificial intelligence..." }, # Add more default test cases as needed ] results = {} for metric in selected_metrics: # For demonstration, return placeholder scores results[metric] = 0.85 # Replace with actual evaluation logic return results def save_evaluation_results(self, results: Dict[str, float]): """Save evaluation results for tracking""" self.evaluation_results.append(results) def get_evaluation_history(self) -> List[Dict[str, float]]: """Get history of evaluation results""" return self.evaluation_results