Spaces:
Sleeping
Sleeping
| # evaluate.py | |
| import pandas as pd | |
| from sklearn.metrics import ( | |
| accuracy_score, precision_score, | |
| recall_score, f1_score, classification_report | |
| ) | |
| from ml_utils import ScamDetectionService | |
| def run_evaluation(csv_path: str = "spam.csv", sample_size: int = 200): | |
| """ | |
| Evaluates the scam detection model on the SMS Spam Collection dataset. | |
| Uses a sample to keep runtime reasonable (zero-shot is slow on CPU). | |
| Args: | |
| csv_path: path to the downloaded spam.csv | |
| sample_size: how many rows to evaluate (200 is enough for metrics) | |
| """ | |
| print("Loading dataset...") | |
| df = pd.read_csv(csv_path, encoding='latin-1')[['v1', 'v2']] | |
| df.columns = ['label', 'text'] | |
| # Map ham/spam -> Safe/Scam | |
| df['true_label'] = df['label'].map({'ham': 'Safe', 'spam': 'Scam'}) | |
| # Sample evenly across both classes | |
| sample_df = df.groupby('true_label', group_keys=False).apply( | |
| lambda x: x.sample(min(len(x), sample_size // 2), random_state=42) | |
| ).reset_index(drop=True) | |
| print(f"Evaluating on {len(sample_df)} samples...") | |
| service = ScamDetectionService() | |
| predictions = [] | |
| for i, row in sample_df.iterrows(): | |
| result = service.analyze_text_scam(row['text']) | |
| # Collapse Suspicious -> Scam for binary eval | |
| pred = "Scam" if result["risk_level"] in ["Scam", "Suspicious"] else "Safe" | |
| predictions.append(pred) | |
| if (i + 1) % 20 == 0: | |
| print(f" Progress: {i + 1}/{len(sample_df)}") | |
| true_labels = sample_df['true_label'].tolist() | |
| print("\n========== EVALUATION RESULTS ==========") | |
| print(f"Accuracy : {accuracy_score(true_labels, predictions):.4f}") | |
| print(f"Precision : {precision_score(true_labels, predictions, pos_label='Scam'):.4f}") | |
| print(f"Recall : {recall_score(true_labels, predictions, pos_label='Scam'):.4f}") | |
| print(f"F1 Score : {f1_score(true_labels, predictions, pos_label='Scam'):.4f}") | |
| print("\nFull Report:") | |
| print(classification_report(true_labels, predictions)) | |
| print("=========================================") | |
| print("\nCopy these numbers into your README and resume.") | |
| if __name__ == "__main__": | |
| run_evaluation() |