Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Test suite for verifying that kpi_correlation_app correctly matches emails | |
| and retrieves corresponding values from KPI and scores CSV files. | |
| """ | |
| import os | |
| import sys | |
| import pandas as pd | |
| import numpy as np | |
| import tempfile | |
| import unittest | |
| from io import StringIO | |
| # Add the current directory to Python path for imports | |
| sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) | |
| from kpi_correlation_app import analyze_correlations, convert_percentage_to_numeric | |
| from csv_utils import robust_csv_loader, find_email_column, find_ipm_columns | |
| class TestEmailMatching(unittest.TestCase): | |
| """Test suite for email matching and value correspondence""" | |
| def setUp(self): | |
| """Set up test data files""" | |
| self.test_dir = tempfile.mkdtemp() | |
| # Create test scores data | |
| self.scores_data = pd.DataFrame({ | |
| 'email': [ | |
| 'john.doe@company.com', | |
| 'jane.smith@company.com', | |
| 'MIKE.JONES@company.com', # Different case | |
| 'sarah.williams@company.com', | |
| 'tom.brown@company.com' | |
| ], | |
| 'problem_score': [85.5, 92.0, 78.5, 88.0, 90.5], | |
| 'ability_score': [4.2, 4.8, 3.9, 4.5, 4.6] | |
| }) | |
| # Create test KPI data with known values | |
| self.kpi_data = pd.DataFrame({ | |
| 'Email': [ | |
| 'John.Doe@company.com', # Different case than scores | |
| 'jane.smith@company.com', # Same case | |
| 'mike.jones@company.com', # Different case than scores | |
| 'sarah.williams@company.com', | |
| 'alice.wonder@company.com' # Not in scores | |
| ], | |
| 'FY23/24 全年IPM': ['85%', '90%', '75%', '88%', '92%'], | |
| 'FY24/25 全年IPM': ['87%', '93%', '78%', '89%', '94%'], | |
| 'Department': ['Sales', 'Marketing', 'IT', 'HR', 'Finance'] | |
| }) | |
| # Save test files | |
| self.scores_file = os.path.join(self.test_dir, 'lenovo-scores-0603.csv') | |
| self.kpi_file = os.path.join(self.test_dir, 'test_kpi.csv') | |
| self.scores_data.to_csv(self.scores_file, index=False) | |
| self.kpi_data.to_csv(self.kpi_file, index=False) | |
| def tearDown(self): | |
| """Clean up test files""" | |
| import shutil | |
| if os.path.exists(self.test_dir): | |
| shutil.rmtree(self.test_dir) | |
| def test_email_case_insensitive_matching(self): | |
| """Test that emails are matched case-insensitively""" | |
| # Temporarily change directory to test directory | |
| original_dir = os.getcwd() | |
| os.chdir(self.test_dir) | |
| try: | |
| # Analyze correlations | |
| results_text, _, _, _, _, correlation_results = analyze_correlations(self.kpi_file) | |
| # Check that case differences don't prevent matching | |
| self.assertIn("Matched emails: 4 records", results_text) | |
| # Verify that john.doe, jane.smith, mike.jones, and sarah.williams were matched | |
| # (alice.wonder and tom.brown should not match) | |
| finally: | |
| os.chdir(original_dir) | |
| def test_correct_value_correspondence(self): | |
| """Test that correct values are retrieved for matched emails""" | |
| # Create a simplified version for direct testing | |
| scores_df = self.scores_data.copy() | |
| kpi_df = self.kpi_data.copy() | |
| # Normalize emails | |
| scores_df['email_normalized'] = scores_df['email'].str.lower() | |
| kpi_df['email_normalized'] = kpi_df['Email'].str.lower() | |
| # Merge datasets | |
| merged_df = pd.merge(scores_df, kpi_df, on='email_normalized', how='inner') | |
| # Test specific email correspondences | |
| john_row = merged_df[merged_df['email_normalized'] == 'john.doe@company.com'] | |
| self.assertEqual(len(john_row), 1) | |
| self.assertAlmostEqual(john_row['problem_score'].iloc[0], 85.5) | |
| self.assertAlmostEqual(john_row['ability_score'].iloc[0], 4.2) | |
| self.assertEqual(john_row['FY23/24 全年IPM'].iloc[0], '85%') | |
| self.assertEqual(john_row['FY24/25 全年IPM'].iloc[0], '87%') | |
| # Test another email | |
| jane_row = merged_df[merged_df['email_normalized'] == 'jane.smith@company.com'] | |
| self.assertEqual(len(jane_row), 1) | |
| self.assertAlmostEqual(jane_row['problem_score'].iloc[0], 92.0) | |
| self.assertAlmostEqual(jane_row['ability_score'].iloc[0], 4.8) | |
| self.assertEqual(jane_row['FY23/24 全年IPM'].iloc[0], '90%') | |
| self.assertEqual(jane_row['FY24/25 全年IPM'].iloc[0], '93%') | |
| def test_percentage_conversion(self): | |
| """Test that percentage strings are correctly converted to numeric values""" | |
| # Test conversion function | |
| test_series = pd.Series(['85%', '90.5%', '100%', '0%', 'invalid']) | |
| converted = convert_percentage_to_numeric(test_series) | |
| self.assertAlmostEqual(converted.iloc[0], 0.85) | |
| self.assertAlmostEqual(converted.iloc[1], 0.905) | |
| self.assertAlmostEqual(converted.iloc[2], 1.0) | |
| self.assertAlmostEqual(converted.iloc[3], 0.0) | |
| self.assertTrue(pd.isna(converted.iloc[4])) # 'invalid' should be NaN | |
| def test_missing_emails_handling(self): | |
| """Test that non-matching emails are handled correctly""" | |
| scores_df = self.scores_data.copy() | |
| kpi_df = self.kpi_data.copy() | |
| # Normalize emails | |
| scores_df['email_normalized'] = scores_df['email'].str.lower() | |
| kpi_df['email_normalized'] = kpi_df['Email'].str.lower() | |
| # Merge datasets | |
| merged_df = pd.merge(scores_df, kpi_df, on='email_normalized', how='inner') | |
| # tom.brown@company.com is in scores but not in KPI | |
| self.assertFalse('tom.brown@company.com' in merged_df['email_normalized'].values) | |
| # alice.wonder@company.com is in KPI but not in scores | |
| self.assertFalse('alice.wonder@company.com' in merged_df['email_normalized'].values) | |
| # Only 4 emails should match | |
| self.assertEqual(len(merged_df), 4) | |
| def test_excel_file_support(self): | |
| """Test that Excel files are correctly processed""" | |
| # Save KPI data as Excel | |
| excel_file = os.path.join(self.test_dir, 'test_kpi.xlsx') | |
| self.kpi_data.to_excel(excel_file, index=False) | |
| # Test loading Excel file | |
| loaded_df = robust_csv_loader(excel_file) | |
| # Verify data integrity | |
| self.assertEqual(len(loaded_df), len(self.kpi_data)) | |
| self.assertListEqual(loaded_df.columns.tolist(), self.kpi_data.columns.tolist()) | |
| def test_correlation_calculation_integrity(self): | |
| """Test that correlations are calculated on correctly matched data""" | |
| # Create specific test data with known correlations | |
| test_scores = pd.DataFrame({ | |
| 'email': ['user1@test.com', 'user2@test.com', 'user3@test.com', 'user4@test.com', 'user5@test.com'], | |
| 'problem_score': [10, 20, 30, 40, 50], | |
| 'ability_score': [1, 2, 3, 4, 5] | |
| }) | |
| test_kpi = pd.DataFrame({ | |
| 'Email': ['user1@test.com', 'user2@test.com', 'user3@test.com', 'user4@test.com', 'user5@test.com'], | |
| 'FY23/24 全年IPM': ['20%', '40%', '60%', '80%', '100%'], # Perfect correlation | |
| 'FY24/25 全年IPM': ['100%', '80%', '60%', '40%', '20%'] # Perfect negative correlation | |
| }) | |
| # Save test files | |
| test_scores_file = os.path.join(self.test_dir, 'lenovo-scores-0603.csv') | |
| test_kpi_file = os.path.join(self.test_dir, 'perfect_correlation_kpi.csv') | |
| test_scores.to_csv(test_scores_file, index=False) | |
| test_kpi.to_csv(test_kpi_file, index=False) | |
| # Change to test directory and analyze | |
| original_dir = os.getcwd() | |
| os.chdir(self.test_dir) | |
| try: | |
| results_text, _, _, _, _, correlation_results = analyze_correlations(test_kpi_file) | |
| # Check AC correlation (problem_score vs FY23/24 IPM) - should be perfect positive | |
| if correlation_results and 'AC' in correlation_results: | |
| self.assertAlmostEqual(correlation_results['AC']['pearson'], 1.0, places=5) | |
| # Check AD correlation (problem_score vs FY24/25 IPM) - should be perfect negative | |
| if correlation_results and 'AD' in correlation_results: | |
| self.assertAlmostEqual(correlation_results['AD']['pearson'], -1.0, places=5) | |
| finally: | |
| os.chdir(original_dir) | |
| class TestColumnFinding(unittest.TestCase): | |
| """Test suite for column finding utilities""" | |
| def test_find_email_column_variations(self): | |
| """Test finding email column with various naming conventions""" | |
| test_cases = [ | |
| pd.DataFrame({'Email': [], 'Name': []}), | |
| pd.DataFrame({'email': [], 'Name': []}), | |
| pd.DataFrame({'EMAIL': [], 'Name': []}), | |
| pd.DataFrame({'Email Address': [], 'Name': []}), | |
| pd.DataFrame({'user_email': [], 'Name': []}) | |
| ] | |
| for df in test_cases: | |
| email_col = find_email_column(df) | |
| self.assertIsNotNone(email_col, f"Failed to find email column in {df.columns.tolist()}") | |
| self.assertTrue('email' in email_col.lower()) | |
| def test_find_ipm_columns(self): | |
| """Test finding IPM columns with exact naming""" | |
| df = pd.DataFrame({ | |
| 'Email': [], | |
| 'FY23/24 全年IPM': [], | |
| 'FY24/25 全年IPM': [], # Note the extra space | |
| 'Other Column': [] | |
| }) | |
| fy2425_col, fy2324_col = find_ipm_columns(df) | |
| self.assertEqual(fy2324_col, 'FY23/24 全年IPM') | |
| self.assertEqual(fy2425_col, 'FY24/25 全年IPM') | |
| def run_detailed_tests(): | |
| """Run tests with detailed output""" | |
| # Create a test suite | |
| loader = unittest.TestLoader() | |
| suite = unittest.TestSuite() | |
| # Add all test classes | |
| suite.addTests(loader.loadTestsFromTestCase(TestEmailMatching)) | |
| suite.addTests(loader.loadTestsFromTestCase(TestColumnFinding)) | |
| # Run with verbose output | |
| runner = unittest.TextTestRunner(verbosity=2) | |
| result = runner.run(suite) | |
| return result.wasSuccessful() | |
| if __name__ == '__main__': | |
| print("=" * 70) | |
| print("Running Email Matching and Value Correspondence Tests") | |
| print("=" * 70) | |
| success = run_detailed_tests() | |
| if success: | |
| print("\n✅ All tests passed! The app correctly matches emails and retrieves corresponding values.") | |
| else: | |
| print("\n❌ Some tests failed. Please review the output above.") | |
| sys.exit(0 if success else 1) |