Spaces:
Sleeping
Sleeping
| import os | |
| import pandas as pd | |
| from evidently import Report | |
| from evidently.presets import DataDriftPreset | |
| REFERENCE_DATA_PATH = "data/splits/train.csv" | |
| CURRENT_DATA_PATH = "data/splits/test.csv" | |
| REPORT_OUTPUT_PATH = "reports/data_drift_report.html" | |
| def generate_drift_report(): | |
| reference_data = pd.read_csv(REFERENCE_DATA_PATH) | |
| current_data = pd.read_csv(CURRENT_DATA_PATH) | |
| # Use simple numerical features for drift monitoring | |
| reference_data["text_length"] = reference_data["clean_text"].apply(len) | |
| reference_data["word_count"] = reference_data["clean_text"].apply(lambda x: len(str(x).split())) | |
| current_data["text_length"] = current_data["clean_text"].apply(len) | |
| current_data["word_count"] = current_data["clean_text"].apply(lambda x: len(str(x).split())) | |
| reference_features = reference_data[["text_length", "word_count"]] | |
| current_features = current_data[["text_length", "word_count"]] | |
| report = Report([ | |
| DataDriftPreset() | |
| ]) | |
| result = report.run( | |
| reference_data=reference_features, | |
| current_data=current_features | |
| ) | |
| os.makedirs("reports", exist_ok=True) | |
| result.save_html(REPORT_OUTPUT_PATH) | |
| print("Data drift report generated successfully.") | |
| print(f"Report saved at: {REPORT_OUTPUT_PATH}") | |
| if __name__ == "__main__": | |
| generate_drift_report() |