import os import pandas as pd from evidently import Report from evidently.presets import DataDriftPreset REFERENCE_DATA_PATH = "data/splits/train.csv" CURRENT_DATA_PATH = "data/splits/test.csv" REPORT_OUTPUT_PATH = "reports/data_drift_report.html" def generate_drift_report(): reference_data = pd.read_csv(REFERENCE_DATA_PATH) current_data = pd.read_csv(CURRENT_DATA_PATH) # Use simple numerical features for drift monitoring reference_data["text_length"] = reference_data["clean_text"].apply(len) reference_data["word_count"] = reference_data["clean_text"].apply(lambda x: len(str(x).split())) current_data["text_length"] = current_data["clean_text"].apply(len) current_data["word_count"] = current_data["clean_text"].apply(lambda x: len(str(x).split())) reference_features = reference_data[["text_length", "word_count"]] current_features = current_data[["text_length", "word_count"]] report = Report([ DataDriftPreset() ]) result = report.run( reference_data=reference_features, current_data=current_features ) os.makedirs("reports", exist_ok=True) result.save_html(REPORT_OUTPUT_PATH) print("Data drift report generated successfully.") print(f"Report saved at: {REPORT_OUTPUT_PATH}") if __name__ == "__main__": generate_drift_report()