pearlll's picture
Deploy document classifier app
492754f
Raw
History Blame Contribute Delete
1.35 kB
import os
import pandas as pd
from evidently import Report
from evidently.presets import DataDriftPreset
REFERENCE_DATA_PATH = "data/splits/train.csv"
CURRENT_DATA_PATH = "data/splits/test.csv"
REPORT_OUTPUT_PATH = "reports/data_drift_report.html"
def generate_drift_report():
reference_data = pd.read_csv(REFERENCE_DATA_PATH)
current_data = pd.read_csv(CURRENT_DATA_PATH)
# Use simple numerical features for drift monitoring
reference_data["text_length"] = reference_data["clean_text"].apply(len)
reference_data["word_count"] = reference_data["clean_text"].apply(lambda x: len(str(x).split()))
current_data["text_length"] = current_data["clean_text"].apply(len)
current_data["word_count"] = current_data["clean_text"].apply(lambda x: len(str(x).split()))
reference_features = reference_data[["text_length", "word_count"]]
current_features = current_data[["text_length", "word_count"]]
report = Report([
DataDriftPreset()
])
result = report.run(
reference_data=reference_features,
current_data=current_features
)
os.makedirs("reports", exist_ok=True)
result.save_html(REPORT_OUTPUT_PATH)
print("Data drift report generated successfully.")
print(f"Report saved at: {REPORT_OUTPUT_PATH}")
if __name__ == "__main__":
generate_drift_report()