| import os, sys |
| import json |
| import mlflow |
| import pandas as pd |
| import requests |
| from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error |
| from dotenv import load_dotenv |
|
|
| load_dotenv() |
| |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) |
|
|
| from src.utils import Timer |
|
|
| |
| |
| |
| TEST_PATH = "data/processed/test.parquet" |
| RUN_INFO_PATH = "reports/last_run_info.json" |
| METRICS_PATH = "reports/eval_metrics.json" |
|
|
| |
| TRACKING_SERVER_HOST = "127.0.0.1" |
| TRACKING_SERVER_PORT = 5000 |
|
|
|
|
| def main(): |
| |
| |
| |
| if not os.path.exists(RUN_INFO_PATH): |
| raise FileNotFoundError(f"β {RUN_INFO_PATH} not found. Run training first.") |
|
|
| print("π Loading last MLflow run info...") |
| with open(RUN_INFO_PATH) as f: |
| run_info = json.load(f) |
|
|
| run_id = run_info["run_id"] |
| model_uri = run_info["pipeline_model_uri"] |
|
|
| print(f"π Run ID: {run_id}") |
| print(f"π Model URI: {model_uri}") |
| print() |
|
|
| |
| |
| |
| GOOGLE_CREDS = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") |
| if not GOOGLE_CREDS or not os.path.isfile(GOOGLE_CREDS): |
| raise FileNotFoundError( |
| f"β GOOGLE_APPLICATION_CREDENTIALS not set or invalid: {GOOGLE_CREDS}" |
| ) |
| print(f"π Using Google credentials: {GOOGLE_CREDS}") |
|
|
| |
| |
| |
| try: |
| r = requests.get(f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}", timeout=3) |
| if r.status_code != 200: |
| raise requests.exceptions.RequestException |
| except requests.exceptions.RequestException: |
| raise ConnectionError( |
| f"β MLflow tracking server not reachable at " |
| f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}. " |
| f"Start the server before evaluation." |
| ) |
|
|
| mlflow.set_tracking_uri(f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}") |
| mlflow.set_registry_uri(f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}") |
|
|
| print(f"π Connected to MLflow: {mlflow.get_tracking_uri()}") |
| print(f" Using run ID: {run_id}") |
| print() |
|
|
| |
| |
| |
| with Timer("Load MLflow model"): |
| model = mlflow.pyfunc.load_model(model_uri) |
|
|
| |
| |
| |
| print("π¦ Loading test data...") |
| df_test = pd.read_parquet(TEST_PATH) |
| X_test = df_test.drop(columns=["price"]) |
| y_test = df_test["price"] |
|
|
| |
| |
| |
| print("βοΈ Running inference on test set...") |
| with Timer("Model inference"): |
| preds = model.predict(X_test) |
|
|
| |
| |
| |
| print("π Computing metrics...") |
| metrics = { |
| "r2": round(r2_score(y_test, preds), 4), |
| "mae": round(mean_absolute_error(y_test, preds), 2), |
| "mape": round(mean_absolute_percentage_error(y_test, preds), 4), |
| } |
|
|
| |
| |
| |
| print("π Logging metrics to MLflow...") |
| mlflow.start_run(run_id=run_id) |
| mlflow.log_metrics(metrics) |
| mlflow.end_run() |
|
|
| |
| |
| |
| os.makedirs(os.path.dirname(METRICS_PATH), exist_ok=True) |
| with open(METRICS_PATH, "w") as f: |
| json.dump(metrics, f, indent=2) |
|
|
| print("β
Evaluation complete!") |
| print(json.dumps(metrics, indent=2)) |
| print(f"π MLflow UI: {run_info['mlflow_ui_link']}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|
|
|