aiko-isnt-ded's picture
config space
99fe3f6
import pickle
import mlflow
from fastapi import FastAPI
from pydantic import BaseModel
from mlflow import MlflowClient
from dotenv import load_dotenv
import os
import pandas as pd
import xgboost as xgb
# Iniciar sesi贸n en databricks
load_dotenv(override=True) # Carga las variables del archivo .env
mlflow.set_tracking_uri("databricks")
client = MlflowClient()
EXPERIMENT_NAME = "/Users/pipochatgpt@gmail.com/Depression_Classification_prefect"
# Buscar el mejor modelo de los 3 candidatos
run_ = mlflow.search_runs(order_by=["metrics.f1 DESC"],
output_format="list",
filter_string="tags.candidate = 'true'",
experiment_names=[EXPERIMENT_NAME]
)[0]
run_id = run_.info.run_id
# Descargar artifacts (preprocessor)
client.download_artifacts(
run_id=run_id,
path="preprocessor",
dst_path="."
)
with open("preprocessor/encoder.pkl", "rb") as f_in:
encoder = pickle.load(f_in)
with open("preprocessor/scaler.pkl", "rb") as f_in:
scaler = pickle.load(f_in)
# Cargar modelo campe贸n del Registry
model_name = "workspace.default.DepressionClassificationPrefect"
alias = "champion"
model_uri = f"models:/{model_name}@{alias}"
model = mlflow.pyfunc.load_model(model_uri)
# Preprocess de entrada
def preprocess(input_data):
df = pd.DataFrame([input_data.dict()])
# Renombrar columnas al formato del modelo entrenado
df = df.rename(columns={
"AcademicPressure": "Academic Pressure",
"StudySatisfaction": "Study Satisfaction",
"SleepDuration": "Sleep Duration",
"DietaryHabits": "Dietary Habits",
"FamilyHistory": "Family History of Mental Illness",
"SuicidalThoughts": "Have you ever had suicidal thoughts ?",
"FinancialStress": "Financial Stress",
"WorkStudyHours": "Work/Study Hours"
})
# Re ordenar columans
columnas = ["Gender","Age","City","Academic Pressure","CGPA","Study Satisfaction","Sleep Duration","Dietary Habits","Degree","Have you ever had suicidal thoughts ?","Work/Study Hours","Financial Stress","Family History of Mental Illness"]
df = df[columnas]
# Aplicar el encoder de One Hot
city_encoded = encoder.transform(df[["City"]])
city_cols = encoder.get_feature_names_out(["City"])
city_df = pd.DataFrame(city_encoded, columns=city_cols)
# Dropear la columna de City
df = df.drop(columns=["City"])
# Unir ambos datasets por columnas
df_proc = pd.concat([df, city_df], axis=1)
# Escalar los datos
df_scaled = scaler.transform(df_proc)
return df_scaled
# Realizar predicciones
def make_prediction(input_data):
X = preprocess(input_data)
pred = model.predict(X)
return int(pred[0])
# FASTAPI
app = FastAPI()
# Clase de pydantic de como van los datos
class InputData(BaseModel):
Gender: int
Age: int
AcademicPressure: float
CGPA: float
FinancialStress: float
StudySatisfaction: float
SleepDuration: int
DietaryHabits: int
WorkStudyHours: float
Degree: int
City: str
FamilyHistory: int
SuicidalThoughts: int
@app.post("/predict")
def predict_endpoint(input_data: InputData):
result = make_prediction(input_data)
return {"prediction": result}