aiko-isnt-ded commited on
Commit
99fe3f6
1 Parent(s): 9388847

config space

Browse files
Files changed (4) hide show
  1. Dockerfile +17 -0
  2. README.md +1 -0
  3. api.py +114 -0
  4. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.13-slim
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN apt-get update && apt-get install -y procps && \
8
+ pip install -U pip && \
9
+ rm /etc/localtime && \
10
+ ln -s /usr/share/zoneinfo/America/Mexico_City /etc/localtime && \
11
+ pip install -r ./requirements.txt
12
+
13
+ COPY ./api.py /code/
14
+
15
+ EXPOSE 8000
16
+
17
+ CMD ["uvicorn","api:app", "--host", "0.0.0.0", "--port", "8000"]
README.md CHANGED
@@ -7,6 +7,7 @@ sdk: docker
7
  pinned: false
8
  license: mit
9
  short_description: API of the Depression Classification project
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
7
  pinned: false
8
  license: mit
9
  short_description: API of the Depression Classification project
10
+ app_port: 8000
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
api.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import mlflow
3
+ from fastapi import FastAPI
4
+ from pydantic import BaseModel
5
+ from mlflow import MlflowClient
6
+ from dotenv import load_dotenv
7
+ import os
8
+ import pandas as pd
9
+ import xgboost as xgb
10
+
11
+ # Iniciar sesi贸n en databricks
12
+ load_dotenv(override=True) # Carga las variables del archivo .env
13
+
14
+ mlflow.set_tracking_uri("databricks")
15
+ client = MlflowClient()
16
+
17
+ EXPERIMENT_NAME = "/Users/pipochatgpt@gmail.com/Depression_Classification_prefect"
18
+
19
+ # Buscar el mejor modelo de los 3 candidatos
20
+ run_ = mlflow.search_runs(order_by=["metrics.f1 DESC"],
21
+ output_format="list",
22
+ filter_string="tags.candidate = 'true'",
23
+ experiment_names=[EXPERIMENT_NAME]
24
+ )[0]
25
+ run_id = run_.info.run_id
26
+
27
+ # Descargar artifacts (preprocessor)
28
+ client.download_artifacts(
29
+ run_id=run_id,
30
+ path="preprocessor",
31
+ dst_path="."
32
+ )
33
+
34
+ with open("preprocessor/encoder.pkl", "rb") as f_in:
35
+ encoder = pickle.load(f_in)
36
+
37
+ with open("preprocessor/scaler.pkl", "rb") as f_in:
38
+ scaler = pickle.load(f_in)
39
+
40
+ # Cargar modelo campe贸n del Registry
41
+ model_name = "workspace.default.DepressionClassificationPrefect"
42
+ alias = "champion"
43
+
44
+ model_uri = f"models:/{model_name}@{alias}"
45
+ model = mlflow.pyfunc.load_model(model_uri)
46
+
47
+ # Preprocess de entrada
48
+ def preprocess(input_data):
49
+
50
+ df = pd.DataFrame([input_data.dict()])
51
+
52
+ # Renombrar columnas al formato del modelo entrenado
53
+ df = df.rename(columns={
54
+ "AcademicPressure": "Academic Pressure",
55
+ "StudySatisfaction": "Study Satisfaction",
56
+ "SleepDuration": "Sleep Duration",
57
+ "DietaryHabits": "Dietary Habits",
58
+ "FamilyHistory": "Family History of Mental Illness",
59
+ "SuicidalThoughts": "Have you ever had suicidal thoughts ?",
60
+ "FinancialStress": "Financial Stress",
61
+ "WorkStudyHours": "Work/Study Hours"
62
+ })
63
+
64
+ # Re ordenar columans
65
+ columnas = ["Gender","Age","City","Academic Pressure","CGPA","Study Satisfaction","Sleep Duration","Dietary Habits","Degree","Have you ever had suicidal thoughts ?","Work/Study Hours","Financial Stress","Family History of Mental Illness"]
66
+
67
+ df = df[columnas]
68
+
69
+ # Aplicar el encoder de One Hot
70
+ city_encoded = encoder.transform(df[["City"]])
71
+ city_cols = encoder.get_feature_names_out(["City"])
72
+ city_df = pd.DataFrame(city_encoded, columns=city_cols)
73
+
74
+ # Dropear la columna de City
75
+ df = df.drop(columns=["City"])
76
+
77
+ # Unir ambos datasets por columnas
78
+ df_proc = pd.concat([df, city_df], axis=1)
79
+
80
+ # Escalar los datos
81
+ df_scaled = scaler.transform(df_proc)
82
+
83
+ return df_scaled
84
+
85
+
86
+ # Realizar predicciones
87
+ def make_prediction(input_data):
88
+ X = preprocess(input_data)
89
+ pred = model.predict(X)
90
+ return int(pred[0])
91
+
92
+ # FASTAPI
93
+ app = FastAPI()
94
+
95
+ # Clase de pydantic de como van los datos
96
+ class InputData(BaseModel):
97
+ Gender: int
98
+ Age: int
99
+ AcademicPressure: float
100
+ CGPA: float
101
+ FinancialStress: float
102
+ StudySatisfaction: float
103
+ SleepDuration: int
104
+ DietaryHabits: int
105
+ WorkStudyHours: float
106
+ Degree: int
107
+ City: str
108
+ FamilyHistory: int
109
+ SuicidalThoughts: int
110
+
111
+ @app.post("/predict")
112
+ def predict_endpoint(input_data: InputData):
113
+ result = make_prediction(input_data)
114
+ return {"prediction": result}
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.122.0
2
+ mlflow==3.6.0
3
+ mlflow_skinny==3.6.0
4
+ mlflow_tracing==3.6.0
5
+ pandas==2.3.3
6
+ pydantic==2.12.4
7
+ python-dotenv==1.2.1
8
+ xgboost==3.1.2