Spaces:
Sleeping
Sleeping
File size: 3,574 Bytes
419e5f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# create_demo_data.py - Cria dados de demonstração e modelos base
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import joblib
def create_demo_dataset():
"""Cria dataset de demonstração realístico"""
np.random.seed(42)
n_samples = 2000
# Features baseadas no dataset real de hotéis
features = {
'lead_time': np.random.gamma(2, 50, n_samples),
'adr': np.random.normal(100, 30, n_samples),
'adults': np.random.poisson(2, n_samples),
'children': np.random.poisson(0.3, n_samples),
'previous_cancellations': np.random.poisson(0.1, n_samples),
'is_repeated_guest': np.random.binomial(1, 0.1, n_samples),
'required_car_parking_spaces': np.random.binomial(1, 0.2, n_samples),
'total_of_special_requests': np.random.poisson(0.5, n_samples),
'booking_changes': np.random.poisson(0.3, n_samples),
}
X = pd.DataFrame(features)
# Criar target com relação realística
cancellation_prob = 1 / (1 + np.exp(-(
X['lead_time'] * 0.01 +
X['adr'] * 0.005 -
X['is_repeated_guest'] * 0.8 -
X['required_car_parking_spaces'] * 0.3 +
X['total_of_special_requests'] * -0.4 +
np.random.normal(0, 0.5, n_samples)
)))
y = (cancellation_prob > 0.5).astype(int)
return X, y
def train_and_save_models():
"""Treina e salva modelos de demonstração"""
# Criar dados
X, y = create_demo_dataset()
# Split dos dados
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=42, stratify=y
)
# Normalizar
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Treinar modelos
models = {}
results = {}
# Regressão Logística
lr = LogisticRegression(random_state=42, max_iter=1000)
lr.fit(X_train_scaled, y_train)
models['RL_Padrao'] = lr
# KNN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)
models['KNN_Padrao'] = knn
# SVM
svm = SVC(probability=True, random_state=42)
svm.fit(X_train_scaled, y_train)
models['SVM_Padrao'] = svm
# Avaliar modelos
for name, model in models.items():
y_pred = model.predict(X_test_scaled)
y_proba = model.predict_proba(X_test_scaled)[:, 1]
results[name] = {
'Acurácia': accuracy_score(y_test, y_pred),
'Precisão': precision_score(y_test, y_pred, zero_division=0),
'Recall': recall_score(y_test, y_pred, zero_division=0),
'F1-Score': f1_score(y_test, y_pred, zero_division=0),
'AUC-ROC': roc_auc_score(y_test, y_proba),
'Tempo Treino (s)': 0
}
# Salvar dados
data_to_save = {
'models': models,
'X_train': X_train_scaled,
'X_test': X_test_scaled,
'y_train': y_train,
'y_test': y_test,
'results': results
}
joblib.dump(data_to_save, 'modelos_treinados.pkl')
print("✅ Dados de demonstração e modelos salvos em 'modelos_treinados.pkl'")
if __name__ == "__main__":
train_and_save_models() |