File size: 5,842 Bytes
3c4de5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import gradio as gr
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Ridge, BayesianRidge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from .shared_state import state # Estado compartilhado
import io
from PIL import Image
# Global model state to save the trained model
global_model = {"model": None, "scaler": None, "columns": None}
# Train the model
def apply_ml(df, var_dep, ml_model_name, test_size):
if df is None:
df = state.get('new_df') # Busca o DataFrame no estado compartilhado
if df is None:
raise ValueError("Nenhum DataFrame disponível para aplicação.")
df = df.dropna()
y = df[var_dep]
X = df.drop(columns=[var_dep])
# Remover a coluna "Índice" se ela existir
if "Índice" in X.columns:
X = X.drop(columns=["Índice"])
# Normalizar os dados com MinMaxScaler
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
# Divisão em treino e teste com test_size ajustável
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=1)
# Escolha do modelo
if ml_model_name == "Linear Regression":
model = LinearRegression()
elif ml_model_name == "Ridge Regression":
model = Ridge(alpha=0.5)
elif ml_model_name == "Bayesian Ridge":
model = BayesianRidge()
elif ml_model_name == "Decision Tree":
model = DecisionTreeRegressor()
elif ml_model_name == "Random Forest":
model = RandomForestRegressor()
elif ml_model_name == "Support Vector Regression (SVR)":
model = SVR()
elif ml_model_name == "Neural Network (MLP)":
model = MLPRegressor(max_iter=5000, tol=0.1, random_state=1)
elif ml_model_name == "K-Neighbors Regressor":
model = KNeighborsRegressor(n_neighbors=5)
else:
raise ValueError("Modelo de ML inválido.")
# Treinamento e avaliação
model.fit(X_train, y_train)
train_r2 = r2_score(y_train, model.predict(X_train))
test_r2 = r2_score(y_test, model.predict(X_test))
print(f"Train R²: {train_r2}, Test R²: {test_r2}")
# Save the trained model, scaler, and column names for prediction
global_model["model"] = model
global_model["scaler"] = scaler
global_model["columns"] = df.drop(columns=[var_dep]).columns.tolist()
# Gerar o gráfico
plt.figure(figsize=(6, 4))
plt.bar(["Treino", "Teste"], [train_r2, test_r2], color=["blue", "orange"])
plt.title(f"Desempenho do Modelo: {ml_model_name} - Test Size: {test_size}")
plt.ylabel("R²")
plt.ylim(0, 1) # Limite entre 0 e 1 para facilitar a visualização
plt.tight_layout()
# Salvar o gráfico em um buffer
buffer = io.BytesIO()
plt.savefig(buffer, format='png')
buffer.seek(0)
plt.close()
# Convert the buffer to a PIL Image
image = Image.open(buffer)
return image
# Função para atualizar as opções de variáveis dependentes
def update_var_dep_dropdown(df):
if df is None:
df = state.get('new_df') # Busca o DataFrame no estado compartilhado
if df is None:
return gr.update(choices=[])
return gr.update(choices=df.columns.tolist())
def predict_new_values(*inputs):
if global_model["model"] is None:
return "O modelo ainda não foi treinado. Execute o modelo primeiro."
# Reshape input to match model expectation
new_data = [float(value) for value in inputs]
new_data_scaled = global_model["scaler"].transform([new_data])
# Predict
prediction = global_model["model"].predict(new_data_scaled)[0]
return f"Previsão: {prediction:.4f}"
# Função para criar a aba Machine Learning
def ml_tab(new_df_output):
with gr.Tab("Machine Learning"):
var_dep_dropdown = gr.Dropdown(choices=[], label="Variável Dependente")
ml_model_dropdown = gr.Dropdown(
choices=[
"Linear Regression", "Ridge Regression", "Bayesian Ridge",
"Decision Tree", "Random Forest", "Support Vector Regression (SVR)",
"Neural Network (MLP)", "K-Neighbors Regressor"
],
label="Modelo de Machine Learning"
)
test_size_slider = gr.Slider(minimum=0.1, maximum=0.5, step=0.05, value=0.3, label="Tamanho do Teste")
submit_button = gr.Button("Executar Modelo")
r2_graph_output = gr.Image(label="Gráfico de Desempenho")
# Callback to execute the function
submit_button.click(
apply_ml,
inputs=[new_df_output, var_dep_dropdown, ml_model_dropdown, test_size_slider],
outputs=[r2_graph_output]
)
# Update dropdown options
new_df_output.change(update_var_dep_dropdown, inputs=[new_df_output], outputs=[var_dep_dropdown])
# Add prediction section
gr.Markdown("### Previsão de Novos Valores")
inputs = []
if global_model["columns"]: # Check if columns exist
for col in global_model["columns"]:
inputs.append(gr.Textbox(label=f"Valor para '{col}'"))
else:
gr.Markdown("O modelo ainda não foi treinado. Execute o modelo primeiro para realizar previsões.")
predict_button = gr.Button("Prever Valores")
prediction_output = gr.Textbox(label="Resultado da Previsão")
# Predict only if inputs were generated
if inputs:
predict_button.click(predict_new_values, inputs=inputs, outputs=prediction_output)
return locals()
|