File size: 5,842 Bytes
3c4de5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import gradio as gr
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Ridge, BayesianRidge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from .shared_state import state  # Estado compartilhado
import io
from PIL import Image

# Global model state to save the trained model
global_model = {"model": None, "scaler": None, "columns": None}

# Train the model
def apply_ml(df, var_dep, ml_model_name, test_size):
    if df is None:
        df = state.get('new_df')  # Busca o DataFrame no estado compartilhado
    if df is None:
        raise ValueError("Nenhum DataFrame disponível para aplicação.")
    df = df.dropna()
    y = df[var_dep]
    X = df.drop(columns=[var_dep])

    # Remover a coluna "Índice" se ela existir
    if "Índice" in X.columns:
        X = X.drop(columns=["Índice"])

    # Normalizar os dados com MinMaxScaler
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)

    # Divisão em treino e teste com test_size ajustável
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=1)

    # Escolha do modelo
    if ml_model_name == "Linear Regression":
        model = LinearRegression()
    elif ml_model_name == "Ridge Regression":
        model = Ridge(alpha=0.5)
    elif ml_model_name == "Bayesian Ridge":
        model = BayesianRidge()
    elif ml_model_name == "Decision Tree":
        model = DecisionTreeRegressor()
    elif ml_model_name == "Random Forest":
        model = RandomForestRegressor()
    elif ml_model_name == "Support Vector Regression (SVR)":
        model = SVR()
    elif ml_model_name == "Neural Network (MLP)":
        model = MLPRegressor(max_iter=5000, tol=0.1, random_state=1)
    elif ml_model_name == "K-Neighbors Regressor":
        model = KNeighborsRegressor(n_neighbors=5)
    else:
        raise ValueError("Modelo de ML inválido.")

    # Treinamento e avaliação
    model.fit(X_train, y_train)
    train_r2 = r2_score(y_train, model.predict(X_train))
    test_r2 = r2_score(y_test, model.predict(X_test))
    
    print(f"Train R²: {train_r2}, Test R²: {test_r2}")

    # Save the trained model, scaler, and column names for prediction
    global_model["model"] = model
    global_model["scaler"] = scaler
    global_model["columns"] = df.drop(columns=[var_dep]).columns.tolist()

    # Gerar o gráfico
    plt.figure(figsize=(6, 4))
    plt.bar(["Treino", "Teste"], [train_r2, test_r2], color=["blue", "orange"])
    plt.title(f"Desempenho do Modelo: {ml_model_name} - Test Size: {test_size}")
    plt.ylabel("R²")
    plt.ylim(0, 1)  # Limite entre 0 e 1 para facilitar a visualização
    plt.tight_layout()

    # Salvar o gráfico em um buffer
    buffer = io.BytesIO()
    plt.savefig(buffer, format='png')
    buffer.seek(0)
    plt.close()

    # Convert the buffer to a PIL Image
    image = Image.open(buffer)

    return image

# Função para atualizar as opções de variáveis dependentes
def update_var_dep_dropdown(df):
    if df is None:
        df = state.get('new_df')  # Busca o DataFrame no estado compartilhado
    if df is None:
        return gr.update(choices=[])
    return gr.update(choices=df.columns.tolist())

def predict_new_values(*inputs):
    if global_model["model"] is None:
        return "O modelo ainda não foi treinado. Execute o modelo primeiro."
    
    # Reshape input to match model expectation
    new_data = [float(value) for value in inputs]
    new_data_scaled = global_model["scaler"].transform([new_data])
    
    # Predict
    prediction = global_model["model"].predict(new_data_scaled)[0]
    return f"Previsão: {prediction:.4f}"

# Função para criar a aba Machine Learning
def ml_tab(new_df_output):
    with gr.Tab("Machine Learning"):
        var_dep_dropdown = gr.Dropdown(choices=[], label="Variável Dependente")
        ml_model_dropdown = gr.Dropdown(
            choices=[
                "Linear Regression", "Ridge Regression", "Bayesian Ridge",
                "Decision Tree", "Random Forest", "Support Vector Regression (SVR)",
                "Neural Network (MLP)", "K-Neighbors Regressor"
            ],
            label="Modelo de Machine Learning"
        )
        test_size_slider = gr.Slider(minimum=0.1, maximum=0.5, step=0.05, value=0.3, label="Tamanho do Teste")
        submit_button = gr.Button("Executar Modelo")
        r2_graph_output = gr.Image(label="Gráfico de Desempenho")

        # Callback to execute the function
        submit_button.click(
            apply_ml,
            inputs=[new_df_output, var_dep_dropdown, ml_model_dropdown, test_size_slider],
            outputs=[r2_graph_output]
        )

        # Update dropdown options
        new_df_output.change(update_var_dep_dropdown, inputs=[new_df_output], outputs=[var_dep_dropdown])

        # Add prediction section
        gr.Markdown("### Previsão de Novos Valores")

        inputs = []
        if global_model["columns"]:  # Check if columns exist
            for col in global_model["columns"]:
                inputs.append(gr.Textbox(label=f"Valor para '{col}'"))
        else:
            gr.Markdown("O modelo ainda não foi treinado. Execute o modelo primeiro para realizar previsões.")

        predict_button = gr.Button("Prever Valores")
        prediction_output = gr.Textbox(label="Resultado da Previsão")

        # Predict only if inputs were generated
        if inputs:
            predict_button.click(predict_new_values, inputs=inputs, outputs=prediction_output)

    return locals()