Spaces:

252106862eder
/

churn

Sleeping

App Files Files Community

252106862eder commited on Oct 26, 2025

Commit

4248ee7

verified ·

1 Parent(s): d804c4f

Update model_utils.py

Browse files

Files changed (1) hide show

model_utils.py +62 -56

model_utils.py CHANGED Viewed

@@ -19,7 +19,7 @@ import re # Para auxiliar na limpeza de markdown para LaTeX
 # Importações para LaTeX (pylatex)
 from pylatex import Document, Section, Command, LongTable, Tabular, Figure, NoEscape, Math, LineBreak
-from pylatex.utils import italic, NoEscape
 from pylatex.base_classes import Environment
 # --- DEFINIÇÃO DAS FEATURES PREDITIVAS E COLUNA ALVO PARA SEU data.csv ---
@@ -139,7 +139,7 @@ class ChurnModelPipeline:
         plot_dir = tempfile.mkdtemp()
         self.plot_paths = {}
-        dpi = 100 # Qualidade da imagem para Gradio
         # --- 1. Correlation Heatmap ---
         if self.df_raw_for_plots is not None and not self.df_raw_for_plots.empty:
@@ -189,12 +189,7 @@ class ChurnModelPipeline:
         plt.tight_layout()
         cm_path = os.path.join(plot_dir, 'confusion_matrix.png')
         plt.savefig(cm_path)
-            # Ajuste para garantir que o plot_dir é um diretório válido
-        if not os.path.exists(plot_dir):
-            os.makedirs(plot_dir)
-        cm_path = os.path.join(plot_dir, 'confusion_matrix.png')
-        plt.savefig(cm_path)
-        plt.close()
         self.plot_paths['confusion_matrix'] = cm_path
         # --- 4. ROC Curve ---
@@ -214,7 +209,7 @@ class ChurnModelPipeline:
         plt.tight_layout()
         roc_path = os.path.join(plot_dir, 'roc_curve.png')
         plt.savefig(roc_path)
-        plt.close()
         self.plot_paths['roc_curve'] = roc_path
     def predict_churn(self, input_data: pd.DataFrame) -> Tuple[int, float]:
@@ -276,6 +271,7 @@ class ChurnModelPipeline:
             latex_story.append(NoEscape(sample_display_df.to_latex(index=False, caption='Características do Cliente Simulado', label='tab:sim_customer', longtable=False)))
             markdown_story.append(f"**Resultado da Simulação:** O cliente **{churn_status_sample}** (Probabilidade de Churn: **{prob_sample:.2%}**)\n")
             latex_story.append(NoEscape(f'\textbf{{Resultado da Simulação:}} O cliente \textbf{{{churn_status_sample}}} (Probabilidade de Churn: \textbf{{{prob_sample:.2f}\%}})\n\n'))
         else:
             markdown_story.append("Não foi possível realizar uma simulação pois o DataFrame de teste ou dados interativos não estão disponíveis.\n")
@@ -295,12 +291,14 @@ class ChurnModelPipeline:
             training_details_markdown += f"- **Shape X_train (antes pré-processamento):** `{self.training_details.get('X_train_shape', 'N/A')}`\n"
             y_train_before_smote = self.training_details.get('y_train_value_counts_before_smote', {})
-            training_details_markdown += f"- **Balanceamento \`Exited\` (antes SMOTE):** `Não Churn: {y_train_before_smote.get(0, 'N/A')}, Churn: {y_train_before_smote.get(1, 'N/A')}`\n"
             training_details_markdown += f"- **Shape X_train (após pré-processamento):** `{self.training_details.get('X_train_processed_shape', 'N/A')}`\n"
             y_train_after_smote = self.training_details.get('y_train_resampled_value_counts_after_smote', {})
-            training_details_markdown += f"- **Balanceamento \`Exited\` (após SMOTE):** `Não Churn: {y_train_after_smote.get(0, 'N/A')}, Churn: {y_train_after_smote.get(1, 'N/A')}`\n"
             training_details_markdown += f"- **Modelo Treinado:** `{'Sim' if self.training_details.get('model_trained_successfully', False) else 'Não'}`\n"
@@ -309,10 +307,10 @@ class ChurnModelPipeline:
             training_details_latex += fr'\item \textbf{{Dataset Carregado:}} {self.training_details.get("dataset_rows", "N/A")} linhas' + '\n'
             training_details_latex += fr'\item \textbf{{Features Preditivas:}} \texttt{{{", ".join(self.training_details.get("predictor_features", ["N/A"]))}}}.' + '\n'
             training_details_latex += fr'\item \textbf{{Coluna Alvo:}} \texttt{{{self.training_details.get("target_column", "N/A")}}}.' + '\n'
-            training_details_latex += fr'\item \textbf{{Shape $X_{train}$ (antes pré-processamento):}} {self.training_details.get("X_train_shape", "N/A")}.' + '\n'
-            training_details_latex += fr'\item \textbf{{Balanceamento \texttt{Exited} (antes SMOTE):}} Não Churn: {y_train_before_smote.get(0, "N/A")}, Churn: {y_train_before_smote.get(1, "N/A")}.' + '\n'
-            training_details_latex += fr'\item \textbf{{Shape $X_{train}$ (após pré-processamento):}} {self.training_details.get("X_train_processed_shape", "N/A")}.' + '\n'
-            training_details_latex += fr'\item \textbf{{Balanceamento \texttt{Exited} (após SMOTE):}} Não Churn: {y_train_after_smote.get(0, "N/A")}, Churn: {y_train_after_smote.get(1, "N/A")}.' + '\n'
             training_details_latex += fr'\item \textbf{{Modelo Treinado:}} {"Sim" if self.training_details.get("model_trained_successfully", False) else "Não"}.' + '\n'
             training_details_latex += r'\end{itemize}' + '\n\n'
@@ -445,9 +443,10 @@ class ChurnModelPipeline:
         doc.append(Command('graphicspath', NoEscape(r'{./}'))) # Para imagens no mesmo diretório
         # --- Cabeçalho Personalizado (com base nas informações do usuário) ---
-        doc.append(NoEscape(r'\title{MODELAGEM PREDITIVA DE CHURN DE CLIENTES BANCÁRIOS UTILIZANDO REGRESSÃO LOGÍSTICA}'))
-        doc.append(NoEscape(r'\author{ÉDER MARCELO PONTES CUNHA}'))
-        doc.append(NoEscape(r'\date{26 de Outubro de 2025}')) # Ajuste conforme necessário
         doc.append(NoEscape(r'\begin{titlepage}'))
         doc.append(Command('centering'))
@@ -457,7 +456,8 @@ class ChurnModelPipeline:
         if os.path.exists(logo_filename):
             with doc.create(Figure(position='h!')) as logo_fig:
                 logo_fig.add_image(logo_filename, width='0.25\textwidth')
-                logo_fig.add_caption(NoEscape(r'\vspace{-1.5cm}')) # Ajuste vertical para o texto abaixo do logo
         else:
             doc.append(Command('textbf', 'AVISO: Logo da UnB não encontrado! Certifique-se de que "marcador.png" esteja no mesmo diretório do arquivo .tex.'))
@@ -478,13 +478,38 @@ class ChurnModelPipeline:
         # Título do Trabalho (do usuário, ajustado para LaTeX)
         # Quebra de linha manual para o título
-        title_parts = header_info["titulo_trabalho"].replace('UTILIZANDO', r'\UTILIZANDO').split(r'\')
         doc.append(Command('Huge'))
-        doc.append(Command('textbf', NoEscape(title_parts[0])))
-        for part in title_parts[1:]:
-             doc.append(LineBreak())
-             doc.append(Command('textbf', NoEscape(part)))
-        doc.append(LineBreak())
         doc.append(Command('vspace', '1.0cm'))
@@ -507,49 +532,30 @@ class ChurnModelPipeline:
         doc.append(Command('vfill')) # Empurra o conteúdo para cima
         doc.append(Command('end{titlepage}'))
-        doc.append(Command('maketitle')) # Gera um título padrão (mas estamos sobrescrevendo com o titlepage)
         doc.append(Command('clearpage'))
         # Conteúdo do Resumo
         for item in latex_content_parts:
-            if isinstance(item, str) and item.strip().startswith('\begin{tabular}'):
-                # Handle DataFrame.to_latex output manually to fit pylatex's tabular environment
-                # This is a bit tricky. pylatex's tabular works best with specific object types.
-                # For simplicity, we'll embed the raw latex table string
-                doc.append(NoEscape(item))
-            elif isinstance(item, str) and (item.startswith('\section') or item.startswith('\subsection')):
-                doc.append(NoEscape(item + '\n'))
-            elif isinstance(item, str):
-                # Process common markdown-like elements in raw string for LaTeX
-                processed_str = item.replace('**', '\textbf{').replace('*', '\emph{').replace('`', '\texttt{')
-                processed_str = processed_str.replace('}', '}}') # close bold/emph/texttt
-                processed_str = processed_str.replace('}}', '}') # fix double close
-                processed_str = processed_str.replace('%', '\%').replace('&', '\&').replace('_', '_') # escape LaTeX special chars
-                doc.append(NoEscape(processed_str))
-            else:
-                doc.append(item) # Assume it's a pylatex object (Section, Math etc.)
-            doc.append(LineBreak()) # Add a line break after each item
         # Adicionar imagens ao final do documento LaTeX
         doc.append(NoEscape(r'\clearpage'))
-        doc.append(NoEscape(r'\section*{Visualizações Gráficas do Modelo}'))
-        doc.append(NoEscape(r'\addcontentsline{toc}{section}{Visualizações Gráficas do Modelo}')) # Adicionar ao sumário
         for key, path in plot_paths.items():
             if os.path.exists(path):
-                doc.append(NoEscape(r'\subsection*{'+ key.replace('_', ' ').title() +'}'))
-                doc.append(NoEscape(r'\begin{figure}[htbp]'))
-                doc.append(NoEscape(r'\centering'))
-                # Ajustar width para caber na página, altura automática
-                doc.append(NoEscape(f'\includegraphics[width=0.9\textwidth]{{{path}}}'))
-                doc.append(NoEscape(f'\caption{{{key.replace("_", " ").title()}}}'))
-                doc.append(NoEscape(f'\label{{fig:{key}}}'))
-                doc.append(NoEscape(r'\end{figure}'))
-                doc.append(NoEscape(r'\clearpage')) # Cada imagem em uma nova página
         # Salvar o arquivo .tex
         latex_output_dir = tempfile.mkdtemp()
         output_filename = os.path.join(latex_output_dir, 'relatorio_churn.tex')
         doc.generate_tex(output_filename) # Salva o arquivo .tex
-        return output_filename

 # Importações para LaTeX (pylatex)
 from pylatex import Document, Section, Command, LongTable, Tabular, Figure, NoEscape, Math, LineBreak
+from pylatex.utils import italic
 from pylatex.base_classes import Environment
 # --- DEFINIÇÃO DAS FEATURES PREDITIVAS E COLUNA ALVO PARA SEU data.csv ---
         plot_dir = tempfile.mkdtemp()
         self.plot_paths = {}
+        dpi = 150 # Aumentado DPI para melhor qualidade em relatórios
         # --- 1. Correlation Heatmap ---
         if self.df_raw_for_plots is not None and not self.df_raw_for_plots.empty:
         plt.tight_layout()
         cm_path = os.path.join(plot_dir, 'confusion_matrix.png')
         plt.savefig(cm_path)
+        plt.close() # Fechar a figura para liberar memória
         self.plot_paths['confusion_matrix'] = cm_path
         # --- 4. ROC Curve ---
         plt.tight_layout()
         roc_path = os.path.join(plot_dir, 'roc_curve.png')
         plt.savefig(roc_path)
+        plt.close() # Fechar a figura para liberar memória
         self.plot_paths['roc_curve'] = roc_path
     def predict_churn(self, input_data: pd.DataFrame) -> Tuple[int, float]:
             latex_story.append(NoEscape(sample_display_df.to_latex(index=False, caption='Características do Cliente Simulado', label='tab:sim_customer', longtable=False)))
             markdown_story.append(f"**Resultado da Simulação:** O cliente **{churn_status_sample}** (Probabilidade de Churn: **{prob_sample:.2%}**)\n")
+            # Corrigido o SyntaxWarning para '%' no f-string para LaTeX
             latex_story.append(NoEscape(f'\textbf{{Resultado da Simulação:}} O cliente \textbf{{{churn_status_sample}}} (Probabilidade de Churn: \textbf{{{prob_sample:.2f}\%}})\n\n'))
         else:
             markdown_story.append("Não foi possível realizar uma simulação pois o DataFrame de teste ou dados interativos não estão disponíveis.\n")
             training_details_markdown += f"- **Shape X_train (antes pré-processamento):** `{self.training_details.get('X_train_shape', 'N/A')}`\n"
             y_train_before_smote = self.training_details.get('y_train_value_counts_before_smote', {})
+            # Corrigido o SyntaxWarning para '`' no f-string para Markdown
+            training_details_markdown += f"- **Balanceamento `Exited` (antes SMOTE):** `Não Churn: {y_train_before_smote.get(0, 'N/A')}, Churn: {y_train_before_smote.get(1, 'N/A')}`\n"
             training_details_markdown += f"- **Shape X_train (após pré-processamento):** `{self.training_details.get('X_train_processed_shape', 'N/A')}`\n"
             y_train_after_smote = self.training_details.get('y_train_resampled_value_counts_after_smote', {})
+            # Corrigido o SyntaxWarning para '`' no f-string para Markdown
+            training_details_markdown += f"- **Balanceamento `Exited` (após SMOTE):** `Não Churn: {y_train_after_smote.get(0, 'N/A')}, Churn: {y_train_after_smote.get(1, 'N/A')}`\n"
             training_details_markdown += f"- **Modelo Treinado:** `{'Sim' if self.training_details.get('model_trained_successfully', False) else 'Não'}`\n"
             training_details_latex += fr'\item \textbf{{Dataset Carregado:}} {self.training_details.get("dataset_rows", "N/A")} linhas' + '\n'
             training_details_latex += fr'\item \textbf{{Features Preditivas:}} \texttt{{{", ".join(self.training_details.get("predictor_features", ["N/A"]))}}}.' + '\n'
             training_details_latex += fr'\item \textbf{{Coluna Alvo:}} \texttt{{{self.training_details.get("target_column", "N/A")}}}.' + '\n'
+            training_details_latex += fr'\item \textbf{{Shape $X_{{train}}$ (antes pré-processamento):}} {self.training_details.get("X_train_shape", "N/A")}.' + '\n' # $X_{train}$ corrigido
+            training_details_latex += fr'\item \textbf{{Balanceamento \texttt{{Exited}} (antes SMOTE):}} Não Churn: {y_train_before_smote.get(0, "N/A")}, Churn: {y_train_before_smote.get(1, "N/A")}.' + '\n'
+            training_details_latex += fr'\item \textbf{{Shape $X_{{train}}$ (após pré-processamento):}} {self.training_details.get("X_train_processed_shape", "N/A")}.' + '\n' # $X_{train}$ corrigido
+            training_details_latex += fr'\item \textbf{{Balanceamento \texttt{{Exited}} (após SMOTE):}} Não Churn: {y_train_after_smote.get(0, "N/A")}, Churn: {y_train_after_smote.get(1, "N/A")}.' + '\n'
             training_details_latex += fr'\item \textbf{{Modelo Treinado:}} {"Sim" if self.training_details.get("model_trained_successfully", False) else "Não"}.' + '\n'
             training_details_latex += r'\end{itemize}' + '\n\n'
         doc.append(Command('graphicspath', NoEscape(r'{./}'))) # Para imagens no mesmo diretório
         # --- Cabeçalho Personalizado (com base nas informações do usuário) ---
+        # Removendo title, author, date pois o titlepage vai sobrescrevê-los
+        # doc.append(NoEscape(r'\title{MODELAGEM PREDITIVA DE CHURN DE CLIENTES BANCÁRIOS UTILIZANDO REGRESSÃO LOGÍSTICA}'))
+        # doc.append(NoEscape(r'\author{ÉDER MARCELO PONTES CUNHA}'))
+        # doc.append(NoEscape(r'\date{26 de Outubro de 2025}')) # Ajuste conforme necessário
         doc.append(NoEscape(r'\begin{titlepage}'))
         doc.append(Command('centering'))
         if os.path.exists(logo_filename):
             with doc.create(Figure(position='h!')) as logo_fig:
                 logo_fig.add_image(logo_filename, width='0.25\textwidth')
+                # A caption vazia ou um vspace garante que não haja texto extra colado no logo
+                logo_fig.add_caption(NoEscape(r'\vspace{-0.5cm}'))
         else:
             doc.append(Command('textbf', 'AVISO: Logo da UnB não encontrado! Certifique-se de que "marcador.png" esteja no mesmo diretório do arquivo .tex.'))
         # Título do Trabalho (do usuário, ajustado para LaTeX)
         # Quebra de linha manual para o título
+        # CORRIGIDO: title_parts = header_info["titulo_trabalho"].replace('UTILIZANDO', r'\UTILIZANDO').split(r'\')
+        # AQUI FOI O ERRO DE SYNTAX. Deve ser assim:
+        title_parts_raw = header_info["titulo_trabalho"].replace(' UTILIZANDO ', r'\ \large ').split(r'\')
         doc.append(Command('Huge'))
+        doc.append(Command('textbf', NoEscape(title_parts_raw[0]))) # Primeira parte do título
+        # As partes restantes são separadas por `\` que adicionamos
+        # Iterar sobre as partes restantes e adicionar com quebra de linha
+        # A lógica de split mudou para apenas quebrar em ' ' e adicionar o comando LaTeX manualmente
+        title_words = header_info["titulo_trabalho"].split()
+        latex_title_lines = []
+        current_line = []
+        for word in title_words:
+            if word == 'UTILIZANDO':
+                if current_line:
+                    latex_title_lines.append(" ".join(current_line))
+                    current_line = []
+                latex_title_lines.append(r'\ \large UTILIZANDO') # Comando LaTeX para quebra de linha e tamanho da fonte
+            else:
+                current_line.append(word)
+        if current_line:
+            latex_title_lines.append(" ".join(current_line))
+        doc.append(Command('Huge'))
+        doc.append(Command('textbf', NoEscape(latex_title_lines[0]))) # Primeira linha do título
+        for line_idx in range(1, len(latex_title_lines)):
+            doc.append(LineBreak())
+            # Se for a linha com 'UTILIZANDO', já está formatada, caso contrário, use textbf
+            if 'UTILIZANDO' in latex_title_lines[line_idx]:
+                 doc.append(NoEscape(latex_title_lines[line_idx]))
+            else:
+                 doc.append(Command('textbf', NoEscape(latex_title_lines[line_idx])))
         doc.append(Command('vspace', '1.0cm'))
         doc.append(Command('vfill')) # Empurra o conteúdo para cima
         doc.append(Command('end{titlepage}'))
+        # doc.append(Command('maketitle')) # Não precisamos de maketitle pois usamos titlepage
+        doc.append(Command('clearpage'))
+        doc.append(Command('tableofcontents')) # Sumário
         doc.append(Command('clearpage'))
         # Conteúdo do Resumo
         for item in latex_content_parts:
+            doc.append(item) # pylatex objects (Section, Math, NoEscape) are directly appended
         # Adicionar imagens ao final do documento LaTeX
         doc.append(NoEscape(r'\clearpage'))
+        doc.append(Section(NoEscape(r'Visualizações Gráficas do Modelo')))
+        # Aumentado a largura para preencher mais a página e centralizar
         for key, path in plot_paths.items():
             if os.path.exists(path):
+                with doc.create(Figure(position='htbp')) as plot_fig:
+                    plot_fig.add_image(path, width='0.8\textwidth')
+                    plot_fig.add_caption(NoEscape(f'{key.replace("_", " ").title()}'))
+                doc.append(Command('clearpage')) # Cada imagem em uma nova página
         # Salvar o arquivo .tex
         latex_output_dir = tempfile.mkdtemp()
         output_filename = os.path.join(latex_output_dir, 'relatorio_churn.tex')
         doc.generate_tex(output_filename) # Salva o arquivo .tex
+        return output_filename