Spaces:

APPONTE
/

DataGraph

Sleeping

App Files Files Community

rwayz commited on Jul 11, 2025

Commit

6d012ce

verified ·

1 Parent(s): 2ab9338

Delete nodes

Browse files

Files changed (9) hide show

nodes/agent_node.py +0 -126
nodes/cache_node.py +0 -230
nodes/csv_processing_node.py +0 -815
nodes/custom_nodes.py +0 -297
nodes/database_node.py +0 -226
nodes/graph_generation_node.py +0 -1015
nodes/graph_selection_node.py +0 -147
nodes/query_node.py +0 -232
nodes/refinement_node.py +0 -141

nodes/agent_node.py DELETED Viewed

@@ -1,126 +0,0 @@
-"""
-Nó principal do agente SQL para LangGraph - Versão refatorada
-"""
-import logging
-from typing import Dict, Any, TypedDict, Optional
-from utils.object_manager import get_object_manager
-class AgentState(TypedDict):
-    """Estado do agente LangGraph - apenas dados serializáveis"""
-    user_input: str
-    selected_model: str
-    response: str
-    advanced_mode: bool
-    execution_time: float
-    error: Optional[str]
-    intermediate_steps: list
-    # Dados serializáveis do banco
-    db_sample_dict: dict
-    # IDs para recuperar objetos não-serializáveis
-    agent_id: str
-    engine_id: str
-    cache_id: str
-    # Campos relacionados a gráficos
-    query_type: str  # 'sql_query', 'sql_query_graphic', 'prediction'
-    sql_query_extracted: Optional[str]  # Query SQL extraída da resposta do agente
-    graph_type: Optional[str]  # Tipo de gráfico escolhido pela LLM
-    graph_data: Optional[dict]  # Dados preparados para o gráfico (serializável)
-    graph_image_id: Optional[str]  # ID da imagem do gráfico no ObjectManager
-    graph_generated: bool  # Se o gráfico foi gerado com sucesso
-    graph_error: Optional[str]  # Erro na geração do gráfico, se houver
-async def initialize_agent_components_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para inicializar componentes do agente
-    Args:
-        state: Estado inicial
-    Returns:
-        Estado com componentes inicializados
-    """
-    try:
-        obj_manager = get_object_manager()
-        # Verifica se os IDs necessários estão presentes
-        required_ids = ["agent_id", "engine_id", "cache_id"]
-        for id_name in required_ids:
-            if not state.get(id_name):
-                raise ValueError(f"ID necessário não encontrado: {id_name}")
-        # Verifica se os objetos existem
-        sql_agent = obj_manager.get_sql_agent(state["agent_id"])
-        engine = obj_manager.get_engine(state["engine_id"])
-        cache_manager = obj_manager.get_cache_manager(state["cache_id"])
-        if not all([sql_agent, engine, cache_manager]):
-            raise ValueError("Um ou mais componentes não foram encontrados")
-        state["components_ready"] = True
-        logging.info("[AGENT] Componentes inicializados com sucesso")
-    except Exception as e:
-        error_msg = f"Erro ao inicializar componentes: {e}"
-        logging.error(f"[AGENT] {error_msg}")
-        state["error"] = error_msg
-        state["components_ready"] = False
-    return state
-def should_refine_response(state: Dict[str, Any]) -> str:
-    """
-    Função condicional para determinar se deve refinar a resposta
-    Args:
-        state: Estado atual do agente
-    Returns:
-        Nome do próximo nó
-    """
-    if state.get("advanced_mode", False) and not state.get("error"):
-        return "refine_response"
-    else:
-        return "cache_response"
-def should_generate_graph(state: Dict[str, Any]) -> str:
-    """
-    Função condicional para determinar se deve gerar gráfico
-    Args:
-        state: Estado atual do agente
-    Returns:
-        Nome do próximo nó
-    """
-    query_type = state.get("query_type", "sql_query")
-    has_error = state.get("error") is not None
-    # Só gera gráfico se for sql_query_graphic e não houver erro
-    if query_type == "sql_query_graphic" and not has_error:
-        return "graph_selection"
-    else:
-        # Pula para refinamento ou cache dependendo do modo avançado
-        return should_refine_response(state)
-class AgentNodeManager:
-    """
-    Gerenciador dos nós do agente - versão refatorada
-    """
-    def __init__(self):
-        self.node_functions = {
-            "initialize_components": initialize_agent_components_node
-        }
-        self.conditional_functions = {
-            "should_refine": should_refine_response,
-            "should_generate_graph": should_generate_graph
-        }
-    def get_node_function(self, node_name: str):
-        """Retorna função do nó pelo nome"""
-        return self.node_functions.get(node_name)
-    def get_conditional_function(self, condition_name: str):
-        """Retorna função condicional pelo nome"""
-        return self.conditional_functions.get(condition_name)

nodes/cache_node.py DELETED Viewed

@@ -1,230 +0,0 @@
-"""
-Nó para gerenciamento de cache e histórico
-"""
-import logging
-from typing import Dict, Any
-from utils.object_manager import get_object_manager
-async def update_history_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para atualizar histórico e logs
-    Args:
-        state: Estado atual do agente
-    Returns:
-        Estado atualizado
-    """
-    try:
-        obj_manager = get_object_manager()
-        cache_id = state.get("cache_id")
-        if not cache_id:
-            logging.warning("[HISTORY] ID do cache não encontrado")
-            return state
-        cache_manager = obj_manager.get_cache_manager(cache_id)
-        if not cache_manager:
-            logging.warning("[HISTORY] Cache manager não encontrado")
-            return state
-        # Adiciona ao histórico de logs
-        history_entry = {
-            "Modelo AgentSQL": state.get("selected_model", ""),
-            "Pergunta": state.get("user_input", ""),
-            "Resposta": state.get("response", ""),
-            "Tempo de Resposta (s)": round(state.get("execution_time", 0.0), 2),
-            "Modo Avançado": state.get("advanced_mode", False),
-            "Refinado": state.get("refined", False),
-            "Erro": state.get("error"),
-            "Tipo de Query": state.get("query_type", "sql_query")
-        }
-        cache_manager.add_to_history(history_entry)
-        # Atualiza histórico recente
-        cache_manager.update_recent_history(
-            state.get("user_input", ""),
-            state.get("response", "")
-        )
-        state["history_updated"] = True
-        logging.info("[HISTORY] Histórico atualizado")
-    except Exception as e:
-        error_msg = f"Erro ao atualizar histórico: {e}"
-        logging.error(f"[HISTORY] {error_msg}")
-        state["history_error"] = error_msg
-    return state
-async def cache_response_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para armazenar resposta no cache
-    Args:
-        state: Estado com resposta a ser cacheada
-    Returns:
-        Estado atualizado
-    """
-    try:
-        obj_manager = get_object_manager()
-        cache_id = state.get("cache_id")
-        if not cache_id:
-            logging.warning("[CACHE] ID do cache não encontrado")
-            return state
-        cache_manager = obj_manager.get_cache_manager(cache_id)
-        if not cache_manager:
-            logging.warning("[CACHE] Cache manager não encontrado")
-            return state
-        user_input = state.get("user_input", "")
-        response = state.get("response", "")
-        if user_input and response and not state.get("error"):
-            cache_manager.cache_response(user_input, response)
-            state["cached"] = True
-            logging.info(f"[CACHE] Resposta cacheada para: {user_input[:50]}...")
-        else:
-            state["cached"] = False
-            logging.info("[CACHE] Resposta não cacheada (erro ou dados insuficientes)")
-    except Exception as e:
-        error_msg = f"Erro ao cachear resposta: {e}"
-        logging.error(f"[CACHE] {error_msg}")
-        state["cache_error"] = error_msg
-    return state
-async def get_cache_stats_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para obter estatísticas do cache
-    Args:
-        state: Estado atual
-    Returns:
-        Estado com estatísticas do cache
-    """
-    try:
-        obj_manager = get_object_manager()
-        cache_id = state.get("cache_id")
-        if not cache_id:
-            state["cache_stats"] = {}
-            return state
-        cache_manager = obj_manager.get_cache_manager(cache_id)
-        if not cache_manager:
-            state["cache_stats"] = {}
-            return state
-        # Coleta estatísticas
-        cache_stats = {
-            "cached_queries": len(cache_manager.query_cache),
-            "history_entries": len(cache_manager.history_log),
-            "recent_history_size": len(cache_manager.recent_history),
-            "cache_hit_rate": 0.0  # Seria calculado com mais dados históricos
-        }
-        # Calcula taxa de acerto aproximada
-        if cache_stats["history_entries"] > 0:
-            # Estimativa simples baseada em queries repetidas
-            unique_queries = len(set(entry.get("Pergunta", "") for entry in cache_manager.history_log))
-            if unique_queries > 0:
-                cache_stats["cache_hit_rate"] = max(0, 1 - (unique_queries / cache_stats["history_entries"]))
-        state["cache_stats"] = cache_stats
-        logging.info(f"[CACHE] Estatísticas coletadas: {cache_stats}")
-    except Exception as e:
-        error_msg = f"Erro ao obter estatísticas do cache: {e}"
-        logging.error(f"[CACHE] {error_msg}")
-        state["cache_stats"] = {}
-    return state
-async def clear_cache_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para limpar cache
-    Args:
-        state: Estado atual
-    Returns:
-        Estado atualizado
-    """
-    try:
-        obj_manager = get_object_manager()
-        cache_id = state.get("cache_id")
-        if not cache_id:
-            state["cache_cleared"] = False
-            return state
-        cache_manager = obj_manager.get_cache_manager(cache_id)
-        if not cache_manager:
-            state["cache_cleared"] = False
-            return state
-        # Limpa cache
-        cache_manager.clear_cache()
-        state["cache_cleared"] = True
-        logging.info("[CACHE] Cache limpo")
-    except Exception as e:
-        error_msg = f"Erro ao limpar cache: {e}"
-        logging.error(f"[CACHE] {error_msg}")
-        state["cache_cleared"] = False
-        state["cache_error"] = error_msg
-    return state
-async def check_cache_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para verificar se existe resposta em cache
-    Args:
-        state: Estado com consulta do usuário
-    Returns:
-        Estado com resultado da verificação de cache
-    """
-    try:
-        obj_manager = get_object_manager()
-        cache_id = state.get("cache_id")
-        user_input = state.get("user_input", "")
-        if not cache_id or not user_input:
-            state["cache_hit"] = False
-            return state
-        cache_manager = obj_manager.get_cache_manager(cache_id)
-        if not cache_manager:
-            state["cache_hit"] = False
-            return state
-        # Verifica cache
-        cached_response = cache_manager.get_cached_response(user_input)
-        if cached_response:
-            state["cache_hit"] = True
-            state["response"] = cached_response
-            state["execution_time"] = 0.0
-            state["error"] = None
-            logging.info(f"[CACHE] Hit para: {user_input[:50]}...")
-        else:
-            state["cache_hit"] = False
-            logging.info(f"[CACHE] Miss para: {user_input[:50]}...")
-    except Exception as e:
-        error_msg = f"Erro ao verificar cache: {e}"
-        logging.error(f"[CACHE] {error_msg}")
-        state["cache_hit"] = False
-        state["cache_error"] = error_msg
-    return state

nodes/csv_processing_node.py DELETED Viewed

@@ -1,815 +0,0 @@
-"""
-Nó para processamento de arquivos CSV
-"""
-import os
-import shutil
-import logging
-import time
-import pandas as pd
-import numpy as np
-from typing import Dict, Any, TypedDict, List, Optional
-from sqlalchemy.types import DateTime, Integer, Float, String, Boolean
-from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
-import multiprocessing as mp
-from utils.config import UPLOADED_CSV_PATH
-from utils.object_manager import get_object_manager
-import numpy as np
-def analyze_numeric_column(sample_values: pd.Series) -> Dict[str, Any]:
-    """
-    Análise otimizada para detectar se coluna é numérica
-    Args:
-        sample_values: Amostra dos valores da coluna
-    Returns:
-        Dicionário com análise numérica
-    """
-    analysis = {
-        "is_numeric": False,
-        "is_integer": False,
-        "numeric_ratio": 0.0,
-        "has_decimals": False
-    }
-    if len(sample_values) == 0:
-        return analysis
-    # Converte para string e limpa valores
-    str_values = sample_values.astype(str).str.strip()
-    # Remove valores vazios e nulos
-    clean_values = str_values[
-        ~str_values.isin(['', 'nan', 'null', 'none', '-', 'NaN', 'NULL'])
-    ]
-    if len(clean_values) == 0:
-        return analysis
-    # Tenta conversão numérica vetorizada
-    try:
-        # Substitui vírgulas por pontos para formato brasileiro
-        numeric_values = clean_values.str.replace(',', '.', regex=False)
-        # Tenta conversão para float
-        converted = pd.to_numeric(numeric_values, errors='coerce')
-        # Conta valores válidos
-        valid_count = converted.notna().sum()
-        total_count = len(clean_values)
-        analysis["numeric_ratio"] = valid_count / total_count if total_count > 0 else 0
-        # Se mais de 80% são números válidos, considera numérico
-        if analysis["numeric_ratio"] > 0.8:
-            analysis["is_numeric"] = True
-            # Verifica se são inteiros
-            valid_numbers = converted.dropna()
-            if len(valid_numbers) > 0:
-                # Verifica se todos os números válidos são inteiros
-                analysis["is_integer"] = all(
-                    float(x).is_integer() for x in valid_numbers
-                    if not pd.isna(x) and abs(x) < 1e15  # Evita overflow
-                )
-                analysis["has_decimals"] = not analysis["is_integer"]
-    except Exception as e:
-        logging.debug(f"Erro na análise numérica: {e}")
-        analysis["is_numeric"] = False
-    return analysis
-def detect_date_format(date_string: str) -> str:
-    """
-    Detecta o formato mais provável de uma string de data
-    Args:
-        date_string: String para analisar
-    Returns:
-        'iso', 'american', 'brazilian' ou 'auto'
-    """
-    date_str = str(date_string).strip()
-    # Formato ISO (YYYY-MM-DD ou YYYY/MM/DD)
-    if len(date_str) >= 10 and date_str[4] in ['-', '/', '.'] and date_str[7] in ['-', '/', '.']:
-        if date_str[:4].isdigit() and int(date_str[:4]) > 1900:
-            return 'iso'
-    # Verifica se pode ser formato americano (MM/DD/YYYY)
-    if '/' in date_str:
-        parts = date_str.split('/')
-        if len(parts) == 3:
-            try:
-                month, day, year = int(parts[0]), int(parts[1]), int(parts[2])
-                # Se o primeiro número é > 12, provavelmente é DD/MM/YYYY
-                if month > 12:
-                    return 'brazilian'
-                # Se o segundo número é > 12, provavelmente é MM/DD/YYYY
-                elif day > 12:
-                    return 'american'
-                # Se ambos <= 12, é ambíguo, assume brasileiro por padrão
-                else:
-                    return 'brazilian'
-            except:
-                pass
-    # Formato brasileiro por padrão (DD/MM/YYYY, DD-MM-YYYY, DD.MM.YYYY)
-    return 'brazilian'
-def smart_date_conversion(date_string: str):
-    """
-    Converte string para data usando detecção inteligente de formato
-    Args:
-        date_string: String da data
-    Returns:
-        Timestamp do pandas ou levanta exceção
-    """
-    format_type = detect_date_format(date_string)
-    if format_type == 'iso':
-        return pd.to_datetime(date_string, errors='raise')
-    elif format_type == 'american':
-        return pd.to_datetime(date_string, format='%m/%d/%Y', errors='raise')
-    elif format_type == 'brazilian':
-        return pd.to_datetime(date_string, dayfirst=True, errors='raise')
-    else:
-        # Fallback para detecção automática
-        return pd.to_datetime(date_string, errors='raise')
-async def process_dates_advanced(series: pd.Series) -> pd.Series:
-    """
-    Processa datas com múltiplos formatos de forma robusta
-    Args:
-        series: Série pandas com datas em formato texto
-    Returns:
-        Série com datas convertidas para datetime
-    """
-    # Formatos de data para tentar em ordem de prioridade
-    date_formats = [
-        '%d/%m/%Y',     # 01/12/2024
-        '%d-%m-%Y',     # 01-12-2024
-        '%Y-%m-%d',     # 2024-12-01
-        '%d/%m/%y',     # 01/12/24
-        '%d-%m-%y',     # 01-12-24
-        '%Y/%m/%d',     # 2024/12/01
-        '%d.%m.%Y',     # 01.12.2024
-        '%Y.%m.%d',     # 2024.12.01
-        '%d/%m/%Y %H:%M:%S',  # 01/12/2024 14:30:00
-        '%Y-%m-%d %H:%M:%S',  # 2024-12-01 14:30:00
-    ]
-    result_series = pd.Series(index=series.index, dtype='datetime64[ns]')
-    for idx, value in series.items():
-        if pd.isna(value) or str(value).strip() in ['', 'nan', 'null', 'none', '-']:
-            result_series[idx] = pd.NaT
-            continue
-        value_str = str(value).strip()
-        converted = False
-        # Tenta conversão automática com detecção inteligente de formato
-        try:
-            result_series[idx] = smart_date_conversion(value_str)
-            converted = True
-        except:
-            pass
-        # Se não funcionou, tenta formatos específicos
-        if not converted:
-            for fmt in date_formats:
-                try:
-                    result_series[idx] = pd.to_datetime(value_str, format=fmt, errors='raise')
-                    converted = True
-                    break
-                except:
-                    continue
-        # Se ainda não converteu, marca como NaT
-        if not converted:
-            result_series[idx] = pd.NaT
-            logging.warning(f"Não foi possível converter '{value_str}' para data")
-    return result_series
-class CSVProcessingState(TypedDict):
-    """Estado para processamento de CSV"""
-    file_path: str
-    success: bool
-    message: str
-    csv_data_sample: dict
-    column_info: dict
-    processing_stats: dict
-async def detect_column_types(df: pd.DataFrame, sample_size: int = 1000) -> Dict[str, Any]:
-    """
-    Detecta automaticamente os tipos de colunas de forma genérica e otimizada
-    Args:
-        df: DataFrame do pandas
-        sample_size: Número de linhas para amostragem (otimização)
-    Returns:
-        Dicionário com informações dos tipos detectados
-    """
-    column_info = {
-        "detected_types": {},
-        "sql_types": {},
-        "date_columns": [],
-        "numeric_columns": [],
-        "text_columns": [],
-        "processing_rules": {}
-    }
-    # Usa amostra para otimizar performance em datasets grandes
-    sample_df = df.sample(n=min(sample_size, len(df)), random_state=42) if len(df) > sample_size else df
-    logging.info(f"[OPTIMIZATION] Usando amostra de {len(sample_df)} linhas para detecção de tipos")
-    for col in df.columns:
-        # Detecta tipo original
-        original_type = str(df[col].dtype)
-        column_info["detected_types"][col] = original_type
-        # Usa amostra para análise
-        sample_col = sample_df[col] if col in sample_df.columns else df[col]
-        # Detecta números já convertidos pelo pandas
-        if sample_col.dtype in ['int64', 'Int64', 'float64', 'Float64']:
-            if 'int' in str(sample_col.dtype).lower():
-                column_info["numeric_columns"].append(col)
-                column_info["sql_types"][col] = Integer()
-                column_info["processing_rules"][col] = "keep_as_int"
-            else:
-                column_info["numeric_columns"].append(col)
-                column_info["sql_types"][col] = Float()
-                column_info["processing_rules"][col] = "keep_as_float"
-            continue
-        # Tenta detectar datas de forma mais robusta
-        if sample_col.dtype == 'object':
-            # Tenta detectar datas com múltiplos formatos
-            sample_values = sample_col.dropna().head(20)
-            date_success_count = 0
-            # Formatos de data comuns para testar
-            date_formats = [
-                '%d/%m/%Y',     # 01/12/2024
-                '%d-%m-%Y',     # 01-12-2024
-                '%Y-%m-%d',     # 2024-12-01
-                '%d/%m/%y',     # 01/12/24
-                '%d-%m-%y',     # 01-12-24
-                '%Y/%m/%d',     # 2024/12/01
-                '%d.%m.%Y',     # 01.12.2024
-                '%Y.%m.%d',     # 2024.12.01
-            ]
-            for val in sample_values:
-                val_str = str(val).strip()
-                if not val_str or val_str.lower() in ['nan', 'null', 'none', '-']:
-                    continue
-                # Tenta conversão automática com detecção inteligente
-                try:
-                    smart_date_conversion(val_str)
-                    date_success_count += 1
-                    continue
-                except:
-                    pass
-                # Tenta formatos específicos
-                for fmt in date_formats:
-                    try:
-                        pd.to_datetime(val_str, format=fmt, errors='raise')
-                        date_success_count += 1
-                        break
-                    except:
-                        continue
-            # Se mais de 70% dos valores são datas válidas, considera como coluna de data
-            if len(sample_values) > 0 and date_success_count / len(sample_values) > 0.7:
-                column_info["date_columns"].append(col)
-                column_info["sql_types"][col] = DateTime()
-                column_info["processing_rules"][col] = "parse_dates_advanced"
-                continue
-        # Tenta detectar números em colunas de texto (otimizado)
-        elif sample_col.dtype == 'object':
-            # Análise otimizada de números em texto
-            sample_values = sample_col.dropna().head(50)  # Aumenta amostra para melhor precisão
-            if len(sample_values) == 0:
-                column_info["text_columns"].append(col)
-                column_info["sql_types"][col] = String()
-                column_info["processing_rules"][col] = "keep_as_text"
-                continue
-            # Análise vetorizada para performance
-            numeric_analysis = analyze_numeric_column(sample_values)
-            if numeric_analysis["is_numeric"]:
-                if numeric_analysis["is_integer"]:
-                    column_info["numeric_columns"].append(col)
-                    column_info["sql_types"][col] = Integer()
-                    column_info["processing_rules"][col] = "convert_text_to_int_safe"
-                else:
-                    column_info["numeric_columns"].append(col)
-                    column_info["sql_types"][col] = Float()
-                    column_info["processing_rules"][col] = "convert_text_to_float_safe"
-            else:
-                # Mantém como texto
-                column_info["text_columns"].append(col)
-                column_info["sql_types"][col] = String()
-                column_info["processing_rules"][col] = "keep_as_text"
-        # Tenta detectar números em colunas de texto
-        elif df[col].dtype == 'object':
-            # Verifica se pode ser convertido para número
-            sample_values = df[col].dropna().head(20)
-            numeric_count = 0
-            for val in sample_values:
-                try:
-                    # Remove caracteres comuns e tenta converter
-                    clean_val = str(val).replace(',', '.').replace('-', '').strip()
-                    if clean_val:
-                        float(clean_val)
-                        numeric_count += 1
-                except:
-                    pass
-            # Se mais de 70% são números, trata como numérico
-            if len(sample_values) > 0 and numeric_count / len(sample_values) > 0.7:
-                # Verifica se são inteiros ou floats
-                has_decimal = any('.' in str(val) or ',' in str(val) for val in sample_values)
-                if has_decimal:
-                    column_info["numeric_columns"].append(col)
-                    column_info["sql_types"][col] = Float()
-                    column_info["processing_rules"][col] = "convert_text_to_float"
-                else:
-                    column_info["numeric_columns"].append(col)
-                    column_info["sql_types"][col] = Integer()
-                    column_info["processing_rules"][col] = "convert_text_to_int"
-            else:
-                # Mantém como texto
-                column_info["text_columns"].append(col)
-                column_info["sql_types"][col] = String()
-                column_info["processing_rules"][col] = "keep_as_text"
-        else:
-            # Outros tipos mantém como texto
-            column_info["text_columns"].append(col)
-            column_info["sql_types"][col] = String()
-            column_info["processing_rules"][col] = "keep_as_text"
-    return column_info
-async def process_dataframe_generic(df: pd.DataFrame, column_info: Dict[str, Any]) -> pd.DataFrame:
-    """
-    Processa DataFrame com OTIMIZAÇÕES EXTREMAS para performance máxima
-    Args:
-        df: DataFrame original
-        column_info: Informações dos tipos detectados
-    Returns:
-        DataFrame processado
-    """
-    logging.info(f"[ULTRA_OPTIMIZATION] Iniciando processamento ULTRA-OTIMIZADO de {len(df)} linhas")
-    start_time = time.time()
-    # OTIMIZAÇÃO 1: Evita cópia desnecessária - modifica in-place quando possível
-    processed_df = df
-    # OTIMIZAÇÃO 2: Agrupa colunas por tipo de processamento
-    processing_groups = {
-        'dates': [],
-        'keep_numeric': [],
-        'convert_numeric': [],
-        'text': []
-    }
-    for col, rule in column_info["processing_rules"].items():
-        if col not in processed_df.columns:
-            continue
-        if 'date' in rule:
-            processing_groups['dates'].append((col, rule))
-        elif 'keep_as' in rule:
-            processing_groups['keep_numeric'].append((col, rule))
-        elif 'convert' in rule:
-            processing_groups['convert_numeric'].append((col, rule))
-        else:
-            processing_groups['text'].append((col, rule))
-    # OTIMIZAÇÃO 3: Processamento paralelo por grupos
-    await process_groups_parallel(processed_df, processing_groups)
-    total_time = time.time() - start_time
-    logging.info(f"[ULTRA_OPTIMIZATION] Processamento ULTRA-OTIMIZADO concluído em {total_time:.2f}s")
-    return processed_df
-async def process_groups_parallel(df: pd.DataFrame, groups: Dict[str, List]):
-    """
-    Processa grupos de colunas em paralelo para máxima performance
-    """
-    tasks = []
-    # Processa cada grupo
-    for group_name, columns in groups.items():
-        if not columns:
-            continue
-        if group_name == 'dates':
-            tasks.append(process_date_columns_batch(df, columns))
-        elif group_name == 'keep_numeric':
-            tasks.append(process_keep_numeric_batch(df, columns))
-        elif group_name == 'convert_numeric':
-            tasks.append(process_convert_numeric_batch(df, columns))
-        # text não precisa processamento
-    # Executa todos os grupos em paralelo
-    if tasks:
-        import asyncio
-        await asyncio.gather(*tasks)
-async def process_date_columns_batch(df: pd.DataFrame, date_columns: List[tuple]):
-    """Processa colunas de data em lote"""
-    for col, rule in date_columns:
-        try:
-            if rule == "parse_dates_advanced":
-                # OTIMIZAÇÃO: Processamento vetorizado de datas
-                df[col] = process_dates_vectorized(df[col])
-            else:
-                df[col] = pd.to_datetime(df[col], dayfirst=True, errors='coerce')
-        except Exception as e:
-            logging.warning(f"Erro ao processar data {col}: {e}")
-async def process_keep_numeric_batch(df: pd.DataFrame, numeric_columns: List[tuple]):
-    """Processa colunas numéricas que já estão no tipo correto"""
-    for col, rule in numeric_columns:
-        try:
-            if rule == "keep_as_int" and df[col].dtype != 'Int64':
-                df[col] = df[col].astype("Int64")
-            elif rule == "keep_as_float" and df[col].dtype != 'float64':
-                df[col] = df[col].astype("float64")
-        except Exception as e:
-            logging.warning(f"Erro ao manter tipo {col}: {e}")
-async def process_convert_numeric_batch(df: pd.DataFrame, convert_columns: List[tuple]):
-    """Processa conversões numéricas em lote com máxima otimização"""
-    for col, rule in convert_columns:
-        try:
-            if rule == "convert_text_to_int_safe":
-                df[col] = convert_to_int_ultra_optimized(df[col])
-            elif rule == "convert_text_to_float_safe":
-                df[col] = convert_to_float_ultra_optimized(df[col])
-        except Exception as e:
-            logging.warning(f"Erro ao converter {col}: {e}")
-            if rule == "parse_dates":
-                processed_df[col] = pd.to_datetime(
-                    processed_df[col],
-                    dayfirst=True,
-                    errors='coerce'
-                )
-            elif rule == "parse_dates_advanced":
-                # Processamento avançado de datas com múltiplos formatos
-                processed_df[col] = await process_dates_advanced(processed_df[col])
-            elif rule == "keep_as_int":
-                # Já é inteiro, apenas garante tipo correto
-                if processed_df[col].dtype != 'Int64':
-                    processed_df[col] = processed_df[col].astype("Int64")
-            elif rule == "keep_as_float":
-                # Já é float, apenas garante tipo correto
-                if processed_df[col].dtype != 'float64':
-                    processed_df[col] = processed_df[col].astype("float64")
-            elif rule == "convert_text_to_int_safe":
-                # Conversão otimizada e segura para inteiros
-                processed_df[col] = convert_to_int_optimized(processed_df[col])
-            elif rule == "convert_text_to_float_safe":
-                # Conversão otimizada e segura para floats
-                processed_df[col] = convert_to_float_optimized(processed_df[col])
-            elif rule == "keep_as_text":
-                # Mantém como texto, apenas garante que é string
-                processed_df[col] = processed_df[col].astype(str)
-        except Exception as e:
-            logging.warning(f"Erro ao processar coluna {col} com regra {rule}: {e}")
-            # Em caso de erro, mantém coluna original
-            continue
-        col_time = time.time() - col_start_time
-        logging.debug(f"[OPTIMIZATION] Coluna {col} processada em {col_time:.2f}s")
-    total_time = time.time() - start_time
-    logging.info(f"[OPTIMIZATION] Processamento concluído em {total_time:.2f}s")
-    return processed_df
-def convert_to_int_optimized(series: pd.Series) -> pd.Series:
-    """
-    Conversão otimizada para inteiros
-    Args:
-        series: Série para converter
-    Returns:
-        Série convertida para Int64
-    """
-    try:
-        # Operações vetorizadas para performance
-        cleaned = series.astype(str).str.strip()
-        # Remove valores inválidos
-        cleaned = cleaned.replace(['', 'nan', 'null', 'none', '-', 'NaN', 'NULL'], np.nan)
-        # Substitui vírgulas por pontos
-        cleaned = cleaned.str.replace(',', '.', regex=False)
-        # Converte para numérico
-        numeric = pd.to_numeric(cleaned, errors='coerce')
-        # Verifica se pode ser convertido para inteiro sem perda
-        # Só converte se todos os valores válidos são inteiros
-        valid_mask = numeric.notna()
-        if valid_mask.any():
-            valid_numbers = numeric[valid_mask]
-            # Verifica se são inteiros (sem parte decimal significativa)
-            is_integer_mask = np.abs(valid_numbers - np.round(valid_numbers)) < 1e-10
-            if is_integer_mask.all():
-                # Todos são inteiros, pode converter
-                result = numeric.round().astype("Int64")
-            else:
-                # Tem decimais, mantém como float mas avisa
-                logging.warning(f"Coluna contém decimais, mantendo como float")
-                result = numeric.astype("Float64")
-        else:
-            # Nenhum valor válido
-            result = pd.Series([pd.NA] * len(series), dtype="Int64")
-        return result
-    except Exception as e:
-        logging.error(f"Erro na conversão otimizada para int: {e}")
-        return series
-def convert_to_float_optimized(series: pd.Series) -> pd.Series:
-    """
-    Conversão otimizada para floats
-    Args:
-        series: Série para converter
-    Returns:
-        Série convertida para float64
-    """
-    try:
-        # Operações vetorizadas para performance
-        cleaned = series.astype(str).str.strip()
-        # Remove valores inválidos
-        cleaned = cleaned.replace(['', 'nan', 'null', 'none', '-', 'NaN', 'NULL'], np.nan)
-        # Substitui vírgulas por pontos (formato brasileiro)
-        cleaned = cleaned.str.replace(',', '.', regex=False)
-        # Converte para numérico
-        result = pd.to_numeric(cleaned, errors='coerce')
-        return result
-    except Exception as e:
-        logging.error(f"Erro na conversão otimizada para float: {e}")
-        return series
-def convert_to_int_ultra_optimized(series: pd.Series) -> pd.Series:
-    """
-    Conversão ULTRA-OTIMIZADA para inteiros usando NumPy puro
-    """
-    try:
-        # OTIMIZAÇÃO EXTREMA: Usa NumPy diretamente
-        values = series.values
-        # Se já é numérico, converte diretamente
-        if pd.api.types.is_numeric_dtype(series):
-            return pd.Series(values, dtype="Int64")
-        # Para strings, usa operações vetorizadas do NumPy
-        str_values = np.asarray(series.astype(str))
-        # Máscara para valores válidos
-        valid_mask = ~np.isin(str_values, ['', 'nan', 'null', 'none', '-', 'NaN', 'NULL'])
-        # Inicializa resultado
-        result = np.full(len(series), pd.NA, dtype=object)
-        if valid_mask.any():
-            valid_values = str_values[valid_mask]
-            # Remove vírgulas e converte
-            cleaned = np.char.replace(valid_values, ',', '.')
-            # Conversão vetorizada
-            try:
-                numeric_values = pd.to_numeric(cleaned, errors='coerce')
-                # Só converte se são realmente inteiros
-                int_mask = np.abs(numeric_values - np.round(numeric_values)) < 1e-10
-                int_values = np.round(numeric_values[int_mask]).astype('Int64')
-                # Atribui valores convertidos
-                valid_indices = np.where(valid_mask)[0]
-                int_indices = valid_indices[int_mask]
-                result[int_indices] = int_values
-            except Exception:
-                pass
-        return pd.Series(result, dtype="Int64")
-    except Exception as e:
-        logging.error(f"Erro na conversão ultra-otimizada para int: {e}")
-        return series
-def convert_to_float_ultra_optimized(series: pd.Series) -> pd.Series:
-    """
-    Conversão ULTRA-OTIMIZADA para floats usando NumPy puro
-    """
-    try:
-        # OTIMIZAÇÃO EXTREMA: Usa NumPy diretamente
-        values = series.values
-        # Se já é numérico, retorna diretamente
-        if pd.api.types.is_numeric_dtype(series):
-            return series.astype('float64')
-        # Para strings, usa operações vetorizadas do NumPy
-        str_values = np.asarray(series.astype(str))
-        # Máscara para valores válidos
-        valid_mask = ~np.isin(str_values, ['', 'nan', 'null', 'none', '-', 'NaN', 'NULL'])
-        # Inicializa resultado
-        result = np.full(len(series), np.nan, dtype='float64')
-        if valid_mask.any():
-            valid_values = str_values[valid_mask]
-            # Remove vírgulas (formato brasileiro)
-            cleaned = np.char.replace(valid_values, ',', '.')
-            # Conversão vetorizada ultra-rápida
-            numeric_values = pd.to_numeric(cleaned, errors='coerce')
-            result[valid_mask] = numeric_values
-        return pd.Series(result, dtype='float64')
-    except Exception as e:
-        logging.error(f"Erro na conversão ultra-otimizada para float: {e}")
-        return series
-def process_dates_vectorized(series: pd.Series) -> pd.Series:
-    """
-    Processamento vetorizado ULTRA-OTIMIZADO de datas
-    """
-    try:
-        # OTIMIZAÇÃO: Tenta conversão direta primeiro
-        try:
-            return pd.to_datetime(series, dayfirst=True, errors='coerce')
-        except:
-            pass
-        # Se falhou, usa abordagem mais robusta mas ainda otimizada
-        str_values = series.astype(str)
-        # Detecta formato mais comum na amostra
-        sample = str_values.dropna().head(100)
-        if len(sample) > 0:
-            first_val = sample.iloc[0]
-            # Detecta formato baseado no primeiro valor
-            if len(first_val) >= 10 and first_val[4] in ['-', '/']:
-                # Formato ISO
-                return pd.to_datetime(series, errors='coerce')
-            else:
-                # Formato brasileiro
-                return pd.to_datetime(series, dayfirst=True, errors='coerce')
-        return pd.to_datetime(series, errors='coerce')
-    except Exception as e:
-        logging.error(f"Erro no processamento vetorizado de datas: {e}")
-        return series
-async def csv_processing_node(state: CSVProcessingState) -> CSVProcessingState:
-    """
-    Nó principal para processamento de CSV
-    Args:
-        state: Estado do processamento CSV
-    Returns:
-        Estado atualizado
-    """
-    try:
-        file_path = state["file_path"]
-        # Copia arquivo para diretório de upload
-        shutil.copy(file_path, UPLOADED_CSV_PATH)
-        logging.info(f"[CSV_PROCESSING] Arquivo copiado para: {UPLOADED_CSV_PATH}")
-        # OTIMIZAÇÃO EXTREMA: Leitura de CSV ultra-otimizada
-        separators = [';', ',', '\t', '|']
-        df = None
-        used_separator = None
-        # Detecta separador com amostra mínima
-        for sep in separators:
-            try:
-                test_df = pd.read_csv(file_path, sep=sep, nrows=3, engine='c')  # Engine C é mais rápido
-                if len(test_df.columns) > 1:
-                    # OTIMIZAÇÃO: Lê com configurações de performance máxima
-                    df = pd.read_csv(
-                        file_path,
-                        sep=sep,
-                        encoding='utf-8',
-                        on_bad_lines="skip",
-                        engine='c',  # Engine C para máxima performance
-                        low_memory=False,  # Evita warnings de tipos mistos
-                        dtype=str  # Lê tudo como string primeiro (mais rápido)
-                    )
-                    used_separator = sep
-                    break
-            except:
-                continue
-        if df is None:
-            raise ValueError("Não foi possível detectar o formato do CSV")
-        logging.info(f"[CSV_PROCESSING] CSV lido com separador '{used_separator}', {len(df)} linhas, {len(df.columns)} colunas")
-        # Detecta tipos de colunas automaticamente
-        column_info = await detect_column_types(df)
-        # Processa DataFrame
-        processed_df = await process_dataframe_generic(df, column_info)
-        # Estatísticas do processamento
-        processing_stats = {
-            "original_rows": len(df),
-            "processed_rows": len(processed_df),
-            "original_columns": len(df.columns),
-            "processed_columns": len(processed_df.columns),
-            "separator_used": used_separator,
-            "date_columns_detected": len(column_info["date_columns"]),
-            "numeric_columns_detected": len(column_info["numeric_columns"]),
-            "text_columns_detected": len(column_info["text_columns"])
-        }
-        # Amostra dos dados para o estado
-        csv_data_sample = {
-            "head": processed_df.head(5).to_dict(),
-            "dtypes": processed_df.dtypes.astype(str).to_dict(),
-            "columns": list(processed_df.columns)
-        }
-        # Armazena DataFrame processado no gerenciador de objetos
-        obj_manager = get_object_manager()
-        df_id = obj_manager.store_object(processed_df, "processed_dataframe")
-        # Atualiza estado
-        state.update({
-            "success": True,
-            "message": f"✅ CSV processado com sucesso! {processing_stats['processed_rows']} linhas, {processing_stats['processed_columns']} colunas",
-            "csv_data_sample": csv_data_sample,
-            "column_info": column_info,
-            "processing_stats": processing_stats,
-            "dataframe_id": df_id
-        })
-        logging.info(f"[CSV_PROCESSING] Processamento concluído: {processing_stats}")
-    except Exception as e:
-        error_msg = f"❌ Erro ao processar CSV: {e}"
-        logging.error(f"[CSV_PROCESSING] {error_msg}")
-        state.update({
-            "success": False,
-            "message": error_msg,
-            "csv_data_sample": {},
-            "column_info": {},
-            "processing_stats": {}
-        })
-    return state

nodes/custom_nodes.py DELETED Viewed

@@ -1,297 +0,0 @@
-"""
-Nós personalizados para funcionalidades específicas
-"""
-import os
-import shutil
-import logging
-from typing import Dict, Any, TypedDict
-from utils.database import create_sql_database
-from utils.config import UPLOADED_CSV_PATH, SQL_DB_PATH, DEFAULT_CSV_PATH
-from agents.sql_agent import SQLAgentManager
-from nodes.csv_processing_node import csv_processing_node
-from nodes.database_node import create_database_from_dataframe_node, load_existing_database_node
-class FileUploadState(TypedDict):
-    """Estado para upload de arquivos"""
-    file_path: str
-    success: bool
-    message: str
-    engine: Any
-    sql_agent: SQLAgentManager
-    cache_manager: Any
-class ResetState(TypedDict):
-    """Estado para reset do sistema"""
-    success: bool
-    message: str
-    engine: Any
-    sql_agent: SQLAgentManager
-    cache_manager: Any
-async def handle_csv_upload_node(state: FileUploadState) -> FileUploadState:
-    """
-    Nó para processar upload de CSV
-    Args:
-        state: Estado do upload
-    Returns:
-        Estado atualizado
-    """
-    try:
-        file_path = state["file_path"]
-        # Etapa 1: Processa CSV usando nova arquitetura
-        csv_state = {
-            "file_path": file_path,
-            "success": False,
-            "message": "",
-            "csv_data_sample": {},
-            "column_info": {},
-            "processing_stats": {}
-        }
-        csv_result = await csv_processing_node(csv_state)
-        if not csv_result["success"]:
-            raise Exception(csv_result["message"])
-        # Etapa 2: Cria banco de dados
-        db_result = await create_database_from_dataframe_node(csv_result)
-        if not db_result["success"]:
-            raise Exception(db_result["message"])
-        # Recupera objetos criados
-        from utils.object_manager import get_object_manager
-        obj_manager = get_object_manager()
-        engine = obj_manager.get_engine(db_result["engine_id"])
-        db = obj_manager.get_object(db_result["db_id"])
-        logging.info("[UPLOAD] Novo banco carregado e DB atualizado usando nova arquitetura.")
-        # Recria agente SQL
-        sql_agent = SQLAgentManager(db)
-        # Limpa cache
-        state["cache_manager"].clear_cache()
-        # Atualiza estado
-        state["engine"] = engine
-        state["sql_agent"] = sql_agent
-        state["success"] = True
-        state["message"] = "✅ CSV carregado com sucesso!"
-        logging.info("[UPLOAD] Novo banco carregado e agente recriado. Cache limpo.")
-    except Exception as e:
-        error_msg = f"❌ Erro ao processar CSV: {e}"
-        logging.error(f"[ERRO] Falha ao processar novo CSV: {e}")
-        state["success"] = False
-        state["message"] = error_msg
-    return state
-async def reset_system_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para resetar o sistema ao estado inicial
-    Args:
-        state: Estado do reset
-    Returns:
-        Estado atualizado
-    """
-    try:
-        from utils.object_manager import get_object_manager
-        from agents.sql_agent import SQLAgentManager
-        obj_manager = get_object_manager()
-        # Remove CSV personalizado se existir
-        if os.path.exists(UPLOADED_CSV_PATH):
-            os.remove(UPLOADED_CSV_PATH)
-            logging.info("[RESET] CSV personalizado removido.")
-        # Recria banco com CSV padrão usando nova arquitetura
-        csv_state = {
-            "file_path": DEFAULT_CSV_PATH,
-            "success": False,
-            "message": "",
-            "csv_data_sample": {},
-            "column_info": {},
-            "processing_stats": {}
-        }
-        csv_result = await csv_processing_node(csv_state)
-        if not csv_result["success"]:
-            raise Exception(csv_result["message"])
-        # Cria banco de dados
-        db_result = await create_database_from_dataframe_node(csv_result)
-        if not db_result["success"]:
-            raise Exception(db_result["message"])
-        # Recupera objetos criados
-        engine = obj_manager.get_engine(db_result["engine_id"])
-        db = obj_manager.get_object(db_result["db_id"])
-        # Recria agente SQL
-        sql_agent = SQLAgentManager(db)
-        # Atualiza objetos no gerenciador
-        engine_id = obj_manager.store_engine(engine)
-        agent_id = obj_manager.store_sql_agent(sql_agent)
-        # Limpa cache se disponível
-        cache_id = state.get("cache_id")
-        if cache_id:
-            cache_manager = obj_manager.get_cache_manager(cache_id)
-            if cache_manager:
-                cache_manager.clear_cache()
-        # Atualiza estado
-        state.update({
-            "engine_id": engine_id,
-            "agent_id": agent_id,
-            "success": True,
-            "message": "🔄 Sistema resetado para o estado inicial."
-        })
-        logging.info("[RESET] Sistema resetado com sucesso.")
-    except Exception as e:
-        error_msg = f"❌ Erro ao resetar: {e}"
-        logging.error(f"[ERRO] Falha ao resetar sistema: {e}")
-        state.update({
-            "success": False,
-            "message": error_msg
-        })
-    return state
-async def validate_system_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para validar o estado do sistema
-    Args:
-        state: Estado atual do sistema
-    Returns:
-        Estado com informações de validação
-    """
-    validation_results = {
-        "database_valid": False,
-        "agent_valid": False,
-        "cache_valid": False,
-        "overall_valid": False
-    }
-    try:
-        # Valida banco de dados
-        if state.get("engine"):
-            from utils.database import validate_database
-            validation_results["database_valid"] = validate_database(state["engine"])
-        # Valida agente SQL
-        if state.get("sql_agent"):
-            validation_results["agent_valid"] = state["sql_agent"].validate_agent()
-        # Valida cache
-        if state.get("cache_manager"):
-            validation_results["cache_valid"] = True  # Cache sempre válido se existe
-        # Validação geral
-        validation_results["overall_valid"] = all([
-            validation_results["database_valid"],
-            validation_results["agent_valid"],
-            validation_results["cache_valid"]
-        ])
-        state["validation"] = validation_results
-        logging.info(f"[VALIDATION] Sistema válido: {validation_results['overall_valid']}")
-    except Exception as e:
-        logging.error(f"[VALIDATION] Erro na validação: {e}")
-        state["validation"] = validation_results
-    return state
-async def get_system_info_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para obter informações do sistema
-    Args:
-        state: Estado atual do sistema
-    Returns:
-        Estado com informações do sistema
-    """
-    system_info = {
-        "csv_active": None,
-        "database_path": SQL_DB_PATH,
-        "agent_info": None,
-        "cache_stats": None
-    }
-    try:
-        # Informações do CSV ativo
-        from utils.config import get_active_csv_path
-        system_info["csv_active"] = get_active_csv_path()
-        # Informações do agente
-        if state.get("sql_agent"):
-            system_info["agent_info"] = state["sql_agent"].get_agent_info()
-        # Estatísticas do cache
-        if state.get("cache_manager"):
-            cache_manager = state["cache_manager"]
-            system_info["cache_stats"] = {
-                "cached_queries": len(cache_manager.query_cache),
-                "history_entries": len(cache_manager.history_log),
-                "recent_history_size": len(cache_manager.recent_history)
-            }
-        state["system_info"] = system_info
-        logging.info("[SYSTEM_INFO] Informações do sistema coletadas")
-    except Exception as e:
-        logging.error(f"[SYSTEM_INFO] Erro ao coletar informações: {e}")
-        state["system_info"] = system_info
-    return state
-class CustomNodeManager:
-    """
-    Gerenciador dos nós personalizados
-    """
-    def __init__(self):
-        self.node_functions = {
-            "csv_upload": handle_csv_upload_node,
-            "system_reset": reset_system_node,
-            "system_validation": validate_system_node,
-            "system_info": get_system_info_node
-        }
-    def get_node_function(self, node_name: str):
-        """Retorna função do nó pelo nome"""
-        return self.node_functions.get(node_name)
-    async def execute_node(self, node_name: str, state: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Executa um nó específico
-        Args:
-            node_name: Nome do nó
-            state: Estado atual
-        Returns:
-            Estado atualizado
-        """
-        node_function = self.get_node_function(node_name)
-        if node_function:
-            return await node_function(state)
-        else:
-            logging.error(f"Nó não encontrado: {node_name}")
-            return state

nodes/database_node.py DELETED Viewed

@@ -1,226 +0,0 @@
-"""
-Nó para operações de banco de dados
-"""
-import os
-import logging
-import pandas as pd
-from typing import Dict, Any, TypedDict, Optional
-from sqlalchemy import create_engine
-from utils.config import SQL_DB_PATH
-from utils.database import create_sql_database, validate_database
-from utils.object_manager import get_object_manager
-class DatabaseState(TypedDict):
-    """Estado para operações de banco de dados"""
-    success: bool
-    message: str
-    database_info: dict
-    engine_id: str
-    db_id: str
-async def create_database_from_dataframe_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para criar banco de dados a partir de DataFrame processado
-    Args:
-        state: Estado contendo informações do DataFrame processado
-    Returns:
-        Estado atualizado com informações do banco
-    """
-    try:
-        obj_manager = get_object_manager()
-        # Recupera DataFrame processado
-        df_id = state.get("dataframe_id")
-        if not df_id:
-            raise ValueError("ID do DataFrame não encontrado no estado")
-        processed_df = obj_manager.get_object(df_id)
-        if processed_df is None:
-            raise ValueError("DataFrame processado não encontrado")
-        # Recupera informações das colunas
-        column_info = state.get("column_info", {})
-        sql_types = column_info.get("sql_types", {})
-        # Cria engine do banco
-        engine = create_engine(f"sqlite:///{SQL_DB_PATH}")
-        # Salva DataFrame no banco
-        processed_df.to_sql(
-            "tabela",
-            engine,
-            index=False,
-            if_exists="replace",
-            dtype=sql_types
-        )
-        logging.info(f"[DATABASE] Banco criado com {len(processed_df)} registros")
-        # Cria objeto SQLDatabase do LangChain
-        db = create_sql_database(engine)
-        # Valida banco
-        is_valid = validate_database(engine)
-        # Armazena objetos no gerenciador
-        engine_id = obj_manager.store_engine(engine)
-        db_id = obj_manager.store_database(db)
-        # Informações do banco
-        database_info = {
-            "path": SQL_DB_PATH,
-            "table_name": "tabela",
-            "total_records": len(processed_df),
-            "columns": list(processed_df.columns),
-            "column_types": {col: str(dtype) for col, dtype in processed_df.dtypes.items()},
-            "is_valid": is_valid,
-            "sql_types_used": {col: str(sql_type) for col, sql_type in sql_types.items()}
-        }
-        # Atualiza estado
-        state.update({
-            "success": True,
-            "message": f"✅ Banco de dados criado com sucesso! {len(processed_df)} registros salvos",
-            "database_info": database_info,
-            "engine_id": engine_id,
-            "db_id": db_id
-        })
-        logging.info(f"[DATABASE] Banco criado e validado: {database_info}")
-    except Exception as e:
-        error_msg = f"❌ Erro ao criar banco de dados: {e}"
-        logging.error(f"[DATABASE] {error_msg}")
-        state.update({
-            "success": False,
-            "message": error_msg,
-            "database_info": {},
-            "engine_id": "",
-            "db_id": ""
-        })
-    return state
-async def load_existing_database_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para carregar banco de dados existente
-    Args:
-        state: Estado atual
-    Returns:
-        Estado atualizado com informações do banco existente
-    """
-    try:
-        if not os.path.exists(SQL_DB_PATH):
-            raise ValueError("Banco de dados não encontrado")
-        # Cria engine
-        engine = create_engine(f"sqlite:///{SQL_DB_PATH}")
-        # Cria objeto SQLDatabase
-        db = create_sql_database(engine)
-        # Valida banco
-        is_valid = validate_database(engine)
-        # Obtém informações do banco
-        try:
-            sample_df = pd.read_sql_query("SELECT * FROM tabela LIMIT 5", engine)
-            total_records_df = pd.read_sql_query("SELECT COUNT(*) as count FROM tabela", engine)
-            total_records = total_records_df.iloc[0]['count']
-            database_info = {
-                "path": SQL_DB_PATH,
-                "table_name": "tabela",
-                "total_records": total_records,
-                "columns": list(sample_df.columns),
-                "column_types": {col: str(dtype) for col, dtype in sample_df.dtypes.items()},
-                "is_valid": is_valid,
-                "sample_data": sample_df.head(3).to_dict()
-            }
-        except Exception as e:
-            logging.warning(f"Erro ao obter informações detalhadas do banco: {e}")
-            database_info = {
-                "path": SQL_DB_PATH,
-                "table_name": "tabela",
-                "is_valid": is_valid,
-                "error": str(e)
-            }
-        # Armazena objetos no gerenciador
-        obj_manager = get_object_manager()
-        engine_id = obj_manager.store_engine(engine)
-        db_id = obj_manager.store_database(db)
-        # Atualiza estado
-        state.update({
-            "success": True,
-            "message": "✅ Banco de dados existente carregado com sucesso",
-            "database_info": database_info,
-            "engine_id": engine_id,
-            "db_id": db_id
-        })
-        logging.info(f"[DATABASE] Banco existente carregado: {database_info}")
-    except Exception as e:
-        error_msg = f"❌ Erro ao carregar banco existente: {e}"
-        logging.error(f"[DATABASE] {error_msg}")
-        state.update({
-            "success": False,
-            "message": error_msg,
-            "database_info": {},
-            "engine_id": "",
-            "db_id": ""
-        })
-    return state
-async def get_database_sample_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para obter amostra dos dados do banco
-    Args:
-        state: Estado contendo ID da engine
-    Returns:
-        Estado atualizado com amostra dos dados
-    """
-    try:
-        obj_manager = get_object_manager()
-        # Recupera engine
-        engine_id = state.get("engine_id")
-        if not engine_id:
-            raise ValueError("ID da engine não encontrado")
-        engine = obj_manager.get_engine(engine_id)
-        if not engine:
-            raise ValueError("Engine não encontrada")
-        # Obtém amostra dos dados
-        sample_df = pd.read_sql_query("SELECT * FROM tabela LIMIT 10", engine)
-        # Converte para formato serializável
-        db_sample_dict = {
-            "data": sample_df.to_dict('records'),
-            "columns": list(sample_df.columns),
-            "dtypes": sample_df.dtypes.astype(str).to_dict(),
-            "shape": sample_df.shape
-        }
-        state["db_sample_dict"] = db_sample_dict
-        logging.info(f"[DATABASE] Amostra obtida: {sample_df.shape[0]} registros")
-    except Exception as e:
-        error_msg = f"Erro ao obter amostra do banco: {e}"
-        logging.error(f"[DATABASE] {error_msg}")
-        state["db_sample_dict"] = {}
-        state["error"] = error_msg
-    return state

nodes/graph_generation_node.py DELETED Viewed

@@ -1,1015 +0,0 @@
-"""
-Nó para geração de gráficos
-"""
-import io
-import logging
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-import matplotlib.dates as mdates
-from PIL import Image
-from typing import Dict, Any, Optional
-from utils.object_manager import get_object_manager
-async def graph_generation_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para geração de gráficos baseado no tipo selecionado
-    Args:
-        state: Estado atual do agente
-    Returns:
-        Estado atualizado com gráfico gerado
-    """
-    try:
-        logging.info("[GRAPH_GENERATION] Iniciando geração de gráfico")
-        # Verifica se há tipo de gráfico selecionado
-        graph_type = state.get("graph_type")
-        if not graph_type:
-            logging.info("[GRAPH_GENERATION] Nenhum tipo de gráfico selecionado, pulando geração")
-            return state
-        # Verifica se há erro anterior
-        if state.get("graph_error"):
-            logging.info("[GRAPH_GENERATION] Erro anterior detectado, pulando geração")
-            return state
-        # Recupera dados do gráfico
-        graph_data = state.get("graph_data", {})
-        data_id = graph_data.get("data_id")
-        if not data_id:
-            error_msg = "ID dos dados do gráfico não encontrado"
-            logging.error(f"[GRAPH_GENERATION] {error_msg}")
-            state.update({
-                "graph_error": error_msg,
-                "graph_generated": False
-            })
-            return state
-        # Recupera DataFrame dos dados
-        obj_manager = get_object_manager()
-        df = obj_manager.get_object(data_id)
-        if df is None or df.empty:
-            error_msg = "Dados do gráfico não encontrados ou vazios"
-            logging.error(f"[GRAPH_GENERATION] {error_msg}")
-            state.update({
-                "graph_error": error_msg,
-                "graph_generated": False
-            })
-            return state
-        # Gera título do gráfico baseado na pergunta do usuário
-        user_query = state.get("user_input", "")
-        title = f"Visualização: {user_query[:50]}..." if len(user_query) > 50 else f"Visualização: {user_query}"
-        # Gera o gráfico
-        graph_image = await generate_graph(df, graph_type, title, user_query)
-        if graph_image is None:
-            error_msg = f"Falha ao gerar gráfico do tipo {graph_type}"
-            logging.error(f"[GRAPH_GENERATION] {error_msg}")
-            state.update({
-                "graph_error": error_msg,
-                "graph_generated": False
-            })
-            return state
-        # Armazena imagem do gráfico no ObjectManager
-        graph_image_id = obj_manager.store_object(graph_image, "graph_image")
-        # Atualiza estado
-        state.update({
-            "graph_image_id": graph_image_id,
-            "graph_generated": True,
-            "graph_error": None
-        })
-        logging.info(f"[GRAPH_GENERATION] Gráfico gerado com sucesso: {graph_type}")
-    except Exception as e:
-        error_msg = f"Erro na geração de gráfico: {e}"
-        logging.error(f"[GRAPH_GENERATION] {error_msg}")
-        state.update({
-            "graph_error": error_msg,
-            "graph_generated": False
-        })
-    return state
-async def generate_graph(df: pd.DataFrame, graph_type: str, title: str = None, user_query: str = None) -> Optional[Image.Image]:
-    """
-    Gera um gráfico com base no DataFrame e tipo especificado
-    Args:
-        df: DataFrame com os dados
-        graph_type: Tipo de gráfico a ser gerado
-        title: Título do gráfico
-        user_query: Pergunta original do usuário
-    Returns:
-        Imagem PIL do gráfico ou None se falhar
-    """
-    logging.info(f"[GRAPH_GENERATION] Gerando gráfico tipo {graph_type}. DataFrame: {len(df)} linhas")
-    if df.empty:
-        logging.warning("[GRAPH_GENERATION] DataFrame vazio")
-        return None
-    try:
-        # Preparar dados usando lógica UNIFICADA
-        prepared_df = prepare_data_for_graph_unified(df, graph_type, user_query)
-        if prepared_df.empty:
-            logging.warning("[GRAPH_GENERATION] DataFrame preparado está vazio")
-            return None
-        # Configurações gerais
-        plt.style.use('default')
-        colors = plt.cm.tab10.colors
-        # Gerar gráfico baseado no tipo
-        if graph_type == 'line_simple':
-            return await generate_line_simple(prepared_df, title, colors)
-        elif graph_type == 'multiline':
-            return await generate_multiline(prepared_df, title, colors)
-        elif graph_type == 'area':
-            return await generate_area(prepared_df, title, colors)
-        elif graph_type == 'bar_vertical':
-            return await generate_bar_vertical(prepared_df, title, colors)
-        elif graph_type == 'bar_horizontal':
-            return await generate_bar_horizontal(prepared_df, title, colors)
-        elif graph_type == 'bar_grouped':
-            return await generate_bar_grouped(prepared_df, title, colors)
-        elif graph_type == 'bar_stacked':
-            return await generate_bar_stacked(prepared_df, title, colors)
-        elif graph_type == 'pie':
-            return await generate_pie(prepared_df, title, colors)
-        elif graph_type == 'donut':
-            return await generate_donut(prepared_df, title, colors)
-        elif graph_type == 'pie_multiple':
-            return await generate_pie_multiple(prepared_df, title, colors)
-        else:
-            logging.warning(f"[GRAPH_GENERATION] Tipo '{graph_type}' não reconhecido, usando bar_vertical")
-            return await generate_bar_vertical(prepared_df, title, colors)
-    except Exception as e:
-        logging.error(f"[GRAPH_GENERATION] Erro ao gerar gráfico: {e}")
-        return None
-def analyze_dataframe_structure(df: pd.DataFrame) -> Dict[str, Any]:
-    """
-    Analisa a estrutura do DataFrame e retorna informações detalhadas
-    Args:
-        df: DataFrame a ser analisado
-    Returns:
-        Dicionário com informações sobre tipos de colunas e estrutura
-    """
-    if df.empty:
-        return {
-            'numeric_cols': [],
-            'date_cols': [],
-            'categorical_cols': [],
-            'total_cols': 0,
-            'has_multiple_numerics': False,
-            'has_multiple_categoricals': False,
-            'is_suitable_for_grouping': False
-        }
-    # Analisar tipos de colunas de forma mais robusta
-    numeric_cols = []
-    date_cols = []
-    categorical_cols = []
-    for col in df.columns:
-        col_data = df[col]
-        # Verificar se é numérico (incluindo strings que representam números)
-        if pd.api.types.is_numeric_dtype(col_data):
-            numeric_cols.append(col)
-        elif col_data.dtype == 'object':
-            # Tentar converter para numérico
-            try:
-                test_numeric = pd.to_numeric(col_data.astype(str).str.replace(',', '.'), errors='coerce')
-                if test_numeric.notna().sum() > len(col_data) * 0.8:  # 80% são números válidos
-                    numeric_cols.append(col)
-                else:
-                    # Verificar se é data
-                    if any(date_indicator in col.lower() for date_indicator in ['data', 'date', 'time', 'dia', 'mes', 'ano']):
-                        try:
-                            pd.to_datetime(col_data.head(3), errors='raise')
-                            date_cols.append(col)
-                        except:
-                            categorical_cols.append(col)
-                    else:
-                        categorical_cols.append(col)
-            except:
-                categorical_cols.append(col)
-        elif pd.api.types.is_datetime64_any_dtype(col_data):
-            date_cols.append(col)
-        else:
-            categorical_cols.append(col)
-    return {
-        'numeric_cols': numeric_cols,
-        'date_cols': date_cols,
-        'categorical_cols': categorical_cols,
-        'total_cols': len(df.columns),
-        'has_multiple_numerics': len(numeric_cols) >= 2,
-        'has_multiple_categoricals': len(categorical_cols) >= 2,
-        'is_suitable_for_grouping': len(categorical_cols) >= 2 or (len(categorical_cols) >= 1 and len(numeric_cols) >= 2)
-    }
-def prepare_data_for_graph_unified(df: pd.DataFrame, graph_type: str, user_query: str = None) -> pd.DataFrame:
-    """
-    FUNÇÃO UNIFICADA para preparação de dados - substitui lógica duplicada
-    Args:
-        df: DataFrame original
-        graph_type: Tipo de gráfico
-        user_query: Pergunta do usuário
-    Returns:
-        DataFrame preparado com colunas adequadas para o tipo de gráfico
-    """
-    logging.info(f"[GRAPH_GENERATION] 🔧 Preparação UNIFICADA para {graph_type}")
-    if df.empty:
-        logging.warning("[GRAPH_GENERATION] DataFrame vazio")
-        return df
-    # Fazer cópia para não modificar original
-    prepared_df = df.copy()
-    # Analisar estrutura do DataFrame
-    structure = analyze_dataframe_structure(prepared_df)
-    numeric_cols = structure['numeric_cols']
-    date_cols = structure['date_cols']
-    categorical_cols = structure['categorical_cols']
-    logging.info(f"[GRAPH_GENERATION] 📊 Estrutura: {len(numeric_cols)} numéricas, {len(date_cols)} datas, {len(categorical_cols)} categóricas")
-    # Preparação específica por tipo de gráfico
-    if graph_type in ['line_simple', 'area']:
-        return _prepare_for_temporal_graphs(prepared_df, date_cols, numeric_cols, categorical_cols)
-    elif graph_type in ['bar_vertical', 'bar_horizontal']:
-        return _prepare_for_simple_bar_graphs(prepared_df, categorical_cols, numeric_cols, graph_type)
-    elif graph_type in ['bar_grouped', 'bar_stacked']:
-        return _prepare_for_grouped_graphs(prepared_df, structure, graph_type)
-    elif graph_type in ['pie', 'donut', 'pie_multiple']:
-        return _prepare_for_pie_graphs(prepared_df, categorical_cols, numeric_cols, graph_type)
-    elif graph_type == 'multiline':
-        return _prepare_for_multiline_graphs(prepared_df, structure)
-    else:
-        logging.warning(f"[GRAPH_GENERATION] Tipo {graph_type} não reconhecido, usando preparação básica")
-        return _prepare_basic_fallback(prepared_df, categorical_cols, numeric_cols)
-def _prepare_for_temporal_graphs(df: pd.DataFrame, date_cols: list, numeric_cols: list, categorical_cols: list) -> pd.DataFrame:
-    """Prepara dados para gráficos temporais (linha, área)"""
-    if date_cols and numeric_cols:
-        # Usar primeira coluna de data e primeira numérica
-        x_col, y_col = date_cols[0], numeric_cols[0]
-        result_df = df[[x_col, y_col]].sort_values(by=x_col)
-        logging.info(f"[GRAPH_GENERATION] 📅 Temporal: {x_col} (data) + {y_col} (numérica)")
-        return result_df
-    elif categorical_cols and numeric_cols:
-        # Usar primeira categórica e primeira numérica
-        x_col, y_col = categorical_cols[0], numeric_cols[0]
-        result_df = df[[x_col, y_col]].sort_values(by=y_col)
-        logging.info(f"[GRAPH_GENERATION] 📊 Categórico: {x_col} + {y_col}")
-        return result_df
-    else:
-        logging.warning("[GRAPH_GENERATION] Dados insuficientes para gráfico temporal")
-        return df
-def _prepare_for_simple_bar_graphs(df: pd.DataFrame, categorical_cols: list, numeric_cols: list, graph_type: str) -> pd.DataFrame:
-    """Prepara dados para gráficos de barras simples"""
-    if categorical_cols and numeric_cols:
-        x_col, y_col = categorical_cols[0], numeric_cols[0]
-        result_df = df[[x_col, y_col]].sort_values(by=y_col, ascending=False)
-        # Limitar categorias para barras verticais
-        if graph_type == 'bar_vertical' and len(result_df) > 15:
-            result_df = result_df.head(15)
-            logging.info(f"[GRAPH_GENERATION] 📊 Limitado a 15 categorias para {graph_type}")
-        logging.info(f"[GRAPH_GENERATION] 📊 Barras simples: {x_col} + {y_col}")
-        return result_df
-    else:
-        logging.warning("[GRAPH_GENERATION] Dados insuficientes para gráfico de barras")
-        return df
-def _prepare_for_grouped_graphs(df: pd.DataFrame, structure: dict, graph_type: str) -> pd.DataFrame:
-    """
-    FUNÇÃO CRÍTICA: Prepara dados para gráficos agrupados com lógica inteligente
-    """
-    numeric_cols = structure['numeric_cols']
-    categorical_cols = structure['categorical_cols']
-    has_multiple_numerics = structure['has_multiple_numerics']
-    has_multiple_categoricals = structure['has_multiple_categoricals']
-    logging.info(f"[GRAPH_GENERATION] 🎯 Preparando agrupado: {len(numeric_cols)} num, {len(categorical_cols)} cat")
-    if has_multiple_numerics:
-        # CENÁRIO 1: Múltiplas numéricas - usar primeira categórica + todas numéricas
-        cols_to_keep = [categorical_cols[0]] + numeric_cols
-        result_df = df[cols_to_keep]
-        logging.info(f"[GRAPH_GENERATION] ✅ Múltiplas numéricas: {cols_to_keep}")
-        return result_df
-    elif len(numeric_cols) == 1 and has_multiple_categoricals:
-        # CENÁRIO 2: 1 numérica + múltiplas categóricas - AGRUPAMENTO POR COR
-        # Usar TODAS as categóricas + a numérica
-        cols_to_keep = categorical_cols + numeric_cols
-        result_df = df[cols_to_keep]
-        logging.info(f"[GRAPH_GENERATION] ✅ Agrupamento por cor: {cols_to_keep}")
-        return result_df
-    elif len(numeric_cols) == 1 and len(categorical_cols) == 1:
-        # CENÁRIO 3: 1 numérica + 1 categórica - gráfico simples
-        cols_to_keep = categorical_cols + numeric_cols
-        result_df = df[cols_to_keep]
-        logging.info(f"[GRAPH_GENERATION] ⚠️ Dados simples para agrupado: {cols_to_keep}")
-        return result_df
-    else:
-        # CENÁRIO 4: Dados inadequados
-        logging.warning("[GRAPH_GENERATION] ❌ Dados inadequados para gráfico agrupado")
-        return df
-def _prepare_for_pie_graphs(df: pd.DataFrame, categorical_cols: list, numeric_cols: list, graph_type: str) -> pd.DataFrame:
-    """Prepara dados para gráficos de pizza"""
-    if categorical_cols and numeric_cols:
-        cat_col, val_col = categorical_cols[0], numeric_cols[0]
-        if graph_type == 'pie_multiple' and len(categorical_cols) >= 2:
-            # Para pizzas múltiplas, manter 2 categóricas + 1 numérica
-            result_df = df[[categorical_cols[0], categorical_cols[1], val_col]]
-            logging.info(f"[GRAPH_GENERATION] 🥧 Pizzas múltiplas: {result_df.columns.tolist()}")
-        else:
-            # Agrupar e somar valores para pizza simples/donut
-            result_df = df.groupby(cat_col)[val_col].sum().reset_index()
-            result_df = result_df.sort_values(by=val_col, ascending=False)
-            # Limitar a 10 categorias
-            if len(result_df) > 10:
-                top_9 = result_df.head(9)
-                others_sum = result_df.iloc[9:][val_col].sum()
-                if others_sum > 0:
-                    others_row = pd.DataFrame({cat_col: ['Outros'], val_col: [others_sum]})
-                    result_df = pd.concat([top_9, others_row], ignore_index=True)
-                else:
-                    result_df = top_9
-            logging.info(f"[GRAPH_GENERATION] 🥧 Pizza: {cat_col} + {val_col} ({len(result_df)} categorias)")
-        return result_df
-    else:
-        logging.warning("[GRAPH_GENERATION] Dados insuficientes para gráfico de pizza")
-        return df
-def _prepare_for_multiline_graphs(df: pd.DataFrame, structure: dict) -> pd.DataFrame:
-    """Prepara dados para gráficos de múltiplas linhas"""
-    date_cols = structure['date_cols']
-    numeric_cols = structure['numeric_cols']
-    categorical_cols = structure['categorical_cols']
-    if date_cols and len(numeric_cols) >= 2:
-        # Data + múltiplas numéricas
-        cols_to_keep = [date_cols[0]] + numeric_cols
-        result_df = df[cols_to_keep].sort_values(by=date_cols[0])
-        logging.info(f"[GRAPH_GENERATION] 📈 Multilinhas temporais: {cols_to_keep}")
-        return result_df
-    elif categorical_cols and len(numeric_cols) >= 2:
-        # Categórica + múltiplas numéricas
-        cols_to_keep = [categorical_cols[0]] + numeric_cols
-        result_df = df[cols_to_keep]
-        logging.info(f"[GRAPH_GENERATION] 📈 Multilinhas categóricas: {cols_to_keep}")
-        return result_df
-    else:
-        logging.warning("[GRAPH_GENERATION] Dados insuficientes para multilinhas")
-        return df
-def _prepare_basic_fallback(df: pd.DataFrame, categorical_cols: list, numeric_cols: list) -> pd.DataFrame:
-    """Preparação básica de fallback"""
-    if categorical_cols and numeric_cols:
-        result_df = df[[categorical_cols[0], numeric_cols[0]]]
-        logging.info(f"[GRAPH_GENERATION] 🔄 Fallback básico: {result_df.columns.tolist()}")
-        return result_df
-    else:
-        logging.warning("[GRAPH_GENERATION] Dados inadequados para qualquer gráfico")
-        return df
-def save_plot_to_image() -> Image.Image:
-    """
-    Salva o plot atual do matplotlib como imagem PIL
-    Returns:
-        Imagem PIL
-    """
-    buf = io.BytesIO()
-    plt.savefig(buf, format='png', dpi=100, bbox_inches='tight', facecolor='white')
-    buf.seek(0)
-    img = Image.open(buf)
-    plt.close()  # Importante: fechar o plot para liberar memória
-    return img
-# ==================== FUNÇÕES DE GERAÇÃO ESPECÍFICAS ====================
-async def generate_line_simple(df: pd.DataFrame, title: str, colors) -> Optional[Image.Image]:
-    """Gera gráfico de linha simples"""
-    if len(df.columns) < 2:
-        return None
-    x_col, y_col = df.columns[0], df.columns[1]
-    is_date = pd.api.types.is_datetime64_any_dtype(df[x_col])
-    plt.figure(figsize=(12, 6))
-    if is_date:
-        plt.plot(df[x_col], df[y_col], marker='o', linewidth=2, color=colors[0])
-        plt.gcf().autofmt_xdate()
-        plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%Y'))
-    else:
-        plt.plot(range(len(df)), df[y_col], marker='o', linewidth=2, color=colors[0])
-        plt.xticks(range(len(df)), df[x_col], rotation=45, ha='right')
-    plt.xlabel(x_col)
-    plt.ylabel(y_col)
-    plt.title(title or f"{y_col} por {x_col}")
-    plt.grid(True, linestyle='--', alpha=0.7)
-    plt.tight_layout()
-    return save_plot_to_image()
-async def generate_multiline(df: pd.DataFrame, title: str, colors) -> Optional[Image.Image]:
-    """Gera gráfico de múltiplas linhas"""
-    if len(df.columns) < 2:
-        return None
-    x_col = df.columns[0]
-    y_cols = [col for col in df.columns[1:] if pd.api.types.is_numeric_dtype(df[col])]
-    if not y_cols:
-        return await generate_line_simple(df, title, colors)
-    is_date = pd.api.types.is_datetime64_any_dtype(df[x_col])
-    plt.figure(figsize=(12, 6))
-    for i, y_col in enumerate(y_cols):
-        if is_date:
-            plt.plot(df[x_col], df[y_col], marker='o', linewidth=2,
-                    label=y_col, color=colors[i % len(colors)])
-        else:
-            plt.plot(range(len(df)), df[y_col], marker='o', linewidth=2,
-                    label=y_col, color=colors[i % len(colors)])
-    if is_date:
-        plt.gcf().autofmt_xdate()
-        plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%Y'))
-    else:
-        plt.xticks(range(len(df)), df[x_col], rotation=45, ha='right')
-    plt.xlabel(x_col)
-    plt.ylabel("Valores")
-    plt.title(title or f"Comparação por {x_col}")
-    plt.legend(title="Séries", loc='best')
-    plt.grid(True, linestyle='--', alpha=0.7)
-    plt.tight_layout()
-    return save_plot_to_image()
-async def generate_area(df: pd.DataFrame, title: str, colors) -> Optional[Image.Image]:
-    """Gera gráfico de área"""
-    if len(df.columns) < 2:
-        return None
-    x_col, y_col = df.columns[0], df.columns[1]
-    is_date = pd.api.types.is_datetime64_any_dtype(df[x_col])
-    plt.figure(figsize=(12, 6))
-    if is_date:
-        plt.fill_between(df[x_col], df[y_col], alpha=0.5, color=colors[0])
-        plt.plot(df[x_col], df[y_col], color=colors[0], linewidth=2)
-        plt.gcf().autofmt_xdate()
-        plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%Y'))
-    else:
-        plt.fill_between(range(len(df)), df[y_col], alpha=0.5, color=colors[0])
-        plt.plot(range(len(df)), df[y_col], color=colors[0], linewidth=2)
-        plt.xticks(range(len(df)), df[x_col], rotation=45, ha='right')
-    plt.xlabel(x_col)
-    plt.ylabel(y_col)
-    plt.title(title or f"{y_col} por {x_col}")
-    plt.grid(True, linestyle='--', alpha=0.7)
-    plt.tight_layout()
-    return save_plot_to_image()
-async def generate_bar_vertical(df: pd.DataFrame, title: str, colors) -> Optional[Image.Image]:
-    """Gera gráfico de barras verticais"""
-    if len(df.columns) < 2:
-        return None
-    x_col, y_col = df.columns[0], df.columns[1]
-    # Preparar dados numéricos - converter strings com vírgula para float
-    df_plot = df.copy()
-    try:
-        if df_plot[y_col].dtype == 'object':
-            # Converte strings para números, tratando vírgulas como separador decimal
-            df_plot[y_col] = pd.to_numeric(df_plot[y_col].astype(str).str.replace(',', '.'), errors='coerce')
-        # Remove linhas com valores não numéricos
-        df_plot = df_plot.dropna(subset=[y_col])
-        if df_plot.empty:
-            logging.error(f"[GRAPH_GENERATION] Nenhum valor numérico válido encontrado na coluna {y_col}")
-            return None
-    except Exception as e:
-        logging.error(f"[GRAPH_GENERATION] Erro ao converter dados para numérico: {e}")
-        return None
-    plt.figure(figsize=(12, 8))
-    bars = plt.bar(range(len(df_plot)), df_plot[y_col], color=colors[0])
-    # Adicionar valores nas barras
-    try:
-        max_value = df_plot[y_col].max()
-        for i, bar in enumerate(bars):
-            height = bar.get_height()
-            if isinstance(height, (int, float)) and not pd.isna(height):
-                plt.text(bar.get_x() + bar.get_width()/2., height + 0.02 * max_value,
-                        f'{height:,.0f}', ha='center', fontsize=9)
-    except Exception as e:
-        logging.warning(f"[GRAPH_GENERATION] Erro ao adicionar valores nas barras: {e}")
-    plt.xlabel(x_col)
-    plt.ylabel(y_col)
-    plt.title(title or f"{y_col} por {x_col}")
-    plt.xticks(range(len(df_plot)), df_plot[x_col], rotation=45, ha='right')
-    plt.grid(True, linestyle='--', alpha=0.7, axis='y')
-    plt.tight_layout()
-    return save_plot_to_image()
-async def generate_bar_horizontal(df: pd.DataFrame, title: str, colors) -> Optional[Image.Image]:
-    """Gera gráfico de barras horizontais"""
-    if len(df.columns) < 2:
-        return None
-    x_col, y_col = df.columns[0], df.columns[1]
-    # Preparar dados numéricos - converter strings com vírgula para float
-    df_plot = df.copy()
-    try:
-        if df_plot[y_col].dtype == 'object':
-            # Converte strings para números, tratando vírgulas como separador decimal
-            df_plot[y_col] = pd.to_numeric(df_plot[y_col].astype(str).str.replace(',', '.'), errors='coerce')
-        # Remove linhas com valores não numéricos
-        df_plot = df_plot.dropna(subset=[y_col])
-        if df_plot.empty:
-            logging.error(f"[GRAPH_GENERATION] Nenhum valor numérico válido encontrado na coluna {y_col}")
-            return None
-    except Exception as e:
-        logging.error(f"[GRAPH_GENERATION] Erro ao converter dados para numérico: {e}")
-        return None
-    plt.figure(figsize=(12, max(6, len(df_plot) * 0.4)))
-    bars = plt.barh(range(len(df_plot)), df_plot[y_col], color=colors[0])
-    # Adicionar valores nas barras
-    try:
-        max_value = df_plot[y_col].max()
-        for i, bar in enumerate(bars):
-            width = bar.get_width()
-            if isinstance(width, (int, float)) and not pd.isna(width):
-                plt.text(width + 0.02 * max_value, bar.get_y() + bar.get_height()/2.,
-                        f'{width:,.0f}', va='center', fontsize=9)
-    except Exception as e:
-        logging.warning(f"[GRAPH_GENERATION] Erro ao adicionar valores nas barras: {e}")
-    plt.xlabel(y_col)
-    plt.ylabel(x_col)
-    plt.title(title or f"{y_col} por {x_col}")
-    plt.yticks(range(len(df_plot)), df_plot[x_col])
-    plt.grid(True, linestyle='--', alpha=0.7, axis='x')
-    plt.tight_layout()
-    return save_plot_to_image()
-async def generate_bar_grouped(df: pd.DataFrame, title: str, colors) -> Optional[Image.Image]:
-    """
-    FUNÇÃO REFATORADA: Gera gráfico de barras agrupadas com fallbacks inteligentes
-    """
-    logging.info(f"[GRAPH_GENERATION] 🎯 Gerando barras agrupadas REFATORADO. Colunas: {df.columns.tolist()}")
-    if len(df.columns) < 2:
-        logging.warning("[GRAPH_GENERATION] ❌ Dados insuficientes para gráfico agrupado")
-        return None
-    # Analisar estrutura dos dados
-    structure = analyze_dataframe_structure(df)
-    numeric_cols = structure['numeric_cols']
-    categorical_cols = structure['categorical_cols']
-    logging.info(f"[GRAPH_GENERATION] 📊 Estrutura: {len(numeric_cols)} numéricas, {len(categorical_cols)} categóricas")
-    if not numeric_cols:
-        logging.warning("[GRAPH_GENERATION] ❌ Nenhuma coluna numérica encontrada")
-        return await generate_bar_vertical(df, title, colors)
-    # DECISÃO INTELIGENTE baseada na estrutura dos dados
-    if len(numeric_cols) >= 2:
-        # CENÁRIO 1: Múltiplas numéricas - gráfico agrupado tradicional
-        return await _generate_multi_numeric_grouped(df, title, colors, categorical_cols[0], numeric_cols)
-    elif len(numeric_cols) == 1 and len(categorical_cols) >= 2:
-        # CENÁRIO 2: 1 numérica + múltiplas categóricas - agrupamento por cor
-        return await _generate_color_grouped_bars(df, title, colors, categorical_cols, numeric_cols[0])
-    elif len(numeric_cols) == 1 and len(categorical_cols) == 1:
-        # CENÁRIO 3: Dados simples - fallback inteligente para barras verticais
-        logging.info("[GRAPH_GENERATION] ⚠️ Dados simples, usando barras verticais")
-        return await generate_bar_vertical(df, title, colors)
-    else:
-        # CENÁRIO 4: Estrutura inadequada
-        logging.warning("[GRAPH_GENERATION] ❌ Estrutura de dados inadequada para agrupamento")
-        return await generate_bar_vertical(df, title, colors)
-async def _generate_multi_numeric_grouped(df: pd.DataFrame, title: str, colors, x_col: str, y_cols: list) -> Optional[Image.Image]:
-    """
-    Gera gráfico agrupado com múltiplas colunas numéricas (cenário tradicional)
-    """
-    logging.info(f"[GRAPH_GENERATION] 📊 Múltiplas numéricas: {x_col} + {y_cols}")
-    # Converter colunas numéricas se necessário
-    for col in y_cols:
-        if df[col].dtype == 'object':
-            df[col] = pd.to_numeric(df[col].astype(str).str.replace(',', '.'), errors='coerce')
-    # Remover linhas com valores NaN
-    df_clean = df.dropna(subset=y_cols)
-    if df_clean.empty:
-        logging.error("[GRAPH_GENERATION] ❌ Todos os valores são NaN após conversão")
-        return None
-    # Verificar diferença de escala entre colunas
-    col_ranges = {col: df_clean[col].max() - df_clean[col].min() for col in y_cols}
-    max_range = max(col_ranges.values())
-    min_range = min(col_ranges.values())
-    if max_range > 0 and min_range > 0 and (max_range / min_range) > 100:
-        # Escalas muito diferentes - usar eixos duplos
-        logging.info("[GRAPH_GENERATION] 📊 Escalas diferentes, usando eixos duplos")
-        return await _generate_dual_axis_chart(df_clean, title, colors, x_col, y_cols[0], y_cols[1])
-    # Gráfico agrupado normal
-    x_pos = np.arange(len(df_clean))
-    width = 0.8 / len(y_cols)
-    fig, ax = plt.subplots(figsize=(14, 8))
-    for i, col in enumerate(y_cols):
-        offset = width * i - width * (len(y_cols) - 1) / 2
-        bars = ax.bar(x_pos + offset, df_clean[col], width, label=col,
-                     color=colors[i % len(colors)], alpha=0.8)
-        # Adicionar valores nas barras
-        for bar in bars:
-            height = bar.get_height()
-            if height > 0:
-                ax.text(bar.get_x() + bar.get_width()/2., height + height * 0.02,
-                       f'{height:.0f}', ha='center', fontsize=8)
-    ax.set_xlabel(x_col)
-    ax.set_ylabel('Valores')
-    ax.set_title(title or f"Comparação de {', '.join(y_cols)} por {x_col}")
-    ax.set_xticks(x_pos)
-    ax.set_xticklabels(df_clean[x_col], rotation=45, ha='right')
-    ax.legend()
-    ax.grid(True, linestyle='--', alpha=0.7, axis='y')
-    plt.tight_layout()
-    logging.info(f"[GRAPH_GENERATION] ✅ Gráfico agrupado tradicional criado: {len(y_cols)} métricas")
-    return save_plot_to_image()
-async def _generate_color_grouped_bars(df: pd.DataFrame, title: str, colors, categorical_cols: list, y_col: str) -> Optional[Image.Image]:
-    """
-    Gera gráfico agrupado por cor usando múltiplas categóricas (CENÁRIO CRÍTICO)
-    """
-    x_col = categorical_cols[0]
-    group_col = categorical_cols[1] if len(categorical_cols) > 1 else None
-    logging.info(f"[GRAPH_GENERATION] 🎨 Agrupamento por cor: {x_col} (X) + {y_col} (Y) + {group_col} (cor)")
-    if not group_col:
-        logging.warning("[GRAPH_GENERATION] ⚠️ Sem coluna para agrupamento, usando gráfico simples")
-        return await generate_bar_vertical(df[[x_col, y_col]], title, colors)
-    # Converter coluna numérica se necessário
-    if df[y_col].dtype == 'object':
-        df[y_col] = pd.to_numeric(df[y_col].astype(str).str.replace(',', '.'), errors='coerce')
-    # Remover linhas com valores NaN
-    df_clean = df.dropna(subset=[y_col])
-    if df_clean.empty:
-        logging.error("[GRAPH_GENERATION] ❌ Todos os valores são NaN após conversão")
-        return None
-    # Obter categorias únicas
-    unique_groups = df_clean[group_col].unique()
-    unique_x = df_clean[x_col].unique()
-    logging.info(f"[GRAPH_GENERATION] 🎯 Grupos: {unique_groups} | X: {len(unique_x)} categorias")
-    # Configurar gráfico
-    x_pos = np.arange(len(unique_x))
-    width = 0.8 / len(unique_groups)
-    fig, ax = plt.subplots(figsize=(14, 8))
-    # Criar barras para cada grupo
-    for i, group in enumerate(unique_groups):
-        group_data = df_clean[df_clean[group_col] == group]
-        # Criar array de valores para cada posição X
-        values = []
-        for x_val in unique_x:
-            matching_rows = group_data[group_data[x_col] == x_val]
-            if not matching_rows.empty:
-                values.append(matching_rows[y_col].iloc[0])
-            else:
-                values.append(0)
-        # Calcular posição das barras
-        offset = width * i - width * (len(unique_groups) - 1) / 2
-        bars = ax.bar(x_pos + offset, values, width, label=f"{group_col}: {group}",
-                     color=colors[i % len(colors)], alpha=0.8)
-        # Adicionar valores nas barras
-        for bar, value in zip(bars, values):
-            if value > 0:
-                ax.text(bar.get_x() + bar.get_width()/2., value + value * 0.02,
-                       f'{value:.0f}', ha='center', fontsize=8)
-    # Configurações do gráfico
-    ax.set_xlabel(x_col)
-    ax.set_ylabel(y_col)
-    ax.set_title(title or f"{y_col} por {x_col} (agrupado por {group_col})")
-    ax.set_xticks(x_pos)
-    ax.set_xticklabels(unique_x, rotation=45, ha='right')
-    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
-    ax.grid(True, linestyle='--', alpha=0.7, axis='y')
-    plt.tight_layout()
-    logging.info(f"[GRAPH_GENERATION] ✅ Gráfico agrupado por cor criado: {len(unique_groups)} grupos")
-    return save_plot_to_image()
-async def _generate_dual_axis_chart(df: pd.DataFrame, title: str, colors, x_col: str, y1_col: str, y2_col: str) -> Optional[Image.Image]:
-    """
-    Gera gráfico com eixos duplos para métricas com escalas diferentes
-    """
-    logging.info(f"[GRAPH_GENERATION] 📊 Eixos duplos: {y1_col} (esq) + {y2_col} (dir)")
-    fig, ax1 = plt.subplots(figsize=(14, 8))
-    # Primeiro eixo Y (esquerda)
-    x_pos = np.arange(len(df))
-    width = 0.35
-    bars1 = ax1.bar(x_pos - width/2, df[y1_col], width, label=y1_col,
-                    color=colors[0], alpha=0.8)
-    ax1.set_xlabel(x_col)
-    ax1.set_ylabel(y1_col, color=colors[0])
-    ax1.tick_params(axis='y', labelcolor=colors[0])
-    # Segundo eixo Y (direita)
-    ax2 = ax1.twinx()
-    bars2 = ax2.bar(x_pos + width/2, df[y2_col], width, label=y2_col,
-                    color=colors[1], alpha=0.8)
-    ax2.set_ylabel(y2_col, color=colors[1])
-    ax2.tick_params(axis='y', labelcolor=colors[1])
-    # Configurações comuns
-    ax1.set_xticks(x_pos)
-    ax1.set_xticklabels(df[x_col], rotation=45, ha='right')
-    ax1.grid(True, linestyle='--', alpha=0.7, axis='y')
-    # Adicionar valores nas barras
-    for bar in bars1:
-        height = bar.get_height()
-        if height > 0:
-            ax1.text(bar.get_x() + bar.get_width()/2., height + height * 0.02,
-                    f'{height:.0f}', ha='center', fontsize=8)
-    for bar in bars2:
-        height = bar.get_height()
-        if height > 0:
-            ax2.text(bar.get_x() + bar.get_width()/2., height + height * 0.02,
-                    f'{height:.0f}', ha='center', fontsize=8)
-    plt.title(title or f"{y1_col} e {y2_col} por {x_col}")
-    plt.tight_layout()
-    logging.info(f"[GRAPH_GENERATION] ✅ Gráfico com eixos duplos criado: {y1_col} + {y2_col}")
-    return save_plot_to_image()
-# Função removida - substituída pela nova lógica unificada
-# Função removida - substituída pela nova lógica unificada em _generate_color_grouped_bars()
-async def generate_bar_stacked(df: pd.DataFrame, title: str, colors) -> Optional[Image.Image]:
-    """Gera gráfico de barras empilhadas"""
-    if len(df.columns) < 3:
-        return await generate_bar_vertical(df, title, colors)
-    x_col = df.columns[0]
-    y_cols = [col for col in df.columns[1:] if pd.api.types.is_numeric_dtype(df[col])]
-    if not y_cols:
-        return await generate_bar_vertical(df, title, colors)
-    fig, ax = plt.subplots(figsize=(12, 8))
-    bottom = np.zeros(len(df))
-    for i, col in enumerate(y_cols):
-        bars = ax.bar(range(len(df)), df[col], bottom=bottom, label=col, color=colors[i % len(colors)])
-        # Adicionar valores nas barras
-        for j, bar in enumerate(bars):
-            height = bar.get_height()
-            if isinstance(height, (int, float)) and height > 0:
-                ax.text(bar.get_x() + bar.get_width()/2., bottom[j] + height/2,
-                        f'{height:.2f}', ha='center', va='center', fontsize=8, color='white')
-        bottom += df[col].fillna(0)
-    ax.set_xlabel(x_col)
-    ax.set_ylabel('Valores')
-    ax.set_title(title or f"Distribuição por {x_col}")
-    ax.set_xticks(range(len(df)))
-    ax.set_xticklabels(df[x_col], rotation=45, ha='right')
-    ax.legend()
-    plt.tight_layout()
-    return save_plot_to_image()
-async def generate_pie(df: pd.DataFrame, title: str, colors) -> Optional[Image.Image]:
-    """Gera gráfico de pizza"""
-    if len(df.columns) < 2:
-        return None
-    label_col, value_col = df.columns[0], df.columns[1]
-    # Preparar dados numéricos - converter strings com vírgula para float
-    df_plot = df.copy()
-    try:
-        if df_plot[value_col].dtype == 'object':
-            # Converte strings para números, tratando vírgulas como separador decimal
-            df_plot[value_col] = pd.to_numeric(df_plot[value_col].astype(str).str.replace(',', '.'), errors='coerce')
-        # Remove linhas com valores não numéricos, negativos ou zero
-        df_plot = df_plot.dropna(subset=[value_col])
-        df_plot = df_plot[df_plot[value_col] > 0]
-        if df_plot.empty:
-            logging.error(f"[GRAPH_GENERATION] Nenhum valor numérico positivo encontrado na coluna {value_col}")
-            return await generate_bar_vertical(df, title, colors)
-    except Exception as e:
-        logging.error(f"[GRAPH_GENERATION] Erro ao converter dados para numérico: {e}")
-        return await generate_bar_vertical(df, title, colors)
-    plt.figure(figsize=(10, 10))
-    # Calcular percentuais para os rótulos
-    total = df_plot[value_col].sum()
-    labels = [f'{label} ({val:,.0f}, {val/total:.1%})' for label, val in zip(df_plot[label_col], df_plot[value_col])]
-    plt.pie(df_plot[value_col], labels=labels, autopct='%1.1f%%',
-            startangle=90, shadow=False, colors=colors[:len(df_plot)])
-    plt.axis('equal')
-    plt.title(title or f"Distribuição de {value_col} por {label_col}")
-    plt.tight_layout()
-    return save_plot_to_image()
-async def generate_donut(df: pd.DataFrame, title: str, colors) -> Optional[Image.Image]:
-    """Gera gráfico de donut"""
-    if len(df.columns) < 2:
-        return None
-    label_col, value_col = df.columns[0], df.columns[1]
-    # Preparar dados numéricos - converter strings com vírgula para float
-    df_plot = df.copy()
-    try:
-        if df_plot[value_col].dtype == 'object':
-            # Converte strings para números, tratando vírgulas como separador decimal
-            df_plot[value_col] = pd.to_numeric(df_plot[value_col].astype(str).str.replace(',', '.'), errors='coerce')
-        # Remove linhas com valores não numéricos, negativos ou zero
-        df_plot = df_plot.dropna(subset=[value_col])
-        df_plot = df_plot[df_plot[value_col] > 0]
-        if df_plot.empty:
-            logging.error(f"[GRAPH_GENERATION] Nenhum valor numérico positivo encontrado na coluna {value_col}")
-            return await generate_bar_vertical(df, title, colors)
-    except Exception as e:
-        logging.error(f"[GRAPH_GENERATION] Erro ao converter dados para numérico: {e}")
-        return await generate_bar_vertical(df, title, colors)
-    plt.figure(figsize=(10, 10))
-    # Calcular percentuais para os rótulos
-    total = df_plot[value_col].sum()
-    labels = [f'{label} ({val:,.0f}, {val/total:.1%})' for label, val in zip(df_plot[label_col], df_plot[value_col])]
-    # Criar gráfico de donut (pizza com círculo central)
-    plt.pie(df_plot[value_col], labels=labels, autopct='%1.1f%%',
-            startangle=90, shadow=False, colors=colors[:len(df_plot)],
-            wedgeprops=dict(width=0.5))  # Largura do anel
-    plt.axis('equal')
-    plt.title(title or f"Distribuição de {value_col} por {label_col}")
-    plt.tight_layout()
-    return save_plot_to_image()
-async def generate_pie_multiple(df: pd.DataFrame, title: str, colors) -> Optional[Image.Image]:
-    """Gera múltiplos gráficos de pizza"""
-    if len(df.columns) < 3:
-        return await generate_pie(df, title, colors)
-    cat1, cat2, val_col = df.columns[0], df.columns[1], df.columns[2]
-    # Verificar se o valor é numérico
-    if not pd.api.types.is_numeric_dtype(df[val_col]):
-        return await generate_bar_grouped(df, title, colors)
-    # Agrupar dados
-    grouped = df.groupby([cat1, cat2])[val_col].sum().unstack().fillna(0)
-    # Determinar layout da grade
-    n_groups = len(grouped)
-    if n_groups == 0:
-        return None
-    cols = min(3, n_groups)  # Máximo 3 colunas
-    rows = (n_groups + cols - 1) // cols  # Arredondar para cima
-    # Criar subplots
-    fig, axes = plt.subplots(rows, cols, figsize=(15, 5 * rows))
-    if rows == 1 and cols == 1:
-        axes = np.array([axes])  # Garantir que axes seja um array
-    axes = axes.flatten()
-    # Plotar cada pizza
-    for i, (group_name, group_data) in enumerate(grouped.iterrows()):
-        if i < len(axes):
-            # Remover valores zero
-            data = group_data[group_data > 0]
-            if not data.empty:
-                # Calcular percentuais
-                total = data.sum()
-                # Criar rótulos com valores e percentuais
-                labels = [f'{idx} ({val:.2f}, {val/total:.1%})' for idx, val in data.items()]
-                # Plotar pizza
-                axes[i].pie(data, labels=labels, autopct='%1.1f%%',
-                           startangle=90, colors=colors[:len(data)])
-                axes[i].set_title(f"{group_name}")
-                axes[i].axis('equal')
-    # Esconder eixos não utilizados
-    for j in range(i + 1, len(axes)):
-        axes[j].axis('off')
-    plt.suptitle(title or f"Distribuição de {val_col} por {cat2} para cada {cat1}", fontsize=16)
-    plt.tight_layout()
-    plt.subplots_adjust(top=0.9)
-    return save_plot_to_image()

nodes/graph_selection_node.py DELETED Viewed

@@ -1,147 +0,0 @@
-"""
-Nó para seleção do tipo de gráfico usando LLM - REFATORADO COMPLETO
-"""
-import logging
-import re
-import pandas as pd
-from typing import Dict, Any, Optional
-from agents.tools import (
-    generate_graph_type_context,
-    extract_sql_query_from_response
-)
-from utils.config import OPENAI_API_KEY
-from langchain_openai import ChatOpenAI
-from utils.object_manager import get_object_manager
-# Mapeamento DIRETO no arquivo para evitar problemas externos
-GRAPH_TYPE_MAPPING = {
-    "1": "line_simple",
-    "2": "multiline",
-    "3": "area",
-    "4": "bar_vertical",
-    "5": "bar_horizontal",
-    "6": "bar_grouped",
-    "7": "bar_stacked",
-    "8": "pie",
-    "9": "donut",
-    "10": "pie_multiple"
-}
-async def graph_selection_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó REFATORADO para seleção do tipo de gráfico usando LLM
-    """
-    logging.info("[GRAPH_SELECTION_NEW] 🚀 Iniciando seleção REFATORADA")
-    try:
-        # 1. Verificações básicas
-        if state.get("query_type") != "sql_query_graphic":
-            logging.info("[GRAPH_SELECTION_NEW] Query não requer gráfico")
-            return state
-        # 2. Obter SQL query
-        sql_query = state.get("sql_query_extracted")
-        if not sql_query:
-            sql_query = extract_sql_query_from_response(state.get("response", ""))
-        if not sql_query:
-            logging.error("[GRAPH_SELECTION_NEW] ❌ SQL query não encontrada")
-            state.update({"graph_error": "SQL query não encontrada", "graph_generated": False})
-            return state
-        # 3. Obter dados
-        obj_manager = get_object_manager()
-        engine = obj_manager.get_engine(state.get("engine_id"))
-        if not engine:
-            logging.error("[GRAPH_SELECTION_NEW] ❌ Engine não encontrada")
-            state.update({"graph_error": "Engine não encontrada", "graph_generated": False})
-            return state
-        # 4. Executar query
-        try:
-            df_result = pd.read_sql_query(sql_query, engine)
-            if df_result.empty:
-                logging.error("[GRAPH_SELECTION_NEW] ❌ Dados vazios")
-                state.update({"graph_error": "Dados vazios", "graph_generated": False})
-                return state
-        except Exception as e:
-            logging.error(f"[GRAPH_SELECTION_NEW] ❌ Erro na query: {e}")
-            state.update({"graph_error": f"Erro na query: {e}", "graph_generated": False})
-            return state
-        # 5. Preparar contexto
-        user_query = state.get("user_input", "")
-        df_sample = df_result.head(3)
-        graph_context = generate_graph_type_context(user_query, sql_query, df_result.columns.tolist(), df_sample)
-        # 6. Chamar LLM de forma LIMPA
-        graph_type = await call_llm_for_graph_selection(graph_context, user_query)
-        logging.error(f"🎯 [RESULTADO_FINAL] Tipo selecionado: '{graph_type}'")
-        # 7. Armazenar resultado
-        graph_data_id = obj_manager.store_object(df_result, "graph_data")
-        state.update({
-            "graph_type": graph_type,
-            "graph_data": {
-                "data_id": graph_data_id,
-                "columns": df_result.columns.tolist(),
-                "rows": len(df_result),
-                "sample": df_sample.to_dict()
-            },
-            "graph_error": None
-        })
-        return state
-    except Exception as e:
-        logging.error(f"[GRAPH_SELECTION_NEW] ❌ Erro geral: {e}")
-        state.update({"graph_error": f"Erro geral: {e}", "graph_generated": False})
-        return state
-async def call_llm_for_graph_selection(graph_context: str, user_query: str) -> str:
-    """
-    Função NOVA e LIMPA para chamar LLM sem interferências
-    """
-    logging.error("🔥 [LLM_CALL] Iniciando chamada LIMPA da LLM")
-    # Verificação básica
-    if not OPENAI_API_KEY:
-        logging.error("🔥 [LLM_CALL] OpenAI não configurada")
-        return "line_simple"
-    try:
-        # Criar LLM com configuração limpa
-        llm = ChatOpenAI(
-            model="gpt-4o",
-            temperature=0,
-            max_tokens=5,
-            timeout=30
-        )
-        # Log do contexto
-        logging.error("🔥 [LLM_CALL] Contexto enviado:")
-        logging.error(f"'{graph_context}...'")
-        # Agora a pergunta real
-        real_response = llm.invoke(graph_context)
-        real_content = real_response.content.strip()
-        logging.error(f"🔥 [LLM_CALL] Resposta REAL: '{real_content}'")
-        # Extrair número da resposta
-        number_match = re.search(r'\b([1-9]|10)\b', real_content)
-        if number_match:
-            number = number_match.group(0)
-            graph_type = GRAPH_TYPE_MAPPING.get(number, "line_simple")
-            logging.error(f"🔥 [LLM_CALL] Número: {number} → Tipo: {graph_type}")
-            return graph_type
-        else:
-            logging.error(f"🔥 [LLM_CALL] Número não encontrado em: '{real_content}'")
-            return "line_simple"
-    except Exception as e:
-        logging.error(f"🔥 [LLM_CALL] ERRO: {e}")
-        return "line_simple"

nodes/query_node.py DELETED Viewed

@@ -1,232 +0,0 @@
-"""
-Nó para processamento de consultas SQL
-"""
-import time
-import logging
-import pandas as pd
-from typing import Dict, Any, TypedDict
-from agents.tools import is_greeting, detect_query_type, prepare_sql_context
-from agents.sql_agent import SQLAgentManager
-from utils.object_manager import get_object_manager
-class QueryState(TypedDict):
-    """Estado para processamento de consultas"""
-    user_input: str
-    selected_model: str
-    response: str
-    execution_time: float
-    error: str
-    intermediate_steps: list
-    llama_instruction: str
-    sql_result: dict
-async def process_user_query_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó principal para processar consulta do usuário
-    Args:
-        state: Estado atual com entrada do usuário
-    Returns:
-        Estado atualizado com resposta processada
-    """
-    start_time = time.time()
-    user_input = state["user_input"]
-    selected_model = state["selected_model"]
-    logging.info(f"[QUERY] Processando: {user_input[:50]}...")
-    try:
-        # Verifica se é saudação
-        if is_greeting(user_input):
-            greeting_response = "Olá! Estou aqui para ajudar com suas consultas. Pergunte algo relacionado aos dados carregados no agente!"
-            state.update({
-                "response": greeting_response,
-                "execution_time": time.time() - start_time,
-                "error": None
-            })
-            return state
-        # Recupera objetos necessários
-        obj_manager = get_object_manager()
-        # Recupera cache manager
-        cache_id = state.get("cache_id")
-        cache_manager = obj_manager.get_cache_manager(cache_id) if cache_id else None
-        # Verifica cache se disponível
-        if cache_manager:
-            cached_response = cache_manager.get_cached_response(user_input)
-            if cached_response:
-                logging.info(f"[CACHE] Retornando resposta do cache")
-                state.update({
-                    "response": cached_response,
-                    "execution_time": time.time() - start_time,
-                    "error": None
-                })
-                return state
-        # Converte amostra do banco para DataFrame
-        db_sample_dict = state.get("db_sample_dict", {})
-        if not db_sample_dict:
-            raise ValueError("Amostra do banco não disponível")
-        # Reconstrói DataFrame da amostra
-        db_sample = pd.DataFrame(db_sample_dict.get("data", []))
-        if db_sample.empty:
-            raise ValueError("Dados de amostra vazios")
-        # Detecta tipo de query e prepara contexto
-        query_type = detect_query_type(user_input)
-        state["query_type"] = query_type
-        if query_type in ['sql_query', 'sql_query_graphic']:
-            # Prepara contexto para envio direto ao agentSQL
-            sql_context = prepare_sql_context(user_input, db_sample)
-            state["sql_context"] = sql_context
-            logging.info(f"[DEBUG] Tipo de query detectado: {query_type}")
-            logging.info(f"[DEBUG] Contexto preparado para agentSQL:\n{sql_context}\n")
-        else:
-            # Para tipos futuros (prediction)
-            error_msg = f"Tipo de query '{query_type}' ainda não implementado."
-            state.update({
-                "error": error_msg,
-                "response": error_msg,
-                "execution_time": time.time() - start_time
-            })
-            return state
-        # Recupera agente SQL
-        agent_id = state.get("agent_id")
-        if not agent_id:
-            raise ValueError("ID do agente SQL não encontrado")
-        sql_agent = obj_manager.get_sql_agent(agent_id)
-        if not sql_agent:
-            raise ValueError("Agente SQL não encontrado")
-        # Executa query no agente SQL com contexto direto
-        sql_result = await sql_agent.execute_query(state["sql_context"])
-        if not sql_result["success"]:
-            state.update({
-                "error": sql_result["output"],
-                "response": sql_result["output"],
-                "sql_result": sql_result
-            })
-        else:
-            # Captura query SQL do resultado do agente
-            sql_query_captured = sql_result.get("sql_query")
-            state.update({
-                "response": sql_result["output"],
-                "intermediate_steps": sql_result["intermediate_steps"],
-                "sql_result": sql_result,
-                "sql_query_extracted": sql_query_captured,  # ← Query SQL capturada
-                "error": None
-            })
-            # Log apenas se não foi capturada (caso de erro)
-            if not sql_query_captured:
-                logging.warning("[QUERY] ⚠️ Nenhuma query SQL foi capturada pelo handler")
-        # Armazena no cache se disponível
-        if cache_manager and sql_result["success"]:
-            cache_manager.cache_response(user_input, state["response"])
-        state["execution_time"] = time.time() - start_time
-        logging.info(f"[QUERY] Concluído em {state['execution_time']:.2f}s")
-    except Exception as e:
-        error_msg = f"Erro ao processar query: {e}"
-        logging.error(f"[QUERY] {error_msg}")
-        state.update({
-            "error": error_msg,
-            "response": error_msg,
-            "execution_time": time.time() - start_time
-        })
-    return state
-async def validate_query_input_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para validar entrada da consulta
-    Args:
-        state: Estado com entrada do usuário
-    Returns:
-        Estado atualizado com validação
-    """
-    user_input = state.get("user_input", "").strip()
-    if not user_input:
-        state.update({
-            "error": "Entrada vazia",
-            "response": "Por favor, digite uma pergunta.",
-            "execution_time": 0.0
-        })
-        return state
-    if len(user_input) > 1000:
-        state.update({
-            "error": "Entrada muito longa",
-            "response": "Pergunta muito longa. Por favor, seja mais conciso.",
-            "execution_time": 0.0
-        })
-        return state
-    # Validação passou
-    state["error"] = None
-    logging.info(f"[VALIDATION] Entrada validada: {len(user_input)} caracteres")
-    return state
-async def prepare_query_context_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para preparar contexto da consulta
-    Args:
-        state: Estado atual
-    Returns:
-        Estado com contexto preparado
-    """
-    try:
-        # Verifica se todos os componentes necessários estão disponíveis
-        required_ids = ["agent_id", "engine_id", "cache_id"]
-        missing_ids = [id_name for id_name in required_ids if not state.get(id_name)]
-        if missing_ids:
-            raise ValueError(f"IDs necessários não encontrados: {missing_ids}")
-        obj_manager = get_object_manager()
-        # Verifica se objetos existem
-        for id_name in required_ids:
-            obj_id = state[id_name]
-            if id_name == "agent_id":
-                obj = obj_manager.get_sql_agent(obj_id)
-            elif id_name == "engine_id":
-                obj = obj_manager.get_engine(obj_id)
-            elif id_name == "cache_id":
-                obj = obj_manager.get_cache_manager(obj_id)
-            if obj is None:
-                raise ValueError(f"Objeto não encontrado para {id_name}: {obj_id}")
-        # Contexto preparado com sucesso
-        state["context_ready"] = True
-        logging.info("[CONTEXT] Contexto da consulta preparado")
-    except Exception as e:
-        error_msg = f"Erro ao preparar contexto: {e}"
-        logging.error(f"[CONTEXT] {error_msg}")
-        state.update({
-            "error": error_msg,
-            "context_ready": False
-        })
-    return state

nodes/refinement_node.py DELETED Viewed

@@ -1,141 +0,0 @@
-"""
-Nó para refinamento de respostas
-"""
-import logging
-from typing import Dict, Any
-from agents.tools import refine_response_with_llm
-async def refine_response_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para refinar a resposta usando LLM adicional
-    Args:
-        state: Estado atual do agente
-    Returns:
-        Estado atualizado com resposta refinada
-    """
-    if not state.get("advanced_mode", False) or state.get("error"):
-        # Pula refinamento se modo avançado desabilitado ou há erro
-        logging.info("[REFINE] Pulando refinamento - modo avançado desabilitado ou erro presente")
-        return state
-    logging.info("[REFINE] Iniciando refinamento da resposta")
-    try:
-        original_response = state.get("response", "")
-        user_input = state.get("user_input", "")
-        if not original_response or not user_input:
-            logging.warning("[REFINE] Resposta ou entrada do usuário não disponível")
-            return state
-        # Refina resposta com LLM adicional
-        refined_response = await refine_response_with_llm(
-            user_input,
-            original_response
-        )
-        # Atualiza estado com resposta refinada
-        state["response"] = refined_response
-        state["refined"] = True
-        logging.info("[REFINE] Resposta refinada com sucesso")
-    except Exception as e:
-        error_msg = f"Erro ao refinar resposta: {e}"
-        logging.error(f"[REFINE] {error_msg}")
-        # Mantém resposta original em caso de erro
-        state["refinement_error"] = error_msg
-    return state
-async def check_refinement_quality_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para verificar qualidade do refinamento
-    Args:
-        state: Estado com resposta refinada
-    Returns:
-        Estado com avaliação da qualidade
-    """
-    try:
-        original_response = state.get("sql_result", {}).get("output", "")
-        refined_response = state.get("response", "")
-        if not state.get("refined", False):
-            state["refinement_quality"] = "not_refined"
-            return state
-        # Métricas simples de qualidade
-        quality_metrics = {
-            "length_increase": len(refined_response) - len(original_response),
-            "has_insights": any(word in refined_response.lower() for word in [
-                "insight", "análise", "interpretação", "conclusão", "tendência"
-            ]),
-            "has_statistics": any(word in refined_response.lower() for word in [
-                "média", "total", "percentual", "proporção", "estatística"
-            ]),
-            "improved": len(refined_response) > len(original_response) * 1.1
-        }
-        # Determina qualidade geral
-        if quality_metrics["improved"] and (quality_metrics["has_insights"] or quality_metrics["has_statistics"]):
-            quality_score = "high"
-        elif quality_metrics["length_increase"] > 0:
-            quality_score = "medium"
-        else:
-            quality_score = "low"
-        state["refinement_quality"] = quality_score
-        state["quality_metrics"] = quality_metrics
-        logging.info(f"[REFINE] Qualidade avaliada: {quality_score}")
-    except Exception as e:
-        logging.error(f"[REFINE] Erro ao avaliar qualidade: {e}")
-        state["refinement_quality"] = "error"
-    return state
-async def format_final_response_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Nó para formatação final da resposta
-    Args:
-        state: Estado com resposta processada
-    Returns:
-        Estado com resposta formatada
-    """
-    try:
-        response = state.get("response", "")
-        execution_time = state.get("execution_time", 0.0)
-        advanced_mode = state.get("advanced_mode", False)
-        refined = state.get("refined", False)
-        # Adiciona informações de contexto se necessário
-        if advanced_mode and refined:
-            quality = state.get("refinement_quality", "unknown")
-            if quality == "high":
-                response += "\n\n💡 *Resposta aprimorada com análise avançada*"
-            elif quality == "medium":
-                response += "\n\n🔍 *Resposta complementada*"
-        # Adiciona tempo de execução se significativo
-        if execution_time > 2.0:
-            response += f"\n\n⏱️ *Processado em {execution_time:.1f}s*"
-        # Formatação final
-        state["response"] = response.strip()
-        state["formatted"] = True
-        logging.info(f"[FORMAT] Resposta formatada - {len(response)} caracteres")
-    except Exception as e:
-        logging.error(f"[FORMAT] Erro na formatação: {e}")
-        # Mantém resposta original se houver erro na formatação
-    return state