Spaces:

akra35567
/

akira

Running

App Files Files Community

akra35567 commited on 14 days ago

Commit

c2e2750

verified ·

1 Parent(s): 5aba635

Upload 29 files

Browse files

Files changed (14) hide show

modules/context_builder.py +7 -6
modules/database.py +0 -0
modules/grouped_skills_adapter.py +2 -53
modules/lstm_extension.py +147 -51
modules/lstm_memory_system.py +52 -45
modules/reply_context_handler.py +781 -758
modules/self_awareness.py +86 -0
modules/sender_attribution_fix.py +55 -0
modules/short_term_memory.py +10 -0
modules/skills_library.py +0 -0
modules/thinking_engine.py +374 -0
modules/treinamento.py +5 -36
modules/twitter_api.py +79 -100
modules/unified_context.py +1041 -1182

modules/context_builder.py CHANGED Viewed

@@ -210,12 +210,13 @@ class ContextBuilder:
         Args:
             db: Instância de Database
         """
-        if get_lstm_extension:
-            try:
-                self.lstm_extension = get_lstm_extension(db)
-                logger.info("✅ LSTM Extension habilitado em ContextBuilder")
-            except Exception as e:
-                logger.debug(f"LSTM initialization: {e}")
     def build_prompt(
         self,

         Args:
             db: Instância de Database
         """
+        try:
+            from .lstm_extension import get_lstm_extension as _get_lstm
+            self.lstm_extension = _get_lstm(db)
+            logger.info("✅ LSTM Extension habilitado em ContextBuilder")
+        except Exception as e:
+            logger.debug(f"LSTM initialization: {e}")
     def build_prompt(
         self,

modules/database.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

modules/grouped_skills_adapter.py CHANGED Viewed

@@ -11,8 +11,7 @@ from modules.skills import (
     WeatherSkill,
     EntertainmentSkill,
     ArtSkill,
-    MusicSkill,
-    ManusSkill
 )
 # ========================================
@@ -23,7 +22,6 @@ _weather_skill = WeatherSkill()
 _entertainment_skill = EntertainmentSkill()
 _art_skill = ArtSkill()
 _music_skill = MusicSkill()
-_manus_skill = ManusSkill()
 # ========================================
@@ -314,20 +312,6 @@ def music_tool(tipo: str = "genre", mood: str = "random", anime: str = None, son
                 "provider": result["provider"]
             }
-        elif dados.get("tipo") == "lyrics":
-            fragmento = dados.get("fragmento", "Letra não encontrada.")
-            url = dados.get("url", "")
-            fonte = dados.get("fonte", "desconhecida")
-            return {
-                "sucesso": True,
-                "tipo": "lyrics",
-                "musica": dados.get("musica"),
-                "artista": dados.get("artista"),
-                "conteudo": f"Fragmento da letra:\n\n{fragmento}\n\nFonte: {fonte}\nLink: {url}",
-                "provider": result["provider"]
-            }
         else:
             return {
                 "sucesso": True,
@@ -342,40 +326,6 @@ def music_tool(tipo: str = "genre", mood: str = "random", anime: str = None, son
         }
-@skill(
-    name="manus_research",
-    description="Realiza pesquisas profundas, análise de mercado ou tarefas autônomas complexas via Manus AI. Use para perguntas que exigem investigação séria.",
-    parameters={
-        "type": "object",
-        "properties": {
-            "prompt": {
-                "type": "string",
-                "description": "O que você quer que o Manus pesquise ou resolva detalhadamente."
-            }
-        },
-        "required": ["prompt"]
-    }
-)
-def manus_research_tool(prompt: str):
-    """
-    Wrapper para ManusSkill
-    """
-    result = _manus_skill.execute(prompt=prompt)
-    if result.get("sucesso"):
-        return {
-            "sucesso": True,
-            "analise": result["dados"].get("resultado"),
-            "provider": "Manus AI Agent",
-            "status": "Finalizado com sucesso"
-        }
-    else:
-        return {
-            "sucesso": False,
-            "erro": result.get("erro")
-        }
 # ========================================
 # Helper para stats
 # ========================================
@@ -386,6 +336,5 @@ def get_grouped_skills_stats() -> Dict[str, Any]:
         "weather": _weather_skill.get_stats(),
         "entertainment": _entertainment_skill.get_stats(),
         "art": _art_skill.get_stats(),
-        "music": _music_skill.get_stats(),
-        "manus": _manus_skill.get_stats()
     }

     WeatherSkill,
     EntertainmentSkill,
     ArtSkill,
+    MusicSkill
 )
 # ========================================
 _entertainment_skill = EntertainmentSkill()
 _art_skill = ArtSkill()
 _music_skill = MusicSkill()
 # ========================================
                 "provider": result["provider"]
             }
         else:
             return {
                 "sucesso": True,
         }
 # ========================================
 # Helper para stats
 # ========================================
         "weather": _weather_skill.get_stats(),
         "entertainment": _entertainment_skill.get_stats(),
         "art": _art_skill.get_stats(),
+        "music": _music_skill.get_stats()
     }

modules/lstm_extension.py CHANGED Viewed

@@ -20,7 +20,6 @@ Features:
 """
 import json
-import re
 import threading
 from typing import Dict, Any, Optional, List
 from dataclasses import dataclass
@@ -75,7 +74,8 @@ class LSTMExtension:
         context_id: str,
         numero_usuario: str,
         message: str,
-        role: str = "user"
     ) -> None:
         """
         Processa mensagem em background thread. NÃO BLOQUEIA.
@@ -85,11 +85,12 @@ class LSTMExtension:
             numero_usuario: Número do usuário
             message: Texto da mensagem
             role: "user" ou "assistant"
         """
         # Dispara em thread para não bloquear
         thread = threading.Thread(
             target=self._analyze_and_store,
-            args=(context_id, numero_usuario, message, role),
             daemon=True
         )
         thread.start()
@@ -99,10 +100,19 @@ class LSTMExtension:
         context_id: str,
         numero_usuario: str,
         message: str,
-        role: str
     ) -> None:
         """Análise interna (roda em thread separada)."""
         try:
             # 1. Recuperar contexto existente
             existing = self._get_from_db(context_id)
             summary = existing or LSTMContextSummary(
@@ -135,6 +145,17 @@ class LSTMExtension:
             # 5. Salvar em DB
             self._save_to_db(summary)
             self.context_cache[context_id] = summary
             logger.debug(f"✅ LSTM context saved: {context_id} (topic: {summary.topic_principal})")
@@ -145,7 +166,8 @@ class LSTMExtension:
     def get_context_for_prompt(
         self,
         context_id: str,
-        numero_usuario: str
     ) -> Optional[Dict[str, Any]]:
         """
         Recupera contexto LSTM para enriquecer prompt.
@@ -153,73 +175,95 @@ class LSTMExtension:
         Args:
             context_id: ID da conversa
-            numero_usuario: Número do usuário
         Returns:
-            Dict com contexto de longo prazo, ou None
         """
-        # Tentar cache primeiro
-        if context_id in self.context_cache:
-            summary = self.context_cache[context_id]
-        else:
-            # Buscar DB
-            summary = self._get_from_db(context_id)
-        if not summary or not summary.topic_principal:
-            return None
-        # Formatar para uso em prompt
-        return {
-            "topic_principal": summary.topic_principal,
-            "subtopicas": summary.subtopicas,
-            "conversation_path": summary.conversation_path,
-            "interaction_pattern": summary.interaction_pattern,
-            "unanswered_questions": summary.unanswered_questions[:3],  # Top 3
-            "assumed_knowledge": summary.assumed_knowledge[:3],  # Top 3
-            "context_switches": summary.context_switches,
-        }
     def _extract_topic_simple(self, message: str, current_topic: Optional[str]) -> Optional[str]:
         """
         Extrai tópico de forma simples (sem LLM).
-        Heurísticas aprimoradas para evitar drift.
         """
         msg_lower = message.lower()
-        # Stopwords básicas para evitar tópicos inúteis
-        stopwords = {
-            "está", "como", "você", "para", "mais", "tudo", "bem", "pode", "fazer",
-            "quando", "onde", "quem", "porque", "qual", "quais", "muito", "pouco",
-            "esse", "essa", "aquele", "aquela", "coisa", "nada", "algo", "isso"
-        }
-        # Detectar palavras-chave comuns (Tópicos Fortes)
         topics_keywords = {
-            "saúde": ["doença", "medicina", "cura", "tratamento", "sintoma", "hospital", "dor", "médico"],
-            "tecnologia": ["código", "python", "função", "erro", "bug", "programação", "ia", "api", "pc", "software"],
-            "relacionamento": ["namoro", "amor", "casal", "relacionamento", "ex", "beijo", "casar"],
-            "trabalho": ["emprego", "trabalho", "chefe", "salário", "despedida", "empresa", "vaga"],
-            "escola": ["escola", "universidade", "prova", "nota", "aula", "estudar", "curso"],
-            "entretenimento": ["filme", "série", "musica", "jogo", "game", "futebol", "esporte"],
-            "finanças": ["dinheiro", "preço", "valor", "kwanza", "aoa", "dólar", "comprar", "venda"]
         }
         for topic, keywords in topics_keywords.items():
             if any(kw in msg_lower for kw in keywords):
                 return topic
-        # Se tem pergunta, tenta extrair um substantivo provável
         if "?" in message:
-            words = [w for w in re.sub(r'[^\w\s]', '', msg_lower).split() if len(w) > 4]
-            # Filtra stopwords
-            filtered_words = [w for w in words if w not in stopwords]
-            if filtered_words:
-                return filtered_words[0]
-        # Se a mensagem for muito curta, mantém o tópico atual (evita drift por ruído)
-        if len(message.split()) < 3:
-            return current_topic
         return current_topic
     def _detect_pattern(self, message: str) -> Optional[str]:
@@ -289,6 +333,58 @@ class LSTMExtension:
             logger.warning(f"Error loading LSTM from DB: {e}")
             return None
     def _save_to_db(self, summary: LSTMContextSummary) -> None:
         """Salva contexto no banco de dados usando Database._execute_with_retry()."""
         try:

 """
 import json
 import threading
 from typing import Dict, Any, Optional, List
 from dataclasses import dataclass
         context_id: str,
         numero_usuario: str,
         message: str,
+        role: str = "user",
+        message_id: Optional[str] = None
     ) -> None:
         """
         Processa mensagem em background thread. NÃO BLOQUEIA.
             numero_usuario: Número do usuário
             message: Texto da mensagem
             role: "user" ou "assistant"
+            message_id: ID único da mensagem (para evitar duplicados)
         """
         # Dispara em thread para não bloquear
         thread = threading.Thread(
             target=self._analyze_and_store,
+            args=(context_id, numero_usuario, message, role, message_id),
             daemon=True
         )
         thread.start()
         context_id: str,
         numero_usuario: str,
         message: str,
+        role: str,
+        message_id: Optional[str] = None
     ) -> None:
         """Análise interna (roda em thread separada)."""
         try:
+            # 0. Verificação de idempotência (Anti-Duplicate)
+            if message_id:
+                query_check = "SELECT id FROM lstm_message_links WHERE context_id = ? AND message_id = ? LIMIT 1"
+                res = self.db._execute_with_retry(query_check, (context_id, message_id))
+                if res:
+                    # logger.debug(f"⏭️ LSTM skip duplicate: {message_id}")
+                    return
             # 1. Recuperar contexto existente
             existing = self._get_from_db(context_id)
             summary = existing or LSTMContextSummary(
             # 5. Salvar em DB
             self._save_to_db(summary)
+            # 6. Registrar link da mensagem (idempotência)
+            if message_id:
+                try:
+                    query_link = """INSERT INTO lstm_message_links
+                                    (context_id, message_id, numero_usuario, created_at)
+                                    VALUES (?, ?, ?, CURRENT_TIMESTAMP)"""
+                    self.db._execute_with_retry(query_link, (context_id, message_id, numero_usuario), commit=True)
+                except Exception:
+                    pass # Provavelmente já existe (race condition), ignorar
             self.context_cache[context_id] = summary
             logger.debug(f"✅ LSTM context saved: {context_id} (topic: {summary.topic_principal})")
     def get_context_for_prompt(
         self,
         context_id: str,
+        numero_usuario: str = None,
+        is_group: bool = False
     ) -> Optional[Dict[str, Any]]:
         """
         Recupera contexto LSTM para enriquecer prompt.
         Args:
             context_id: ID da conversa
+            numero_usuario: Número do usuário (pode ser None em grupos)
+            is_group: Se True, retorna contexto para TODOS os speakers do grupo
         Returns:
+            Dict com contexto de longo prazo enriquecido com speaker tracking, ou None
         """
+        if is_group:
+            # Recupera contexto para TODOS os speakers do grupo
+            summaries = self._get_from_db_all_speakers(context_id)
+            if not summaries:
+                return None
+            # Agrupa contexto: qual speaker falou sobre qual tópico
+            speakers_topics = {}
+            total_context_switches = 0
+            for summary in summaries:
+                if summary.numero_usuario and summary.topic_principal:
+                    speakers_topics[summary.numero_usuario] = {
+                        "topic_principal": summary.topic_principal,
+                        "interaction_pattern": summary.interaction_pattern or "regular",
+                        "unanswered_questions": summary.unanswered_questions[:2] if summary.unanswered_questions else [],
+                        "assumed_knowledge": summary.assumed_knowledge[:1] if summary.assumed_knowledge else [],
+                    }
+                    total_context_switches += summary.context_switches or 0
+            if not speakers_topics:
+                return None
+            return {
+                "context_id": context_id,
+                "tipo": "grupo",
+                "speakers_topics": speakers_topics,  # ✅ Rastreia quem falou o quê
+                "context_switches": total_context_switches,
+            }
+        else:
+            # Código original para PV/direto
+            # Tentar cache primeiro
+            if context_id in self.context_cache:
+                summary = self.context_cache[context_id]
+            else:
+                # Buscar DB (vai retornar primeiro speaker se houver múltiplos em grupo)
+                summary = self._get_from_db(context_id)
+            if not summary or not summary.topic_principal:
+                return None
+            # Formatar para uso em prompt
+            return {
+                "topic_principal": summary.topic_principal,
+                "subtopicas": summary.subtopicas,
+                "conversation_path": summary.conversation_path,
+                "interaction_pattern": summary.interaction_pattern,
+                "unanswered_questions": summary.unanswered_questions[:3] if summary.unanswered_questions else [],
+                "assumed_knowledge": summary.assumed_knowledge[:3] if summary.assumed_knowledge else [],
+                "context_switches": summary.context_switches,
+            }
     def _extract_topic_simple(self, message: str, current_topic: Optional[str]) -> Optional[str]:
         """
         Extrai tópico de forma simples (sem LLM).
+        Heurísticas básicas.
         """
         msg_lower = message.lower()
+        # Detectar palavras-chave comuns
         topics_keywords = {
+            "saúde": ["doença", "medicina", "cura", "tratamento", "sintoma", "hospital"],
+            "técnica": ["código", "python", "função", "erro", "bug", "programação"],
+            "relacionamento": ["namoro", "amor", "casal", "relacionamento", "ex"],
+            "trabalho": ["emprego", "trabalho", "chefe", "salário", "despedida"],
+            "escola": ["escola", "universidade", "prova", "nota", "aula"],
+            "esportes": ["futebol", "basquete", "games", "competição", "time"],
         }
         for topic, keywords in topics_keywords.items():
             if any(kw in msg_lower for kw in keywords):
                 return topic
+        # Se tem pergunta, extrai dela
         if "?" in message:
+            # Pega primeira palavra significativa
+            words = [w for w in msg_lower.split() if len(w) > 3]
+            if words:
+                return words[0]
         return current_topic
     def _detect_pattern(self, message: str) -> Optional[str]:
             logger.warning(f"Error loading LSTM from DB: {e}")
             return None
+    def _get_from_db_all_speakers(self, context_id: str) -> List[LSTMContextSummary]:
+        """
+        Recupera contexto para TODOS os speakers em um contexto de grupo.
+        Essencial para rastrear quem falou o quê em grupos.
+        """
+        try:
+            rows = self.db._execute_with_retry(
+                "SELECT * FROM lstm_contexto WHERE context_id = ? ORDER BY last_updated DESC",
+                (context_id,)
+            )
+            if not rows:
+                return []
+            summaries = []
+            for row in rows:
+                data = dict(row)
+                # Desserializar JSON fields
+                if data.get('subtopicas'):
+                    data['subtopicas'] = json.loads(data['subtopicas'])
+                if data.get('conversation_path'):
+                    data['conversation_path'] = json.loads(data['conversation_path'])
+                if data.get('unanswered_questions'):
+                    data['unanswered_questions'] = json.loads(data['unanswered_questions'])
+                if data.get('assumed_knowledge'):
+                    data['assumed_knowledge'] = json.loads(data['assumed_knowledge'])
+                if data.get('contradictions'):
+                    data['contradictions'] = json.loads(data['contradictions'])
+                # Limpar campos legados
+                data.pop('created_at', None)
+                data.pop('last_updated', None)
+                data.pop('metadata', None)
+                data.pop('emotional_state', None)
+                data.pop('contexto_geral', None)
+                # Filtro genérico
+                import inspect
+                valid_keys = inspect.signature(LSTMContextSummary).parameters.keys()
+                filtered_data = {k: v for k, v in data.items() if k in valid_keys}
+                summary = LSTMContextSummary(**filtered_data)
+                summaries.append(summary)
+            logger.debug(f"✅ Loaded LSTM speakers: context_id={context_id}, {len(summaries)} speakers")
+            return summaries
+        except Exception as e:
+            logger.warning(f"Error loading LSTM speakers from DB: {e}")
+            return []
     def _save_to_db(self, summary: LSTMContextSummary) -> None:
         """Salva contexto no banco de dados usando Database._execute_with_retry()."""
         try:

modules/lstm_memory_system.py CHANGED Viewed

@@ -173,6 +173,10 @@ class LSTMMemorySystem:
         self.processing_queue: List[Dict[str, Any]] = []
         self.processing_lock = threading.Lock()
         # Inicializar tabelas no DB
         self._initialize_database()
@@ -181,13 +185,13 @@ class LSTMMemorySystem:
     def _initialize_database(self) -> None:
         """Cria tabelas necessárias no banco de dados."""
         try:
-            # As tabelas lstm_contexto e lstm_message_links já são criadas
-            # pelo database.py _init_db(). Aqui apenas garantimos que existem.
             self.db._execute_with_retry("""
                 CREATE TABLE IF NOT EXISTS lstm_contexto (
-                    context_id TEXT PRIMARY KEY,
-                    numero_usuario TEXT NOT NULL,
-                    topic_principal TEXT,
                     subtopicas TEXT,
                     conversation_path TEXT,
                     last_key_message TEXT,
@@ -206,27 +210,19 @@ class LSTMMemorySystem:
             self.db._execute_with_retry("""
                 CREATE TABLE IF NOT EXISTS lstm_message_links (
                     id INTEGER PRIMARY KEY AUTOINCREMENT,
-                    context_id TEXT NOT NULL,
-                    message_id TEXT,
-                    parent_message_id TEXT,
                     topic_changed BOOLEAN DEFAULT FALSE,
-                    context_switch_type TEXT,
-                    relevance_score REAL DEFAULT 1.0,
-                    created_at REAL
                 )
             """, commit=True)
-            self.db._execute_with_retry("""
-                CREATE INDEX IF NOT EXISTS idx_lstm_usuario
-                ON lstm_contexto(numero_usuario)
-            """, commit=True)
-            self.db._execute_with_retry("""
-                CREATE INDEX IF NOT EXISTS idx_lstm_links_context
-                ON lstm_message_links(context_id)
-            """, commit=True)
-            logger.info("✅ Tabelas LSTM verificadas/inicializadas")
         except Exception as e:
             logger.error(f"❌ Erro ao inicializar tabelas LSTM: {e}")
@@ -247,6 +243,8 @@ class LSTMMemorySystem:
         Processa mensagem de forma assíncrona para extrair contexto LSTM.
         Não bloqueia a resposta. Funciona em background thread.
         Args:
             context_id: ID do contexto (PV ou Grupo)
             numero_usuario: ID do usuário
@@ -255,6 +253,28 @@ class LSTMMemorySystem:
             parent_message_id: ID da mensagem anterior (para linked context)
             llm_client: Client LLM para análise (opcional)
         """
         # Adiciona à queue para processamento assíncrono
         with self.processing_lock:
             self.processing_queue.append({
@@ -263,7 +283,7 @@ class LSTMMemorySystem:
                 'message': message,
                 'role': role,
                 'parent_message_id': parent_message_id,
-                'timestamp': time.time()
             })
         # Dispara thread de processamento se não estiver rodando
@@ -315,7 +335,7 @@ class LSTMMemorySystem:
             lstm_summary.topic_principal = new_topic
             # Armazenar link entre mensagens
-            self._record_context_switch(context_id, parent_message_id, new_topic)
         # ✅ ANÁLISE 3: Adicionar subtópicos
         subtopics = self._extract_subtopics(message, new_topic)
@@ -381,21 +401,8 @@ class LSTMMemorySystem:
             'política': ['presidente', 'eleição', 'política', 'governo', 'ministro'],
             'clima': ['tempo', 'chuva', 'temperatura', 'previsão', 'clima'],
             'saúde': ['doença', 'médico', 'hospital', 'sintomas', 'saúde'],
-            'entretenimento': ['pedro orochi', 'orochinho', 'weedzao', 'mitada', 'rei das mitadas', 'youtuber', 'streamer'],
         }
-        # 🛡️ FILTRO ANTI-NOISE: Ignora termos vazios ou clickbaits genéricos
-        clickbait_patterns = [
-            'veja o que aconteceu', 'não acredito', 'olha isso', 'surpreendente',
-            'morreu hoje', 'luto', 'urgente'
-        ]
-        if any(p in message_lower for p in clickbait_patterns):
-            # Se contém clickbait, mas não contém um tópico real forte, ignora
-            has_real_topic = any(kw in message_lower for topic, kws in keywords_map.items() for kw in kws)
-            if not has_real_topic:
-                return None
         for topic, keywords in keywords_map.items():
             if any(kw in message_lower for kw in keywords):
                 return topic
@@ -403,11 +410,6 @@ class LSTMMemorySystem:
         # Se não detectar via keywords, tenta extrair primeira entidade nomeada
         # (simplificado - em produção usaria NER)
         if len(message.split()) >= 3:
-            # Pela experiência, temas com "morreu" ou "luto" sem fonte são ruído
-            if 'morreu' in message_lower or 'luto' in message_lower:
-                if not any(kw in message_lower for kw in ['notícia', 'jornal', 'confirmado']):
-                    return None
             # Pega primeiras 3-4 palavras como possível tema
             words = message.split()[:4]
             if all(w[0].isupper() for w in words if w):
@@ -627,19 +629,24 @@ class LSTMMemorySystem:
     def _record_context_switch(
         self,
         context_id: str,
         parent_message_id: Optional[str],
         new_topic: str
     ) -> None:
         """Registra mudança de contexto/tópico."""
         try:
             self.db._execute_with_retry("""
                 INSERT INTO lstm_message_links
-                (context_id, message_id, parent_message_id, topic_changed,
                  context_switch_type, created_at)
-                VALUES (?, ?, ?, ?, ?, ?)
             """, (
                 context_id,
-                None,  # message_id será gerado externamente
                 parent_message_id,
                 True,
                 'topic_change',

         self.processing_queue: List[Dict[str, Any]] = []
         self.processing_lock = threading.Lock()
+        # ✅ PROTEÇÃO CONTRA DUPLICAÇÃO: Track mensagens processadas recentemente
+        self.recently_processed: Dict[str, float] = {}  # {hash(context+user+msg): timestamp}
+        self.dedup_timeout = 5  # Segundos - evita duplicação em 5s
         # Inicializar tabelas no DB
         self._initialize_database()
     def _initialize_database(self) -> None:
         """Cria tabelas necessárias no banco de dados."""
         try:
+            # As tabelas já são criadas pelo database.py _init_db().
+            # Aqui apenas garantimos redundância segura com o esquema oficial.
             self.db._execute_with_retry("""
                 CREATE TABLE IF NOT EXISTS lstm_contexto (
+                    context_id VARCHAR(255) PRIMARY KEY,
+                    numero_usuario VARCHAR(50) NOT NULL,
+                    topic_principal VARCHAR(255),
                     subtopicas TEXT,
                     conversation_path TEXT,
                     last_key_message TEXT,
             self.db._execute_with_retry("""
                 CREATE TABLE IF NOT EXISTS lstm_message_links (
                     id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    context_id VARCHAR(255) NOT NULL,
+                    message_id VARCHAR(255) NOT NULL,
+                    numero_usuario VARCHAR(50) NOT NULL,
+                    parent_message_id VARCHAR(255),
                     topic_changed BOOLEAN DEFAULT FALSE,
+                    context_switch_type VARCHAR(50),
+                    relevance_score FLOAT DEFAULT 1.0,
+                    created_at REAL,
+                    FOREIGN KEY (context_id) REFERENCES lstm_contexto(context_id) ON DELETE CASCADE
                 )
             """, commit=True)
+            logger.info("✅ Tabelas LSTM sincronizadas")
         except Exception as e:
             logger.error(f"❌ Erro ao inicializar tabelas LSTM: {e}")
         Processa mensagem de forma assíncrona para extrair contexto LSTM.
         Não bloqueia a resposta. Funciona em background thread.
+        ✅ Proteção: Evita duplicação em 5 segundos
         Args:
             context_id: ID do contexto (PV ou Grupo)
             numero_usuario: ID do usuário
             parent_message_id: ID da mensagem anterior (para linked context)
             llm_client: Client LLM para análise (opcional)
         """
+        # ✅ DEDUPLICATION: Verifica se a mensagem já foi processada recentemente
+        import hashlib
+        if message_id:
+            msg_hash = hashlib.md5(f"msgid:{message_id}".encode()).hexdigest()
+        else:
+            msg_hash = hashlib.md5(f"{context_id}:{numero_usuario}:{message[:100]}".encode()).hexdigest()
+        now = time.time()
+        # Limpa entries expiradas
+        expired = [k for k, v in self.recently_processed.items() if now - v > self.dedup_timeout]
+        for k in expired:
+            del self.recently_processed[k]
+        # Verifica se já foi processada recentemente
+        if msg_hash in self.recently_processed:
+            logger.debug(f"⚠️ [LSTM DEDUP] Mensagem duplicada ignorada: {message[:50]}...")
+            return
+        # Marca como processada
+        self.recently_processed[msg_hash] = now
         # Adiciona à queue para processamento assíncrono
         with self.processing_lock:
             self.processing_queue.append({
                 'message': message,
                 'role': role,
                 'parent_message_id': parent_message_id,
+                'timestamp': now
             })
         # Dispara thread de processamento se não estiver rodando
             lstm_summary.topic_principal = new_topic
             # Armazenar link entre mensagens
+            self._record_context_switch(context_id, numero_usuario, parent_message_id, new_topic)
         # ✅ ANÁLISE 3: Adicionar subtópicos
         subtopics = self._extract_subtopics(message, new_topic)
             'política': ['presidente', 'eleição', 'política', 'governo', 'ministro'],
             'clima': ['tempo', 'chuva', 'temperatura', 'previsão', 'clima'],
             'saúde': ['doença', 'médico', 'hospital', 'sintomas', 'saúde'],
         }
         for topic, keywords in keywords_map.items():
             if any(kw in message_lower for kw in keywords):
                 return topic
         # Se não detectar via keywords, tenta extrair primeira entidade nomeada
         # (simplificado - em produção usaria NER)
         if len(message.split()) >= 3:
             # Pega primeiras 3-4 palavras como possível tema
             words = message.split()[:4]
             if all(w[0].isupper() for w in words if w):
     def _record_context_switch(
         self,
         context_id: str,
+        numero_usuario: str,
         parent_message_id: Optional[str],
         new_topic: str
     ) -> None:
         """Registra mudança de contexto/tópico."""
         try:
+            # Gera um ID temporário se não houver
+            msg_id = f"switch_{int(time.time())}_{hashlib.md5(new_topic.encode()).hexdigest()[:8]}"
             self.db._execute_with_retry("""
                 INSERT INTO lstm_message_links
+                (context_id, message_id, numero_usuario, parent_message_id, topic_changed,
                  context_switch_type, created_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
             """, (
                 context_id,
+                msg_id,
+                numero_usuario,
                 parent_message_id,
                 True,
                 'topic_change',

modules/reply_context_handler.py CHANGED Viewed

@@ -1,758 +1,781 @@
-# type: ignore
-"""
-================================================================================
-AKIRA V21 ULTIMATE - REPLY CONTEXT HANDLER MODULE
-================================================================================
-Sistema dedicado para processar e priorizar contexto de replies.
-Garante que replies tenham prioridade ligeiramente maior que o contexto geral,
-especialmente em perguntas curtas.
-Features:
-- Extração e processamento de metadados de reply
-- 3 níveis de prioridade (1=normal, 2=reply, 3=reply-to-bot+pergunta-curta)
-- Construção de prompt sections otimizadas para replies
-- Integração com ShortTermMemory
-- Context hint extraction para melhor compreensão
-================================================================================
-"""
-import os
-import sys
-import time
-import json
-import re
-import logging
-from typing import Optional, Dict, Any, List, Tuple
-from dataclasses import dataclass, field
-# Imports robustos com fallback - CORRIGIDO para usar modules.
-try:
-    from . import config
-    from .short_term_memory import ShortTermMemory, MessageWithContext, IMPORTANCIA_REPLY, IMPORTANCIA_REPLY_TO_BOT, IMPORTANCIA_PERGUNTA_CURTA_REPLY
-    REPLY_HANDLER_AVAILABLE = True
-except ImportError:
-    try:
-        import modules.config as config
-        from modules.short_term_memory import ShortTermMemory, MessageWithContext, IMPORTANCIA_REPLY, IMPORTANCIA_REPLY_TO_BOT, IMPORTANCIA_PERGUNTA_CURTA_REPLY
-        REPLY_HANDLER_AVAILABLE = True
-    except ImportError:
-        try:
-            from short_term_memory import ShortTermMemory, MessageWithContext, IMPORTANCIA_REPLY, IMPORTANCIA_REPLY_TO_BOT, IMPORTANCIA_PERGUNTA_CURTA_REPLY
-            REPLY_HANDLER_AVAILABLE = True
-        except ImportError:
-            REPLY_HANDLER_AVAILABLE = False
-            config = None
-logger = logging.getLogger(__name__)
-# ============================================================
-# NÍVEIS DE PRIORIDADE
-# ============================================================
-PRIORITY_NORMAL = 1
-PRIORITY_REPLY = 2
-PRIORITY_REPLY_TO_BOT = 3
-PRIORITY_REPLY_TO_BOT_SHORT_QUESTION = 4  # Prioridade máxima!
-# Limite de palavras para "pergunta curta"
-PERGUNTA_CURTA_LIMITE: int = 5
-@dataclass
-class ProcessedReplyContext:
-    """
-    Contexto de reply processado e pronto para uso.
-    Attributes:
-        is_reply: Se é um reply
-        reply_to_bot: Se é reply direcionado ao bot
-        priority_level: Nível de prioridade (1-4)
-        quoted_author_name: Nome do autor da mensagem citada
-        quoted_author_numero: Número do autor
-        quoted_text_original: Texto original citado
-        mensagem_citada: Texto da mensagem citada
-        context_hint: Hint de contexto extraído
-        importancia: Peso de importância calculado
-        prompt_section: Section formatada para o prompt
-        should_prioritize_reply: Se deve priorizar no prompt
-        adaptive_multiplier: Multiplicador adaptativo baseado no tamanho
-    """
-    is_reply: bool = False
-    reply_to_bot: bool = False
-    priority_level: int = PRIORITY_NORMAL
-    quoted_author_name: str = ""
-    quoted_author_numero: str = ""
-    quoted_text_original: str = ""
-    mensagem_citada: str = ""
-    context_hint: str = ""
-    importancia: float = 1.0
-    prompt_section: str = ""
-    should_prioritize_reply: bool = False
-    adaptive_multiplier: float = 1.0
-    def to_dict(self) -> Dict[str, Any]:
-        """Converte para dicionário."""
-        return {
-            "is_reply": self.is_reply,
-            "reply_to_bot": self.reply_to_bot,
-            "priority_level": self.priority_level,
-            "quoted_author_name": self.quoted_author_name,
-            "quoted_author_numero": self.quoted_author_numero,
-            "quoted_text_original": self.quoted_text_original,
-            "mensagem_citada": self.mensagem_citada,
-            "context_hint": self.context_hint,
-            "importancia": self.importancia,
-            "prompt_section": self.prompt_section,
-            "should_prioritize_reply": self.should_prioritize_reply,
-            "adaptive_multiplier": self.adaptive_multiplier
-        }
-    @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> 'ProcessedReplyContext':
-        """Cria instância a partir de dicionário."""
-        return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
-# ============================================================
-# FUNÇÕES AUXILIARES
-# ============================================================
-def contar_palavras(texto: str) -> int:
-    """Conta palavras em um texto."""
-    if not texto:
-        return 0
-    return len(texto.split())
-def is_pergunta_curta(texto: str) -> bool:
-    """
-    Verifica se o texto é uma pergunta curta.
-    Args:
-        texto: Texto a verificar
-    Returns:
-        True se for pergunta com pocas palavras
-    """
-    if not texto:
-        return False
-    texto_lower = texto.strip().lower()
-    word_count = contar_palavras(texto)
-    # Deve ter marcador de pergunta ou palavras interrogativas
-    has_question_marker = '?' in texto
-    has_interrogative = any(w in texto_lower for w in [
-        'qual', 'quais', 'quem', 'como', 'onde', 'quando', 'por que',
-        'porque', 'para que', 'o que', 'que', 'é o que', 'vc', 'você',
-        'tu', 'meu', 'minha', 'oq', 'oq', 'n'
-    ])
-    return word_count <= PERGUNTA_CURTA_LIMITE and (has_question_marker or has_interrogative)
-def is_mensagem_vazia_ou_reconhecimento(texto: str) -> bool:
-    """
-    Verifica se a mensagem é apenas um sinal de pontuação ou texto muito curto/vazio.
-    Ajuda a evitar a alucinação de self-reply (onde o bot conversa consigo mesmo).
-    """
-    if not texto:
-        return True
-    clean_text = texto.strip()
-    # Se for apenas 1-2 caracteres não-alfanuméricos (ex: ".", "..", "!")
-    import re
-    if len(clean_text) <= 2 and not re.search(r'[a-zA-Z0-9]', clean_text):
-        return True
-    # Palavras muito curtas e fechadas que soam como reconhecimento e não têm substância
-    if clean_text.lower() in [".", "vc", "ah", "ok", "hm", "ta"]:
-        return True
-    return False
-def extrair_context_hint(quoted_text: str, mensagem_atual: str) -> str:
-    """
-    Extrai hint de contexto baseado no texto citado e mensagem atual.
-    Args:
-        quoted_text: Texto original citado
-        mensagem_atual: Mensagem atual do usuário
-    Returns:
-        String de hint de contexto
-    """
-    hints = []
-    # Detecta tipo de reply
-    quoted_lower = quoted_text.lower() if quoted_text else ""
-    # Pergunta sobre o bot
-    if any(w in quoted_lower for w in ['akira', 'bot', 'você', 'vc', 'tu']):
-        hints.append("pergunta_sobre_akira")
-    # Pergunta factual
-    if any(w in quoted_lower for w in ['oq', 'o que', 'qual', 'quanto', 'onde', 'quando']):
-        hints.append("pergunta_factual")
-    # Ironia/deboche detectado
-    if any(w in quoted_lower for w in ['kkk', 'haha', '😂', '🤣', 'eita']):
-        hints.append("tom_irreverente")
-    # Expressão de opinião
-    if any(w in quoted_lower for w in ['acho', 'penso', 'creio', 'imagino']):
-        hints.append("expressao_opiniao")
-    return " | ".join(hints) if hints else "contexto_geral"
-def calcular_prioridade(
-    is_reply: bool,
-    reply_to_bot: bool,
-    mensagem: str,
-    quoted_text: str = ""
-) -> Tuple[int, float]:
-    """
-    Calcula nível de prioridade e importância.
-    Args:
-        is_reply: Se é um reply
-        reply_to_bot: Se é reply para o bot
-        mensagem: Mensagem atual
-        quoted_text: Texto citado
-    Returns:
-        Tupla (priority_level, importancia)
-    """
-    if not is_reply:
-        return PRIORITY_NORMAL, 1.0
-    # Reply para o bot
-    if reply_to_bot:
-        # Pergunta curta = prioridade máxima
-        if is_pergunta_curta(mensagem):
-            return PRIORITY_REPLY_TO_BOT_SHORT_QUESTION, IMPORTANCIA_PERGUNTA_CURTA_REPLY
-        # Reply normal ao bot
-        return PRIORITY_REPLY_TO_BOT, IMPORTANCIA_REPLY_TO_BOT
-    # Reply para outro usuário
-    return PRIORITY_REPLY, IMPORTANCIA_REPLY
-# ============================================================
-# CLASSE PRINCIPAL
-# ============================================================
-class ReplyContextHandler:
-    """
-    Handler dedicado para processar e priorizar contexto de replies.
-    Funcionalidades:
-    - Extração de metadados de reply do payload
-    - Cálculo automático de prioridade
-    - Construção de seções de prompt otimizadas
-    - Integração com ShortTermMemory
-    - Ajuste adaptativo baseado em tamanho da pergunta
-    """
-    def __init__(self, short_term_memory: Optional[ShortTermMemory] = None):
-        """
-        Inicializa o handler.
-        Args:
-            short_term_memory: Instância de ShortTermMemory (opcional)
-        """
-        self.short_term_memory = short_term_memory
-        self.lstm_extension = None  # Será inicializado depois se DB disponível
-        logger.debug("✅ ReplyContextHandler inicializado")
-    def enable_lstm(self, lstm_ext: Any) -> None:
-        """Habilita LSTM extension."""
-        self.lstm_extension = lstm_ext
-        logger.debug("✅ LSTM enabled em ReplyContextHandler")
-    def process_reply(
-        self,
-        mensagem: str,
-        reply_metadata: Dict[str, Any],
-        historico_geral: Optional[List[Dict[str, Any]]] = None
-    ) -> ProcessedReplyContext:
-        """
-        Processa metadados de reply e gera contexto processado.
-        Args:
-            mensagem: Mensagem atual do usuário
-            reply_metadata: Metadados do reply do payload
-            historico_geral: Histórico geral (opcional)
-        Returns:
-            ProcessedReplyContext pronto para uso
-        """
-        # Extrai dados do metadata
-        is_reply = reply_metadata.get('is_reply', False)
-        reply_to_bot = reply_metadata.get('reply_to_bot', False)
-        quoted_author_name = reply_metadata.get('quoted_author_name', '')
-        quoted_author_numero = reply_metadata.get('quoted_author_numero', '')
-        quoted_text_original = reply_metadata.get('quoted_text_original', '')
-        mensagem_citada = reply_metadata.get('mensagem_citada', '') or quoted_text_original
-        # 🔧 CRITICAL FIX: Validate that quoted author is NOT the bot itself
-        # Extract pure number from lid_XXXXX format if present
-        def extract_pure_number(id_str: str) -> str:
-            """Extrai número puro de formatos como 'lid_123456' ou '123456'"""
-            if not id_str:
-                return ''
-            # Remove 'lid_' prefix if present
-            if isinstance(id_str, str) and id_str.startswith('lid_'):
-                return id_str[4:]
-            return str(id_str) if id_str else ''
-        # ⚠️ SELF-REPLY RECOGNITION
-        # Check if the quoted author is the bot itself
-        quoted_author_pure = extract_pure_number(quoted_author_numero)
-        bot_id_pure = extract_pure_number(config.BOT_NUMERO if hasattr(config, 'BOT_NUMERO') else '37839265886398')
-        is_quoted_from_bot = (quoted_author_pure and quoted_author_pure == bot_id_pure)
-        if is_quoted_from_bot and is_reply:
-            logger.info(f"🔄 [REPLY AO BOT] Usuário está respondendo a uma mensagem da Akira ({quoted_author_pure}).")
-            reply_to_bot = True
-            quoted_author_name = "Akira (você mesmo)"
-            quoted_author_numero = config.BOT_NUMERO
-        # 🔧 CORREÇÃO FORÇADA: Se o payload já determinou que é reply_to_bot,
-        # ignora qualquer nome/número que tenha vindo e força para o bot.
-        if is_reply and reply_to_bot:
-            quoted_author_name = "Akira (você mesmo)"
-            quoted_author_numero = config.BOT_NUMERO
-        # 🔧 CORREÇÃO: Se autor é desconhecido e não é reply_to_bot explícito, tenta detectar pelo contexto
-        elif not quoted_author_name or quoted_author_name.lower() in ['desconhecido', 'unknown', '']:
-            # Detecta pelo conteúdo da mensagem citada
-            quoted_lower = quoted_text_original.lower() if quoted_text_original else ""
-            # Se a mensagem citada contém padrões de resposta do bot
-            bot_patterns = ['akira:', 'eu sou', 'eu sou a akira', 'sou um bot', 'oi!', 'eae!']
-            if any(p in quoted_lower for p in bot_patterns):
-                quoted_author_name = "Akira (você mesmo)"
-                quoted_author_numero = config.BOT_NUMERO
-                reply_to_bot = True
-            elif mensagem_citada:
-                # Se há histórico, busca última mensagem
-                if historico_geral:
-                    # Assumir que é reply para a última mensagem do bot
-                    quoted_author_name = "mensagem_anterior"
-                    quoted_author_numero = "unknown"
-        # Se ainda não tem autor mas tem mensagem citada e é reply
-        if is_reply and (not quoted_author_name or quoted_author_name == 'desconhecido'):
-            # Se é reply_to_bot=True mas autor desconhecido, assume que é reply para o bot
-            if reply_to_bot:
-                quoted_author_name = "Akira (você mesmo)"
-                quoted_author_numero = "BOT"
-            else:
-                # Tenta extrair do conteúdo
-                quoted_author_name = "participante_desconhecido"
-        # Calcula prioridade e importância
-        priority_level, importancia = calcular_prioridade(
-            is_reply=is_reply,
-            reply_to_bot=reply_to_bot,
-            mensagem=mensagem,
-            quoted_text=quoted_text_original
-        )
-        # Extrai context hint
-        context_hint = extrair_context_hint(quoted_text_original, mensagem)
-        # Calcula multiplicador adaptativo
-        adaptive_multiplier = self._calculate_adaptive_multiplier(
-            mensagem=mensagem,
-            is_reply=is_reply,
-            priority_level=priority_level
-        )
-        # Determina se deve priorizar no prompt
-        should_prioritize = is_reply and priority_level >= PRIORITY_REPLY
-        # Constrói section do prompt
-        prompt_section = self._build_reply_prompt_section(
-            mensagem=mensagem,
-            mensagem_citada=mensagem_citada,
-            quoted_author_name=quoted_author_name,
-            reply_to_bot=reply_to_bot,
-            context_hint=context_hint,
-            priority_level=priority_level
-        )
-        # Cria contexto processado
-        reply_context = ProcessedReplyContext(
-            is_reply=is_reply,
-            reply_to_bot=reply_to_bot,
-            priority_level=priority_level,
-            quoted_author_name=quoted_author_name,
-            quoted_author_numero=quoted_author_numero,
-            quoted_text_original=quoted_text_original,
-            mensagem_citada=mensagem_citada,
-            context_hint=context_hint,
-            importancia=importancia * adaptive_multiplier,
-            prompt_section=prompt_section,
-            should_prioritize_reply=should_prioritize,
-            adaptive_multiplier=adaptive_multiplier
-        )
-        # Adiciona à memória de curto prazo se disponível
-        if self.short_term_memory and is_reply:
-            self.short_term_memory.add_message(
-                role="user",
-                content=mensagem,
-                importancia=reply_context.importancia,
-                reply_info={
-                    "is_reply": True,
-                    "reply_to_bot": reply_to_bot,
-                    "quoted_text_original": quoted_text_original,
-                    "priority_level": priority_level
-                }
-            )
-        return reply_context
-    def _calculate_adaptive_multiplier(
-        self,
-        mensagem: str,
-        is_reply: bool,
-        priority_level: int
-    ) -> float:
-        """
-        Calcula multiplicador adaptativo baseado no tamanho da pergunta.
-        Para perguntas curtas com reply, aumenta a importância do contexto do reply
-        para garantir que o LLM tenha contexto suficiente.
-        Args:
-            mensagem: Mensagem atual
-            is_reply: Se é reply
-            priority_level: Nível de prioridade
-        Returns:
-            Multiplicador entre 1.0 e 2.0
-        """
-        if not is_reply:
-            return 1.0
-        word_count = contar_palavras(mensagem)
-        # Pergunta muito curta (< 3 palavras) = contexto crítico
-        if word_count <= 2:
-            # Proteção contra alucinação
-            if is_mensagem_vazia_ou_reconhecimento(mensagem):
-                return 0.5  # Reduz a importância para o bot focar menos no contexto citado
-            return 1.5
-        # Pergunta curta (3-5 palavras) = contexto importante
-        if word_count <= PERGUNTA_CURTA_LIMITE:
-            return 1.3
-        # Pergunta normal = multiplicador padrão baseado em prioridade
-        if priority_level == PRIORITY_REPLY_TO_BOT_SHORT_QUESTION:
-            return 1.2
-        elif priority_level == PRIORITY_REPLY_TO_BOT:
-            return 1.1
-        return 1.0
-    def _build_reply_prompt_section(
-        self,
-        mensagem: str,
-        mensagem_citada: str,
-        quoted_author_name: str,
-        reply_to_bot: bool,
-        context_hint: str,
-        priority_level: int
-    ) -> str:
-        """
-        Constrói seção formatada do prompt para replies.
-        """
-        if not mensagem_citada:
-            return ""
-        sections = []
-        # Cabeçalho conciso
-        if priority_level >= PRIORITY_REPLY_TO_BOT_SHORT_QUESTION:
-            sections.append("[REPLY CRÍTICO]")
-        elif reply_to_bot:
-            sections.append("[REPLY AO BOT]")
-        # Conteúdo
-        if reply_to_bot:
-            quoted_preview = mensagem_citada[:150] + ("..." if len(mensagem_citada) > 150 else "")
-            sections.append(f"Você citou anteriormente: \"{quoted_preview}\"")
-        else:
-            sections.append(f"Respondendo a {quoted_author_name}: \"{mensagem_citada[:100]}...\"")
-        # Instrução curta
-        if reply_to_bot:
-            if is_mensagem_vazia_ou_reconhecimento(mensagem):
-                sections.append("💡 NOTA: Apenas reconhecimento. Não repita o contexto.")
-            else:
-                sections.append("💡 Responda ao comentário do usuário sobre sua fala anterior sem narrar o processo.")
-        return "\n".join(sections)
-    def prioritize_reply_context(
-        self,
-        prompt: str,
-        reply_context: ProcessedReplyContext,
-        historico_geral: Optional[List[Dict[str, Any]]] = None
-    ) -> str:
-        """
-        Injeta contexto de reply no prompt com alta prioridade.
-        Args:
-            prompt: Prompt original
-            reply_context: Contexto de reply processado
-            historico_geral: Histórico geral (opcional)
-        Returns:
-            Prompt enriquecido com contexto de reply
-        """
-        if not reply_context.is_reply or not reply_context.prompt_section:
-            return prompt
-        # Insere contexto de reply no início do prompt
-        reply_block = f"""
-{'='*60}
-{reply_context.prompt_section}
-{'='*60}
-"""
-        # Determina posição de inserção
-        # Se há seção [SYSTEM], insere após ela
-        if "[SYSTEM]" in prompt:
-            # Encontra final da seção SYSTEM
-            system_end = prompt.find("[/SYSTEM]")
-            if system_end != -1:
-                return prompt[:system_end + 10] + reply_block + prompt[system_end + 10:]
-        # Caso contrário, insere no início
-        return reply_block + "\n" + prompt
-    def get_reply_summary_for_llm(self, reply_context: ProcessedReplyContext) -> str:
-        """
-        Retorna resumo formatado do reply para contexto do LLM.
-        Args:
-            reply_context: Contexto de reply processado
-        Returns:
-            String resumida para uso no contexto
-        """
-        if not reply_context.is_reply:
-            return ""
-        parts = []
-        if reply_context.reply_to_bot:
-            parts.append("REPLY DIRETO AO BOT")
-        else:
-            parts.append(f"REPLY a {reply_context.quoted_author_name}")
-        if reply_context.mensagem_citada:
-            cited = reply_context.mensagem_citada[:100]
-            parts.append(f"Citando: \"{cited}\"")
-        if reply_context.priority_level >= PRIORITY_REPLY_TO_BOT_SHORT_QUESTION:
-            parts.append("PERGUNTA CURTA - Prioridade Alta")
-        return " | ".join(parts)
-    def merge_reply_into_history(
-        self,
-        reply_context: ProcessedReplyContext,
-        history: List[Dict[str, str]]
-    ) -> List[Dict[str, str]]:
-        """
-        Mescla contexto de reply no histórico para o LLM.
-        Args:
-            reply_context: Contexto de reply processado
-            history: Histórico formatado para LLM
-        Returns:
-            Histórico com reply injetado no início
-        """
-        if not reply_context.is_reply:
-            return history
-        # Cria entry para o reply
-        reply_entry = {
-            "role": "user",
-            "content": f"[REPLY] {reply_context.get_reply_summary_for_llm(reply_context)}"
-        }
-        # Adiciona texto citado se disponível
-        if reply_context.mensagem_citada:
-            reply_entry["content"] += f"\n\nMensagem citada:\n{reply_context.mensagem_citada}"
-        # Insere no início do histórico
-        return [reply_entry] + history
-    def calculate_token_budget(
-        self,
-        reply_context: ProcessedReplyContext,
-        total_budget: int = 8000
-    ) -> Tuple[int, int]:
-        """
-        Calcula alocação de tokens entre reply e contexto geral.
-        Args:
-            reply_context: Contexto de reply
-            total_budget: Total de tokens disponíveis
-        Returns:
-            Tupla (tokens_para_reply, tokens_para_contexto)
-        """
-        if not reply_context.is_reply:
-            return 0, total_budget
-        # Pergunta curta com reply = mais tokens para reply
-        if reply_context.priority_level >= PRIORITY_REPLY_TO_BOT_SHORT_QUESTION:
-            reply_tokens = min(1500, int(total_budget * 0.25))
-        elif reply_context.reply_to_bot:
-            reply_tokens = min(1000, int(total_budget * 0.15))
-        else:
-            reply_tokens = min(800, int(total_budget * 0.10))
-        return reply_tokens, total_budget - reply_tokens
-    # ============================================================
-    # HELPERS PARA API
-    # ============================================================
-    @staticmethod
-    def extract_reply_metadata_from_request(data: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Extrai metadados de reply de um request da API.
-        Args:
-            data: Payload do request
-        Returns:
-            Dict com metadados de reply
-        """
-        reply_metadata = data.get('reply_metadata', {})
-        # Se não há reply_metadata, tenta extrair de campos individuais
-        if not reply_metadata:
-            mensagem_citada = data.get('mensagem_citada', '')
-            if mensagem_citada:
-                reply_metadata = {
-                    'is_reply': True,
-                    'quoted_text_original': mensagem_citada,
-                    'mensagem_citada': mensagem_citada
-                }
-            else:
-                return {'is_reply': False}
-        # Garante campos obrigatórios
-        return {
-            'is_reply': reply_metadata.get('is_reply', False),
-            'reply_to_bot': reply_metadata.get('reply_to_bot', False),
-            'quoted_author_name': reply_metadata.get('quoted_author_name', ''),
-            'quoted_author_numero': reply_metadata.get('quoted_author_numero', ''),
-            'quoted_type': reply_metadata.get('quoted_type', 'texto'),
-            'quoted_text_original': reply_metadata.get('quoted_text_original', ''),
-            'context_hint': reply_metadata.get('context_hint', ''),
-            'mensagem_citada': reply_metadata.get('mensagem_citada', '')
-        }
-    def validate_reply_priority(self, reply_context: ProcessedReplyContext) -> bool:
-        """
-        Valida se a prioridade calculada está correta.
-        Args:
-            reply_context: Contexto a validar
-        Returns:
-            True se válido
-        """
-        if not reply_context.is_reply:
-            return reply_context.priority_level == PRIORITY_NORMAL
-        # Reply para bot + pergunta curta deve ter prioridade máxima
-        if reply_context.reply_to_bot and is_pergunta_curta(reply_context.mensagem_citada):
-            return reply_context.priority_level == PRIORITY_REPLY_TO_BOT_SHORT_QUESTION
-        # Reply para bot deve ter alta prioridade
-        if reply_context.reply_to_bot:
-            return reply_context.priority_level >= PRIORITY_REPLY_TO_BOT
-        # Reply normal deve ter prioridade >= 2
-        return reply_context.priority_level >= PRIORITY_REPLY
-    def __repr__(self) -> str:
-        """Representação textual."""
-        mem_status = "com STM" if self.short_term_memory else "sem STM"
-        return f"ReplyContextHandler({mem_status})"
-# ============================================================
-# FUNÇÕES DE FÁBRICA
-# ============================================================
-def criar_reply_handler(
-    short_term_memory: Optional[ShortTermMemory] = None
-) -> ReplyContextHandler:
-    """
-    Factory function para criar ReplyContextHandler.
-    Args:
-        short_term_memory: Instância de ShortTermMemory (opcional)
-    Returns:
-        ReplyContextHandler instance
-    """
-    return ReplyContextHandler(short_term_memory=short_term_memory)
-def processar_reply_request(
-    mensagem: str,
-    request_data: Dict[str, Any],
-    short_term_memory: Optional[ShortTermMemory] = None
-) -> ProcessedReplyContext:
-    """
-    Função helper para processar reply de request.
-    Args:
-        mensagem: Mensagem atual
-        request_data: Payload do request
-        short_term_memory: Instância de ShortTermMemory (opcional)
-    Returns:
-        ProcessedReplyContext
-    """
-    handler = criar_reply_handler(short_term_memory)
-    reply_metadata = handler.extract_reply_metadata_from_request(request_data)
-    return handler.process_reply(mensagem, reply_metadata)
-# ============================================================
-# COMPATIBILIDADE — aliases para imports legados
-# ============================================================
-_reply_handler_singleton = None
-def get_context_handler(short_term_memory=None) -> ReplyContextHandler:
-    """Alias legado de get_context_handler → retorna singleton de ReplyContextHandler."""
-    global _reply_handler_singleton
-    if _reply_handler_singleton is None:
-        _reply_handler_singleton = ReplyContextHandler(short_term_memory=short_term_memory)
-    return _reply_handler_singleton
-# type: ignore

+# type: ignore
+"""
+================================================================================
+AKIRA V21 ULTIMATE - REPLY CONTEXT HANDLER MODULE
+================================================================================
+Sistema dedicado para processar e priorizar contexto de replies.
+Garante que replies tenham prioridade ligeiramente maior que o contexto geral,
+especialmente em perguntas curtas.
+Features:
+- Extração e processamento de metadados de reply
+- 3 níveis de prioridade (1=normal, 2=reply, 3=reply-to-bot+pergunta-curta)
+- Construção de prompt sections otimizadas para replies
+- Integração com ShortTermMemory
+- Context hint extraction para melhor compreensão
+================================================================================
+"""
+import os
+import sys
+import time
+import json
+import re
+import logging
+from typing import Optional, Dict, Any, List, Tuple
+from dataclasses import dataclass, field
+# Imports robustos com fallback - CORRIGIDO para usar modules.
+try:
+    from . import config
+    from .short_term_memory import ShortTermMemory, MessageWithContext, IMPORTANCIA_REPLY, IMPORTANCIA_REPLY_TO_BOT, IMPORTANCIA_PERGUNTA_CURTA_REPLY
+    REPLY_HANDLER_AVAILABLE = True
+except ImportError:
+    try:
+        import modules.config as config
+        from modules.short_term_memory import ShortTermMemory, MessageWithContext, IMPORTANCIA_REPLY, IMPORTANCIA_REPLY_TO_BOT, IMPORTANCIA_PERGUNTA_CURTA_REPLY
+        REPLY_HANDLER_AVAILABLE = True
+    except ImportError:
+        try:
+            from short_term_memory import ShortTermMemory, MessageWithContext, IMPORTANCIA_REPLY, IMPORTANCIA_REPLY_TO_BOT, IMPORTANCIA_PERGUNTA_CURTA_REPLY
+            REPLY_HANDLER_AVAILABLE = True
+        except ImportError:
+            REPLY_HANDLER_AVAILABLE = False
+            config = None
+logger = logging.getLogger(__name__)
+# ============================================================
+# NÍVEIS DE PRIORIDADE
+# ============================================================
+PRIORITY_NORMAL = 1
+PRIORITY_REPLY = 2
+PRIORITY_REPLY_TO_BOT = 3
+PRIORITY_REPLY_TO_BOT_SHORT_QUESTION = 4  # Prioridade máxima!
+# Limite de palavras para "pergunta curta"
+PERGUNTA_CURTA_LIMITE: int = 5
+@dataclass
+class ProcessedReplyContext:
+    """
+    Contexto de reply processado e pronto para uso.
+    Attributes:
+        is_reply: Se é um reply
+        reply_to_bot: Se é reply direcionado ao bot
+        priority_level: Nível de prioridade (1-4)
+        quoted_author_name: Nome do autor da mensagem citada
+        quoted_author_numero: Número do autor
+        quoted_text_original: Texto original citado
+        mensagem_citada: Texto da mensagem citada
+        context_hint: Hint de contexto extraído
+        importancia: Peso de importância calculado
+        prompt_section: Section formatada para o prompt
+        should_prioritize_reply: Se deve priorizar no prompt
+        adaptive_multiplier: Multiplicador adaptativo baseado no tamanho
+    """
+    is_reply: bool = False
+    reply_to_bot: bool = False
+    priority_level: int = PRIORITY_NORMAL
+    quoted_author_name: str = ""
+    quoted_author_numero: str = ""
+    quoted_text_original: str = ""
+    mensagem_citada: str = ""
+    context_hint: str = ""
+    importancia: float = 1.0
+    prompt_section: str = ""
+    should_prioritize_reply: bool = False
+    adaptive_multiplier: float = 1.0
+    def to_dict(self) -> Dict[str, Any]:
+        """Converte para dicionário."""
+        return {
+            "is_reply": self.is_reply,
+            "reply_to_bot": self.reply_to_bot,
+            "priority_level": self.priority_level,
+            "quoted_author_name": self.quoted_author_name,
+            "quoted_author_numero": self.quoted_author_numero,
+            "quoted_text_original": self.quoted_text_original,
+            "mensagem_citada": self.mensagem_citada,
+            "context_hint": self.context_hint,
+            "importancia": self.importancia,
+            "prompt_section": self.prompt_section,
+            "should_prioritize_reply": self.should_prioritize_reply,
+            "adaptive_multiplier": self.adaptive_multiplier
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'ProcessedReplyContext':
+        """Cria instância a partir de dicionário."""
+        return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
+# ============================================================
+# FUNÇÕES AUXILIARES
+# ============================================================
+def contar_palavras(texto: str) -> int:
+    """Conta palavras em um texto."""
+    if not texto:
+        return 0
+    return len(texto.split())
+def is_pergunta_curta(texto: str) -> bool:
+    """
+    Verifica se o texto é uma pergunta curta.
+    Args:
+        texto: Texto a verificar
+    Returns:
+        True se for pergunta com pocas palavras
+    """
+    if not texto:
+        return False
+    texto_lower = texto.strip().lower()
+    word_count = contar_palavras(texto)
+    # Deve ter marcador de pergunta ou palavras interrogativas
+    has_question_marker = '?' in texto
+    has_interrogative = any(w in texto_lower for w in [
+        'qual', 'quais', 'quem', 'como', 'onde', 'quando', 'por que',
+        'porque', 'para que', 'o que', 'que', 'é o que', 'vc', 'você',
+        'tu', 'meu', 'minha', 'oq', 'oq', 'n'
+    ])
+    return word_count <= PERGUNTA_CURTA_LIMITE and (has_question_marker or has_interrogative)
+def is_mensagem_vazia_ou_reconhecimento(texto: str) -> bool:
+    """
+    Verifica se a mensagem é apenas um sinal de pontuação ou texto muito curto/vazio.
+    Ajuda a evitar a alucinação de self-reply (onde o bot conversa consigo mesmo).
+    """
+    if not texto:
+        return True
+    clean_text = texto.strip()
+    # Se for apenas 1-2 caracteres não-alfanuméricos (ex: ".", "..", "!")
+    import re
+    if len(clean_text) <= 2 and not re.search(r'[a-zA-Z0-9]', clean_text):
+        return True
+    # Palavras muito curtas e fechadas que soam como reconhecimento e não têm substância
+    if clean_text.lower() in [".", "vc", "ah", "ok", "hm", "ta"]:
+        return True
+    return False
+def extrair_context_hint(quoted_text: str, mensagem_atual: str) -> str:
+    """
+    Extrai hint de contexto baseado no texto citado e mensagem atual.
+    Args:
+        quoted_text: Texto original citado
+        mensagem_atual: Mensagem atual do usuário
+    Returns:
+        String de hint de contexto
+    """
+    hints = []
+    # Detecta tipo de reply
+    quoted_lower = quoted_text.lower() if quoted_text else ""
+    # Pergunta sobre o bot
+    if any(w in quoted_lower for w in ['akira', 'bot', 'você', 'vc', 'tu']):
+        hints.append("pergunta_sobre_akira")
+    # Pergunta factual
+    if any(w in quoted_lower for w in ['oq', 'o que', 'qual', 'quanto', 'onde', 'quando']):
+        hints.append("pergunta_factual")
+    # Ironia/deboche detectado
+    if any(w in quoted_lower for w in ['kkk', 'haha', '😂', '🤣', 'eita']):
+        hints.append("tom_irreverente")
+    # Expressão de opinião
+    if any(w in quoted_lower for w in ['acho', 'penso', 'creio', 'imagino']):
+        hints.append("expressao_opiniao")
+    return " | ".join(hints) if hints else "contexto_geral"
+def calcular_prioridade(
+    is_reply: bool,
+    reply_to_bot: bool,
+    mensagem: str,
+    quoted_text: str = ""
+) -> Tuple[int, float]:
+    """
+    Calcula nível de prioridade e importância.
+    Args:
+        is_reply: Se é um reply
+        reply_to_bot: Se é reply para o bot
+        mensagem: Mensagem atual
+        quoted_text: Texto citado
+    Returns:
+        Tupla (priority_level, importancia)
+    """
+    if not is_reply:
+        return PRIORITY_NORMAL, 1.0
+    # Reply para o bot
+    if reply_to_bot:
+        # Pergunta curta = prioridade máxima
+        if is_pergunta_curta(mensagem):
+            return PRIORITY_REPLY_TO_BOT_SHORT_QUESTION, IMPORTANCIA_PERGUNTA_CURTA_REPLY
+        # Reply normal ao bot
+        return PRIORITY_REPLY_TO_BOT, IMPORTANCIA_REPLY_TO_BOT
+    # Reply para outro usuário
+    return PRIORITY_REPLY, IMPORTANCIA_REPLY
+# ============================================================
+# CLASSE PRINCIPAL
+# ============================================================
+class ReplyContextHandler:
+    """
+    Handler dedicado para processar e priorizar contexto de replies.
+    Funcionalidades:
+    - Extração de metadados de reply do payload
+    - Cálculo automático de prioridade
+    - Construção de seções de prompt otimizadas
+    - Integração com ShortTermMemory
+    - Ajuste adaptativo baseado em tamanho da pergunta
+    """
+    def __init__(self, short_term_memory: Optional[ShortTermMemory] = None):
+        """
+        Inicializa o handler.
+        Args:
+            short_term_memory: Instância de ShortTermMemory (opcional)
+        """
+        self.short_term_memory = short_term_memory
+        self.lstm_extension = None  # Será inicializado depois se DB disponível
+        logger.debug("✅ ReplyContextHandler inicializado")
+    def enable_lstm(self, lstm_ext: Any) -> None:
+        """Habilita LSTM extension."""
+        self.lstm_extension = lstm_ext
+        logger.debug("✅ LSTM enabled em ReplyContextHandler")
+    def process_reply(
+        self,
+        mensagem: str,
+        reply_metadata: Dict[str, Any],
+        historico_geral: Optional[List[Dict[str, Any]]] = None
+    ) -> ProcessedReplyContext:
+        """
+        Processa metadados de reply e gera contexto processado.
+        Args:
+            mensagem: Mensagem atual do usuário
+            reply_metadata: Metadados do reply do payload
+            historico_geral: Histórico geral (opcional)
+        Returns:
+            ProcessedReplyContext pronto para uso
+        """
+        # Extrai dados do metadata
+        is_reply = reply_metadata.get('is_reply', False)
+        reply_to_bot = reply_metadata.get('reply_to_bot', False)
+        quoted_author_name = reply_metadata.get('quoted_author_name', '')
+        quoted_author_numero = reply_metadata.get('quoted_author_numero', '')
+        quoted_text_original = reply_metadata.get('quoted_text_original', '')
+        mensagem_citada = reply_metadata.get('mensagem_citada', '') or quoted_text_original
+        # 🔧 CRITICAL FIX: Validate that quoted author is NOT the bot itself
+        # Extract pure number from lid_XXXXX format if present
+        def extract_pure_number(id_str: str) -> str:
+            """Extrai número puro de formatos como 'lid_123456' ou '123456'"""
+            if not id_str:
+                return ''
+            # Remove 'lid_' prefix if present
+            if isinstance(id_str, str) and id_str.startswith('lid_'):
+                return id_str[4:]
+            return str(id_str) if id_str else ''
+        # ⚠️ SELF-REPLY RECOGNITION
+        # Check if the quoted author is the bot itself
+        quoted_author_pure = extract_pure_number(quoted_author_numero)
+        bot_id_pure = extract_pure_number(config.BOT_NUMERO if hasattr(config, 'BOT_NUMERO') else '37839265886398')
+        is_quoted_from_bot = (quoted_author_pure and quoted_author_pure == bot_id_pure)
+        if is_quoted_from_bot and is_reply:
+            logger.info(f"🔄 [REPLY AO BOT] Usuário está respondendo a uma mensagem da Akira ({quoted_author_pure}).")
+            reply_to_bot = True
+            quoted_author_name = "Akira (você mesmo)"
+            quoted_author_numero = config.BOT_NUMERO
+        # 🔧 CORREÇÃO FORÇADA: Se o payload já determinou que é reply_to_bot,
+        # ignora qualquer nome/número que tenha vindo e força para o bot.
+        if is_reply and reply_to_bot:
+            quoted_author_name = "Akira (você mesmo)"
+            quoted_author_numero = config.BOT_NUMERO
+        # 🔧 CORREÇÃO: Se autor é desconhecido e não é reply_to_bot explícito, tenta detectar pelo contexto
+        elif not quoted_author_name or quoted_author_name.lower() in ['desconhecido', 'unknown', '']:
+            # Detecta pelo conteúdo da mensagem citada
+            quoted_lower = quoted_text_original.lower() if quoted_text_original else ""
+            # Se a mensagem citada contém padrões de resposta do bot
+            bot_patterns = ['akira:', 'eu sou', 'eu sou a akira', 'sou um bot', 'oi!', 'eae!']
+            if any(p in quoted_lower for p in bot_patterns):
+                quoted_author_name = "Akira (você mesmo)"
+                quoted_author_numero = config.BOT_NUMERO
+                reply_to_bot = True
+            elif mensagem_citada:
+                # Se há histórico, busca última mensagem
+                if historico_geral:
+                    # Assumir que é reply para a última mensagem do bot
+                    quoted_author_name = "mensagem_anterior"
+                    quoted_author_numero = "unknown"
+        # Se ainda não tem autor mas tem mensagem citada e é reply
+        if is_reply and (not quoted_author_name or quoted_author_name == 'desconhecido'):
+            # Se é reply_to_bot=True mas autor desconhecido, assume que é reply para o bot
+            if reply_to_bot:
+                quoted_author_name = "Akira (você mesmo)"
+                quoted_author_numero = "BOT"
+            else:
+                # Tenta extrair do conteúdo
+                quoted_author_name = "participante_desconhecido"
+        # Calcula prioridade e importância
+        priority_level, importancia = calcular_prioridade(
+            is_reply=is_reply,
+            reply_to_bot=reply_to_bot,
+            mensagem=mensagem,
+            quoted_text=quoted_text_original
+        )
+        # Extrai context hint
+        context_hint = extrair_context_hint(quoted_text_original, mensagem)
+        # Calcula multiplicador adaptativo
+        adaptive_multiplier = self._calculate_adaptive_multiplier(
+            mensagem=mensagem,
+            is_reply=is_reply,
+            priority_level=priority_level
+        )
+        # Determina se deve priorizar no prompt
+        should_prioritize = is_reply and priority_level >= PRIORITY_REPLY
+        # Constrói section do prompt
+        prompt_section = self._build_reply_prompt_section(
+            mensagem=mensagem,
+            mensagem_citada=mensagem_citada,
+            quoted_author_name=quoted_author_name,
+            reply_to_bot=reply_to_bot,
+            context_hint=context_hint,
+            priority_level=priority_level
+        )
+        # Cria contexto processado
+        reply_context = ProcessedReplyContext(
+            is_reply=is_reply,
+            reply_to_bot=reply_to_bot,
+            priority_level=priority_level,
+            quoted_author_name=quoted_author_name,
+            quoted_author_numero=quoted_author_numero,
+            quoted_text_original=quoted_text_original,
+            mensagem_citada=mensagem_citada,
+            context_hint=context_hint,
+            importancia=importancia * adaptive_multiplier,
+            prompt_section=prompt_section,
+            should_prioritize_reply=should_prioritize,
+            adaptive_multiplier=adaptive_multiplier
+        )
+        # Adiciona à memória de curto prazo se disponível
+        if self.short_term_memory and is_reply:
+            self.short_term_memory.add_message(
+                role="user",
+                content=mensagem,
+                importancia=reply_context.importancia,
+                reply_info={
+                    "is_reply": True,
+                    "reply_to_bot": reply_to_bot,
+                    "quoted_text_original": quoted_text_original,
+                    "priority_level": priority_level
+                }
+            )
+        return reply_context
+    def _calculate_adaptive_multiplier(
+        self,
+        mensagem: str,
+        is_reply: bool,
+        priority_level: int
+    ) -> float:
+        """
+        Calcula multiplicador adaptativo baseado no tamanho da pergunta.
+        Para perguntas curtas com reply, aumenta a importância do contexto do reply
+        para garantir que o LLM tenha contexto suficiente.
+        Args:
+            mensagem: Mensagem atual
+            is_reply: Se é reply
+            priority_level: Nível de prioridade
+        Returns:
+            Multiplicador entre 1.0 e 2.0
+        """
+        if not is_reply:
+            return 1.0
+        word_count = contar_palavras(mensagem)
+        # Pergunta muito curta (< 3 palavras) = contexto crítico
+        if word_count <= 2:
+            # Proteção contra alucinação
+            if is_mensagem_vazia_ou_reconhecimento(mensagem):
+                return 0.5  # Reduz a importância para o bot focar menos no contexto citado
+            return 1.5
+        # Pergunta curta (3-5 palavras) = contexto importante
+        if word_count <= PERGUNTA_CURTA_LIMITE:
+            return 1.3
+        # Pergunta normal = multiplicador padrão baseado em prioridade
+        if priority_level == PRIORITY_REPLY_TO_BOT_SHORT_QUESTION:
+            return 1.2
+        elif priority_level == PRIORITY_REPLY_TO_BOT:
+            return 1.1
+        return 1.0
+    def _build_reply_prompt_section(
+        self,
+        mensagem: str,
+        mensagem_citada: str,
+        quoted_author_name: str,
+        reply_to_bot: bool,
+        context_hint: str,
+        priority_level: int
+    ) -> str:
+        """
+        Constrói seção formatada do prompt para replies.
+        Args:
+            mensagem: Mensagem atual
+            mensagem_citada: Texto citado
+            quoted_author_name: Nome do autor
+            reply_to_bot: Se é reply para o bot
+            context_hint: Hint de contexto
+            priority_level: Nível de prioridade
+        Returns:
+            String formatada para inserção no prompt
+        """
+        if not mensagem_citada:
+            return ""
+        sections = []
+        # Cabeçalho com nível de prioridade
+        if priority_level >= PRIORITY_REPLY_TO_BOT_SHORT_QUESTION:
+            sections.append("[🔴 REPLY CRÍTICO - PERGUNTA CURTA]")
+        elif priority_level == PRIORITY_REPLY_TO_BOT:
+            sections.append("[🟡 REPLY AO BOT]")
+        elif priority_level == PRIORITY_REPLY:
+            sections.append("[🟢 REPLY]")
+        # Contexto do autor e conteúdo
+        if reply_to_bot:
+            sections.append(f"⚠️ O USUÁRIO ESTÁ RESPONDENDO DIRETAMENTE A VOCÊ!")
+            quoted_preview = mensagem_citada[:200] + ("..." if len(mensagem_citada) > 200 else "")
+            sections.append(f"O usuário citou sua mensagem anterior: \"{quoted_preview}\"")
+        else:
+            sections.append(f"Respondendo a: {quoted_author_name}")
+            quoted_preview = mensagem_citada[:150] + ("..." if len(mensagem_citada) > 150 else "")
+            sections.append(f"Msg citada: \"{quoted_preview}\"")
+        # Hint de contexto
+        if context_hint and context_hint != "contexto_geral":
+            sections.append(f"Contexto: {context_hint}")
+        # Instrução de resposta (MUITO AGRESSIVA CONTRA NARRATIVA E SELF-REPLY)
+        vazia_reconhecimento = is_mensagem_vazia_ou_reconhecimento(mensagem)
+        if reply_to_bot and vazia_reconhecimento:
+            sections.append(f"🛑 [ALERTA DE SELF-REPLY]: O usuário respondeu APENAS com '{mensagem}' à sua mensagem. Isso é apenas um reconhecimento ou sinal. NÃO responda a si mesma, NÃO repita a sua mensagem. Reaja com sarcasmo, naturalidade ou pergunte o que o usuário quer dizer.")
+        elif priority_level >= PRIORITY_REPLY_TO_BOT_SHORT_QUESTION:
+            sections.append("💡 RESPONSE: Responda SILENCIOSAMENTE ao contexto. Jamais use prefixos de IA ou narre que está respondendo. Vá direto ao ponto.")
+        elif reply_to_bot:
+            sections.append("💡 RESPONSE: Você foi citada. NUNCA comece com 'Ah', 'Então', 'Vejo' ou narre o reply. Mate o prefixo e responda direto.")
+        return "\n".join(sections)
+    def prioritize_reply_context(
+        self,
+        prompt: str,
+        reply_context: ProcessedReplyContext,
+        historico_geral: Optional[List[Dict[str, Any]]] = None
+    ) -> str:
+        """
+        Injeta contexto de reply no prompt com alta prioridade.
+        Args:
+            prompt: Prompt original
+            reply_context: Contexto de reply processado
+            historico_geral: Histórico geral (opcional)
+        Returns:
+            Prompt enriquecido com contexto de reply
+        """
+        if not reply_context.is_reply or not reply_context.prompt_section:
+            return prompt
+        # Insere contexto de reply no início do prompt
+        reply_block = f"""
+{'='*60}
+{reply_context.prompt_section}
+{'='*60}
+"""
+        # Determina posição de inserção
+        # Se há seção [SYSTEM], insere após ela
+        if "[SYSTEM]" in prompt:
+            # Encontra final da seção SYSTEM
+            system_end = prompt.find("[/SYSTEM]")
+            if system_end != -1:
+                return prompt[:system_end + 10] + reply_block + prompt[system_end + 10:]
+        # Caso contrário, insere no início
+        return reply_block + "\n" + prompt
+    def get_reply_summary_for_llm(self, reply_context: ProcessedReplyContext) -> str:
+        """
+        Retorna resumo formatado do reply para contexto do LLM.
+        Args:
+            reply_context: Contexto de reply processado
+        Returns:
+            String resumida para uso no contexto
+        """
+        if not reply_context.is_reply:
+            return ""
+        parts = []
+        if reply_context.reply_to_bot:
+            parts.append("REPLY DIRETO AO BOT")
+        else:
+            parts.append(f"REPLY a {reply_context.quoted_author_name}")
+        if reply_context.mensagem_citada:
+            cited = reply_context.mensagem_citada[:100]
+            parts.append(f"Citando: \"{cited}\"")
+        if reply_context.priority_level >= PRIORITY_REPLY_TO_BOT_SHORT_QUESTION:
+            parts.append("PERGUNTA CURTA - Prioridade Alta")
+        return " | ".join(parts)
+    def merge_reply_into_history(
+        self,
+        reply_context: ProcessedReplyContext,
+        history: List[Dict[str, str]]
+    ) -> List[Dict[str, str]]:
+        """
+        Mescla contexto de reply no histórico para o LLM.
+        Args:
+            reply_context: Contexto de reply processado
+            history: Histórico formatado para LLM
+        Returns:
+            Histórico com reply injetado no início
+        """
+        if not reply_context.is_reply:
+            return history
+        # Cria entry para o reply
+        reply_entry = {
+            "role": "user",
+            "content": f"[REPLY] {reply_context.get_reply_summary_for_llm(reply_context)}"
+        }
+        # Adiciona texto citado se disponível
+        if reply_context.mensagem_citada:
+            reply_entry["content"] += f"\n\nMensagem citada:\n{reply_context.mensagem_citada}"
+        # Insere no início do histórico
+        return [reply_entry] + history
+    def calculate_token_budget(
+        self,
+        reply_context: ProcessedReplyContext,
+        total_budget: int = 8000
+    ) -> Tuple[int, int]:
+        """
+        Calcula alocação de tokens entre reply e contexto geral.
+        Args:
+            reply_context: Contexto de reply
+            total_budget: Total de tokens disponíveis
+        Returns:
+            Tupla (tokens_para_reply, tokens_para_contexto)
+        """
+        if not reply_context.is_reply:
+            return 0, total_budget
+        # Pergunta curta com reply = mais tokens para reply
+        if reply_context.priority_level >= PRIORITY_REPLY_TO_BOT_SHORT_QUESTION:
+            reply_tokens = min(1500, int(total_budget * 0.25))
+        elif reply_context.reply_to_bot:
+            reply_tokens = min(1000, int(total_budget * 0.15))
+        else:
+            reply_tokens = min(800, int(total_budget * 0.10))
+        return reply_tokens, total_budget - reply_tokens
+    # ============================================================
+    # HELPERS PARA API
+    # ============================================================
+    @staticmethod
+    def extract_reply_metadata_from_request(data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Extrai metadados de reply de um request da API.
+        Args:
+            data: Payload do request
+        Returns:
+            Dict com metadados de reply
+        """
+        reply_metadata = data.get('reply_metadata', {})
+        # Se não há reply_metadata, tenta extrair de campos individuais
+        if not reply_metadata:
+            mensagem_citada = data.get('mensagem_citada', '')
+            if mensagem_citada:
+                reply_metadata = {
+                    'is_reply': True,
+                    'quoted_text_original': mensagem_citada,
+                    'mensagem_citada': mensagem_citada
+                }
+            else:
+                return {'is_reply': False}
+        # Garante campos obrigatórios
+        return {
+            'is_reply': reply_metadata.get('is_reply', False),
+            'reply_to_bot': reply_metadata.get('reply_to_bot', False),
+            'quoted_author_name': reply_metadata.get('quoted_author_name', ''),
+            'quoted_author_numero': reply_metadata.get('quoted_author_numero', ''),
+            'quoted_type': reply_metadata.get('quoted_type', 'texto'),
+            'quoted_text_original': reply_metadata.get('quoted_text_original', ''),
+            'context_hint': reply_metadata.get('context_hint', ''),
+            'mensagem_citada': reply_metadata.get('mensagem_citada', '')
+        }
+    def validate_reply_priority(self, reply_context: ProcessedReplyContext) -> bool:
+        """
+        Valida se a prioridade calculada está correta.
+        Args:
+            reply_context: Contexto a validar
+        Returns:
+            True se válido
+        """
+        if not reply_context.is_reply:
+            return reply_context.priority_level == PRIORITY_NORMAL
+        # Reply para bot + pergunta curta deve ter prioridade máxima
+        if reply_context.reply_to_bot and is_pergunta_curta(reply_context.mensagem_citada):
+            return reply_context.priority_level == PRIORITY_REPLY_TO_BOT_SHORT_QUESTION
+        # Reply para bot deve ter alta prioridade
+        if reply_context.reply_to_bot:
+            return reply_context.priority_level >= PRIORITY_REPLY_TO_BOT
+        # Reply normal deve ter prioridade >= 2
+        return reply_context.priority_level >= PRIORITY_REPLY
+    def __repr__(self) -> str:
+        """Representação textual."""
+        mem_status = "com STM" if self.short_term_memory else "sem STM"
+        return f"ReplyContextHandler({mem_status})"
+# ============================================================
+# FUNÇÕES DE FÁBRICA
+# ============================================================
+def criar_reply_handler(
+    short_term_memory: Optional[ShortTermMemory] = None
+) -> ReplyContextHandler:
+    """
+    Factory function para criar ReplyContextHandler.
+    Args:
+        short_term_memory: Instância de ShortTermMemory (opcional)
+    Returns:
+        ReplyContextHandler instance
+    """
+    return ReplyContextHandler(short_term_memory=short_term_memory)
+def processar_reply_request(
+    mensagem: str,
+    request_data: Dict[str, Any],
+    short_term_memory: Optional[ShortTermMemory] = None
+) -> ProcessedReplyContext:
+    """
+    Função helper para processar reply de request.
+    Args:
+        mensagem: Mensagem atual
+        request_data: Payload do request
+        short_term_memory: Instância de ShortTermMemory (opcional)
+    Returns:
+        ProcessedReplyContext
+    """
+    handler = criar_reply_handler(short_term_memory)
+    reply_metadata = handler.extract_reply_metadata_from_request(request_data)
+    return handler.process_reply(mensagem, reply_metadata)
+# ============================================================
+# COMPATIBILIDADE — aliases para imports legados
+# ============================================================
+_reply_handler_singleton = None
+def get_context_handler(short_term_memory=None) -> ReplyContextHandler:
+    """Alias legado de get_context_handler → retorna singleton de ReplyContextHandler."""
+    global _reply_handler_singleton
+    if _reply_handler_singleton is None:
+        _reply_handler_singleton = ReplyContextHandler(short_term_memory=short_term_memory)
+    return _reply_handler_singleton
+# type: ignore

modules/self_awareness.py ADDED Viewed

	@@ -0,0 +1,86 @@

+"""
+Self-Awareness Module - Permite IA reconhecer erros e responder a crítica.
+Criado como parte da Fase 3: Self-Aware Correction
+Data: 2026-05-15
+"""
+import re
+from typing import Dict, Tuple
+from loguru import logger
+from datetime import datetime
+class SelfAwarenessEngine:
+    """Detecta crítica, erro anterior, e permite self-correction."""
+    def __init__(self):
+        self.logger = logger
+        self.error_memory = {}
+        self.criticism_patterns = [
+            r"(?:isso|isso que|que)\s+(?:você\s+)?(?:disse|falou|escreveu)\s+(?:é\s+)?(?:errado|falso|mentira)",
+            r"(?:você\s+)?(?:errou|enganou|enganaste)",
+            r"(?:tá|está)\s+(?:errado|mal|falso)",
+            r"(?:não|n[ã\/]o)\s+(?:é|foi)\s+(?:assim|verdade|correto)",
+        ]
+        self.error_acknowledgment = [
+            "Você tem razão, cometi erro.",
+            "Admito que estava errado.",
+            "Obrigado pela correção, você está certo.",
+            "Eu me equivoquei naquilo.",
+        ]
+    def detect_criticism(self, mensagem: str) -> Tuple[bool, str]:
+        """
+        Detecta se mensagem é crítica a resposta anterior.
+        Returns:
+            (tem_crítica, tipo_crítica)
+        """
+        mensagem_lower = mensagem.lower()
+        for pattern in self.criticism_patterns:
+            if re.search(pattern, mensagem_lower):
+                return True, "direct_criticism"
+        if any(phrase in mensagem_lower for phrase in ["na verdade", "corrigindo", "melhor seria"]):
+            return True, "implicit_correction"
+        return False, None
+    def generate_self_correction_response(
+        self,
+        original_response: str,
+        correction: str,
+        user_id: str
+    ) -> str:
+        """
+        Gera resposta que reconhece erro e corrige.
+        """
+        import random
+        ack = random.choice(self.error_acknowledgment)
+        response = (
+            f"{ack}\n\n"
+            f"Então ficaria: {correction}\n\n"
+            f"Obrigado por me manter preciso. É assim que melhoro."
+        )
+        if user_id not in self.error_memory:
+            self.error_memory[user_id] = []
+        self.error_memory[user_id].append({
+            "original": original_response,
+            "correction": correction,
+            "timestamp": datetime.now().isoformat()
+        })
+        self.logger.info(f"📝 [SELF-AWARE] Erro registrado para {user_id}")
+        return response
+# Instância global
+self_awareness_engine = SelfAwarenessEngine()

modules/sender_attribution_fix.py ADDED Viewed

	@@ -0,0 +1,55 @@

+"""
+Monkey-patch for sender attribution bug fix in modules/api.py
+This module patches the akira_endpoint to properly validate and reconstruct sender names
+"""
+import sys
+from functools import wraps
+def patch_akira_api():
+    """Apply the sender attribution fix by monkey-patching the modules.api module"""
+    try:
+        from modules import api
+        # Store original endpoint method
+        original_get_blueprint = api.get_blueprint
+        def patched_get_blueprint():
+            """Wrapper that patches the blueprint routes"""
+            bp = original_get_blueprint()
+            # Get the akira_endpoint from the blueprint
+            for rule in bp.defsurl_map.iter_rules():
+                if rule.endpoint == 'akira_endpoint':
+                    original_endpoint = bp.view_functions.get('akira_endpoint')
+                    if original_endpoint:
+                        # Wrap the endpoint
+                        @wraps(original_endpoint)
+                        def patched_akira_endpoint(*args, **kwargs):
+                            # Call original
+                            result = original_endpoint(*args, **kwargs)
+                            return result
+                        bp.view_functions['akira_endpoint'] = patched_akira_endpoint
+                    break
+            return bp
+        # Replace the function
+        api.get_blueprint = patched_get_blueprint
+        print("✅ Sender attribution fix monkey-patch applied to modules.api")
+        return True
+    except Exception as e:
+        print(f"⚠️ Failed to apply monkey-patch: {e}")
+        return False
+# Auto-apply when imported
+try:
+    patch_akira_api()
+except Exception as e:
+    print(f"Error during auto-patch: {e}")

modules/short_term_memory.py CHANGED Viewed

@@ -71,6 +71,7 @@ class MessageWithContext:
         emocao: Emoção detectada
         reply_info: Info sobre reply (se aplicável)
         conversation_id: ID da conversa isolada
         token_count: Contagem aproximada de tokens
     """
     role: str
@@ -80,6 +81,7 @@ class MessageWithContext:
     emocao: str = "neutro"
     reply_info: Dict[str, Any] = field(default_factory=dict)
     conversation_id: str = ""
     token_count: int = 0
     def to_dict(self) -> Dict[str, Any]:
@@ -92,6 +94,7 @@ class MessageWithContext:
             "emocao": self.emocao,
             "reply_info": self.reply_info,
             "conversation_id": self.conversation_id,
             "token_count": self.token_count
         }
@@ -106,6 +109,7 @@ class MessageWithContext:
             emocao=data.get("emocao", "neutral"),
             reply_info=data.get("reply_info", {}),
             conversation_id=data.get("conversation_id", ""),
             token_count=data.get("token_count", 0)
         )
@@ -274,6 +278,7 @@ class ShortTermMemory:
         importancia: float = IMPORTANCIA_NORMAL,
         emocao: str = "neutro",
         reply_info: Optional[Dict[str, Any]] = None,
         metadata: Optional[Dict[str, Any]] = None
     ) -> MessageWithContext:
         """
@@ -298,6 +303,7 @@ class ShortTermMemory:
             emocao=emocao,
             reply_info=reply_info or {},
             conversation_id=self.conversation_id,
             token_count=estimar_tokens(content)
         )
@@ -323,6 +329,7 @@ class ShortTermMemory:
     def add_user_message(
         self,
         content: str,
         emocao: str = "neutral",
         reply_info: Optional[Dict[str, Any]] = None,
         importancia: float = None
@@ -350,6 +357,7 @@ class ShortTermMemory:
         return self.add_message(
             role="user",
             content=content,
             importancia=importancia,
             emocao=emocao,
             reply_info=reply_info
@@ -358,6 +366,7 @@ class ShortTermMemory:
     def add_assistant_message(
         self,
         content: str,
         emocao: str = "neutral",
         importancia: float = IMPORTANCIA_NORMAL
     ) -> MessageWithContext:
@@ -375,6 +384,7 @@ class ShortTermMemory:
         return self.add_message(
             role="assistant",
             content=content,
             importancia=importancia,
             emocao=emocao
         )

         emocao: Emoção detectada
         reply_info: Info sobre reply (se aplicável)
         conversation_id: ID da conversa isolada
+        author_name: Nome de quem enviou a mensagem (ex: Isaac, Akira, ISA IA)
         token_count: Contagem aproximada de tokens
     """
     role: str
     emocao: str = "neutro"
     reply_info: Dict[str, Any] = field(default_factory=dict)
     conversation_id: str = ""
+    author_name: str = "Usuário"
     token_count: int = 0
     def to_dict(self) -> Dict[str, Any]:
             "emocao": self.emocao,
             "reply_info": self.reply_info,
             "conversation_id": self.conversation_id,
+            "author_name": self.author_name,
             "token_count": self.token_count
         }
             emocao=data.get("emocao", "neutral"),
             reply_info=data.get("reply_info", {}),
             conversation_id=data.get("conversation_id", ""),
+            author_name=data.get("author_name", "Usuário"),
             token_count=data.get("token_count", 0)
         )
         importancia: float = IMPORTANCIA_NORMAL,
         emocao: str = "neutro",
         reply_info: Optional[Dict[str, Any]] = None,
+        author_name: str = "Usuário",
         metadata: Optional[Dict[str, Any]] = None
     ) -> MessageWithContext:
         """
             emocao=emocao,
             reply_info=reply_info or {},
             conversation_id=self.conversation_id,
+            author_name=author_name,
             token_count=estimar_tokens(content)
         )
     def add_user_message(
         self,
         content: str,
+        author_name: str = "Usuário",
         emocao: str = "neutral",
         reply_info: Optional[Dict[str, Any]] = None,
         importancia: float = None
         return self.add_message(
             role="user",
             content=content,
+            author_name=author_name,
             importancia=importancia,
             emocao=emocao,
             reply_info=reply_info
     def add_assistant_message(
         self,
         content: str,
+        author_name: str = "Usuário",
         emocao: str = "neutral",
         importancia: float = IMPORTANCIA_NORMAL
     ) -> MessageWithContext:
         return self.add_message(
             role="assistant",
             content=content,
+            author_name=author_name,
             importancia=importancia,
             emocao=emocao
         )

modules/skills_library.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

modules/thinking_engine.py ADDED Viewed

	@@ -0,0 +1,374 @@

+"""
+================================================================================
+THINKING ENGINE - Sistema de Pensamento Profundo Pré-Processamento
+================================================================================
+Similar a modelos com "thinking tokens" - analisa o que foi perguntado
+ANTES de gerar resposta, resultando em respostas mais acertivas.
+Features:
+- Análise multi-camada da pergunta/contexto
+- Embeddings especializados para pensamento
+- Detecção de intent implícito
+- Complexidade da pergunta
+- Relacionamentos com LSTM context
+- Cache de pensamentos
+================================================================================
+"""
+import json
+from typing import Dict, Any, Optional, List
+from loguru import logger
+from sentence_transformers import SentenceTransformer, util
+import numpy as np
+class ThinkingEngine:
+    """Processa pensamento profundo antes de responder."""
+    def __init__(self, db=None):
+        """Inicializa com modelo de embedding para análise profunda."""
+        self.db = db
+        self.thinking_cache = {}
+        self.model_thinking = None
+        self._load_thinking_model()
+    def _load_thinking_model(self):
+        """Carrega modelo especializado para pensamento."""
+        try:
+            # Usa o modelo centralizado do config (com fallback embutido)
+            from . import config
+            self.model_thinking = config.get_embedding_model("all-MiniLM-L6-v2")
+            if self.model_thinking:
+                logger.success("✅ ThinkingEngine: Modelo de pensamento carregado via config")
+            else:
+                logger.warning("⚠️ ThinkingEngine: Config retornou None para o modelo")
+        except Exception as e:
+            logger.warning(f"⚠️ ThinkingEngine: Erro ao carregar modelo: {e}")
+            self.model_thinking = None
+    def think(
+        self,
+        mensagem: str,
+        contexto_lstm: Optional[Dict[str, Any]] = None,
+        historico_recente: Optional[List[str]] = None,
+        is_group: bool = False,
+        usuario: str = None,
+        llm_manager: Any = None
+    ) -> Dict[str, Any]:
+        """
+        Processa pensamento profundo sobre a pergunta/contexto.
+        Args:
+            mensagem: Mensagem do usuário
+            contexto_lstm: Contexto LSTM (longo prazo)
+            historico_recente: Últimas mensagens
+            is_group: Se é em grupo
+            usuario: Nome do usuário
+            llm_manager: Instância de LLMManager para CoT Dinâmico (OpenRouter)
+        Returns:
+            Dict com análise profunda
+        """
+        if not self.model_thinking:
+            return self._thinking_fallback(mensagem)
+        cache_key = f"{usuario}:{mensagem[:50]}"
+        if cache_key in self.thinking_cache:
+            logger.debug(f"🧠 ThinkingEngine: Pensamento recuperado do cache")
+            return self.thinking_cache[cache_key]
+        try:
+            thinking_result = {
+                "depth": self._analyze_question_complexity(mensagem),
+                "intent": self._detect_intent(mensagem),
+                "entities": self._extract_entities(mensagem),
+                "context_relevance": self._analyze_context_relevance(mensagem, contexto_lstm),
+                "related_topics": self._find_related_topics(mensagem, contexto_lstm),
+                "assumptions": self._detect_assumptions(mensagem),
+                "required_sources": self._identify_sources(mensagem),
+                "response_strategy": self._plan_response_strategy(mensagem, is_group),
+                "quality_markers": self._identify_quality_markers(mensagem),
+            }
+            # 🧠 CoT Dinâmico: Chama o OpenRouter para raciocínio estruturado
+            dynamic_thought = self._generate_dynamic_thought(
+                mensagem, contexto_lstm, historico_recente, is_group, llm_manager, usuario
+            )
+            if dynamic_thought:
+                thinking_result["dynamic_thought_trace"] = dynamic_thought
+            # Cache por 30 minutos (300 chamadas)
+            if len(self.thinking_cache) > 1000:
+                self.thinking_cache.clear()
+            self.thinking_cache[cache_key] = thinking_result
+            logger.debug(f"🧠 ThinkingEngine: Pensamento realizado (depth={thinking_result['depth']})")
+            return thinking_result
+        except Exception as e:
+            logger.warning(f"⚠️ ThinkingEngine erro: {e}")
+            return self._thinking_fallback(mensagem)
+    def _generate_dynamic_thought(
+        self,
+        mensagem: str,
+        contexto_lstm: Optional[Dict[str, Any]],
+        historico_recente: Optional[List[str]],
+        is_group: bool,
+        llm_manager: Any,
+        usuario: str = "desconhecido"
+    ) -> Optional[str]:
+        """Usa o OpenRouter para gerar um plano de raciocínio passo a passo."""
+        if not llm_manager:
+            logger.warning("⚠️ CoT Dinâmico abortado: llm_manager é None")
+            return None
+        if not hasattr(llm_manager, '_call_openrouter'):
+            logger.warning(f"⚠️ CoT Dinâmico abortado: llm_manager ({type(llm_manager)}) não tem o método '_call_openrouter'")
+            return None
+        try:
+            # Constrói um contexto enxuto para não gastar muitos tokens
+            sys_prompt = (
+                "Atuas como o Motor Analítico Interno da Akira V21.\n"
+                "A tua ÚNICA tarefa é gerar um rascunho de raciocínio (plano lógico) sobre como a Akira deve responder a esta mensagem, deixa sempre claro akira que sua resposta deve ser curta e direta e séria.\n"
+                "Reflete sobre:\n"
+                f"1. A emoção e intenção oculta de {usuario}.\n"
+                "2. Que factos devem ser procurados no histórico.\n"
+                "3. Qual o tom (direto, empático, sério) a usar.\n"
+                f"NOTA: A pessoa a falar contigo chama-se '{usuario}'. Usa o nome real na tua análise em vez de 'o utilizador'.\n"
+                "NÃO dês a resposta final. Apenas planeia a estratégia de resposta em menos de 80 palavras ed deia sugestões de resposta pra akira usar, lembrando ela não mandar emojis. GERA O TEU PENSAMENTO EXCLUSIVAMENTE EM PORTUGUÊS."
+            )
+            if is_group:
+                sys_prompt += "\nNOTA: Isto é um ambiente de GRUPO. Sê muito conciso e evita intervir desnecessariamente."
+            if contexto_lstm:
+                sys_prompt += "\n\n[MEMÓRIA LONGO PRAZO (LSTM)]"
+                if 'topic_principal' in contexto_lstm:
+                    sys_prompt += f"\n- Tópico Principal: {contexto_lstm['topic_principal']}"
+                if 'unanswered_questions' in contexto_lstm and contexto_lstm['unanswered_questions']:
+                    sys_prompt += f"\n- Perguntas Pendentes: {', '.join(contexto_lstm['unanswered_questions'][:2])}"
+                if 'interaction_pattern' in contexto_lstm:
+                    sys_prompt += f"\n- Padrão do Utilizador: {contexto_lstm['interaction_pattern']}"
+            if historico_recente:
+                sys_prompt += "\n\n[MEMÓRIA CURTO PRAZO (LISTEN)]\nÚltimas mensagens da conversa:\n"
+                # Pega mais mensagens para entender conversas paralelas
+                for msg in historico_recente[-15:]:
+                    if isinstance(msg, dict) and "content" in msg:
+                        sys_prompt += f"{msg['content']}\n"
+                    else:
+                        sys_prompt += f"{msg}\n"
+            logger.info("🧠 Gerando CoT Dinâmico via OpenRouter...")
+            # Chamada ultrarrápida usando o modelo setado no config
+            thought = llm_manager._call_openrouter(
+                system_prompt=sys_prompt,
+                context_history=[],  # não passamos o histórico todo para ser super rápido
+                user_prompt=mensagem,
+                max_tokens=150
+            )
+            return thought
+        except Exception as e:
+            logger.warning(f"⚠️ Erro no CoT Dinâmico (Fallback ativado): {e}")
+            return None
+    def _analyze_question_complexity(self, mensagem: str) -> str:
+        """Analisa complexidade da pergunta."""
+        msg_lower = mensagem.lower()
+        # Sinais de complexidade
+        complex_markers = {
+            "muito": 0.3, "profundo": 0.4, "explique": 0.35, "detalhe": 0.35,
+            "por quê": 0.4, "como": 0.3, "quando": 0.25, "onde": 0.2,
+            "comparação": 0.5, "diferença": 0.4, "relação": 0.4,
+            "múltiplo": 0.45, "vários": 0.4, "tanto": 0.35,
+        }
+        score = 0.1  # Base
+        for marker, weight in complex_markers.items():
+            if marker in msg_lower:
+                score += weight
+        # Pontuação
+        if "?" in mensagem:
+            score += 0.1
+        if "!" in mensagem:
+            score -= 0.1
+        score = min(1.0, score)
+        if score < 0.2:
+            return "simples"
+        elif score < 0.5:
+            return "moderada"
+        elif score < 0.75:
+            return "complexa"
+        else:
+            return "muito_complexa"
+    def _detect_intent(self, mensagem: str) -> List[str]:
+        """Detecta intent(s) implícito(s)."""
+        intents = []
+        msg_lower = mensagem.lower()
+        intent_markers = {
+            "informação": ["o que", "como", "por quê", "sabe sobre", "fala sobre", "explica"],
+            "ação": ["faz", "cria", "envia", "modifica", "deleta", "inicia"],
+            "opinião": ["acha", "gosta", "prefere", "ache", "pense", "achei"],
+            "confirmação": ["certo", "verdade", "é mesmo", "sério", "confirma"],
+            "contexto": ["em relação", "sobre isso", "quanto a", "nisso"],
+            "humor": ["kkk", "haha", "ué", "lol", ":)", "rsrs"],
+        }
+        for intent, markers in intent_markers.items():
+            if any(m in msg_lower for m in markers):
+                intents.append(intent)
+        return intents or ["indefinido"]
+    def _extract_entities(self, mensagem: str) -> List[str]:
+        """Extrai entidades mencionadas."""
+        # Simples: palavras maiúsculas ou nomes comuns
+        palavras = mensagem.split()
+        entities = [p.strip(".,!?;:") for p in palavras if len(p) > 3 and p[0].isupper()]
+        return entities[:5]  # Top 5
+    def _analyze_context_relevance(
+        self,
+        mensagem: str,
+        contexto_lstm: Optional[Dict[str, Any]]
+    ) -> float:
+        """Quanto a mensagem se relaciona com contexto de longo prazo."""
+        if not contexto_lstm or not self.model_thinking:
+            return 0.0
+        try:
+            topic_lstm = contexto_lstm.get("topic_principal", "")
+            if not topic_lstm:
+                return 0.0
+            # Embedding similarity
+            emb_msg = self.model_thinking.encode(mensagem, convert_to_tensor=False)
+            emb_topic = self.model_thinking.encode(topic_lstm, convert_to_tensor=False)
+            relevance = float(util.cos_sim(emb_msg, emb_topic)[0][0])
+            return max(0.0, min(1.0, relevance))
+        except:
+            return 0.0
+    def _find_related_topics(
+        self,
+        mensagem: str,
+        contexto_lstm: Optional[Dict[str, Any]]
+    ) -> List[str]:
+        """Encontra tópicos relacionados no LSTM."""
+        if not contexto_lstm:
+            return []
+        topics = []
+        # Topics do LSTM (se houver)
+        if contexto_lstm.get("subtopicas"):
+            topics.extend(contexto_lstm["subtopicas"][:3])
+        if contexto_lstm.get("conversation_path"):
+            topics.extend(contexto_lstm["conversation_path"][-3:])
+        return topics[:5]
+    def _detect_assumptions(self, mensagem: str) -> List[str]:
+        """Detecta assumptions que o usuário faz."""
+        assumptions = []
+        msg_lower = mensagem.lower()
+        # Palavras que indicam assumption
+        if "já" in msg_lower or "não sabe" in msg_lower:
+            assumptions.append("assume_conhecimento_anterior")
+        if "deve" in msg_lower or "deveria" in msg_lower:
+            assumptions.append("expectativa_de_comportamento")
+        if "sempre" in msg_lower or "nunca" in msg_lower:
+            assumptions.append("generalização")
+        return assumptions
+    def _identify_sources(self, mensagem: str) -> List[str]:
+        """Identifica que fontes seriam úteis."""
+        sources = []
+        msg_lower = mensagem.lower()
+        if any(w in msg_lower for w in ["notícia", "última", "recente", "novo", "2024", "2025"]):
+            sources.append("web_search")
+        if any(w in msg_lower for w in ["wikipedia", "história", "quem foi", "quando"]):
+            sources.append("wikipedia")
+        if any(w in msg_lower for w in ["preço", "dólar", "bitcoin", "crypto", "cotação"]):
+            sources.append("market_data")
+        if any(w in msg_lower for w in ["clima", "tempo", "previsão", "chuva"]):
+            sources.append("weather")
+        return sources
+    def _plan_response_strategy(self, mensagem: str, is_group: bool) -> str:
+        """Define estratégia de resposta."""
+        msg_lower = mensagem.lower()
+        # Contexto do grupo
+        if is_group:
+            if any(w in msg_lower for w in ["vocês", "vcs", "todos", "@all"]):
+                return "grupo_completo"
+            else:
+                return "grupo_individual"
+        else:
+            return "privado"
+    def _identify_quality_markers(self, mensagem: str) -> Dict[str, bool]:
+        """Identifica marcadores de qualidade da resposta esperada."""
+        return {
+            "needs_brevity": len(mensagem) < 20,
+            "needs_detail": len(mensagem) > 100,
+            "needs_humor": any(m in mensagem for m in ["kk", "kkk", ":)", "rsrs"]),
+            "formal_tone": any(w in mensagem for w in ["sr.", "sra.", "prezado"]),
+            "technical": any(w in mensagem.lower() for w in ["código", "api", "script", "função"]),
+        }
+    def _thinking_fallback(self, mensagem: str) -> Dict[str, Any]:
+        """Fallback simples quando modelo não está disponível."""
+        return {
+            "depth": "moderada",
+            "intent": ["indefinido"],
+            "entities": [],
+            "context_relevance": 0.5,
+            "related_topics": [],
+            "assumptions": [],
+            "required_sources": [],
+            "response_strategy": "padrão",
+            "quality_markers": {
+                "needs_brevity": False,
+                "needs_detail": False,
+                "needs_humor": False,
+                "formal_tone": False,
+                "technical": False,
+            },
+        }
+# Singleton global
+_thinking_engine_instance: Optional[ThinkingEngine] = None
+def get_thinking_engine(db=None) -> ThinkingEngine:
+    """Retorna instância singleton do ThinkingEngine."""
+    global _thinking_engine_instance
+    if _thinking_engine_instance is None:
+        _thinking_engine_instance = ThinkingEngine(db=db)
+    return _thinking_engine_instance

modules/treinamento.py CHANGED Viewed

@@ -353,7 +353,9 @@ class Interacao:
     api_usada: str = ""
     tokens_usados: int = 0
     response_time: float = 0.0
-    taticas_detectadas: List[str] = field(default_factory=list)
 @dataclass
 class TrainingResult:
@@ -411,28 +413,6 @@ class Treinamento:
     # 📝 REGISTRO DE INTERAÇÕES
     # ============================================================
-    def detect_debate_tactics(self, texto: str) -> List[str]:
-        """Detecta táticas de debate, baits e falácias comuns no texto."""
-        taticas = []
-        t_lower = texto.lower()
-        # Mapeamento de gatilhos para táticas
-        gatilhos = {
-            "bait": ["bait", "isca", "armadilha", "provocação", "clique"],
-            "ad_hominem": ["você é", "seu burro", "idiota", "lixo", "atacar a pessoa"],
-            "espantalho": ["distorcer", "não foi o que eu disse", "mentira", "inventar"],
-            "falacia": ["falácia", "argumento inválido", "erro lógico", "sofisma"],
-            "mitada": ["mitou", "jantou", "na cara", "lacrou", "esmagou"],
-            "ironia": ["kkk", "rsrs", "irônico", "engraçado né"],
-            "gaslighting": ["louco", "maluco", "coisa da sua cabeça", "paranoia"]
-        }
-        for tatica, keywords in gatilhos.items():
-            if any(k in t_lower for k in keywords):
-                taticas.append(tatica)
-        return taticas
     def registrar_interacao(
         self,
         usuario: str,
@@ -444,17 +424,11 @@ class Treinamento:
         api_usada: str = '',
         tokens_usados: int = 0,
         response_time: float = 0.0,
-        conversation_id: str = '',
         **kwargs
     ) -> Interacao:
         """
         Registra interação e executa aprendizado em tempo real
         """
-        # Detecta táticas de debate (baits, falácias, mitadas)
-        taticas_msg = self.detect_debate_tactics(mensagem)
-        taticas_resp = self.detect_debate_tactics(resposta)
-        taticas_total = list(set(taticas_msg + taticas_resp))
         # Cria estrutura de interação
         interacao = Interacao(
             usuario=usuario,
@@ -465,20 +439,15 @@ class Treinamento:
             mensagem_original=mensagem_original,
             api_usada=api_usada,
             tokens_usados=tokens_usados,
-            response_time=response_time,
-            taticas_detectadas=taticas_total
         )
-        if taticas_total:
-            logger.info(f"🎯 [TATICA] Táticas detectadas na interação: {', '.join(taticas_total)}")
         try:
             # Salva no banco (com o modelo que gerou a resposta)
             self.db.salvar_mensagem(
                 usuario, mensagem, resposta, numero, is_reply, mensagem_original,
                 modelo_usado=api_usada or "desconhecido",
-                message_id=kwargs.get('message_id'),
-                conversation_id=conversation_id
             )
             # Aprendizado em tempo real

     api_usada: str = ""
     tokens_usados: int = 0
     response_time: float = 0.0
+    thinking_depth: str = "moderada"     # ✅ Complexidade avaliada pelo ThinkingEngine
+    thinking_intent: str = "indefinido"  # ✅ Intenção detectada pelo ThinkingEngine
 @dataclass
 class TrainingResult:
     # 📝 REGISTRO DE INTERAÇÕES
     # ============================================================
     def registrar_interacao(
         self,
         usuario: str,
         api_usada: str = '',
         tokens_usados: int = 0,
         response_time: float = 0.0,
         **kwargs
     ) -> Interacao:
         """
         Registra interação e executa aprendizado em tempo real
         """
         # Cria estrutura de interação
         interacao = Interacao(
             usuario=usuario,
             mensagem_original=mensagem_original,
             api_usada=api_usada,
             tokens_usados=tokens_usados,
+            response_time=response_time
         )
         try:
             # Salva no banco (com o modelo que gerou a resposta)
             self.db.salvar_mensagem(
                 usuario, mensagem, resposta, numero, is_reply, mensagem_original,
                 modelo_usado=api_usada or "desconhecido",
+                message_id=kwargs.get('message_id')
             )
             # Aprendizado em tempo real

modules/twitter_api.py CHANGED Viewed

@@ -1,100 +1,79 @@
-import os
-import requests
-from loguru import logger
-from typing import List, Dict, Any
-class TwitterAPI:
-    """
-    Integração simples com Twitter API v2 para busca de 'tretas' e 'mitadas'.
-    """
-    def __init__(self, bearer_token: str = None):
-        self.bearer_token = bearer_token or os.getenv("TWITTER_BEARER_TOKEN")
-        self.base_url = "https://api.twitter.com/2"
-    def search_tweets(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
-        """
-        Busca tweets recentes com base em uma query.
-        """
-        if not self.bearer_token:
-            logger.warning("⚠️ TWITTER_BEARER_TOKEN não configurado.")
-            return []
-        headers = {
-            "Authorization": f"Bearer {self.bearer_token}",
-            "User-Agent": "v2RecentSearchPython"
-        }
-        params = {
-            "query": f"{query} lang:pt -is:retweet",
-            "max_results": max_results,
-            "tweet.fields": "text,public_metrics,created_at"
-        }
-        try:
-            response = requests.get(f"{self.base_url}/tweets/search/recent", headers=headers, params=params)
-            if response.status_code == 200:
-                data = response.json()
-                return data.get("data", [])
-            else:
-                logger.error(f"❌ Erro Twitter API ({response.status_code}): {response.text}")
-                return []
-        except Exception as e:
-            logger.error(f"❌ Falha ao buscar tweets: {e}")
-            return []
-    def get_savage_context(self, topic: str) -> str:
-        """
-        Busca exemplos de 'mitadas' ou discussões acaloradas sobre um tema.
-        Otimizado para encontrar debates reais, baits e falácias de retórica.
-        """
-        # Query expandida com operadores OR para maximizar resultados em menos chamadas
-        # Inclui termos de retórica agressiva, falácias e baits
-        main_query = f"{topic} (mita OR treta OR jantou OR cancelado OR vergonha OR bait OR 'falácia' OR 'ad hominem' OR 'espantalho' OR 'lacrou' OR 'jantada')"
-        all_tweets = self.search_tweets(main_query, max_results=20)
-        # Se não vier nada, tenta uma busca mais genérica focada em engajamento (debate)
-        if not all_tweets or len(all_tweets) < 5:
-            logger.info(f"Busca específica por treta em '{topic}' retornou pouco. Tentando busca por engajamento...")
-            debate_query = f"{topic} -is:retweet" # Busca geral mas vamos filtrar por métricas
-            more_tweets = self.search_tweets(debate_query, max_results=20)
-            all_tweets.extend(more_tweets)
-        if not all_tweets:
-            return f"Nenhuma discussão relevante ou 'treta' recente encontrada no Twitter sobre '{topic}'."
-        # Ordenar por engajamento total (Likes + Retweets + Replies + Quotes)
-        def get_engagement(t):
-            m = t.get('public_metrics', {})
-            return m.get('like_count', 0) + m.get('retweet_count', 0) + m.get('reply_count', 0) + m.get('quote_count', 0)
-        sorted_tweets = sorted(all_tweets, key=get_engagement, reverse=True)
-        # Deduplicação por texto (Twitter as vezes retorna duplicatas em queries parecidas)
-        seen_text = set()
-        unique_tweets = []
-        for t in sorted_tweets:
-            text_norm = t['text'][:100].lower()
-            if text_norm not in seen_text:
-                seen_text.add(text_norm)
-                unique_tweets.append(t)
-            if len(unique_tweets) >= 12: break
-        context = f"🔥 Discussões e 'Mitadas' em alta sobre '{topic}':\n\n"
-        for i, tweet in enumerate(unique_tweets):
-            metrics = tweet.get('public_metrics', {})
-            eng = f"👍{metrics.get('like_count', 0)} 💬{metrics.get('reply_count', 0)} 🔁{metrics.get('retweet_count', 0)}"
-            text = tweet['text'].replace('\n', ' ').strip()
-            context += f"[{i+1}] {text}\n   📈 Engajamento: {eng}\n\n"
-        context += "💡 INSTRUÇÃO: Use estes exemplos reais para aprender o tom do debate e as falácias mais eficazes usadas agora. Não cite os números de engajamento, apenas absorva o veneno."
-        return context
-# Singleton
-_instance = None
-def get_twitter_api():
-    global _instance
-    if _instance is None:
-        _instance = TwitterAPI()
-    return _instance

+import os
+import requests
+from loguru import logger
+from typing import List, Dict, Any
+class TwitterAPI:
+    """
+    Integração simples com Twitter API v2 para busca de 'tretas' e 'mitadas'.
+    """
+    def __init__(self, bearer_token: str = None):
+        self.bearer_token = bearer_token or os.getenv("TWITTER_BEARER_TOKEN")
+        self.base_url = "https://api.twitter.com/2"
+    def search_tweets(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
+        """
+        Busca tweets recentes com base em uma query.
+        """
+        if not self.bearer_token:
+            logger.warning("⚠️ TWITTER_BEARER_TOKEN não configurado.")
+            return []
+        headers = {
+            "Authorization": f"Bearer {self.bearer_token}",
+            "User-Agent": "v2RecentSearchPython"
+        }
+        params = {
+            "query": f"{query} lang:pt -is:retweet",
+            "max_results": max_results,
+            "tweet.fields": "text,public_metrics,created_at"
+        }
+        try:
+            response = requests.get(f"{self.base_url}/tweets/search/recent", headers=headers, params=params)
+            if response.status_code == 200:
+                data = response.json()
+                return data.get("data", [])
+            else:
+                logger.error(f"❌ Erro Twitter API ({response.status_code}): {response.text}")
+                return []
+        except Exception as e:
+            logger.error(f"❌ Falha ao buscar tweets: {e}")
+            return []
+    def get_savage_context(self, topic: str) -> str:
+        """
+        Busca exemplos de 'mitadas' ou discussões acaloradas sobre um tema.
+        """
+        queries = [
+            f"{topic} mita",
+            f"{topic} treta",
+            f"{topic} cancelado",
+            f"{topic} 'na cara'",
+            f"{topic} 'jantou'"
+        ]
+        all_tweets = []
+        for q in queries[:2]: # Tenta as 2 primeiras queries para economizar cota
+            tweets = self.search_tweets(q, max_results=10)
+            all_tweets.extend(tweets)
+            if len(all_tweets) >= 10: break
+        if not all_tweets:
+            return "Nenhuma 'treta' recente encontrada no Twitter sobre este assunto."
+        context = "Exemplos de discussões/mitadas no Twitter sobre este assunto:\n"
+        for i, tweet in enumerate(all_tweets[:10]):
+            text = tweet['text'].replace('\n', ' ')
+            context += f"{i+1}. {text}\n"
+        return context
+# Singleton
+_instance = None
+def get_twitter_api():
+    global _instance
+    if _instance is None:
+        _instance = TwitterAPI()
+    return _instance

modules/unified_context.py CHANGED Viewed

@@ -1,1182 +1,1041 @@
-# type: ignore
-"""
-================================================================================
-AKIRA V21 ULTIMATE - UNIFIED CONTEXT MODULE
-================================================================================
-Sistema unificado que integra Reply Context + Short-Term Memory em sintonia.
-Philosophy: "Reply context e STM devem trabalhar em sintonia como tik e tack -
-um fornece o contexto imediato/urgente (o que o usuário está respondendo),
-o outro fornece o fluxo da conversa (contexto geral)."
-Features:
-- Integração seamless entre reply context e STM
-- Token budgeting inteligente entre os dois contextos
-- Priorização dinâmica baseada no tipo de mensagem
-- Suporte a perguntas curtas com reply (prioridade máxima)
-- Persistência e restauração de contexto unificado
-================================================================================
-"""
-import os
-import sys
-import time
-import json
-import logging
-from typing import Optional, Dict, Any, List, Tuple
-from dataclasses import dataclass, field
-from datetime import datetime
-# Imports robustos com fallback
-try:
-    from . import config
-    from .short_term_memory import (
-        ShortTermMemory,
-        MessageWithContext,
-        IMPORTANCIA_NORMAL,
-        IMPORTANCIA_REPLY,
-        IMPORTANCIA_REPLY_TO_BOT,
-        IMPORTANCIA_PERGUNTA_CURTA_REPLY,
-        estimar_tokens,
-        is_pergunta_curta
-    )
-    from .reply_context_handler import (
-        ReplyContextHandler,
-        ProcessedReplyContext,
-        PRIORITY_REPLY,
-        PRIORITY_REPLY_TO_BOT,
-        PRIORITY_REPLY_TO_BOT_SHORT_QUESTION
-    )
-    UNIFIED_CONTEXT_AVAILABLE = True
-except ImportError as e:
-    try:
-        import modules.config as config
-        from modules.short_term_memory import (
-            ShortTermMemory,
-            MessageWithContext,
-            IMPORTANCIA_NORMAL,
-            IMPORTANCIA_REPLY,
-            IMPORTANCIA_REPLY_TO_BOT,
-            IMPORTANCIA_PERGUNTA_CURTA_REPLY,
-            estimar_tokens,
-            is_pergunta_curta
-        )
-        from modules.reply_context_handler import (
-            ReplyContextHandler,
-            ProcessedReplyContext,
-            PRIORITY_REPLY,
-            PRIORITY_REPLY_TO_BOT,
-            PRIORITY_REPLY_TO_BOT_SHORT_QUESTION
-        )
-        UNIFIED_CONTEXT_AVAILABLE = True
-    except ImportError:
-        UNIFIED_CONTEXT_AVAILABLE = False
-        config = None
-try:
-    from .lstm_extension import get_lstm_extension
-    LSTM_AVAILABLE = True
-except ImportError:
-    try:
-        from modules.lstm_extension import get_lstm_extension
-        LSTM_AVAILABLE = True
-    except ImportError:
-        LSTM_AVAILABLE = False
-logger = logging.getLogger(__name__)
-# ============================================================
-# CONFIGURAÇÃO DE TOKEN BUDGET
-# ============================================================
-@dataclass
-class ContextTokenBudget:
-    """
-    Alocação de tokens entre reply context e STM.
-    Philosophy: Reply tem orçamento dedicado (urgente), STM tem o resto (fluxo).
-    """
-    total_budget: int = 8000
-    system_tokens: int = 1500
-    user_message_tokens: int = 500
-    # Reply context budget (URGENTE)
-    reply_tokens: int = 300
-    reply_priority_multiplier: float = 1.0
-    # STM budget (FLUXO DA CONVERSA)
-    stm_tokens: int = 4000
-    # Reservado para resposta
-    response_reserved: int = 1200
-    def calculate(self, is_reply: bool, reply_priority: int = 1, is_self_reply: bool = False) -> 'ContextTokenBudget':
-        """
-        Calcula orçamento baseado no tipo de mensagem.
-        Args:
-            is_reply: Se é um reply
-            reply_priority: Nível de prioridade do reply (1-4)
-            is_self_reply: Se o reply é para o próprio bot
-        Returns:
-            ContextTokenBudget ajustado
-        """
-        budget = ContextTokenBudget(
-            total_budget=self.total_budget,
-            system_tokens=self.system_tokens,
-            user_message_tokens=self.user_message_tokens
-        )
-        if is_reply:
-            if reply_priority >= PRIORITY_REPLY_TO_BOT_SHORT_QUESTION:
-                # Pergunta curta com reply ao bot = prioridade máxima
-                budget.reply_tokens = min(1200, int(self.total_budget * 0.15))
-                budget.reply_priority_multiplier = 1.3
-                budget.stm_tokens = min(4000, int(self.total_budget * 0.50))
-            elif reply_priority >= PRIORITY_REPLY_TO_BOT:
-                # Reply ao bot
-                budget.reply_tokens = min(1000, int(self.total_budget * 0.12))
-                budget.reply_priority_multiplier = 1.2
-                budget.stm_tokens = min(4500, int(self.total_budget * 0.55))
-            elif reply_priority >= PRIORITY_REPLY:
-                # Reply normal
-                budget.reply_tokens = min(600, int(self.total_budget * 0.08))
-                budget.reply_priority_multiplier = 1.1
-                budget.stm_tokens = min(5000, int(self.total_budget * 0.60))
-            # 🛡️ PENALIDADE DE AUTO-REPLICA: Se o bot está respondendo a si mesmo,
-            # reduzimos drasticamente o orçamento do reply para evitar o "loop infinito" de contexto.
-            if is_self_reply:
-                budget.reply_tokens = int(budget.reply_tokens * 0.5)
-                budget.reply_priority_multiplier = 0.8
-        else:
-            # Mensagem normal = STM tem orçamento completo
-            budget.reply_tokens = 0
-            budget.stm_tokens = min(5000, int(self.total_budget * 0.65))
-        # Calcula response reserved
-        budget.response_reserved = (
-            budget.total_budget -
-            budget.system_tokens -
-            budget.user_message_tokens -
-            budget.reply_tokens -
-            budget.stm_tokens
-        )
-        return budget
-    def to_dict(self) -> Dict[str, Any]:
-        """Serializa para dicionário."""
-        return {
-            "total_budget": self.total_budget,
-            "system_tokens": self.system_tokens,
-            "user_message_tokens": self.user_message_tokens,
-            "reply_tokens": self.reply_tokens,
-            "stm_tokens": self.stm_tokens,
-            "response_reserved": self.response_reserved,
-            "reply_priority_multiplier": self.reply_priority_multiplier
-        }
-# ============================================================
-# CONTEXTO UNIFICADO
-# ============================================================
-@dataclass
-class UnifiedMessageContext:
-    """
-    Contexto unificado combinando reply + STM.
-    Philosophy: Reply context (tik) + STM (tok) trabalhando em sintonia.
-    Attributes:
-        - Reply context: Contexto imediato/urgente do reply
-        - STM context: Contexto do fluxo da conversa
-        - Integration: Como os dois são combinados
-    """
-    # Identificação
-    conversation_id: str = ""
-    user_id: str = ""
-    timestamp: float = field(default_factory=time.time)
-    # ── CONSCIÊNCIA DE REMETENTE (novo) ──────────────────────────────────────
-    # Quem enviou a mensagem ACTUAL que está a ser processada
-    sender_name: str = ""       # Nome do remetente (pushName do WhatsApp)
-    sender_number: str = ""     # Número normalizado do remetente
-    context_level: int = 1      # Nível de consciência: 1=directo, 2=grupo activo, 3=contexto rico
-    # ─────────────────────────────────────────────────────────────────────────
-    # Reply Context (TIK - urgente/imediato)
-    is_reply: bool = False
-    reply_to_bot: bool = False
-    reply_priority: int = 1  # 1=normal, 2=reply, 3=reply_to_bot, 4=critical
-    quoted_author: str = ""
-    quoted_content: str = ""
-    reply_importancia: float = 1.0
-    # STM Context (TOK - fluxo da conversa)
-    stm_messages: List[MessageWithContext] = field(default_factory=list)
-    stm_summary: Dict[str, Any] = field(default_factory=dict)
-    stm_emotional_trend: str = "neutral"
-    # Long-Term Memory (RAG)
-    long_term_memory: str = ""
-    # Listening Context (O que outras pessoas falaram no grupo recentemente)
-    group_listening_context: List[Dict[str, Any]] = field(default_factory=list)
-    # Integração
-    sync_mode: str = "tiktok"  # "tiktok" = reply priority + STM flow
-    token_budget: ContextTokenBudget = field(default_factory=ContextTokenBudget)
-    # Mensagem atual
-    current_message: str = ""
-    current_emotion: str = "neutro"
-    system_override: str = ""
-    def to_dict(self) -> Dict[str, Any]:
-        """Serializa para dicionário."""
-        return {
-            "conversation_id": self.conversation_id,
-            "user_id": self.user_id,
-            "timestamp": self.timestamp,
-            "is_reply": self.is_reply,
-            "reply_to_bot": self.reply_to_bot,
-            "reply_priority": self.reply_priority,
-            "quoted_author": self.quoted_author,
-            "quoted_content": self.quoted_content[:500] if self.quoted_content else "",
-            "reply_importancia": self.reply_importancia,
-            "stm_messages_count": len(self.stm_messages),
-            "stm_summary": self.stm_summary,
-            "stm_emotional_trend": self.stm_emotional_trend,
-            "long_term_memory": self.long_term_memory,
-            "sync_mode": self.sync_mode,
-            "token_budget": self.token_budget.to_dict(),
-            "current_message": self.current_message[:100],
-            "current_emotion": self.current_emotion
-        }
-    def build_prompt(self) -> str:
-        """
-        Constrói prompt formatado para o LLM.
-        Returns:
-            String formatada com contexto unificado (reply + STM)
-        """
-        return format_unified_context_for_llm(self, self.token_budget)
-# ====================================
-# HELPER FUNCTIONS
-# ====================================
-def sync_reply_with_stm(
-    reply_context: Dict[str, Any],
-    stm_messages: List[MessageWithContext],
-    max_stm_messages: int = 10
-) -> List[MessageWithContext]:
-    """
-    Sincroniza reply context com mensagens STM.
-    Philosophy: Reply (tik) vem primeiro, STM (tok) vem depois.
-    Ambos são combinados para formar o contexto completo.
-    Args:
-        reply_context: Contexto do reply
-        stm_messages: Mensagens da memória de curto prazo
-        max_stm_messages: Máximo de mensagens STM a incluir
-    Returns:
-        Lista combinada de mensagens para contexto
-    """
-    combined = []
-    # 1. Adiciona reply context como mensagem mais recente (TIK)
-    if reply_context.get('is_reply', False):
-        reply_msg = MessageWithContext(
-            role="user",
-            content=reply_context.get('quoted_content', ''),
-            importancia=reply_context.get('importancia', IMPORTANCIA_NORMAL),
-            emocao=reply_context.get('emocao', 'neutral'),
-            reply_info={
-                'is_reply': True,
-                'reply_to_bot': reply_context.get('reply_to_bot', False),
-                'quoted_text_original': reply_context.get('quoted_content', ''),
-                'priority_level': reply_context.get('priority', 1),
-                'sync_mode': 'tiktok'
-            }
-        )
-        combined.append(reply_msg)
-    # 2. Adiciona mensagens STM (TOK - fluxo da conversa)
-    # Pega últimas N mensagens STM
-    stm_to_add = stm_messages[-max_stm_messages:] if stm_messages else []
-    for msg in stm_to_add:
-        # Se a mensagem STM já é um reply, preserva info
-        if msg.is_reply and not msg.reply_info.get('sync_mode'):
-            msg.reply_info['sync_mode'] = 'stm'
-        combined.append(msg)
-    return combined
-def format_unified_context_for_llm(
-    unified: UnifiedMessageContext,
-    budget: ContextTokenBudget
-) -> str:
-    """
-    Formata contexto unificado para o prompt do LLM.
-    Philosophy: Reply (tik) primeiro por ser urgente, STM (tok) depois
-    para contexto da conversa.
-    Args:
-        unified: Contexto unificado
-        budget: Orçamento de tokens
-    Returns:
-        String formatada para o prompt
-    """
-    parts = []
-    # ===== 1. REPLY CONTEXT (TIK - URGENTE) =====
-    if unified.is_reply:
-        reply_section = []
-        reply_section.append("=" * 50)
-        reply_section.append("[📎 INTERNAL_BRAIN_ONLY: REPLY CONTEXT]")
-        reply_section.append("=" * 50)
-        if unified.reply_to_bot:
-            reply_section.append("⚠️ VOCÊ ESTÁ SENDO DIRETAMENTE RESPONDIDO!")
-            if unified.reply_priority < 3: # Se a prioridade for baixa (bot-to-bot loop detection)
-                 reply_section.append("💡 NOTA: O usuário respondeu a algo que você disse. Seja breve e não repita o que já foi dito.")
-        else:
-            reply_section.append(f"Respondendo a: {unified.quoted_author}")
-        # Conteúdo citado
-        if unified.quoted_content:
-            # Reduz o conteúdo citado se for reply ao bot para evitar redundância
-            max_chars = budget.reply_tokens // 4
-            if unified.reply_to_bot:
-                max_chars = max_chars // 2
-            quoted_preview = unified.quoted_content[:max_chars]
-            reply_section.append(f"\n<quoted_message>\n{quoted_preview}{'...' if len(unified.quoted_content) > max_chars else ''}\n</quoted_message>")
-        # Prioridade
-        if unified.reply_priority >= PRIORITY_REPLY_TO_BOT_SHORT_QUESTION:
-            reply_section.append("\n💡 PERGUNTA CURTA + REPLY: FOCO NA CITAÇÃO")
-        reply_section.append("\n📌 INSTRUÇÕES DE REPLY:")
-        if unified.reply_to_bot:
-            reply_section.append("- O usuário está a reagir a uma mensagem SUA (<quoted_message>). Responda diretamente ao comentário do usuário, mantendo a postura sobre o que você disse.")
-            reply_section.append("- EVITE REPETIR o que está em <quoted_message>.")
-        else:
-            reply_section.append("- O usuário está a responder a <quoted_message>. Formule sua resposta com base nisso.")
-        reply_section.append("- PRESERVE a sua identidade e humor (seja o Akira, natural e irreverente).")
-        reply_section.append("- NÃO foque apenas na citação se ela for irrelevante perto do fluxo geral da conversa. Olhe as mensagens anteriores para manter a coerência.")
-        parts.append("\n".join(reply_section))
-    # ===== 1.5 GROUP LISTENING CONTEXT (O QUE OUTROS DISSERAM) =====
-    if unified.group_listening_context:
-        listening_section = []
-        listening_section.append("\n" + "=" * 50)
-        listening_section.append("[👂 INTERNAL_BRAIN_ONLY: RECENT GROUP LISTENING (Fofoca & Trends)]")
-        listening_section.append("=" * 50)
-        listening_section.append("(O que outras pessoas no grupo estavam discutindo antes desta mensagem)")
-        for msg in unified.group_listening_context:
-            author = msg.get('usuario', '')
-            text = msg.get('mensagem', '')[:250]
-            if text:
-                if author and author not in ["Grupo", "Membro", ""]:
-                    listening_section.append(f"• {author}: {text}")
-                else:
-                    listening_section.append(f"• {text}")
-        listening_section.append("\n📌 INSTRUÇÃO DE TRENDS/FOFOCA:")
-        listening_section.append("- Use este histórico para saber o que as pessoas estão a conversar (a trend).")
-        listening_section.append("- Se for relevante, CITE NOMINALMENTE pessoas que deram comentários interessantes no grupo ou as mencione diretamente na sua resposta.")
-        listening_section.append("- Comente sobre a 'fofoca' ou o assunto em alta no grupo. Mostre que você está prestando atenção em todos e sabe quem falou com quem.")
-        listening_section.append("- Não alucine! Confie estritamente neste histórico. Você pode usar isso a favor ou contra os membros, como uma observadora atenta do grupo.")
-        parts.append("\n".join(listening_section))
-    # ===== RAG CONTEXT (MEMÓRIA DE LONGO PRAZO) =====
-    if unified.long_term_memory:
-        rag_section = []
-        rag_section.append("\n" + "=" * 50)
-        rag_section.append("[📖 INTERNAL_BRAIN_ONLY: LONG-TERM MEMORY]")
-        rag_section.append("=" * 50)
-        rag_section.append("(Informações previamente aprendidas sobre o usuário)")
-        rag_section.append(unified.long_term_memory)
-        parts.append("\n".join(rag_section))
-    # ===== 2. STM CONTEXT (METADADOS DE FLUXO) =====
-    if unified.stm_messages:
-        stm_section = []
-        # Não adicionamos as mensagens como texto aqui para evitar duplicação e truncagem,
-        # pois elas já são injetadas nativamente no array context_history da API.
-        # emotional trend
-        if unified.stm_emotional_trend != "neutral":
-            stm_section.append(f"\n📊 Tendência emocional do chat: {unified.stm_emotional_trend}")
-        if stm_section:
-            parts.append("\n".join(stm_section))
-    # ===== 3. SYSTEM OVERRIDE (REMETENTE & AMBIENTE) =====
-    if getattr(unified, 'system_override', None):
-        parts.append(unified.system_override)
-    return "\n".join(parts)
-# ====================================
-# GROUP LISTEN FEED HELPER
-# ====================================
-def get_group_listen_feed(
-    grupo_id: str,
-    limit: int = 6,
-    db_instance=None
-) -> List[Dict[str, Any]]:
-    """
-    Recupera as últimas mensagens ouvidas passivamente num grupo via STM de Grupo.
-    Usado para alimentar o campo group_listening_context do UnifiedMessageContext,
-    permitindo que a IA saiba o que outros membros disseram mesmo sem ser mencionada.
-    Args:
-        grupo_id: ID do grupo (JID completo ou context_id)
-        limit: Máximo de mensagens a recuperar (padrão 6)
-        db_instance: Instância do Database (opcional)
-    Returns:
-        Lista de dicts: [{"usuario": str, "mensagem": str, "timestamp": float}]
-    """
-    feed: List[Dict[str, Any]] = []
-    if not grupo_id:
-        return feed
-    try:
-        # Import local para evitar dependência circular
-        from .unified_context import get_stm_manager
-        stm_mgr = get_stm_manager()
-        group_stm_id = f"group_feed_{grupo_id}"
-        # Recupera as mensagens do cache STM compartilhado do grupo
-        if stm_mgr:
-            msgs = stm_mgr.get_messages(group_stm_id, limit=limit, include_replies=True)
-            for msg in msgs:
-                # O msg.content já está formatado como "[Author]: msg" ou "[Author] em resposta a [Target]: msg"
-                # A API adiciona dessa forma
-                feed.append({
-                    "usuario": "", # Deixamos vazio pois o nome já está no content
-                    "mensagem": (msg.content or "")[:250],
-                    "timestamp": msg.timestamp
-                })
-    except Exception as e:
-        logger.debug(f"[LISTEN_FEED] Erro ao obter feed STM do grupo {grupo_id[:12]}: {e}")
-    return feed
-def build_sender_context_level(
-    sender_name: str,
-    sender_number: str,
-    listen_feed: List[Dict[str, Any]],
-    stm_count: int
-) -> int:
-    """
-    Calcula o nível de consciência contextual (1, 2 ou 3).
-    Nível 1 — Apenas o remetente actual é conhecido
-    Nível 2 — Grupo activo: temos feed de outras mensagens ouvidas
-    Nível 3 — Contexto rico: STM consolidado + feed denso
-    """
-    if not sender_name and not sender_number:
-        return 1
-    if len(listen_feed) >= 3 and stm_count >= 5:
-        return 3
-    if len(listen_feed) >= 1 or stm_count >= 3:
-        return 2
-    return 1
-# ====================================
-# SHORT-TERM MEMORY MANAGER
-# ====================================
-class ShortTermMemoryManager:
-    """
-    Gerenciador de instâncias STM por conversa.
-    Philosophy: Cada conversa tem sua própria STM isolada,
-    mas todas compartilham o mesmo manager.
-    """
-    _instance = None
-    _lock = None
-    def __new__(cls):
-        if cls._instance is None:
-            cls._lock = __import__('threading').Lock()
-            with cls._lock:
-                if cls._instance is None:
-                    cls._instance = super().__new__(cls)
-                    cls._instance._initialized = False
-        return cls._instance
-    def __init__(self):
-        if self._initialized:
-            return
-        self._instances: Dict[str, ShortTermMemory] = {}
-        # Path centralizado via config
-        if config and hasattr(config, "DATA_DIR"):
-            self._storage_path: str = str(config.DATA_DIR / "stm_cache")
-        else:
-            self._storage_path: str = os.path.join(
-                os.path.dirname(os.path.abspath(__file__)),
-                '..', 'data', 'stm_cache'
-            )
-        os.makedirs(self._storage_path, exist_ok=True)
-        self._initialized = True
-        self._load_all()
-        logger.debug(f"✅ ShortTermMemoryManager inicializado (persistência: {self._storage_path})")
-    # ============================================================
-    # PERSISTÊNCIA EM DISCO
-    # ============================================================
-    def _stm_file_path(self, conversation_id: str) -> str:
-        """Retorna caminho do arquivo de persistência de uma STM."""
-        safe_id = conversation_id.replace('/', '_').replace('\\', '_')[:128]
-        return os.path.join(self._storage_path, f"{safe_id}.json")
-    def _load_stm(self, conversation_id: str) -> Optional[ShortTermMemory]:
-        """Carrega STM de disco se existir."""
-        fpath = self._stm_file_path(conversation_id)
-        if os.path.exists(fpath):
-            try:
-                stm = ShortTermMemory.load_from_file(fpath)
-                self._instances[conversation_id] = stm
-                return stm
-            except Exception as e:
-                logger.warning(f"Falha ao carregar STM {conversation_id[:8]}: {e}")
-        return None
-    def _load_all(self) -> None:
-        """Carrega todas as STMs persistidas do disco."""
-        if not os.path.isdir(self._storage_path):
-            return
-        for fname in os.listdir(self._storage_path):
-            if fname.endswith('.json'):
-                cid = fname[:-5]
-                self._load_stm(cid)
-        logger.info(f"📦 {len(self._instances)} STM(s) carregadas do disco")
-    def _save_stm(self, conversation_id: str) -> None:
-        """Salva STM de uma conversa em disco."""
-        if conversation_id in self._instances:
-            fpath = self._stm_file_path(conversation_id)
-            self._instances[conversation_id].save_to_file(fpath)
-    def get_or_create_stm(
-        self,
-        conversation_id: str,
-        user_id: str = "",
-        max_messages: int = 100
-    ) -> ShortTermMemory:
-        """
-        Obtém ou cria STM para uma conversa.
-        Args:
-            conversation_id: ID único da conversa
-            user_id: ID do usuário
-            max_messages: Máximo de mensagens na STM
-        Returns:
-            Instância de ShortTermMemory
-        """
-        if conversation_id not in self._instances:
-            self._instances[conversation_id] = ShortTermMemory(
-                conversation_id=conversation_id,
-                max_messages=max_messages
-            )
-            logger.debug(f"🧠 STM criada: {conversation_id[:8]}...")
-        return self._instances[conversation_id]
-    def add_message(
-        self,
-        conversation_id: str,
-        role: str,
-        content: str,
-        emocao: str = "neutral",
-        reply_info: Optional[Dict] = None,
-        importancia: Optional[float] = None
-    ) -> MessageWithContext:
-        """
-        Adiciona mensagem à STM de uma conversa.
-        Args:
-            conversation_id: ID da conversa
-            role: "user" ou "assistant"
-            content: Texto da mensagem
-            emocao: Emoção detectada
-            reply_info: Info de reply (se aplicável)
-            importancia: Importância customizada
-        Returns:
-            MessageWithContext criada
-        """
-        stm = self.get_or_create_stm(conversation_id)
-        # Calcula importância automaticamente se não fornecida
-        if importancia is None:
-            from .short_term_memory import calcular_importancia
-            importancia = calcular_importancia(
-                is_reply=bool(reply_info and reply_info.get("is_reply")),
-                reply_to_bot=bool(reply_info and reply_info.get("reply_to_bot")),
-                mensagem=content,
-                emocao=emocao
-            )
-        msg = stm.add_message(
-            role=role,
-            content=content,
-            importancia=importancia,
-            emocao=emocao,
-            reply_info=reply_info
-        )
-        # Persiste em disco (salva a cada mensagem para garantir durability)
-        self._save_stm(conversation_id)
-        return msg
-    def get_context(
-        self,
-        conversation_id: str,
-        include_replies: bool = True,
-        prioritize_replies: bool = True,
-        max_messages: int = 30,
-        max_tokens: int = 4000
-    ) -> List[MessageWithContext]:
-        """
-        Obtém contexto da STM de uma conversa.
-        Args:
-            conversation_id: ID da conversa
-            include_replies: Se inclui replies
-            prioritize_replies: Se prioriza replies
-            max_messages: Máximo de mensagens
-            max_tokens: Máximo de tokens
-        Returns:
-            Lista de mensagens
-        """
-        if conversation_id not in self._instances:
-            return []
-        stm = self._instances[conversation_id]
-        return stm.get_context_window(
-            include_replies=include_replies,
-            prioritize_replies=prioritize_replies,
-            max_messages=max_messages,
-            max_tokens=max_tokens
-        )
-    def get_summary(self, conversation_id: str) -> Dict[str, Any]:
-        """
-        Obtém resumo da STM de uma conversa.
-        Args:
-            conversation_id: ID da conversa
-        Returns:
-            Dicionário com resumo
-        """
-        if conversation_id not in self._instances:
-            return {}
-        stm = self._instances[conversation_id]
-        return stm.get_conversation_summary()
-    def clear(self, conversation_id: str) -> bool:
-        """
-        Limpa STM de uma conversa, inclusive persistência em disco.
-        Args:
-            conversation_id: ID da conversa
-        Returns:
-            True se limpou
-        """
-        if conversation_id in self._instances:
-            self._instances[conversation_id].clear()
-            del self._instances[conversation_id]
-        # Remove arquivo de persistência
-        fpath = self._stm_file_path(conversation_id)
-        if hasattr(self, 'fpath') or True:
-            try:
-                fpath = self._stm_file_path(conversation_id)
-                if os.path.exists(fpath):
-                    os.remove(fpath)
-            except Exception:
-                pass
-        return True
-    def clear_messages(self, conversation_id: str) -> None:
-        """Alias de compatibilidade para clear()."""
-        self.clear(conversation_id)
-    def get_messages(
-        self,
-        conversation_id: str,
-        limit: int = 30,
-        include_replies: bool = True
-    ) -> list:
-        """
-        Alias de compatibilidade para get_context().
-        Retorna lista de MessageWithContext para a conversa.
-        Args:
-            conversation_id: ID da conversa
-            limit: Quantidade máxima de mensagens
-            include_replies: Se inclui replies
-        Returns:
-            Lista de MessageWithContext
-        """
-        if conversation_id not in self._instances:
-            return []
-        stm = self._instances[conversation_id]
-        result = stm.get_context_window(
-            include_replies=include_replies,
-            prioritize_replies=True,
-            max_messages=limit
-        )
-        return result if result else []
-# ====================================
-# UNIFIED CONTEXT BUILDER
-# ====================================
-class UnifiedContextBuilder:
-    """
-    Constrói contexto unificado combinando reply + STM.
-    Philosophy: "Reply context e STM devem trabalhar em sintonia como tik e tack"
-    Usage:
-        builder = UnifiedContextBuilder()
-        context = builder.build(
-            conversation_id="...",
-            reply_metadata={...},
-            current_message="..."
-        )
-        prompt_section = builder.format_for_llm(context)
-    """
-    def __init__(self, context_manager=None, stm_manager=None, db_instance=None):
-        self.stm_manager = stm_manager if stm_manager else ShortTermMemoryManager()
-        self.context_manager = context_manager
-        self.db = db_instance
-        self.reply_handler = None
-        self._initialized = False
-    def _ensure_initialized(self):
-        """Garante inicialização do reply handler."""
-        if not self._initialized and UNIFIED_CONTEXT_AVAILABLE:
-            try:
-                self.reply_handler = ReplyContextHandler()
-                self._initialized = True
-            except Exception as e:
-                logger.warning(f"UnifiedContextBuilder: falha ao init reply handler: {e}")
-    def build(
-        self,
-        conversation_id: str,
-        user_id: str = "",
-        reply_metadata: Optional[Dict[str, Any]] = None,
-        current_message: str = "",
-        current_emotion: str = "neutro",
-        stm_messages: Optional[List[MessageWithContext]] = None
-    ) -> UnifiedMessageContext:
-        """
-        Constrói contexto unificado.
-        Args:
-            conversation_id: ID único da conversa
-            user_id: ID do usuário
-            reply_metadata: Metadados do reply
-            current_message: Mensagem atual
-            current_emotion: Emoção atual
-            stm_messages: Mensagens STM (usa manager se None)
-        Returns:
-            UnifiedMessageContext pronto para uso
-        """
-        self._ensure_initialized()
-        # ===== 1. PROCESSA REPLY CONTEXT (TIK) =====
-        is_reply = reply_metadata.get('is_reply', False) if reply_metadata else False
-        reply_context = {
-            'is_reply': is_reply,
-            'reply_to_bot': reply_metadata.get('reply_to_bot', False) if reply_metadata else False,
-            'quoted_author': reply_metadata.get('quoted_author_name', '') if reply_metadata else '',
-            'quoted_content': reply_metadata.get('quoted_text_original', '') or
-                             reply_metadata.get('mensagem_citada', '') if reply_metadata else '',
-            'importancia': IMPORTANCIA_NORMAL,
-            'emocao': current_emotion,
-            'priority': 1
-        }
-        # Calcula prioridade do reply
-        if is_reply and reply_metadata:
-            reply_context['priority'] = self._calculate_reply_priority(
-                reply_metadata.get('reply_to_bot', False),
-                current_message,
-                reply_metadata.get('quoted_text_original', '')
-            )
-            # Calcula importância baseada em prioridade
-            if reply_context['priority'] >= PRIORITY_REPLY_TO_BOT_SHORT_QUESTION:
-                reply_context['importancia'] = IMPORTANCIA_PERGUNTA_CURTA_REPLY
-            elif reply_context['priority'] >= PRIORITY_REPLY_TO_BOT:
-                reply_context['importancia'] = IMPORTANCIA_REPLY_TO_BOT
-            elif reply_context['priority'] >= PRIORITY_REPLY:
-                reply_context['importancia'] = IMPORTANCIA_REPLY
-        # ===== 2. OBTÉM STM (TOK) =====
-        if stm_messages is None:
-            stm_messages = self.stm_manager.get_context(
-                conversation_id,
-                include_replies=True,
-                prioritize_replies=True,
-                max_messages=30,
-                max_tokens=4000
-            )
-        # ===== 3. CALCULA TOKEN BUDGET =====
-        # Detecta se é self-reply (reply para o próprio bot)
-        is_self_reply = False
-        if is_reply and reply_metadata:
-            bot_num = str(config.BOT_NUMERO if hasattr(config, 'BOT_NUMERO') else '37839265886398')
-            quoted_num = str(reply_metadata.get('quoted_author_numero', ''))
-            if bot_num in quoted_num or (reply_metadata.get('reply_to_bot') and 'você' in str(reply_metadata.get('quoted_author_name', '')).lower()):
-                is_self_reply = True
-        budget = ContextTokenBudget().calculate(
-            is_reply=is_reply,
-            reply_priority=reply_context['priority'],
-            is_self_reply=is_self_reply
-        )
-        # ===== 4. FETCH LONG-TERM MEMORY (DB) =====
-        long_term_memory_string = ""
-        if self.db and user_id:
-            try:
-                # Recuperar aprendizados e gírias
-                ltm_facts = self.db.recuperar_aprendizado_detalhado(user_id)
-                ltm_girias = self.db.recuperar_girias_usuario(user_id)
-                ltm_tom = self.db.obter_tom_predominante(user_id)
-                persona_ltm = self.db.recuperar_persona(user_id) if hasattr(self.db, 'recuperar_persona') else None
-                ltm_lines = []
-                # --- PERSONA DO USUÁRIO (Rastreador) ---
-                if persona_ltm:
-                    ltm_lines.append("=== PERFIL ANALISADO DO USUÁRIO ===")
-                    if persona_ltm.get('personalidade') and persona_ltm['personalidade'] != "None":
-                        ltm_lines.append(f"• Personalidade: {persona_ltm['personalidade']}")
-                    if persona_ltm.get('gostos') and persona_ltm['gostos'] != "None":
-                        ltm_lines.append(f"• Tópicos de Interesse: {persona_ltm['gostos']}")
-                    if persona_ltm.get('desgostos') and persona_ltm['desgostos'] != "None":
-                        ltm_lines.append(f"• Desgostos/Gatilhos: {persona_ltm['desgostos']}")
-                    if persona_ltm.get('vicios_linguagem') and persona_ltm['vicios_linguagem'] != "None":
-                        ltm_lines.append(f"• Padrões de Linguagem: {persona_ltm['vicios_linguagem']}")
-                    if persona_ltm.get('emocional') and persona_ltm['emocional'] != "None":
-                        ltm_lines.append(f"• Perfil Emocional: {persona_ltm['emocional']}")
-                if ltm_tom:
-                    ltm_lines.append(f"• Seu tom de conversa predominante é: {ltm_tom}")
-                if ltm_facts and isinstance(ltm_facts, dict):
-                    # Ignorar chaves puramente técnicas como 'emocao_atual' ou strings de timestamp longas
-                    fatos_filtrados = {k: v for k, v in ltm_facts.items() if not k.startswith("emocao_")}
-                    if fatos_filtrados:
-                        ltm_lines.append("• Fatos Relevantes Aprendidos:")
-                        for k, v in list(fatos_filtrados.items())[:5]: # limita 5
-                            ltm_lines.append(f"  - {k}: {v}")
-                if ltm_girias:
-                    ltm_lines.append("• Expressões Específicas Recentes:")
-                    for g in ltm_girias[:5]:
-                        ltm_lines.append(f"  - {g['giria']} ({g['significado']})")
-                if ltm_lines:
-                    long_term_memory_string = "\n".join(ltm_lines)
-            except Exception as e:
-                logger.warning(f"Erro ao recuperar memória de longo prazo: {e}")
-        # [INTEGRAÇÃO LSTM MENTAL CONTEXT]
-        if LSTM_AVAILABLE and self.db and conversation_id:
-            try:
-                lstm_ext = get_lstm_extension(self.db)
-                lstm_data = lstm_ext.get_context_for_prompt(conversation_id, user_id)
-                if lstm_data:
-                    lstm_lines = ["\n[INTERNAL_BRAIN_ONLY: COMPLETE CONVERSATION SUMMARY]"]
-                    if lstm_data.get('topic_principal'):
-                        lstm_lines.append(f"• Tópico Atual: {lstm_data['topic_principal']}")
-                    if lstm_data.get('subtopicas'):
-                        lstm_lines.append(f"• Subtópicos: {', '.join(lstm_data['subtopicas'])}")
-                    if lstm_data.get('unanswered_questions'):
-                        lstm_lines.append(f"• Perguntas pendentes: {'; '.join(lstm_data['unanswered_questions'])}")
-                    if lstm_data.get('interaction_pattern'):
-                        lstm_lines.append(f"• Padrão do usuário: {lstm_data['interaction_pattern']}")
-                    if lstm_data.get('assumed_knowledge'):
-                        lstm_lines.append(f"• Usuário sabe sobre: {', '.join(lstm_data['assumed_knowledge'])}")
-                    lstm_lines.append("NOTA MENTAL MÁXIMA: Este resumo é estritamente para seu conhecimento interno. NUNCA mencione que você leu um resumo ou narre o histórico. Apenas aja como se você lembrasse de tudo naturalmente.")
-                    if long_term_memory_string:
-                        long_term_memory_string += "\n" + "\n".join(lstm_lines)
-                    else:
-                        long_term_memory_string = "\n".join(lstm_lines)
-            except Exception as e:
-                logger.warning(f"Erro ao recuperar contexto LSTM: {e}")
-        # ===== 4.5 FETCH GROUP LISTENING CONTEXT =====
-        group_listening = []
-        if self.db and conversation_id and "@g.us" in conversation_id:
-            try:
-                # Recuperar últimas 15 mensagens de outras pessoas no grupo
-                # Exclui a mensagem atual e mensagens do bot para focar no que o grupo fala
-                bot_num = str(config.BOT_NUMERO if hasattr(config, 'BOT_NUMERO') else '37839265886398')
-                rows = self.db._execute_with_retry("""
-                    SELECT usuario, mensagem FROM mensagens
-                    WHERE conversation_id = ?
-                    AND numero != ?
-                    AND mensagem != ?
-                    ORDER BY id DESC LIMIT 15
-                """, (conversation_id, bot_num, current_message))
-                if rows:
-                    for r in reversed(rows):
-                        group_listening.append({'usuario': r[0], 'mensagem': r[1]})
-            except Exception as ge:
-                logger.warning(f"Erro ao recuperar group listening context: {ge}")
-        # ===== 5. CRIA CONTEXTO UNIFICADO =====
-        unified = UnifiedMessageContext(
-            conversation_id=conversation_id,
-            user_id=user_id,
-            timestamp=time.time(),
-            is_reply=is_reply,
-            reply_to_bot=reply_context['reply_to_bot'],
-            reply_priority=reply_context['priority'],
-            quoted_author=reply_context['quoted_author'],
-            quoted_content=reply_context['quoted_content'],
-            reply_importancia=reply_context['importancia'],
-            stm_messages=stm_messages,
-            stm_summary=self.stm_manager.get_summary(conversation_id),
-            stm_emotional_trend=self._get_stm_emotional_trend(stm_messages),
-            long_term_memory=long_term_memory_string,
-            group_listening_context=group_listening,
-            sync_mode="tiktok",
-            token_budget=budget,
-            current_message=current_message,
-            current_emotion=current_emotion
-        )
-        return unified
-    def _calculate_reply_priority(
-        self,
-        reply_to_bot: bool,
-        current_message: str,
-        quoted_content: str
-    ) -> int:
-        """
-        Calcula nível de prioridade do reply.
-        Returns:
-            1=normal, 2=reply, 3=reply_to_bot, 4=critical
-        """
-        if not reply_to_bot:
-            return PRIORITY_REPLY
-        if is_pergunta_curta(current_message):
-            return PRIORITY_REPLY_TO_BOT_SHORT_QUESTION
-        return PRIORITY_REPLY_TO_BOT
-    def _get_stm_emotional_trend(
-        self,
-        stm_messages: List[MessageWithContext]
-    ) -> str:
-        """Obtém tendência emocional da STM."""
-        if not stm_messages:
-            return "neutral"
-        emocoes = {}
-        for msg in stm_messages[-10:]:  # Últimas 10
-            emocao = msg.emocao or "neutral"
-            emocoes[emocao] = emocoes.get(emocao, 0) + 1
-        if not emocoes:
-            return "neutral"
-        return max(emocoes, key=emocoes.get)
-    def format_for_llm(
-        self,
-        unified: UnifiedMessageContext,
-        include_header: bool = True
-    ) -> str:
-        """
-        Formata contexto unificado para o prompt do LLM.
-        Args:
-            unified: Contexto unificado
-            include_header: Se inclui cabeçalho
-        Returns:
-            String formatada para o prompt
-        """
-        return format_unified_context_for_llm(unified, unified.token_budget)
-    def add_to_stm(
-        self,
-        conversation_id: str,
-        role: str,
-        content: str,
-        emocao: str = "neutral",
-        reply_info: Optional[Dict] = None,
-        resposta: str = ""
-    ) -> MessageWithContext:
-        """
-        Adiciona mensagem (user ou bot) à STM.
-        Args:
-            conversation_id: ID da conversa
-            role: "user" ou "assistant"
-            content: Conteúdo da mensagem
-            emocao: Emoção
-            reply_info: Info de reply (se aplicável)
-            resposta: Resposta do bot (se for assistant)
-        Returns:
-            MessageWithContext criada
-        """
-        # Para mensagens do bot, usa a resposta gerada
-        if role == "assistant" and resposta:
-            content = resposta
-        return self.stm_manager.add_message(
-            conversation_id=conversation_id,
-            role=role,
-            content=content,
-            emocao=emocao,
-            reply_info=reply_info
-        )
-    def merge_reply_with_stm(
-        self,
-        reply_context: Dict[str, Any],
-        stm_messages: List[MessageWithContext],
-        max_stm: int = 30
-    ) -> List[MessageWithContext]:
-        """
-        Mescla reply context com STM para contexto do LLM.
-        Args:
-            reply_context: Contexto do reply
-            stm_messages: Mensagens STM
-            max_stm: Máximo de mensagens STM
-        Returns:
-            Lista combinada
-        """
-        return sync_reply_with_stm(reply_context, stm_messages, max_stm)
-# ====================================
-# FACTORY FUNCTIONS
-# ====================================
-_unified_builder: Optional[UnifiedContextBuilder] = None
-def get_unified_context_builder() -> UnifiedContextBuilder:
-    """Obtém instância singleton do builder."""
-    global _unified_builder
-    if _unified_builder is None:
-        _unified_builder = UnifiedContextBuilder()
-    return _unified_builder
-def get_stm_manager() -> ShortTermMemoryManager:
-    """Obtém instância singleton do manager de STM."""
-    return ShortTermMemoryManager()
-def build_unified_context(
-    conversation_id: str,
-    user_id: str = "",
-    reply_metadata: Optional[Dict[str, Any]] = None,
-    current_message: str = "",
-    current_emotion: str = "neutral"
-) -> UnifiedMessageContext:
-    """
-    Factory function para construir contexto unificado.
-    Usage:
-        context = build_unified_context(
-            conversation_id="pv:2449...",
-            reply_metadata={...},
-            current_message="."
-        )
-    """
-    builder = get_unified_context_builder()
-    return builder.build(
-        conversation_id=conversation_id,
-        user_id=user_id,
-        reply_metadata=reply_metadata,
-        current_message=current_message,
-        current_emotion=current_emotion
-    )
-# ====================================
-# COMPATIBILITY HELPERS
-# ====================================
-def gerar_id_conversao(
-    numero: str,
-    tipo_conversa: str = "pv",
-    grupo_id: Optional[str] = None
-) -> str:
-    """
-    Gera ID de conversa para STM isolada.
-    Args:
-        numero: Número do usuário
-        tipo_conversa: "pv" ou "grupo"
-        grupo_id: ID do grupo (para conversas em grupo)
-    Returns:
-        ID único da conversa
-    """
-    from .context_isolation import generate_context_id
-    return generate_context_id(numero, tipo_conversa, grupo_id)
-# type: ignore

+# type: ignore
+"""
+================================================================================
+AKIRA V21 ULTIMATE - UNIFIED CONTEXT MODULE
+================================================================================
+Sistema unificado que integra Reply Context + Short-Term Memory em sintonia.
+Philosophy: "Reply context e STM devem trabalhar em sintonia como tik e tack -
+um fornece o contexto imediato/urgente (o que o usuário está respondendo),
+o outro fornece o fluxo da conversa (contexto geral)."
+Features:
+- Integração seamless entre reply context e STM
+- Token budgeting inteligente entre os dois contextos
+- Priorização dinâmica baseada no tipo de mensagem
+- Suporte a perguntas curtas com reply (prioridade máxima)
+- Persistência e restauração de contexto unificado
+================================================================================
+"""
+import os
+import sys
+import time
+import json
+import logging
+from typing import Optional, Dict, Any, List, Tuple
+from dataclasses import dataclass, field
+from datetime import datetime
+# Imports robustos com fallback
+try:
+    from . import config
+    from .short_term_memory import (
+        ShortTermMemory,
+        MessageWithContext,
+        IMPORTANCIA_NORMAL,
+        IMPORTANCIA_REPLY,
+        IMPORTANCIA_REPLY_TO_BOT,
+        IMPORTANCIA_PERGUNTA_CURTA_REPLY,
+        estimar_tokens,
+        is_pergunta_curta
+    )
+    from .reply_context_handler import (
+        ReplyContextHandler,
+        ProcessedReplyContext,
+        PRIORITY_REPLY,
+        PRIORITY_REPLY_TO_BOT,
+        PRIORITY_REPLY_TO_BOT_SHORT_QUESTION
+    )
+    UNIFIED_CONTEXT_AVAILABLE = True
+except ImportError as e:
+    try:
+        import modules.config as config
+        from modules.short_term_memory import (
+            ShortTermMemory,
+            MessageWithContext,
+            IMPORTANCIA_NORMAL,
+            IMPORTANCIA_REPLY,
+            IMPORTANCIA_REPLY_TO_BOT,
+            IMPORTANCIA_PERGUNTA_CURTA_REPLY,
+            estimar_tokens,
+            is_pergunta_curta
+        )
+        from modules.reply_context_handler import (
+            ReplyContextHandler,
+            ProcessedReplyContext,
+            PRIORITY_REPLY,
+            PRIORITY_REPLY_TO_BOT,
+            PRIORITY_REPLY_TO_BOT_SHORT_QUESTION
+        )
+        UNIFIED_CONTEXT_AVAILABLE = True
+    except ImportError:
+        UNIFIED_CONTEXT_AVAILABLE = False
+        config = None
+try:
+    from .lstm_extension import get_lstm_extension
+    LSTM_AVAILABLE = True
+except ImportError:
+    try:
+        from modules.lstm_extension import get_lstm_extension
+        LSTM_AVAILABLE = True
+    except ImportError:
+        LSTM_AVAILABLE = False
+logger = logging.getLogger(__name__)
+# ============================================================
+# CONFIGURAÇÃO DE TOKEN BUDGET
+# ============================================================
+@dataclass
+class ContextTokenBudget:
+    """
+    Alocação de tokens entre reply context e STM.
+    Philosophy: Reply tem orçamento dedicado (urgente), STM tem o resto (fluxo).
+    """
+    total_budget: int = 8000
+    system_tokens: int = 1500
+    user_message_tokens: int = 500
+    # Reply context budget (URGENTE)
+    reply_tokens: int = 300
+    reply_priority_multiplier: float = 1.0
+    # STM budget (FLUXO DA CONVERSA)
+    stm_tokens: int = 4000
+    # Reservado para resposta
+    response_reserved: int = 1200
+    def calculate(self, is_reply: bool, reply_priority: int = 1) -> 'ContextTokenBudget':
+        """
+        Calcula orçamento baseado no tipo de mensagem.
+        Args:
+            is_reply: Se é um reply
+            reply_priority: Nível de prioridade do reply (1-4)
+        Returns:
+            ContextTokenBudget ajustado
+        """
+        budget = ContextTokenBudget(
+            total_budget=self.total_budget,
+            system_tokens=self.system_tokens,
+            user_message_tokens=self.user_message_tokens
+        )
+        if is_reply:
+            if reply_priority >= PRIORITY_REPLY_TO_BOT_SHORT_QUESTION:
+                # Pergunta curta com reply ao bot = prioridade máxima
+                budget.reply_tokens = min(1500, int(self.total_budget * 0.20))
+                budget.reply_priority_multiplier = 1.5
+                budget.stm_tokens = min(3500, int(self.total_budget * 0.45))
+            elif reply_priority >= PRIORITY_REPLY_TO_BOT:
+                # Reply ao bot
+                budget.reply_tokens = min(1200, int(self.total_budget * 0.15))
+                budget.reply_priority_multiplier = 1.3
+                budget.stm_tokens = min(4000, int(self.total_budget * 0.50))
+            elif reply_priority >= PRIORITY_REPLY:
+                # Reply normal
+                budget.reply_tokens = min(800, int(self.total_budget * 0.10))
+                budget.reply_priority_multiplier = 1.1
+                budget.stm_tokens = min(4500, int(self.total_budget * 0.55))
+        else:
+            # Mensagem normal = STM tem orçamento completo
+            budget.reply_tokens = 0
+            budget.stm_tokens = min(5000, int(self.total_budget * 0.65))
+        # Calcula response reserved
+        budget.response_reserved = (
+            budget.total_budget -
+            budget.system_tokens -
+            budget.user_message_tokens -
+            budget.reply_tokens -
+            budget.stm_tokens
+        )
+        return budget
+    def to_dict(self) -> Dict[str, Any]:
+        """Serializa para dicionário."""
+        return {
+            "total_budget": self.total_budget,
+            "system_tokens": self.system_tokens,
+            "user_message_tokens": self.user_message_tokens,
+            "reply_tokens": self.reply_tokens,
+            "stm_tokens": self.stm_tokens,
+            "response_reserved": self.response_reserved,
+            "reply_priority_multiplier": self.reply_priority_multiplier
+        }
+# ============================================================
+# CONTEXTO UNIFICADO
+# ============================================================
+@dataclass
+class UnifiedMessageContext:
+    """
+    Contexto unificado combinando reply + STM.
+    Philosophy: Reply context (tik) + STM (tok) trabalhando em sintonia.
+    Attributes:
+        - Reply context: Contexto imediato/urgente do reply
+        - STM context: Contexto do fluxo da conversa
+        - Integration: Como os dois são combinados
+    """
+    # Identificação
+    conversation_id: str = ""
+    user_id: str = ""
+    timestamp: float = field(default_factory=time.time)
+    # Reply Context (TIK - urgente/imediato)
+    is_reply: bool = False
+    reply_to_bot: bool = False
+    reply_priority: int = 1  # 1=normal, 2=reply, 3=reply_to_bot, 4=critical
+    quoted_author: str = ""
+    quoted_content: str = ""
+    reply_importancia: float = 1.0
+    replied_to_author: str = ""
+    replied_to_content: str = ""
+    # STM Context (TOK - fluxo da conversa)
+    stm_messages: List[MessageWithContext] = field(default_factory=list)
+    stm_summary: Dict[str, Any] = field(default_factory=dict)
+    stm_emotional_trend: str = "neutral"
+    # Long-Term Memory (RAG)
+    long_term_memory: str = ""
+    # Integração
+    sync_mode: str = "tiktok"  # "tiktok" = reply priority + STM flow
+    token_budget: ContextTokenBudget = field(default_factory=ContextTokenBudget)
+    # Mensagem atual
+    current_message: str = ""
+    current_emotion: str = "neutro"
+    system_override: str = ""
+    def to_dict(self) -> Dict[str, Any]:
+        """Serializa para dicionário."""
+        return {
+            "conversation_id": self.conversation_id,
+            "user_id": self.user_id,
+            "timestamp": self.timestamp,
+            "is_reply": self.is_reply,
+            "reply_to_bot": self.reply_to_bot,
+            "reply_priority": self.reply_priority,
+            "quoted_author": self.quoted_author,
+            "quoted_content": self.quoted_content[:500] if self.quoted_content else "",
+            "reply_importancia": self.reply_importancia,
+            "stm_messages_count": len(self.stm_messages),
+            "stm_summary": self.stm_summary,
+            "stm_emotional_trend": self.stm_emotional_trend,
+            "long_term_memory": self.long_term_memory,
+            "sync_mode": self.sync_mode,
+            "token_budget": self.token_budget.to_dict(),
+            "current_message": self.current_message[:100],
+            "current_emotion": self.current_emotion,
+            "replied_to_author": self.replied_to_author,
+            "replied_to_content": self.replied_to_content[:200] if self.replied_to_content else ""
+        }
+    def build_prompt(self) -> str:
+        """
+        Constrói prompt formatado para o LLM.
+        Returns:
+            String formatada com contexto unificado (reply + STM)
+        """
+        return format_unified_context_for_llm(self, self.token_budget)
+# ====================================
+# HELPER FUNCTIONS
+# ====================================
+def sync_reply_with_stm(
+    reply_context: Dict[str, Any],
+    stm_messages: List[MessageWithContext],
+    max_stm_messages: int = 10
+) -> List[MessageWithContext]:
+    """
+    Sincroniza reply context com mensagens STM.
+    Philosophy: Reply (tik) vem primeiro, STM (tok) vem depois.
+    Ambos são combinados para formar o contexto completo.
+    Args:
+        reply_context: Contexto do reply
+        stm_messages: Mensagens da memória de curto prazo
+        max_stm_messages: Máximo de mensagens STM a incluir
+    Returns:
+        Lista combinada de mensagens para contexto
+    """
+    combined = []
+    # 1. Adiciona reply context como mensagem mais recente (TIK)
+    if reply_context.get('is_reply', False):
+        reply_msg = MessageWithContext(
+            role="user",
+            content=reply_context.get('quoted_content', ''),
+            importancia=reply_context.get('importancia', IMPORTANCIA_NORMAL),
+            emocao=reply_context.get('emocao', 'neutral'),
+            reply_info={
+                'is_reply': True,
+                'reply_to_bot': reply_context.get('reply_to_bot', False),
+                'quoted_text_original': reply_context.get('quoted_content', ''),
+                'priority_level': reply_context.get('priority', 1),
+                'sync_mode': 'tiktok'
+            }
+        )
+        combined.append(reply_msg)
+    # 2. Adiciona mensagens STM (TOK - fluxo da conversa)
+    # Pega últimas N mensagens STM
+    stm_to_add = stm_messages[-max_stm_messages:] if stm_messages else []
+    for msg in stm_to_add:
+        # Se a mensagem STM já é um reply, preserva info
+        if msg.is_reply and not msg.reply_info.get('sync_mode'):
+            msg.reply_info['sync_mode'] = 'stm'
+        combined.append(msg)
+    return combined
+def format_unified_context_for_llm(
+    unified: UnifiedMessageContext,
+    budget: ContextTokenBudget
+) -> str:
+    """
+    Formata contexto unificado para o prompt do LLM.
+    Philosophy: Reply (tik) primeiro por ser urgente, STM (tok) depois
+    para contexto da conversa.
+    Args:
+        unified: Contexto unificado
+        budget: Orçamento de tokens
+    Returns:
+        String formatada para o prompt
+    """
+    parts = []
+    # ===== 1. REPLY CONTEXT (TIK - URGENTE) =====
+    if unified.is_reply:
+        reply_section = []
+        reply_section.append("=" * 50)
+        reply_section.append("[📎 INTERNAL_BRAIN_ONLY: REPLY CONTEXT]")
+        reply_section.append("=" * 50)
+        if unified.reply_to_bot:
+            reply_section.append("⚠️ VOCÊ ESTÁ SENDO DIRETAMENTE RESPONDIDO!")
+        else:
+            reply_section.append(f"Respondendo a: {unified.quoted_author}")
+        # Conteúdo citado
+        if unified.quoted_content:
+            quoted_preview = unified.quoted_content[:budget.reply_tokens // 4]
+            reply_section.append(f"\n<quoted_message>\n{quoted_preview}...\n</quoted_message>")
+        # Prioridade
+        if unified.reply_priority >= PRIORITY_REPLY_TO_BOT_SHORT_QUESTION:
+            reply_section.append("\n💡 PERGUNTA CURTA + REPLY: FOCO NA CITAÇÃO")
+        reply_section.append("\n📌 INSTRUÇÕES DE REPLY:")
+        if unified.reply_to_bot:
+            thread_info = ""
+            if unified.replied_to_author:
+                thread_info = f" (Esta sua mensagem citada foi enviada originalmente para {unified.replied_to_author} em resposta a: \"{unified.replied_to_content[:200]}...\")"
+            reply_section.append(f"- O usuário está a reagir a uma mensagem SUA (<quoted_message>){thread_info}. Responda diretamente ao comentário do usuário, mantendo a postura sobre o que você disse.")
+        else:
+            reply_section.append("- O usuário está a responder a <quoted_message>. Formule sua resposta com base nisso.")
+        reply_section.append("- PRESERVE a sua identidade e humor (seja o Akira, natural e irreverente).")
+        reply_section.append("- Nunca perca o fio da meada. Olhe as mensagens anteriores para entender o contexto real.")
+        parts.append("\n".join(reply_section))
+    # ===== RAG CONTEXT (MEMÓRIA DE LONGO PRAZO) =====
+    if unified.long_term_memory:
+        rag_section = []
+        rag_section.append("\n" + "=" * 50)
+        rag_section.append("[📖 INTERNAL_BRAIN_ONLY: LONG-TERM MEMORY]")
+        rag_section.append("=" * 50)
+        rag_section.append("(Informações previamente aprendidas sobre o usuário)")
+        rag_section.append(unified.long_term_memory)
+        parts.append("\n".join(rag_section))
+    # ===== 2. STM CONTEXT (METADADOS DE FLUXO) =====
+    if unified.stm_messages:
+        stm_section = []
+        # Não adicionamos as mensagens como texto aqui para evitar duplicação e truncagem,
+        # pois elas já são injetadas nativamente no array context_history da API.
+        # emotional trend
+        if unified.stm_emotional_trend != "neutral":
+            stm_section.append(f"\n📊 Tendência emocional do chat: {unified.stm_emotional_trend}")
+        if stm_section:
+            parts.append("\n".join(stm_section))
+    return "\n".join(parts)
+# ====================================
+# SHORT-TERM MEMORY MANAGER
+# ====================================
+class ShortTermMemoryManager:
+    """
+    Gerenciador de instâncias STM por conversa.
+    Philosophy: Cada conversa tem sua própria STM isolada,
+    mas todas compartilham o mesmo manager.
+    """
+    _instance = None
+    _lock = None
+    def __new__(cls):
+        if cls._instance is None:
+            cls._lock = __import__('threading').Lock()
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = super().__new__(cls)
+                    cls._instance._initialized = False
+        return cls._instance
+    def __init__(self):
+        if self._initialized:
+            return
+        self._instances: Dict[str, ShortTermMemory] = {}
+        # Path centralizado via config
+        if config and hasattr(config, "DATA_DIR"):
+            self._storage_path: str = str(config.DATA_DIR / "stm_cache")
+        else:
+            self._storage_path: str = os.path.join(
+                os.path.dirname(os.path.abspath(__file__)),
+                '..', 'data', 'stm_cache'
+            )
+        os.makedirs(self._storage_path, exist_ok=True)
+        self._initialized = True
+        self._load_all()
+        logger.debug(f"✅ ShortTermMemoryManager inicializado (persistência: {self._storage_path})")
+    # ============================================================
+    # PERSISTÊNCIA EM DISCO
+    # ============================================================
+    def _stm_file_path(self, conversation_id: str) -> str:
+        """Retorna caminho do arquivo de persistência de uma STM."""
+        safe_id = conversation_id.replace('/', '_').replace('\\', '_')[:128]
+        return os.path.join(self._storage_path, f"{safe_id}.json")
+    def _load_stm(self, conversation_id: str) -> Optional[ShortTermMemory]:
+        """Carrega STM de disco se existir."""
+        fpath = self._stm_file_path(conversation_id)
+        if os.path.exists(fpath):
+            try:
+                stm = ShortTermMemory.load_from_file(fpath)
+                self._instances[conversation_id] = stm
+                return stm
+            except Exception as e:
+                logger.warning(f"Falha ao carregar STM {conversation_id[:8]}: {e}")
+        return None
+    def _load_all(self) -> None:
+        """Carrega todas as STMs persistidas do disco."""
+        if not os.path.isdir(self._storage_path):
+            return
+        for fname in os.listdir(self._storage_path):
+            if fname.endswith('.json'):
+                cid = fname[:-5]
+                self._load_stm(cid)
+        logger.info(f"📦 {len(self._instances)} STM(s) carregadas do disco")
+    def _save_stm(self, conversation_id: str) -> None:
+        """Salva STM de uma conversa em disco."""
+        if conversation_id in self._instances:
+            fpath = self._stm_file_path(conversation_id)
+            self._instances[conversation_id].save_to_file(fpath)
+    def get_or_create_stm(
+        self,
+        conversation_id: str,
+        user_id: str = "",
+        max_messages: int = 100
+    ) -> ShortTermMemory:
+        """
+        Obtém ou cria STM para uma conversa.
+        Args:
+            conversation_id: ID único da conversa
+            user_id: ID do usuário
+            max_messages: Máximo de mensagens na STM
+        Returns:
+            Instância de ShortTermMemory
+        """
+        if conversation_id not in self._instances:
+            self._instances[conversation_id] = ShortTermMemory(
+                conversation_id=conversation_id,
+                max_messages=max_messages
+            )
+            logger.debug(f"🧠 STM criada: {conversation_id[:8]}...")
+        return self._instances[conversation_id]
+    def add_message(
+        self,
+        conversation_id: str,
+        role: str,
+        content: str,
+        author_name: str = "Usuário",
+        emocao: str = "neutral",
+        reply_info: Optional[Dict] = None,
+        importancia: Optional[float] = None
+    ) -> MessageWithContext:
+        """
+        Adiciona mensagem à STM de uma conversa.
+        Args:
+            conversation_id: ID da conversa
+            role: "user" ou "assistant"
+            content: Texto da mensagem
+            emocao: Emoção detectada
+            reply_info: Info de reply (se aplicável)
+            importancia: Importância customizada
+        Returns:
+            MessageWithContext criada
+        """
+        stm = self.get_or_create_stm(conversation_id)
+        # Calcula importância automaticamente se não fornecida
+        if importancia is None:
+            from .short_term_memory import calcular_importancia
+            importancia = calcular_importancia(
+                is_reply=bool(reply_info and reply_info.get("is_reply")),
+                reply_to_bot=bool(reply_info and reply_info.get("reply_to_bot")),
+                mensagem=content,
+                emocao=emocao
+            )
+        msg = stm.add_message(
+            role=role,
+            content=content,
+            author_name=author_name,
+            importancia=importancia,
+            emocao=emocao,
+            reply_info=reply_info
+        )
+        # Persiste em disco (salva a cada mensagem para garantir durability)
+        self._save_stm(conversation_id)
+        return msg
+    def get_context(
+        self,
+        conversation_id: str,
+        include_replies: bool = True,
+        prioritize_replies: bool = True,
+        max_messages: int = 10,
+        max_tokens: int = 4000
+    ) -> List[MessageWithContext]:
+        """
+        Obtém contexto da STM de uma conversa.
+        Args:
+            conversation_id: ID da conversa
+            include_replies: Se inclui replies
+            prioritize_replies: Se prioriza replies
+            max_messages: Máximo de mensagens
+            max_tokens: Máximo de tokens
+        Returns:
+            Lista de mensagens
+        """
+        if conversation_id not in self._instances:
+            return []
+        stm = self._instances[conversation_id]
+        return stm.get_context_window(
+            include_replies=include_replies,
+            prioritize_replies=prioritize_replies,
+            max_messages=max_messages,
+            max_tokens=max_tokens
+        )
+    def get_summary(self, conversation_id: str) -> Dict[str, Any]:
+        """
+        Obtém resumo da STM de uma conversa.
+        Args:
+            conversation_id: ID da conversa
+        Returns:
+            Dicionário com resumo
+        """
+        if conversation_id not in self._instances:
+            return {}
+        stm = self._instances[conversation_id]
+        return stm.get_conversation_summary()
+    def clear(self, conversation_id: str) -> bool:
+        """
+        Limpa STM de uma conversa, inclusive persistência em disco.
+        Args:
+            conversation_id: ID da conversa
+        Returns:
+            True se limpou
+        """
+        if conversation_id in self._instances:
+            self._instances[conversation_id].clear()
+            del self._instances[conversation_id]
+        # Remove arquivo de persistência
+        fpath = self._stm_file_path(conversation_id)
+        if hasattr(self, 'fpath') or True:
+            try:
+                fpath = self._stm_file_path(conversation_id)
+                if os.path.exists(fpath):
+                    os.remove(fpath)
+            except Exception:
+                pass
+        return True
+    def clear_messages(self, conversation_id: str) -> None:
+        """Alias de compatibilidade para clear()."""
+        self.clear(conversation_id)
+    def get_messages(
+        self,
+        conversation_id: str,
+        limit: int = 10,
+        include_replies: bool = True
+    ) -> list:
+        """
+        Alias de compatibilidade para get_context().
+        Retorna lista de MessageWithContext para a conversa.
+        Args:
+            conversation_id: ID da conversa
+            limit: Quantidade máxima de mensagens
+            include_replies: Se inclui replies
+        Returns:
+            Lista de MessageWithContext
+        """
+        if conversation_id not in self._instances:
+            return []
+        stm = self._instances[conversation_id]
+        result = stm.get_context_window(
+            include_replies=include_replies,
+            prioritize_replies=True,
+            max_messages=limit
+        )
+        return result if result else []
+# ====================================
+# UNIFIED CONTEXT BUILDER
+# ====================================
+class UnifiedContextBuilder:
+    """
+    Constrói contexto unificado combinando reply + STM.
+    Philosophy: "Reply context e STM devem trabalhar em sintonia como tik e tack"
+    Usage:
+        builder = UnifiedContextBuilder()
+        context = builder.build(
+            conversation_id="...",
+            reply_metadata={...},
+            current_message="..."
+        )
+        prompt_section = builder.format_for_llm(context)
+    """
+    def __init__(self, context_manager=None, stm_manager=None, db_instance=None):
+        self.stm_manager = stm_manager if stm_manager else ShortTermMemoryManager()
+        self.context_manager = context_manager
+        self.db = db_instance
+        self.reply_handler = None
+        self._initialized = False
+    def _ensure_initialized(self):
+        """Garante inicialização do reply handler."""
+        if not self._initialized and UNIFIED_CONTEXT_AVAILABLE:
+            try:
+                self.reply_handler = ReplyContextHandler()
+                self._initialized = True
+            except Exception as e:
+                logger.warning(f"UnifiedContextBuilder: falha ao init reply handler: {e}")
+    def build(
+        self,
+        conversation_id: str,
+        user_id: str = "",
+        reply_metadata: Optional[Dict[str, Any]] = None,
+        current_message: str = "",
+        current_emotion: str = "neutro",
+        stm_messages: Optional[List[MessageWithContext]] = None
+    ) -> UnifiedMessageContext:
+        """
+        Constrói contexto unificado.
+        Args:
+            conversation_id: ID único da conversa
+            user_id: ID do usuário
+            reply_metadata: Metadados do reply
+            current_message: Mensagem atual
+            current_emotion: Emoção atual
+            stm_messages: Mensagens STM (usa manager se None)
+        Returns:
+            UnifiedMessageContext pronto para uso
+        """
+        self._ensure_initialized()
+        # ===== 1. PROCESSA REPLY CONTEXT (TIK) =====
+        is_reply = reply_metadata.get('is_reply', False) if reply_metadata else False
+        reply_context = {
+            'is_reply': is_reply,
+            'reply_to_bot': reply_metadata.get('reply_to_bot', False) if reply_metadata else False,
+            'quoted_author': reply_metadata.get('quoted_author_name', '') if reply_metadata else '',
+            'quoted_content': reply_metadata.get('quoted_text_original', '') or
+                             reply_metadata.get('mensagem_citada', '') if reply_metadata else '',
+            'importancia': IMPORTANCIA_NORMAL,
+            'emocao': current_emotion,
+            'priority': 1,
+            'replied_to_author': reply_metadata.get('replied_to_author', '') if reply_metadata else '',
+            'replied_to_content': reply_metadata.get('replied_to_content', '') if reply_metadata else ''
+        }
+        # Calcula prioridade do reply
+        if is_reply and reply_metadata:
+            reply_context['priority'] = self._calculate_reply_priority(
+                reply_metadata.get('reply_to_bot', False),
+                current_message,
+                reply_metadata.get('quoted_text_original', '')
+            )
+            # Calcula importância baseada em prioridade
+            if reply_context['priority'] >= PRIORITY_REPLY_TO_BOT_SHORT_QUESTION:
+                reply_context['importancia'] = IMPORTANCIA_PERGUNTA_CURTA_REPLY
+            elif reply_context['priority'] >= PRIORITY_REPLY_TO_BOT:
+                reply_context['importancia'] = IMPORTANCIA_REPLY_TO_BOT
+            elif reply_context['priority'] >= PRIORITY_REPLY:
+                reply_context['importancia'] = IMPORTANCIA_REPLY
+        # ===== 2. OBTÉM STM (TOK) =====
+        if stm_messages is None:
+            stm_messages = self.stm_manager.get_context(
+                conversation_id,
+                include_replies=True,
+                prioritize_replies=True,
+                max_messages=10,
+                max_tokens=4000
+            )
+        # ===== 3. CALCULA TOKEN BUDGET =====
+        budget = ContextTokenBudget().calculate(
+            is_reply=is_reply,
+            reply_priority=reply_context['priority']
+        )
+        # ===== 4. FETCH LONG-TERM MEMORY (DB) =====
+        long_term_memory_string = ""
+        if self.db and user_id:
+            try:
+                # Recuperar aprendizados e gírias
+                ltm_facts = self.db.recuperar_aprendizado_detalhado(user_id)
+                ltm_girias = self.db.recuperar_girias_usuario(user_id)
+                ltm_tom = self.db.obter_tom_predominante(user_id)
+                persona_ltm = self.db.recuperar_persona(user_id) if hasattr(self.db, 'recuperar_persona') else None
+                ltm_lines = []
+                # --- PERSONA DO USUÁRIO (Rastreador) ---
+                if persona_ltm:
+                    ltm_lines.append("=== PERFIL ANALISADO DO USUÁRIO ===")
+                    if persona_ltm.get('personalidade') and persona_ltm['personalidade'] != "None":
+                        ltm_lines.append(f"• Personalidade: {persona_ltm['personalidade']}")
+                    if persona_ltm.get('gostos') and persona_ltm['gostos'] != "None":
+                        ltm_lines.append(f"• Tópicos de Interesse: {persona_ltm['gostos']}")
+                    if persona_ltm.get('desgostos') and persona_ltm['desgostos'] != "None":
+                        ltm_lines.append(f"• Desgostos/Gatilhos: {persona_ltm['desgostos']}")
+                    if persona_ltm.get('vicios_linguagem') and persona_ltm['vicios_linguagem'] != "None":
+                        ltm_lines.append(f"• Padrões de Linguagem: {persona_ltm['vicios_linguagem']}")
+                    if persona_ltm.get('emocional') and persona_ltm['emocional'] != "None":
+                        ltm_lines.append(f"• Perfil Emocional: {persona_ltm['emocional']}")
+                if ltm_tom:
+                    ltm_lines.append(f"• Seu tom de conversa predominante é: {ltm_tom}")
+                if ltm_facts and isinstance(ltm_facts, dict):
+                    # Ignorar chaves puramente técnicas como 'emocao_atual' ou strings de timestamp longas
+                    fatos_filtrados = {k: v for k, v in ltm_facts.items() if not k.startswith("emocao_")}
+                    if fatos_filtrados:
+                        ltm_lines.append("• Fatos Relevantes Aprendidos:")
+                        for k, v in list(fatos_filtrados.items())[:5]: # limita 5
+                            ltm_lines.append(f"  - {k}: {v}")
+                if ltm_girias:
+                    ltm_lines.append("• Expressões Específicas Recentes:")
+                    for g in ltm_girias[:5]:
+                        ltm_lines.append(f"  - {g['giria']} ({g['significado']})")
+                if ltm_lines:
+                    long_term_memory_string = "\n".join(ltm_lines)
+            except Exception as e:
+                logger.warning(f"Erro ao recuperar memória de longo prazo: {e}")
+        # [INTEGRAÇÃO LSTM MENTAL CONTEXT]
+        if LSTM_AVAILABLE and self.db and conversation_id:
+            try:
+                lstm_ext = get_lstm_extension(self.db)
+                lstm_data = lstm_ext.get_context_for_prompt(conversation_id, user_id)
+                if lstm_data:
+                    lstm_lines = ["\n[INTERNAL_BRAIN_ONLY: COMPLETE CONVERSATION SUMMARY]"]
+                    if lstm_data.get('topic_principal'):
+                        lstm_lines.append(f"• Tópico Atual: {lstm_data['topic_principal']}")
+                    if lstm_data.get('subtopicas'):
+                        lstm_lines.append(f"• Subtópicos: {', '.join(lstm_data['subtopicas'])}")
+                    if lstm_data.get('unanswered_questions'):
+                        lstm_lines.append(f"• Perguntas pendentes: {'; '.join(lstm_data['unanswered_questions'])}")
+                    if lstm_data.get('interaction_pattern'):
+                        lstm_lines.append(f"• Padrão do usuário: {lstm_data['interaction_pattern']}")
+                    if lstm_data.get('assumed_knowledge'):
+                        lstm_lines.append(f"• Usuário sabe sobre: {', '.join(lstm_data['assumed_knowledge'])}")
+                    lstm_lines.append("NOTA MENTAL MÁXIMA: Este resumo é estritamente para seu conhecimento interno. NUNCA mencione que você leu um resumo ou narre o histórico. Apenas aja como se você lembrasse de tudo naturalmente.")
+                    if long_term_memory_string:
+                        long_term_memory_string += "\n" + "\n".join(lstm_lines)
+                    else:
+                        long_term_memory_string = "\n".join(lstm_lines)
+            except Exception as e:
+                logger.warning(f"Erro ao recuperar contexto LSTM: {e}")
+        # ===== 5. CRIA CONTEXTO UNIFICADO =====
+        unified = UnifiedMessageContext(
+            conversation_id=conversation_id,
+            user_id=user_id,
+            timestamp=time.time(),
+            is_reply=is_reply,
+            reply_to_bot=reply_context['reply_to_bot'],
+            reply_priority=reply_context['priority'],
+            quoted_author=reply_context['quoted_author'],
+            quoted_content=reply_context['quoted_content'],
+            reply_importancia=reply_context['importancia'],
+            stm_messages=stm_messages,
+            stm_summary=self.stm_manager.get_summary(conversation_id),
+            stm_emotional_trend=self._get_stm_emotional_trend(stm_messages),
+            long_term_memory=long_term_memory_string,
+            sync_mode="tiktok",
+            token_budget=budget,
+            current_message=current_message,
+            current_emotion=current_emotion,
+            replied_to_author=reply_context['replied_to_author'],
+            replied_to_content=reply_context['replied_to_content']
+        )
+        return unified
+    def _calculate_reply_priority(
+        self,
+        reply_to_bot: bool,
+        current_message: str,
+        quoted_content: str
+    ) -> int:
+        """
+        Calcula nível de prioridade do reply.
+        Returns:
+            1=normal, 2=reply, 3=reply_to_bot, 4=critical
+        """
+        if not reply_to_bot:
+            return PRIORITY_REPLY
+        if is_pergunta_curta(current_message):
+            return PRIORITY_REPLY_TO_BOT_SHORT_QUESTION
+        return PRIORITY_REPLY_TO_BOT
+    def _get_stm_emotional_trend(
+        self,
+        stm_messages: List[MessageWithContext]
+    ) -> str:
+        """Obtém tendência emocional da STM."""
+        if not stm_messages:
+            return "neutral"
+        emocoes = {}
+        for msg in stm_messages[-10:]:  # Últimas 10
+            emocao = msg.emocao or "neutral"
+            emocoes[emocao] = emocoes.get(emocao, 0) + 1
+        if not emocoes:
+            return "neutral"
+        return max(emocoes, key=emocoes.get)
+    def format_for_llm(
+        self,
+        unified: UnifiedMessageContext,
+        include_header: bool = True
+    ) -> str:
+        """
+        Formata contexto unificado para o prompt do LLM.
+        Args:
+            unified: Contexto unificado
+            include_header: Se inclui cabeçalho
+        Returns:
+            String formatada para o prompt
+        """
+        return format_unified_context_for_llm(unified, unified.token_budget)
+    def add_to_stm(
+        self,
+        conversation_id: str,
+        role: str,
+        content: str,
+        author_name: str = "Usuário",
+        emocao: str = "neutral",
+        reply_info: Optional[Dict] = None,
+        resposta: str = ""
+    ) -> MessageWithContext:
+        """
+        Adiciona mensagem (user ou bot) à STM.
+        Args:
+            conversation_id: ID da conversa
+            role: "user" ou "assistant"
+            content: Conteúdo da mensagem
+            emocao: Emoção
+            reply_info: Info de reply (se aplicável)
+            resposta: Resposta do bot (se for assistant)
+        Returns:
+            MessageWithContext criada
+        """
+        # Para mensagens do bot, usa a resposta gerada
+        if role == "assistant" and resposta:
+            content = resposta
+        return self.stm_manager.add_message(
+            conversation_id=conversation_id,
+            role=role,
+            content=content,
+            author_name=author_name,
+            emocao=emocao,
+            reply_info=reply_info
+        )
+    def merge_reply_with_stm(
+        self,
+        reply_context: Dict[str, Any],
+        stm_messages: List[MessageWithContext],
+        max_stm: int = 10
+    ) -> List[MessageWithContext]:
+        """
+        Mescla reply context com STM para contexto do LLM.
+        Args:
+            reply_context: Contexto do reply
+            stm_messages: Mensagens STM
+            max_stm: Máximo de mensagens STM
+        Returns:
+            Lista combinada
+        """
+        return sync_reply_with_stm(reply_context, stm_messages, max_stm)
+# ====================================
+# FACTORY FUNCTIONS
+# ====================================
+_unified_builder: Optional[UnifiedContextBuilder] = None
+def get_unified_context_builder() -> UnifiedContextBuilder:
+    """Obtém instância singleton do builder."""
+    global _unified_builder
+    if _unified_builder is None:
+        _unified_builder = UnifiedContextBuilder()
+    return _unified_builder
+def get_stm_manager() -> ShortTermMemoryManager:
+    """Obtém instância singleton do manager de STM."""
+    return ShortTermMemoryManager()
+def build_unified_context(
+    conversation_id: str,
+    user_id: str = "",
+    reply_metadata: Optional[Dict[str, Any]] = None,
+    current_message: str = "",
+    current_emotion: str = "neutral"
+) -> UnifiedMessageContext:
+    """
+    Factory function para construir contexto unificado.
+    Usage:
+        context = build_unified_context(
+            conversation_id="pv:2449...",
+            reply_metadata={...},
+            current_message="."
+        )
+    """
+    builder = get_unified_context_builder()
+    return builder.build(
+        conversation_id=conversation_id,
+        user_id=user_id,
+        reply_metadata=reply_metadata,
+        current_message=current_message,
+        current_emotion=current_emotion
+    )
+# ====================================
+# COMPATIBILITY HELPERS
+# ====================================
+def gerar_id_conversao(
+    numero: str,
+    tipo_conversa: str = "pv",
+    grupo_id: Optional[str] = None
+) -> str:
+    """
+    Gera ID de conversa para STM isolada.
+    Args:
+        numero: Número do usuário
+        tipo_conversa: "pv" ou "grupo"
+        grupo_id: ID do grupo (para conversas em grupo)
+    Returns:
+        ID único da conversa
+    """
+    from .context_isolation import generate_context_id
+    return generate_context_id(numero, tipo_conversa, grupo_id)
+# type: ignore