PARA.AI / core /base_specialist.py
Carlex22's picture
ParaAIV3.6
2b9d72c
##PARA.AI/core/base_specialist.py
"""
Base Specialist - Classe base refatorada para todos os especialistas V13.6
"""
import logging
import json
import yaml
from typing import Dict, Any, Optional
from datetime import datetime
from abc import ABC, abstractmethod
logger = logging.getLogger(__name__)
class BaseSpecialist(ABC):
"""Classe base para todos os especialistas V13.6"""
def __init__(
self,
specialist_id: int,
config_path: str,
llm_manager,
schema_validator=None
):
self.id = specialist_id
self.llm_manager = llm_manager
self.schema_validator = schema_validator
self.config = self._load_config(specialist_id, config_path)
self.name = self.config.get('name', f'Specialist_{specialist_id}')
self.prompt_template = self._load_prompt_template()
self.llm_config = self.config.get('llm_config', {})
self.max_retries = self.config.get('max_retries', 3)
self.retry_delay = self.config.get('retry_delay', 2)
logger.info(f"βœ… {self.name} (ID {self.id}) inicializado")
def _load_config(self, specialist_id: int, config_path: str) -> Dict[str, Any]:
"""Carrega configuraΓ§Γ£o do especialista do YAML"""
try:
with open(config_path, 'r', encoding='utf-8') as f:
full_config = yaml.safe_load(f)
specialist_config = full_config.get('specialists', {}).get(specialist_id, {})
if not specialist_config:
raise ValueError(f"ConfiguraΓ§Γ£o nΓ£o encontrada para especialista {specialist_id}")
return specialist_config
except Exception as e:
logger.error(f"❌ Erro ao carregar config: {e}")
raise
def _load_prompt_template(self) -> str:
"""Carrega template de prompt do arquivo .txt"""
prompt_file = self.config.get('prompt_file', '')
if not prompt_file:
return ""
try:
with open(prompt_file, 'r', encoding='utf-8') as f:
return f.read()
except Exception as e:
logger.error(f"❌ Erro ao carregar prompt: {e}")
return ""
async def process(
self,
input_data: Dict[str, Any],
context: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""Processa input com context injection e retry logic"""
logger.info(f"πŸ€– {self.name} processando...")
for attempt in range(self.max_retries):
try:
prompt = self._build_prompt(input_data, context)
response = await self._call_llm(prompt)
result = self._parse_response(response)
if self.schema_validator:
is_valid, errors = self._validate_output(result)
if not is_valid and attempt < self.max_retries - 1:
continue
logger.info(f" βœ… {self.name} completou: {len(result)} campos")
return result
except Exception as e:
logger.error(f" ❌ Tentativa {attempt + 1}/{self.max_retries} falhou: {e}")
if attempt == self.max_retries - 1:
return self._get_empty_structure()
return self._get_empty_structure()
def _build_prompt(
self,
input_data: Dict[str, Any],
context: Optional[Dict[str, Any]] = None
) -> str:
"""ConstrΓ³i prompt final com substituiΓ§Γ£o de variΓ‘veis e context injection"""
prompt = self.prompt_template
# Substituir variΓ‘veis bΓ‘sicas
prompt = prompt.replace('{ementa}', input_data.get('ementa', ''))
prompt = prompt.replace('{inteiro_teor}', input_data.get('inteiro_teor', '')[:10000])
# Substituir seΓ§Γ΅es especΓ­ficas
if context:
secoes = context.get('secoes_originais', {})
prompt = prompt.replace('{RELATORIO_texto}', secoes.get('RELATORIO_texto_completo', '')[:5000])
prompt = prompt.replace('{FUNDAMENTACAO_texto}', secoes.get('FUNDAMENTACAO_texto_completo', '')[:5000])
prompt = prompt.replace('{DISPOSITIVO_texto}', secoes.get('DISPOSITIVO_texto_completo', '')[:3000])
if 'metadados' in context:
metadados_str = json.dumps(context['metadados'], indent=2, ensure_ascii=False)
prompt = prompt.replace('{metadados}', metadados_str[:1000])
# CONTEXT INJECTION
if context:
context_compact = self._compact_context(context)
context_str = json.dumps(context_compact, indent=2, ensure_ascii=False)
prompt += f"""
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
JSON JÁ PREENCHIDO (contexto dos especialistas anteriores):
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
{context_str}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
IMPORTANTE:
- NÃO repita os campos acima
- Complete APENAS os campos NOVOS da sua responsabilidade
- Retorne SOMENTE JSON vΓ‘lido
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
"""
return prompt
def _compact_context(self, context: Dict[str, Any]) -> Dict[str, Any]:
"""Remove campos muito grandes do contexto"""
compact = {}
for key, value in context.items():
if key in ['secoes_originais']:
continue
if isinstance(value, str) and len(value) > 500:
compact[key] = value[:500] + "..."
else:
compact[key] = value
return compact
async def _call_llm(self, prompt: str) -> str:
"""Chama LLM via LLMManager"""
try:
response = await self.llm_manager.generate(
system_prompt=self.config.get('system_prompt', ''),
user_prompt=prompt,
temperature=self.llm_config.get('temperature', 0.3),
max_tokens=self.llm_config.get('max_tokens', 2000),
model=self.llm_config.get('model', 'groq/llama-3-70b')
)
return response
except Exception as e:
logger.error(f"❌ Erro ao chamar LLM: {e}")
raise
def _parse_response(self, response: str) -> Dict[str, Any]:
"""Parse da resposta JSON do LLM"""
try:
response_clean = response.strip()
if response_clean.startswith('```'):
lines = response_clean.split('\n')
response_clean = '\n'.join(lines[1:-1])
result = json.loads(response_clean)
return result
except json.JSONDecodeError as e:
logger.error(f"❌ Erro ao parsear JSON: {e}")
raise
def _validate_output(self, result: Dict[str, Any]) -> tuple[bool, list]:
"""Valida output contra schema parcial"""
if not self.schema_validator:
return True, []
schema_file = self.config.get('schema_file', '')
if not schema_file:
return True, []
try:
return self.schema_validator.validate_partial(result, schema_file)
except Exception as e:
return False, [str(e)]
@abstractmethod
def _get_empty_structure(self) -> Dict[str, Any]:
"""Retorna estrutura vazia em caso de falha"""
pass
def get_info(self) -> Dict[str, Any]:
"""Retorna informaΓ§Γ΅es sobre o especialista"""
return {
'id': self.id,
'name': self.name,
'enabled': self.config.get('enabled', True),
'llm_model': self.llm_config.get('model', 'unknown'),
'max_retries': self.max_retries
}