Spaces:

caarleexx
/

PARA.AI

Runtime error

App Files Files Community

PARA.AI / llm /schema_loader.py

Carlex22

Revert "ParaAIV3.1"

a7f77a7 28 days ago

raw

history blame contribute delete

7.09 kB

	"""Schema Loader - Carrega e injeta schemas JSON nos system prompts."""

	import os
	import json
	import logging
	from pathlib import Path
	from typing import Dict, Optional

	logger = logging.getLogger(__name__)


	class SchemaLoader:
	"""Carrega schemas JSON e injeta em system prompts."""

	# Mapeamento especialista -> arquivo schema
	SCHEMA_MAP = {
	"metadados": "metadados_schema.json",
	"segmentacao": "segmentacao_schema.json",
	"transcricao": "transcricao_schema.json",
	"contexto": "contexto_schema.json",
	"fundamentacao": "fundamentacao_schema.json",
	"decisao": "decisao_schema.json",
	"arquivo": "arquivo_schema.json",
	"relatorio": "relatorio_schema.json",
	"auditoria": "auditoria_schema.json",
	}

	def __init__(self, schemas_dir: Optional[str] = None):
	"""
	Inicializa SchemaLoader.

	Args:
	schemas_dir: Diretório dos schemas (padrão: prompts/schemas/)
	"""
	if schemas_dir is None:
	# Tenta encontrar o diretório automaticamente
	base_dir = Path(__file__).parent.parent
	schemas_dir = base_dir / "prompts" / "schemas"

	self.schemas_dir = Path(schemas_dir)

	if not self.schemas_dir.exists():
	logger.warning(f"⚠️ Diretório de schemas não encontrado: {self.schemas_dir}")
	else:
	logger.info(f"✅ SchemaLoader inicializado: {self.schemas_dir}")

	# Cache de schemas carregados
	self._cache: Dict[str, Dict] = {}

	def load_schema(self, specialist_name: str) -> Optional[Dict]:
	"""
	Carrega schema JSON de um especialista.

	Args:
	specialist_name: Nome do especialista (ex: "decisao", "metadados")

	Returns:
	Dict com schema JSON ou None se não encontrado
	"""
	# Verifica cache
	if specialist_name in self._cache:
	return self._cache[specialist_name]

	# Busca arquivo schema
	schema_filename = self.SCHEMA_MAP.get(specialist_name.lower())
	if not schema_filename:
	logger.warning(f"⚠️ Schema não mapeado para especialista: {specialist_name}")
	return None

	schema_path = self.schemas_dir / schema_filename

	if not schema_path.exists():
	logger.warning(f"⚠️ Arquivo schema não encontrado: {schema_path}")
	return None

	try:
	with open(schema_path, 'r', encoding='utf-8') as f:
	schema = json.load(f)

	# Armazena no cache
	self._cache[specialist_name] = schema

	logger.debug(f"✅ Schema carregado: {specialist_name}")
	return schema

	except Exception as e:
	logger.error(f"❌ Erro ao carregar schema {schema_path}: {e}")
	return None

	def inject_schema_in_prompt(self,
	system_prompt: str,
	specialist_name: str,
	format_style: str = "json") -> str:
	"""
	Injeta schema JSON no system prompt.

	Args:
	system_prompt: Prompt original do sistema
	specialist_name: Nome do especialista
	format_style: Estilo de formatação ("json", "markdown", "compact")

	Returns:
	System prompt com schema injetado
	"""
	schema = self.load_schema(specialist_name)

	if not schema:
	logger.warning(f"⚠️ Schema não disponível para {specialist_name}, retornando prompt original")
	return system_prompt

	# Formata schema conforme estilo
	if format_style == "json":
	schema_text = json.dumps(schema, indent=2, ensure_ascii=False)
	elif format_style == "compact":
	schema_text = json.dumps(schema, ensure_ascii=False)
	elif format_style == "markdown":
	schema_text = self._format_schema_markdown(schema)
	else:
	schema_text = json.dumps(schema, indent=2, ensure_ascii=False)

	# Monta prompt final com schema
	enhanced_prompt = f"""{system_prompt}

	# JSON SCHEMA OBRIGATÓRIO

	Você DEVE retornar sua resposta seguindo EXATAMENTE este JSON Schema:

	```json
	{schema_text}
	```

	IMPORTANTE:
	- Retorne APENAS JSON válido
	- Siga TODOS os campos required do schema
	- Respeite os tipos de dados (string, integer, array, object)
	- Valide enums e patterns quando especificados
	- NÃO adicione comentários ou texto fora do JSON
	- NÃO invente campos que não estão no schema
	"""

	return enhanced_prompt

	def _format_schema_markdown(self, schema: Dict) -> str:
	"""Formata schema em Markdown legível."""
	lines = []

	if 'title' in schema:
	lines.append(f"## {schema['title']}")
	if 'description' in schema:
	lines.append(f"{schema['description']}\n")

	if 'properties' in schema:
	lines.append("### Campos:")
	for field, props in schema['properties'].items():
	field_type = props.get('type', 'any')
	desc = props.get('description', '')
	required = '(obrigatório)' if field in schema.get('required', []) else '(opcional)'
	lines.append(f"- {field} ({field_type}) {required}: {desc}")

	return '\n'.join(lines)

	def get_available_specialists(self) -> list:
	"""Retorna lista de especialistas com schemas disponíveis."""
	return list(self.SCHEMA_MAP.keys())

	def validate_response(self, response: str, specialist_name: str) -> tuple:
	"""
	Valida resposta JSON contra schema.

	Args:
	response: Resposta JSON (string)
	specialist_name: Nome do especialista

	Returns:
	(is_valid: bool, errors: list)
	"""
	try:
	from jsonschema import validate, ValidationError
	except ImportError:
	logger.warning("jsonschema não instalado, validação desabilitada")
	return True, []

	schema = self.load_schema(specialist_name)
	if not schema:
	return True, ["Schema não encontrado"]

	try:
	data = json.loads(response)
	validate(instance=data, schema=schema)
	return True, []
	except json.JSONDecodeError as e:
	return False, [f"JSON inválido: {e}"]
	except ValidationError as e:
	return False, [f"Validação falhou: {e.message}"]
	except Exception as e:
	return False, [f"Erro na validação: {e}"]


	# ============================================================================
	# SINGLETON GLOBAL (Opcional)
	# ============================================================================

	_global_loader: Optional[SchemaLoader] = None

	def get_schema_loader() -> SchemaLoader:
	"""Retorna instância global do SchemaLoader (singleton)."""
	global _global_loader
	if _global_loader is None:
	_global_loader = SchemaLoader()
	return _global_loader