Spaces:

caarleexx
/

PARA.AI

Runtime error

App Files Files Community

PARA.AI / core /segmenter.py

Carlex22

ParaAIV3.6

e6027de 28 days ago

raw

history blame contribute delete

3.06 kB

	##PARA.AI/core/segmenter.py
	"""
	Segmenter V13.6 - Fase 2.2 (Regex, sem LLM)
	Segmenta inteiro_teor em 3 blocos: RELATORIO, FUNDAMENTACAO, DECISAO
	"""
	import re
	from typing import Dict, Optional, Tuple

	class Segmenter:
	"""Segmenta texto em blocos lógicos usando regex"""

	# Gatilhos para RELATÓRIO (início)
	TRIGGERS_RELATORIO = [
	r"RELATÓRIO",
	r"Trata-se de",
	r"Cuida a espécie"
	]

	# Gatilhos para FUNDAMENTAÇÃO (meio)
	TRIGGERS_FUNDAMENTACAO = [
	r"É o (relatório\|síntese\|resumo\|histórico)",
	r"_nPresentes",
	r"_nDecido",
	r"_nVOTO",
	r"_nFUNDAMENTAÇÃO"
	]

	# Gatilhos para DECISÃO (fim)
	TRIGGERS_DECISAO = [
	r"Diante do exposto",
	r"DECISÃO",
	r"DISPOSITIVO",
	r"Por todo o exposto"
	]

	def segment(self, inteiro_teor: str) -> Dict[str, Optional[str]]:
	"""
	Segmenta inteiro_teor em blocos
	RETORNA: {"bloco_1": str, "bloco_2": str, "bloco_3": str}
	"""
	if not inteiro_teor:
	return {"bloco_1": None, "bloco_2": None, "bloco_3": None}

	# Normalizar quebras de linha
	text = inteiro_teor.replace("\r\n", "\n").replace("\r", "\n")

	# Tentar encontrar limites
	pos_inicio_fund = self._find_fundamentacao_start(text)
	pos_inicio_decisao = self._find_decisao_start(text)

	# Se não encontrou, usar divisão proporcional
	if pos_inicio_fund is None and pos_inicio_decisao is None:
	return self._split_proportional(text)

	# Dividir pelos limites encontrados
	bloco_1 = text[:pos_inicio_fund] if pos_inicio_fund else text[:int(len(text)*0.3)]
	bloco_2 = text[pos_inicio_fund:pos_inicio_decisao] if pos_inicio_decisao else text[pos_inicio_fund:]
	bloco_3 = text[pos_inicio_decisao:] if pos_inicio_decisao else text[int(len(text)*0.7):]

	return {
	"bloco_1": bloco_1.strip(),
	"bloco_2": bloco_2.strip(),
	"bloco_3": bloco_3.strip()
	}

	def _find_fundamentacao_start(self, text: str) -> Optional[int]:
	"""Encontra início da fundamentação"""
	for trigger in self.TRIGGERS_FUNDAMENTACAO:
	match = re.search(trigger, text, re.IGNORECASE)
	if match:
	return match.start()
	return None

	def _find_decisao_start(self, text: str) -> Optional[int]:
	"""Encontra início da decisão"""
	for trigger in self.TRIGGERS_DECISAO:
	match = re.search(trigger, text, re.IGNORECASE)
	if match:
	return match.start()
	return None

	def _split_proportional(self, text: str) -> Dict[str, str]:
	"""Divisão proporcional quando não encontra gatilhos"""
	length = len(text)
	pos_1 = int(length * 0.3)
	pos_2 = int(length * 0.7)

	return {
	"bloco_1": text[:pos_1].strip(),
	"bloco_2": text[pos_1:pos_2].strip(),
	"bloco_3": text[pos_2:].strip()
	}