caarleexx commited on
Commit
9a6aa0c
verified
1 Parent(s): e2a60cf

Upload 2 files

Browse files
api/routers/processor_llm_base.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Processor Base com integra莽茫o LLM REAL
3
+ Substitui processamento MOCK por chamadas ao Groq
4
+ """
5
+ from typing import Dict, Any, Optional, List
6
+ from datetime import datetime
7
+ import logging
8
+ from abc import ABC, abstractmethod
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class ProcessorLLMBase(ABC):
14
+ """
15
+ Processor base que integra com LLM real (Groq).
16
+
17
+ Substitui hardcoded por prompts e chamadas reais.
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ specialist_id: int,
23
+ specialist_name: str,
24
+ llm_client=None
25
+ ):
26
+ """
27
+ Args:
28
+ specialist_id: ID do especialista (1-9)
29
+ specialist_name: Nome descritivo
30
+ llm_client: Cliente LLM configurado (GroqClient)
31
+ """
32
+ self.specialist_id = specialist_id
33
+ self.specialist_name = specialist_name
34
+ self.llm_client = llm_client
35
+ self.execution_time = 0
36
+ self.confidence_score = 0
37
+ self.errors = []
38
+ self.warnings = []
39
+
40
+ if not llm_client:
41
+ self.add_warning("LLM client n茫o configurado - usando fallback mock")
42
+
43
+ @abstractmethod
44
+ def process(self, acordao_data: Dict[str, Any]) -> Dict[str, Any]:
45
+ """Processa ac贸rd茫o usando LLM real."""
46
+ pass
47
+
48
+ @abstractmethod
49
+ def get_prompt(self, acordao_data: Dict[str, Any]) -> str:
50
+ """Retorna prompt para o LLM."""
51
+ pass
52
+
53
+ @abstractmethod
54
+ def validate(self, result: Dict[str, Any]) -> bool:
55
+ """Valida resultado."""
56
+ pass
57
+
58
+ def call_llm(
59
+ self,
60
+ prompt: str,
61
+ max_tokens: int = 2048,
62
+ temperature: float = 0.3
63
+ ) -> str:
64
+ """
65
+ Faz chamada ao LLM real.
66
+
67
+ Args:
68
+ prompt: Prompt a enviar
69
+ max_tokens: M谩ximo de tokens
70
+ temperature: Temperatura (0-1)
71
+
72
+ Returns:
73
+ Resposta do LLM
74
+ """
75
+ if not self.llm_client:
76
+ self.add_error("LLM client n茫o dispon铆vel")
77
+ return ""
78
+
79
+ try:
80
+ start_time = datetime.now()
81
+
82
+ logger.info(
83
+ f"[{self.specialist_name}] Chamando LLM... "
84
+ f"(max_tokens={max_tokens}, temp={temperature})"
85
+ )
86
+
87
+ # Chamada real ao LLM
88
+ response = self.llm_client.generate(
89
+ prompt=prompt,
90
+ max_tokens=max_tokens,
91
+ temperature=temperature
92
+ )
93
+
94
+ elapsed = (datetime.now() - start_time).total_seconds()
95
+ self.execution_time += elapsed
96
+
97
+ logger.info(
98
+ f"[{self.specialist_name}] LLM respondeu em {elapsed:.2f}s "
99
+ f"({len(response)} chars)"
100
+ )
101
+
102
+ return response
103
+
104
+ except Exception as e:
105
+ self.add_error(f"Erro ao chamar LLM: {e}")
106
+ logger.error(f"[{self.specialist_name}] Erro LLM: {e}", exc_info=True)
107
+ return ""
108
+
109
+ def add_error(self, error_msg: str):
110
+ """Adiciona erro."""
111
+ self.errors.append(error_msg)
112
+
113
+ def add_warning(self, warning_msg: str):
114
+ """Adiciona aviso."""
115
+ self.warnings.append(warning_msg)
116
+
117
+ def set_confidence(self, score: int):
118
+ """Define score de confian莽a (0-100)."""
119
+ if 0 <= score <= 100:
120
+ self.confidence_score = score
121
+
122
+ def postprocess(self, result: Dict[str, Any]) -> Dict[str, Any]:
123
+ """P贸s-processa resultado."""
124
+ return {
125
+ "specialist_id": self.specialist_id,
126
+ "specialist_name": self.specialist_name,
127
+ "result": result,
128
+ "execution_time": self.execution_time,
129
+ "confidence_score": self.confidence_score,
130
+ "errors": self.errors,
131
+ "warnings": self.warnings,
132
+ "timestamp": datetime.now().isoformat()
133
+ }
api/routers/processor_metadados_llm.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Especialista 1: Metadados com LLM REAL
3
+ """
4
+ from typing import Dict, Any
5
+ import json
6
+ import logging
7
+ import re
8
+ from api.processors.processor_llm_base import ProcessorLLMBase
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class ProcessorMetadadosLLM(ProcessorLLMBase):
14
+ """Extra莽茫o de metadados via LLM."""
15
+
16
+ def __init__(self, llm_client=None):
17
+ super().__init__(
18
+ specialist_id=1,
19
+ specialist_name="Metadados (LLM)",
20
+ llm_client=llm_client
21
+ )
22
+
23
+ def process(self, acordao_data: Dict[str, Any]) -> Dict[str, Any]:
24
+ """Extrai metadados usando LLM."""
25
+ try:
26
+ prompt = self.get_prompt(acordao_data)
27
+ llm_response = self.call_llm(prompt, max_tokens=1024, temperature=0.2)
28
+
29
+ if not llm_response:
30
+ return self._fallback_mock(acordao_data)
31
+
32
+ metadados = self._parse_llm_response(llm_response)
33
+
34
+ if self.validate(metadados):
35
+ self.set_confidence(90)
36
+ return metadados
37
+ else:
38
+ return self._fallback_mock(acordao_data)
39
+
40
+ except Exception as e:
41
+ self.add_error(f"Erro: {e}")
42
+ return self._fallback_mock(acordao_data)
43
+
44
+ def get_prompt(self, acordao_data: Dict[str, Any]) -> str:
45
+ """Gera prompt."""
46
+ ementa = acordao_data.get("ementa", "")[:1000]
47
+ integra = acordao_data.get("integra", "")[:2000]
48
+
49
+ return f"""Extraia metadados deste ac贸rd茫o. Retorne JSON v谩lido:
50
+
51
+ EMENTA: {ementa}
52
+ 脥NTEGRA (trecho): {integra}
53
+
54
+ JSON esperado:
55
+ {{
56
+ "tribunal": "TJPR",
57
+ "relator": "Nome do Relator",
58
+ "ramo_especializado": "Direito do Consumidor"
59
+ }}
60
+
61
+ JSON:"""
62
+
63
+ def validate(self, result: Dict[str, Any]) -> bool:
64
+ """Valida."""
65
+ return "relator" in result and len(result.get("relator", "")) > 3
66
+
67
+ def _parse_llm_response(self, response: str) -> Dict[str, Any]:
68
+ """Parse resposta LLM."""
69
+ try:
70
+ # Tentar parsear como JSON direto
71
+ return json.loads(response)
72
+ except:
73
+ # Extrair JSON do texto
74
+ match = re.search(r'\{[^{}]+\}', response)
75
+ if match:
76
+ try:
77
+ return json.loads(match.group())
78
+ except:
79
+ pass
80
+ return {}
81
+
82
+ def _fallback_mock(self, acordao_data: Dict[str, Any]) -> Dict[str, Any]:
83
+ """Fallback."""
84
+ return {
85
+ "tribunal": "TJPR",
86
+ "relator": "RELATOR N脙O IDENTIFICADO",
87
+ "ramo_especializado": "DIREITO C脥VEL"
88
+ }