caarleexx commited on
Commit
17f01f8
·
verified ·
1 Parent(s): a255057

Upload 3 files

Browse files
api/routers/debug.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Router de debug - informações de sistema e tasks"""
2
+ from fastapi import APIRouter
3
+ from api.config import get_settings
4
+ from api.utils.logger import setup_logger
5
+ import sys, os
6
+
7
+ router = APIRouter()
8
+ logger = setup_logger(__name__)
9
+ settings = get_settings()
10
+
11
+ @router.get("/info")
12
+ async def debug_info():
13
+ """Informações completas do sistema"""
14
+ return {
15
+ "python_version": sys.version,
16
+ "environment": settings.APP_ENV,
17
+ "debug_mode": settings.DEBUG,
18
+ "paths": {
19
+ "upload": settings.UPLOAD_PATH,
20
+ "output": settings.OUTPUT_PATH,
21
+ "files": settings.FILES_BASE_PATH
22
+ },
23
+ "llm_config": {
24
+ "default_provider": settings.DEFAULT_LLM_PROVIDER,
25
+ "providers_available": {
26
+ "groq": bool(settings.GROQ_API_KEY),
27
+ "openai": bool(settings.OPENAI_API_KEY),
28
+ "anthropic": bool(settings.ANTHROPIC_API_KEY)
29
+ }
30
+ }
31
+ }
32
+
33
+ @router.get("/tasks")
34
+ async def list_tasks():
35
+ """Lista todas as tasks"""
36
+ from api.routers.processing import processing_tasks
37
+ return {"total": len(processing_tasks), "tasks": list(processing_tasks.keys())}
api/routers/health.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Router de health check e status do sistema
3
+ """
4
+ from fastapi import APIRouter
5
+ from datetime import datetime
6
+ import psutil
7
+ import os
8
+
9
+ from api.models.responses import HealthResponse
10
+ from api.config import get_settings
11
+ from api.utils.logger import setup_logger
12
+
13
+ router = APIRouter()
14
+ logger = setup_logger(__name__)
15
+ settings = get_settings()
16
+
17
+ app_start_time = datetime.now()
18
+
19
+
20
+ @router.get("/health", response_model=HealthResponse)
21
+ async def health_check():
22
+ """
23
+ **Health check completo do sistema.**
24
+
25
+ Verifica:
26
+ - ✅ Database connectivity
27
+ - ✅ LLM providers availability
28
+ - ✅ File system access
29
+ - ✅ System metrics
30
+ """
31
+ services = {}
32
+
33
+ # Check Database
34
+ try:
35
+ from database.db_manager import get_db_manager
36
+ db = get_db_manager()
37
+ services['database'] = db.health_check()
38
+ except Exception as e:
39
+ logger.error(f"DB health check failed: {e}")
40
+ services['database'] = False
41
+
42
+ # Check LLM Providers
43
+ try:
44
+ services['llm_groq'] = bool(settings.GROQ_API_KEY)
45
+ services['llm_openai'] = bool(settings.OPENAI_API_KEY)
46
+ services['llm_anthropic'] = bool(settings.ANTHROPIC_API_KEY)
47
+ except Exception as e:
48
+ logger.error(f"LLM health check failed: {e}")
49
+ services.update({
50
+ 'llm_groq': False,
51
+ 'llm_openai': False,
52
+ 'llm_anthropic': False
53
+ })
54
+
55
+ # Check Files
56
+ from pathlib import Path
57
+ services['files_upload'] = Path(settings.UPLOAD_PATH).exists()
58
+ services['files_output'] = Path(settings.OUTPUT_PATH).exists()
59
+ services['files_base'] = Path(settings.FILES_BASE_PATH).exists()
60
+
61
+ # System metrics
62
+ try:
63
+ metrics = {
64
+ 'cpu_percent': psutil.cpu_percent(),
65
+ 'memory_percent': psutil.virtual_memory().percent,
66
+ 'disk_percent': psutil.disk_usage('/').percent,
67
+ 'process_count': len(psutil.pids())
68
+ }
69
+ except:
70
+ metrics = {}
71
+
72
+ # Calcular uptime
73
+ uptime = (datetime.now() - app_start_time).total_seconds()
74
+
75
+ # Status geral
76
+ status = "healthy" if all(services.values()) else "degraded"
77
+
78
+ return HealthResponse(
79
+ status=status,
80
+ version=settings.APP_VERSION,
81
+ environment=settings.APP_ENV,
82
+ timestamp=datetime.now(),
83
+ uptime_seconds=uptime,
84
+ services=services,
85
+ metrics=metrics
86
+ )
87
+
88
+
89
+ @router.get("/ping")
90
+ async def ping():
91
+ """Simple ping endpoint."""
92
+ return {"status": "pong", "timestamp": datetime.now().isoformat()}
api/routers/processing.py ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Router de processamento de acórdãos
3
+ ENDPOINT PRINCIPAL: Upload JSONL → Processamento → Download TAR.GZ
4
+ """
5
+ from fastapi import APIRouter, UploadFile, File, BackgroundTasks, HTTPException, Query
6
+ from fastapi.responses import FileResponse
7
+ import uuid
8
+ import json
9
+ from pathlib import Path
10
+ from datetime import datetime
11
+ import hashlib
12
+
13
+ from api.models.requests import ProcessingOptionsRequest
14
+ from api.models.responses import ProcessingResponse, ProcessingStatus, FileInfoResponse
15
+ from api.services.processing_service import ProcessingService
16
+ from api.utils.logger import setup_logger
17
+ from api.config import get_settings
18
+
19
+ router = APIRouter()
20
+ logger = setup_logger(__name__)
21
+ settings = get_settings()
22
+
23
+ # Storage de tasks (em produção usar Redis ou Database)
24
+ processing_tasks = {}
25
+
26
+
27
+ @router.post("/process/upload", response_model=ProcessingResponse, status_code=202)
28
+ async def upload_and_process(
29
+ background_tasks: BackgroundTasks,
30
+ file: UploadFile = File(..., description="Arquivo JSONL com acórdãos"),
31
+ llm_provider: str = Query("groq", description="Provedor LLM (groq/openai/anthropic)"),
32
+ model_type: str = Query("balanced", description="Tipo de modelo (fast/balanced/quality)"),
33
+ enable_parallel: bool = Query(True, description="Processar em paralelo"),
34
+ max_workers: int = Query(3, ge=1, le=10, description="Workers paralelos"),
35
+ save_to_db: bool = Query(False, description="Salvar resultados no banco")
36
+ ):
37
+ """
38
+ **Upload de arquivo JSONL e início do processamento em background.**
39
+
40
+ ## Fluxo:
41
+ 1. Upload do arquivo JSONL
42
+ 2. Validação do formato
43
+ 3. Criação de task de processamento
44
+ 4. Processamento em background (9 especialistas)
45
+ 5. Geração de arquivo TAR.GZ com resultados
46
+
47
+ ## Formato JSONL esperado:
48
+ ```json
49
+ {"acordao_id": "001", "tribunal": "TJPR", "ementa": "...", "integra": "..."}
50
+ {"acordao_id": "002", "tribunal": "TJSP", "ementa": "...", "integra": "..."}
51
+ ```
52
+
53
+ ## Response:
54
+ - **task_id**: ID único para consultar status
55
+ - **status**: Status inicial (pending)
56
+ - Use `/process/status/{task_id}` para acompanhar
57
+ - Use `/process/download/{task_id}` para baixar resultados
58
+ """
59
+
60
+ # Validar extensão
61
+ if not file.filename.endswith(('.jsonl', '.json')):
62
+ raise HTTPException(
63
+ status_code=400,
64
+ detail="Arquivo deve ser .jsonl ou .json"
65
+ )
66
+
67
+ # Validar tamanho
68
+ content = await file.read()
69
+ size_mb = len(content) / (1024 * 1024)
70
+ if size_mb > settings.MAX_UPLOAD_SIZE_MB:
71
+ raise HTTPException(
72
+ status_code=413,
73
+ detail=f"Arquivo muito grande: {size_mb:.2f}MB (máx: {settings.MAX_UPLOAD_SIZE_MB}MB)"
74
+ )
75
+
76
+ # Criar task ID
77
+ task_id = f"task-{uuid.uuid4()}"
78
+
79
+ # Criar diretórios
80
+ upload_dir = Path(settings.UPLOAD_PATH)
81
+ upload_dir.mkdir(parents=True, exist_ok=True)
82
+
83
+ # Salvar arquivo
84
+ file_path = upload_dir / f"{task_id}_{file.filename}"
85
+ file_path.write_bytes(content)
86
+
87
+ # Calcular hash
88
+ file_hash = hashlib.sha256(content).hexdigest()
89
+
90
+ # Validar e contar registros
91
+ total_records = 0
92
+ try:
93
+ for line_num, line in enumerate(content.decode('utf-8').strip().split('
94
+ '), 1):
95
+ if not line.strip():
96
+ continue
97
+ try:
98
+ record = json.loads(line)
99
+ # Validar campos obrigatórios
100
+ if 'ementa' not in record or 'integra' not in record:
101
+ raise HTTPException(
102
+ status_code=422,
103
+ detail=f"Linha {line_num}: campos obrigatórios ausentes (ementa, integra)"
104
+ )
105
+ total_records += 1
106
+ except json.JSONDecodeError as e:
107
+ raise HTTPException(
108
+ status_code=422,
109
+ detail=f"JSONL inválido na linha {line_num}: {str(e)}"
110
+ )
111
+ except UnicodeDecodeError:
112
+ raise HTTPException(
113
+ status_code=422,
114
+ detail="Arquivo deve estar em UTF-8"
115
+ )
116
+
117
+ # Criar response inicial
118
+ response = ProcessingResponse(
119
+ task_id=task_id,
120
+ status=ProcessingStatus.PENDING,
121
+ message=f"Processamento agendado para {total_records} registros",
122
+ total_records=total_records,
123
+ processed_records=0,
124
+ failed_records=0,
125
+ started_at=datetime.now()
126
+ )
127
+
128
+ # Armazenar task
129
+ processing_tasks[task_id] = response.dict()
130
+
131
+ # Adicionar metadados
132
+ processing_tasks[task_id]['metadata'] = {
133
+ 'filename': file.filename,
134
+ 'size_bytes': len(content),
135
+ 'hash_sha256': file_hash,
136
+ 'llm_provider': llm_provider,
137
+ 'model_type': model_type,
138
+ 'enable_parallel': enable_parallel,
139
+ 'max_workers': max_workers,
140
+ 'save_to_db': save_to_db
141
+ }
142
+
143
+ # Iniciar processamento em background
144
+ background_tasks.add_task(
145
+ process_acordaos_background,
146
+ task_id=task_id,
147
+ file_path=str(file_path),
148
+ llm_provider=llm_provider,
149
+ model_type=model_type,
150
+ enable_parallel=enable_parallel,
151
+ max_workers=max_workers,
152
+ save_to_db=save_to_db
153
+ )
154
+
155
+ logger.info(f"✅ Task {task_id} criada - {total_records} registros - {size_mb:.2f}MB")
156
+
157
+ return response
158
+
159
+
160
+ @router.get("/process/status/{task_id}", response_model=ProcessingResponse)
161
+ async def get_processing_status(task_id: str):
162
+ """
163
+ **Consulta status de processamento.**
164
+
165
+ Retorna informações atualizadas sobre a task:
166
+ - Status atual (pending/processing/completed/error)
167
+ - Progresso (processados/total)
168
+ - Tempo estimado restante
169
+ - URL de download (quando concluído)
170
+ """
171
+ if task_id not in processing_tasks:
172
+ raise HTTPException(
173
+ status_code=404,
174
+ detail=f"Task '{task_id}' não encontrada"
175
+ )
176
+
177
+ return ProcessingResponse(**processing_tasks[task_id])
178
+
179
+
180
+ @router.get("/process/list")
181
+ async def list_all_tasks():
182
+ """
183
+ **Lista todas as tasks de processamento.**
184
+
185
+ Útil para debug e monitoramento.
186
+ """
187
+ return {
188
+ "total": len(processing_tasks),
189
+ "tasks": [
190
+ {
191
+ "task_id": task_id,
192
+ "status": data["status"],
193
+ "progress": f"{data['processed_records']}/{data['total_records']}",
194
+ "started_at": data.get("started_at")
195
+ }
196
+ for task_id, data in processing_tasks.items()
197
+ ]
198
+ }
199
+
200
+
201
+ @router.get("/process/download/{task_id}")
202
+ async def download_result(task_id: str):
203
+ """
204
+ **Download do arquivo TAR.GZ com resultados.**
205
+
206
+ Disponível apenas quando status = "completed".
207
+
208
+ ## Conteúdo do arquivo:
209
+ - `{task_id}_results.json`: Resultados completos
210
+ - Análises de cada especialista
211
+ - Metadados do processamento
212
+ - Logs e métricas
213
+ """
214
+ if task_id not in processing_tasks:
215
+ raise HTTPException(
216
+ status_code=404,
217
+ detail=f"Task '{task_id}' não encontrada"
218
+ )
219
+
220
+ task = processing_tasks[task_id]
221
+
222
+ if task['status'] != ProcessingStatus.COMPLETED:
223
+ raise HTTPException(
224
+ status_code=400,
225
+ detail=f"Processamento ainda não concluído. Status atual: {task['status']}"
226
+ )
227
+
228
+ # Procurar arquivo
229
+ output_file = Path(settings.OUTPUT_PATH) / "archives" / f"{task_id}.tar.gz"
230
+
231
+ if not output_file.exists():
232
+ raise HTTPException(
233
+ status_code=404,
234
+ detail="Arquivo de resultado não encontrado"
235
+ )
236
+
237
+ logger.info(f"📦 Download iniciado: {task_id}")
238
+
239
+ return FileResponse(
240
+ path=str(output_file),
241
+ filename=f"para_ai_resultado_{task_id}.tar.gz",
242
+ media_type="application/gzip",
243
+ headers={
244
+ "Content-Disposition": f"attachment; filename=para_ai_resultado_{task_id}.tar.gz"
245
+ }
246
+ )
247
+
248
+
249
+ @router.delete("/process/{task_id}")
250
+ async def delete_task(task_id: str):
251
+ """
252
+ **Deleta uma task e seus arquivos.**
253
+
254
+ Útil para limpeza de tasks antigas.
255
+ """
256
+ if task_id not in processing_tasks:
257
+ raise HTTPException(
258
+ status_code=404,
259
+ detail=f"Task '{task_id}' não encontrada"
260
+ )
261
+
262
+ # Remover arquivos
263
+ upload_dir = Path(settings.UPLOAD_PATH)
264
+ output_dir = Path(settings.OUTPUT_PATH)
265
+
266
+ for file in upload_dir.glob(f"{task_id}_*"):
267
+ file.unlink()
268
+
269
+ for file in output_dir.glob(f"{task_id}*"):
270
+ file.unlink()
271
+
272
+ # Remover da memória
273
+ del processing_tasks[task_id]
274
+
275
+ logger.info(f"🗑️ Task deletada: {task_id}")
276
+
277
+ return {"message": f"Task {task_id} deletada com sucesso"}
278
+
279
+
280
+ # ============================================================================
281
+ # FUNÇÃO DE BACKGROUND
282
+ # ============================================================================
283
+
284
+ async def process_acordaos_background(
285
+ task_id: str,
286
+ file_path: str,
287
+ llm_provider: str,
288
+ model_type: str,
289
+ enable_parallel: bool,
290
+ max_workers: int,
291
+ save_to_db: bool
292
+ ):
293
+ """
294
+ Função executada em background para processar acórdãos.
295
+
296
+ Atualiza o status da task conforme progresso.
297
+ """
298
+ try:
299
+ # Atualizar status
300
+ processing_tasks[task_id]['status'] = ProcessingStatus.PROCESSING
301
+ processing_tasks[task_id]['message'] = "Processamento em andamento..."
302
+
303
+ logger.info(f"🚀 Iniciando processamento background: {task_id}")
304
+
305
+ # Inicializar serviço
306
+ service = ProcessingService()
307
+
308
+ # Processar
309
+ result = await service.process_jsonl_file(
310
+ file_path=file_path,
311
+ task_id=task_id,
312
+ llm_provider=llm_provider,
313
+ model_type=model_type,
314
+ enable_parallel=enable_parallel,
315
+ max_workers=max_workers
316
+ )
317
+
318
+ # Atualizar task com sucesso
319
+ processing_tasks[task_id].update({
320
+ 'status': ProcessingStatus.COMPLETED,
321
+ 'message': f"Processamento concluído com sucesso em {result['elapsed_seconds']:.2f}s",
322
+ 'processed_records': result['processed'],
323
+ 'failed_records': result['failed'],
324
+ 'completed_at': datetime.now(),
325
+ 'download_url': f"/api/v1/process/download/{task_id}",
326
+ 'result_metadata': {
327
+ 'archive_path': result['archive_path'],
328
+ 'hash_sha256': result['hash'],
329
+ 'elapsed_seconds': result['elapsed_seconds']
330
+ }
331
+ })
332
+
333
+ logger.info(f"✅ Task {task_id} concluída - {result['processed']} processados, {result['failed']} falhas")
334
+
335
+ except Exception as e:
336
+ # Atualizar task com erro
337
+ logger.error(f"❌ Erro na task {task_id}: {str(e)}", exc_info=True)
338
+ processing_tasks[task_id].update({
339
+ 'status': ProcessingStatus.ERROR,
340
+ 'message': f"Erro no processamento: {str(e)}",
341
+ 'completed_at': datetime.now(),
342
+ 'errors': [str(e)]
343
+ })