caarleexx commited on
Commit
ae2014a
·
verified ·
1 Parent(s): 124d299

Update api/services/processing_service.py

Browse files
Files changed (1) hide show
  1. api/services/processing_service.py +85 -65
api/services/processing_service.py CHANGED
@@ -1,74 +1,94 @@
1
- """Router de testes de processadores"""
2
- from fastapi import APIRouter, HTTPException
3
- from api.models.requests import ProcessorTestRequest
4
- from api.models.responses import ProcessorTestResponse
5
- from api.utils.logger import setup_logger
6
- import time
 
7
 
8
- router = APIRouter()
9
- logger = setup_logger(__name__)
10
-
11
- @router.post("/run", response_model=ProcessorTestResponse)
12
- async def test_processor(request: ProcessorTestRequest):
13
- """Testa um processador específico."""
14
- start_time = time.time()
15
 
16
- try:
17
- # Importar processador
18
- processor_map = {
19
- "1": "processor_metadados",
20
- "2": "processor_relatorio",
21
- "metadados": "processor_metadados",
22
- "relatorio": "processor_relatorio"
23
- }
 
 
 
24
 
25
- processor_name = processor_map.get(request.processor_name, request.processor_name)
 
 
 
26
 
27
- from processors import base_processor
28
- processor = base_processor.get_processor(processor_name)
 
 
29
 
30
- # Processar
31
- result = await processor.process(
32
- ementa=request.ementa,
33
- integra=request.integra,
34
- tribunal=request.tribunal
35
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- duration_ms = (time.time() - start_time) * 1000
 
 
38
 
39
- return ProcessorTestResponse(
40
- processor_name=processor_name,
41
- success=True,
42
- result=result,
43
- duration_ms=duration_ms
44
- )
45
 
46
- except Exception as e:
47
- duration_ms = (time.time() - start_time) * 1000
48
- logger.error(f"Processor test error: {e}")
 
 
 
 
 
49
 
50
- return ProcessorTestResponse(
51
- processor_name=request.processor_name,
52
- success=False,
53
- duration_ms=duration_ms,
54
- error=str(e)
55
- )
56
-
57
- @router.get("/list")
58
- async def list_processors():
59
- """Lista processadores disponíveis."""
60
- return {
61
- "processors": [
62
- {"id": 1, "name": "Metadados"},
63
- {"id": 2, "name": "Relatório"},
64
- {"id": 3, "name": "Fundamentação"},
65
- {"id": 4, "name": "Decisão"},
66
- {"id": 5, "name": "Auditoria"},
67
- {"id": 6, "name": "Arquivo Relacional"},
68
- {"id": 7, "name": "Segmentação Sintática"},
69
- {"id": 8, "name": "Análise Contextual"},
70
- {"id": 9, "name": "Transcrição 3-Partite"}
71
- ],
72
- "total": 9
73
- }
74
-
 
1
+ """Serviço de processamento de acórdãos"""
2
+ from typing import Dict, Any
3
+ import json
4
+ import tarfile
5
+ import hashlib
6
+ from pathlib import Path
7
+ from datetime import datetime
8
 
9
+ class ProcessingService:
10
+ """Serviço para processar arquivos JSONL de acórdãos."""
 
 
 
 
 
11
 
12
+ async def process_jsonl_file(
13
+ self,
14
+ file_path: str,
15
+ task_id: str,
16
+ llm_provider: str = "groq",
17
+ model_type: str = "balanced",
18
+ enable_parallel: bool = True,
19
+ max_workers: int = 3
20
+ ) -> Dict[str, Any]:
21
+ """
22
+ Processa arquivo JSONL com acórdãos.
23
 
24
+ Returns:
25
+ Dict com resultados do processamento
26
+ """
27
+ from api.config import get_settings
28
 
29
+ settings = get_settings()
30
+ processed = 0
31
+ failed = 0
32
+ results = []
33
 
34
+ # Ler arquivo JSONL
35
+ with open(file_path, 'r', encoding='utf-8') as f:
36
+ for line in f:
37
+ if not line.strip():
38
+ continue
39
+
40
+ try:
41
+ record = json.loads(line)
42
+ # Aqui seria o processamento real com os 9 especialistas
43
+ # Por enquanto, retornar mock
44
+ results.append({
45
+ "acordao_id": record.get("acordao_id"),
46
+ "status": "processed",
47
+ "timestamp": datetime.now().isoformat()
48
+ })
49
+ processed += 1
50
+ except Exception as e:
51
+ failed += 1
52
+ results.append({
53
+ "acordao_id": record.get("acordao_id", "unknown"),
54
+ "status": "error",
55
+ "error": str(e)
56
+ })
57
 
58
+ # Criar arquivo TAR.GZ com resultados
59
+ output_dir = Path(settings.OUTPUT_PATH) / "archives"
60
+ output_dir.mkdir(parents=True, exist_ok=True)
61
 
62
+ archive_path = output_dir / f"{task_id}.tar.gz"
 
 
 
 
 
63
 
64
+ # Criar JSON com resultados
65
+ result_json = {
66
+ "task_id": task_id,
67
+ "processed": processed,
68
+ "failed": failed,
69
+ "results": results,
70
+ "completed_at": datetime.now().isoformat()
71
+ }
72
 
73
+ temp_json = output_dir / f"{task_id}_results.json"
74
+ with open(temp_json, 'w', encoding='utf-8') as f:
75
+ json.dump(result_json, f, ensure_ascii=False, indent=2)
76
+
77
+ # Criar TAR.GZ
78
+ with tarfile.open(archive_path, 'w:gz') as tar:
79
+ tar.add(temp_json, arcname=f"{task_id}_results.json")
80
+
81
+ # Remover JSON temporário
82
+ temp_json.unlink()
83
+
84
+ # Calcular hash
85
+ with open(archive_path, 'rb') as f:
86
+ file_hash = hashlib.sha256(f.read()).hexdigest()
87
+
88
+ return {
89
+ "processed": processed,
90
+ "failed": failed,
91
+ "archive_path": str(archive_path),
92
+ "hash": file_hash,
93
+ "elapsed_seconds": 1.5 # mock
94
+ }