Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- api/routers/debug.py +37 -0
- api/routers/health.py +92 -0
- api/routers/processing.py +343 -0
api/routers/debug.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Router de debug - informações de sistema e tasks"""
|
| 2 |
+
from fastapi import APIRouter
|
| 3 |
+
from api.config import get_settings
|
| 4 |
+
from api.utils.logger import setup_logger
|
| 5 |
+
import sys, os
|
| 6 |
+
|
| 7 |
+
router = APIRouter()
|
| 8 |
+
logger = setup_logger(__name__)
|
| 9 |
+
settings = get_settings()
|
| 10 |
+
|
| 11 |
+
@router.get("/info")
|
| 12 |
+
async def debug_info():
|
| 13 |
+
"""Informações completas do sistema"""
|
| 14 |
+
return {
|
| 15 |
+
"python_version": sys.version,
|
| 16 |
+
"environment": settings.APP_ENV,
|
| 17 |
+
"debug_mode": settings.DEBUG,
|
| 18 |
+
"paths": {
|
| 19 |
+
"upload": settings.UPLOAD_PATH,
|
| 20 |
+
"output": settings.OUTPUT_PATH,
|
| 21 |
+
"files": settings.FILES_BASE_PATH
|
| 22 |
+
},
|
| 23 |
+
"llm_config": {
|
| 24 |
+
"default_provider": settings.DEFAULT_LLM_PROVIDER,
|
| 25 |
+
"providers_available": {
|
| 26 |
+
"groq": bool(settings.GROQ_API_KEY),
|
| 27 |
+
"openai": bool(settings.OPENAI_API_KEY),
|
| 28 |
+
"anthropic": bool(settings.ANTHROPIC_API_KEY)
|
| 29 |
+
}
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
@router.get("/tasks")
|
| 34 |
+
async def list_tasks():
|
| 35 |
+
"""Lista todas as tasks"""
|
| 36 |
+
from api.routers.processing import processing_tasks
|
| 37 |
+
return {"total": len(processing_tasks), "tasks": list(processing_tasks.keys())}
|
api/routers/health.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Router de health check e status do sistema
|
| 3 |
+
"""
|
| 4 |
+
from fastapi import APIRouter
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
import psutil
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
from api.models.responses import HealthResponse
|
| 10 |
+
from api.config import get_settings
|
| 11 |
+
from api.utils.logger import setup_logger
|
| 12 |
+
|
| 13 |
+
router = APIRouter()
|
| 14 |
+
logger = setup_logger(__name__)
|
| 15 |
+
settings = get_settings()
|
| 16 |
+
|
| 17 |
+
app_start_time = datetime.now()
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@router.get("/health", response_model=HealthResponse)
|
| 21 |
+
async def health_check():
|
| 22 |
+
"""
|
| 23 |
+
**Health check completo do sistema.**
|
| 24 |
+
|
| 25 |
+
Verifica:
|
| 26 |
+
- ✅ Database connectivity
|
| 27 |
+
- ✅ LLM providers availability
|
| 28 |
+
- ✅ File system access
|
| 29 |
+
- ✅ System metrics
|
| 30 |
+
"""
|
| 31 |
+
services = {}
|
| 32 |
+
|
| 33 |
+
# Check Database
|
| 34 |
+
try:
|
| 35 |
+
from database.db_manager import get_db_manager
|
| 36 |
+
db = get_db_manager()
|
| 37 |
+
services['database'] = db.health_check()
|
| 38 |
+
except Exception as e:
|
| 39 |
+
logger.error(f"DB health check failed: {e}")
|
| 40 |
+
services['database'] = False
|
| 41 |
+
|
| 42 |
+
# Check LLM Providers
|
| 43 |
+
try:
|
| 44 |
+
services['llm_groq'] = bool(settings.GROQ_API_KEY)
|
| 45 |
+
services['llm_openai'] = bool(settings.OPENAI_API_KEY)
|
| 46 |
+
services['llm_anthropic'] = bool(settings.ANTHROPIC_API_KEY)
|
| 47 |
+
except Exception as e:
|
| 48 |
+
logger.error(f"LLM health check failed: {e}")
|
| 49 |
+
services.update({
|
| 50 |
+
'llm_groq': False,
|
| 51 |
+
'llm_openai': False,
|
| 52 |
+
'llm_anthropic': False
|
| 53 |
+
})
|
| 54 |
+
|
| 55 |
+
# Check Files
|
| 56 |
+
from pathlib import Path
|
| 57 |
+
services['files_upload'] = Path(settings.UPLOAD_PATH).exists()
|
| 58 |
+
services['files_output'] = Path(settings.OUTPUT_PATH).exists()
|
| 59 |
+
services['files_base'] = Path(settings.FILES_BASE_PATH).exists()
|
| 60 |
+
|
| 61 |
+
# System metrics
|
| 62 |
+
try:
|
| 63 |
+
metrics = {
|
| 64 |
+
'cpu_percent': psutil.cpu_percent(),
|
| 65 |
+
'memory_percent': psutil.virtual_memory().percent,
|
| 66 |
+
'disk_percent': psutil.disk_usage('/').percent,
|
| 67 |
+
'process_count': len(psutil.pids())
|
| 68 |
+
}
|
| 69 |
+
except:
|
| 70 |
+
metrics = {}
|
| 71 |
+
|
| 72 |
+
# Calcular uptime
|
| 73 |
+
uptime = (datetime.now() - app_start_time).total_seconds()
|
| 74 |
+
|
| 75 |
+
# Status geral
|
| 76 |
+
status = "healthy" if all(services.values()) else "degraded"
|
| 77 |
+
|
| 78 |
+
return HealthResponse(
|
| 79 |
+
status=status,
|
| 80 |
+
version=settings.APP_VERSION,
|
| 81 |
+
environment=settings.APP_ENV,
|
| 82 |
+
timestamp=datetime.now(),
|
| 83 |
+
uptime_seconds=uptime,
|
| 84 |
+
services=services,
|
| 85 |
+
metrics=metrics
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
@router.get("/ping")
|
| 90 |
+
async def ping():
|
| 91 |
+
"""Simple ping endpoint."""
|
| 92 |
+
return {"status": "pong", "timestamp": datetime.now().isoformat()}
|
api/routers/processing.py
ADDED
|
@@ -0,0 +1,343 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Router de processamento de acórdãos
|
| 3 |
+
ENDPOINT PRINCIPAL: Upload JSONL → Processamento → Download TAR.GZ
|
| 4 |
+
"""
|
| 5 |
+
from fastapi import APIRouter, UploadFile, File, BackgroundTasks, HTTPException, Query
|
| 6 |
+
from fastapi.responses import FileResponse
|
| 7 |
+
import uuid
|
| 8 |
+
import json
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
import hashlib
|
| 12 |
+
|
| 13 |
+
from api.models.requests import ProcessingOptionsRequest
|
| 14 |
+
from api.models.responses import ProcessingResponse, ProcessingStatus, FileInfoResponse
|
| 15 |
+
from api.services.processing_service import ProcessingService
|
| 16 |
+
from api.utils.logger import setup_logger
|
| 17 |
+
from api.config import get_settings
|
| 18 |
+
|
| 19 |
+
router = APIRouter()
|
| 20 |
+
logger = setup_logger(__name__)
|
| 21 |
+
settings = get_settings()
|
| 22 |
+
|
| 23 |
+
# Storage de tasks (em produção usar Redis ou Database)
|
| 24 |
+
processing_tasks = {}
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@router.post("/process/upload", response_model=ProcessingResponse, status_code=202)
|
| 28 |
+
async def upload_and_process(
|
| 29 |
+
background_tasks: BackgroundTasks,
|
| 30 |
+
file: UploadFile = File(..., description="Arquivo JSONL com acórdãos"),
|
| 31 |
+
llm_provider: str = Query("groq", description="Provedor LLM (groq/openai/anthropic)"),
|
| 32 |
+
model_type: str = Query("balanced", description="Tipo de modelo (fast/balanced/quality)"),
|
| 33 |
+
enable_parallel: bool = Query(True, description="Processar em paralelo"),
|
| 34 |
+
max_workers: int = Query(3, ge=1, le=10, description="Workers paralelos"),
|
| 35 |
+
save_to_db: bool = Query(False, description="Salvar resultados no banco")
|
| 36 |
+
):
|
| 37 |
+
"""
|
| 38 |
+
**Upload de arquivo JSONL e início do processamento em background.**
|
| 39 |
+
|
| 40 |
+
## Fluxo:
|
| 41 |
+
1. Upload do arquivo JSONL
|
| 42 |
+
2. Validação do formato
|
| 43 |
+
3. Criação de task de processamento
|
| 44 |
+
4. Processamento em background (9 especialistas)
|
| 45 |
+
5. Geração de arquivo TAR.GZ com resultados
|
| 46 |
+
|
| 47 |
+
## Formato JSONL esperado:
|
| 48 |
+
```json
|
| 49 |
+
{"acordao_id": "001", "tribunal": "TJPR", "ementa": "...", "integra": "..."}
|
| 50 |
+
{"acordao_id": "002", "tribunal": "TJSP", "ementa": "...", "integra": "..."}
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
## Response:
|
| 54 |
+
- **task_id**: ID único para consultar status
|
| 55 |
+
- **status**: Status inicial (pending)
|
| 56 |
+
- Use `/process/status/{task_id}` para acompanhar
|
| 57 |
+
- Use `/process/download/{task_id}` para baixar resultados
|
| 58 |
+
"""
|
| 59 |
+
|
| 60 |
+
# Validar extensão
|
| 61 |
+
if not file.filename.endswith(('.jsonl', '.json')):
|
| 62 |
+
raise HTTPException(
|
| 63 |
+
status_code=400,
|
| 64 |
+
detail="Arquivo deve ser .jsonl ou .json"
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
# Validar tamanho
|
| 68 |
+
content = await file.read()
|
| 69 |
+
size_mb = len(content) / (1024 * 1024)
|
| 70 |
+
if size_mb > settings.MAX_UPLOAD_SIZE_MB:
|
| 71 |
+
raise HTTPException(
|
| 72 |
+
status_code=413,
|
| 73 |
+
detail=f"Arquivo muito grande: {size_mb:.2f}MB (máx: {settings.MAX_UPLOAD_SIZE_MB}MB)"
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
# Criar task ID
|
| 77 |
+
task_id = f"task-{uuid.uuid4()}"
|
| 78 |
+
|
| 79 |
+
# Criar diretórios
|
| 80 |
+
upload_dir = Path(settings.UPLOAD_PATH)
|
| 81 |
+
upload_dir.mkdir(parents=True, exist_ok=True)
|
| 82 |
+
|
| 83 |
+
# Salvar arquivo
|
| 84 |
+
file_path = upload_dir / f"{task_id}_{file.filename}"
|
| 85 |
+
file_path.write_bytes(content)
|
| 86 |
+
|
| 87 |
+
# Calcular hash
|
| 88 |
+
file_hash = hashlib.sha256(content).hexdigest()
|
| 89 |
+
|
| 90 |
+
# Validar e contar registros
|
| 91 |
+
total_records = 0
|
| 92 |
+
try:
|
| 93 |
+
for line_num, line in enumerate(content.decode('utf-8').strip().split('
|
| 94 |
+
'), 1):
|
| 95 |
+
if not line.strip():
|
| 96 |
+
continue
|
| 97 |
+
try:
|
| 98 |
+
record = json.loads(line)
|
| 99 |
+
# Validar campos obrigatórios
|
| 100 |
+
if 'ementa' not in record or 'integra' not in record:
|
| 101 |
+
raise HTTPException(
|
| 102 |
+
status_code=422,
|
| 103 |
+
detail=f"Linha {line_num}: campos obrigatórios ausentes (ementa, integra)"
|
| 104 |
+
)
|
| 105 |
+
total_records += 1
|
| 106 |
+
except json.JSONDecodeError as e:
|
| 107 |
+
raise HTTPException(
|
| 108 |
+
status_code=422,
|
| 109 |
+
detail=f"JSONL inválido na linha {line_num}: {str(e)}"
|
| 110 |
+
)
|
| 111 |
+
except UnicodeDecodeError:
|
| 112 |
+
raise HTTPException(
|
| 113 |
+
status_code=422,
|
| 114 |
+
detail="Arquivo deve estar em UTF-8"
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
# Criar response inicial
|
| 118 |
+
response = ProcessingResponse(
|
| 119 |
+
task_id=task_id,
|
| 120 |
+
status=ProcessingStatus.PENDING,
|
| 121 |
+
message=f"Processamento agendado para {total_records} registros",
|
| 122 |
+
total_records=total_records,
|
| 123 |
+
processed_records=0,
|
| 124 |
+
failed_records=0,
|
| 125 |
+
started_at=datetime.now()
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
# Armazenar task
|
| 129 |
+
processing_tasks[task_id] = response.dict()
|
| 130 |
+
|
| 131 |
+
# Adicionar metadados
|
| 132 |
+
processing_tasks[task_id]['metadata'] = {
|
| 133 |
+
'filename': file.filename,
|
| 134 |
+
'size_bytes': len(content),
|
| 135 |
+
'hash_sha256': file_hash,
|
| 136 |
+
'llm_provider': llm_provider,
|
| 137 |
+
'model_type': model_type,
|
| 138 |
+
'enable_parallel': enable_parallel,
|
| 139 |
+
'max_workers': max_workers,
|
| 140 |
+
'save_to_db': save_to_db
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
# Iniciar processamento em background
|
| 144 |
+
background_tasks.add_task(
|
| 145 |
+
process_acordaos_background,
|
| 146 |
+
task_id=task_id,
|
| 147 |
+
file_path=str(file_path),
|
| 148 |
+
llm_provider=llm_provider,
|
| 149 |
+
model_type=model_type,
|
| 150 |
+
enable_parallel=enable_parallel,
|
| 151 |
+
max_workers=max_workers,
|
| 152 |
+
save_to_db=save_to_db
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
logger.info(f"✅ Task {task_id} criada - {total_records} registros - {size_mb:.2f}MB")
|
| 156 |
+
|
| 157 |
+
return response
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
@router.get("/process/status/{task_id}", response_model=ProcessingResponse)
|
| 161 |
+
async def get_processing_status(task_id: str):
|
| 162 |
+
"""
|
| 163 |
+
**Consulta status de processamento.**
|
| 164 |
+
|
| 165 |
+
Retorna informações atualizadas sobre a task:
|
| 166 |
+
- Status atual (pending/processing/completed/error)
|
| 167 |
+
- Progresso (processados/total)
|
| 168 |
+
- Tempo estimado restante
|
| 169 |
+
- URL de download (quando concluído)
|
| 170 |
+
"""
|
| 171 |
+
if task_id not in processing_tasks:
|
| 172 |
+
raise HTTPException(
|
| 173 |
+
status_code=404,
|
| 174 |
+
detail=f"Task '{task_id}' não encontrada"
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
return ProcessingResponse(**processing_tasks[task_id])
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
@router.get("/process/list")
|
| 181 |
+
async def list_all_tasks():
|
| 182 |
+
"""
|
| 183 |
+
**Lista todas as tasks de processamento.**
|
| 184 |
+
|
| 185 |
+
Útil para debug e monitoramento.
|
| 186 |
+
"""
|
| 187 |
+
return {
|
| 188 |
+
"total": len(processing_tasks),
|
| 189 |
+
"tasks": [
|
| 190 |
+
{
|
| 191 |
+
"task_id": task_id,
|
| 192 |
+
"status": data["status"],
|
| 193 |
+
"progress": f"{data['processed_records']}/{data['total_records']}",
|
| 194 |
+
"started_at": data.get("started_at")
|
| 195 |
+
}
|
| 196 |
+
for task_id, data in processing_tasks.items()
|
| 197 |
+
]
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
@router.get("/process/download/{task_id}")
|
| 202 |
+
async def download_result(task_id: str):
|
| 203 |
+
"""
|
| 204 |
+
**Download do arquivo TAR.GZ com resultados.**
|
| 205 |
+
|
| 206 |
+
Disponível apenas quando status = "completed".
|
| 207 |
+
|
| 208 |
+
## Conteúdo do arquivo:
|
| 209 |
+
- `{task_id}_results.json`: Resultados completos
|
| 210 |
+
- Análises de cada especialista
|
| 211 |
+
- Metadados do processamento
|
| 212 |
+
- Logs e métricas
|
| 213 |
+
"""
|
| 214 |
+
if task_id not in processing_tasks:
|
| 215 |
+
raise HTTPException(
|
| 216 |
+
status_code=404,
|
| 217 |
+
detail=f"Task '{task_id}' não encontrada"
|
| 218 |
+
)
|
| 219 |
+
|
| 220 |
+
task = processing_tasks[task_id]
|
| 221 |
+
|
| 222 |
+
if task['status'] != ProcessingStatus.COMPLETED:
|
| 223 |
+
raise HTTPException(
|
| 224 |
+
status_code=400,
|
| 225 |
+
detail=f"Processamento ainda não concluído. Status atual: {task['status']}"
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
# Procurar arquivo
|
| 229 |
+
output_file = Path(settings.OUTPUT_PATH) / "archives" / f"{task_id}.tar.gz"
|
| 230 |
+
|
| 231 |
+
if not output_file.exists():
|
| 232 |
+
raise HTTPException(
|
| 233 |
+
status_code=404,
|
| 234 |
+
detail="Arquivo de resultado não encontrado"
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
logger.info(f"📦 Download iniciado: {task_id}")
|
| 238 |
+
|
| 239 |
+
return FileResponse(
|
| 240 |
+
path=str(output_file),
|
| 241 |
+
filename=f"para_ai_resultado_{task_id}.tar.gz",
|
| 242 |
+
media_type="application/gzip",
|
| 243 |
+
headers={
|
| 244 |
+
"Content-Disposition": f"attachment; filename=para_ai_resultado_{task_id}.tar.gz"
|
| 245 |
+
}
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
@router.delete("/process/{task_id}")
|
| 250 |
+
async def delete_task(task_id: str):
|
| 251 |
+
"""
|
| 252 |
+
**Deleta uma task e seus arquivos.**
|
| 253 |
+
|
| 254 |
+
Útil para limpeza de tasks antigas.
|
| 255 |
+
"""
|
| 256 |
+
if task_id not in processing_tasks:
|
| 257 |
+
raise HTTPException(
|
| 258 |
+
status_code=404,
|
| 259 |
+
detail=f"Task '{task_id}' não encontrada"
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
# Remover arquivos
|
| 263 |
+
upload_dir = Path(settings.UPLOAD_PATH)
|
| 264 |
+
output_dir = Path(settings.OUTPUT_PATH)
|
| 265 |
+
|
| 266 |
+
for file in upload_dir.glob(f"{task_id}_*"):
|
| 267 |
+
file.unlink()
|
| 268 |
+
|
| 269 |
+
for file in output_dir.glob(f"{task_id}*"):
|
| 270 |
+
file.unlink()
|
| 271 |
+
|
| 272 |
+
# Remover da memória
|
| 273 |
+
del processing_tasks[task_id]
|
| 274 |
+
|
| 275 |
+
logger.info(f"🗑️ Task deletada: {task_id}")
|
| 276 |
+
|
| 277 |
+
return {"message": f"Task {task_id} deletada com sucesso"}
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
# ============================================================================
|
| 281 |
+
# FUNÇÃO DE BACKGROUND
|
| 282 |
+
# ============================================================================
|
| 283 |
+
|
| 284 |
+
async def process_acordaos_background(
|
| 285 |
+
task_id: str,
|
| 286 |
+
file_path: str,
|
| 287 |
+
llm_provider: str,
|
| 288 |
+
model_type: str,
|
| 289 |
+
enable_parallel: bool,
|
| 290 |
+
max_workers: int,
|
| 291 |
+
save_to_db: bool
|
| 292 |
+
):
|
| 293 |
+
"""
|
| 294 |
+
Função executada em background para processar acórdãos.
|
| 295 |
+
|
| 296 |
+
Atualiza o status da task conforme progresso.
|
| 297 |
+
"""
|
| 298 |
+
try:
|
| 299 |
+
# Atualizar status
|
| 300 |
+
processing_tasks[task_id]['status'] = ProcessingStatus.PROCESSING
|
| 301 |
+
processing_tasks[task_id]['message'] = "Processamento em andamento..."
|
| 302 |
+
|
| 303 |
+
logger.info(f"🚀 Iniciando processamento background: {task_id}")
|
| 304 |
+
|
| 305 |
+
# Inicializar serviço
|
| 306 |
+
service = ProcessingService()
|
| 307 |
+
|
| 308 |
+
# Processar
|
| 309 |
+
result = await service.process_jsonl_file(
|
| 310 |
+
file_path=file_path,
|
| 311 |
+
task_id=task_id,
|
| 312 |
+
llm_provider=llm_provider,
|
| 313 |
+
model_type=model_type,
|
| 314 |
+
enable_parallel=enable_parallel,
|
| 315 |
+
max_workers=max_workers
|
| 316 |
+
)
|
| 317 |
+
|
| 318 |
+
# Atualizar task com sucesso
|
| 319 |
+
processing_tasks[task_id].update({
|
| 320 |
+
'status': ProcessingStatus.COMPLETED,
|
| 321 |
+
'message': f"Processamento concluído com sucesso em {result['elapsed_seconds']:.2f}s",
|
| 322 |
+
'processed_records': result['processed'],
|
| 323 |
+
'failed_records': result['failed'],
|
| 324 |
+
'completed_at': datetime.now(),
|
| 325 |
+
'download_url': f"/api/v1/process/download/{task_id}",
|
| 326 |
+
'result_metadata': {
|
| 327 |
+
'archive_path': result['archive_path'],
|
| 328 |
+
'hash_sha256': result['hash'],
|
| 329 |
+
'elapsed_seconds': result['elapsed_seconds']
|
| 330 |
+
}
|
| 331 |
+
})
|
| 332 |
+
|
| 333 |
+
logger.info(f"✅ Task {task_id} concluída - {result['processed']} processados, {result['failed']} falhas")
|
| 334 |
+
|
| 335 |
+
except Exception as e:
|
| 336 |
+
# Atualizar task com erro
|
| 337 |
+
logger.error(f"❌ Erro na task {task_id}: {str(e)}", exc_info=True)
|
| 338 |
+
processing_tasks[task_id].update({
|
| 339 |
+
'status': ProcessingStatus.ERROR,
|
| 340 |
+
'message': f"Erro no processamento: {str(e)}",
|
| 341 |
+
'completed_at': datetime.now(),
|
| 342 |
+
'errors': [str(e)]
|
| 343 |
+
})
|