PARA.AI / api /services /processing_service.py
caarleexx's picture
Update api/services/processing_service.py
ae2014a verified
raw
history blame
3.06 kB
"""Serviço de processamento de acórdãos"""
from typing import Dict, Any
import json
import tarfile
import hashlib
from pathlib import Path
from datetime import datetime
class ProcessingService:
"""Serviço para processar arquivos JSONL de acórdãos."""
async def process_jsonl_file(
self,
file_path: str,
task_id: str,
llm_provider: str = "groq",
model_type: str = "balanced",
enable_parallel: bool = True,
max_workers: int = 3
) -> Dict[str, Any]:
"""
Processa arquivo JSONL com acórdãos.
Returns:
Dict com resultados do processamento
"""
from api.config import get_settings
settings = get_settings()
processed = 0
failed = 0
results = []
# Ler arquivo JSONL
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
if not line.strip():
continue
try:
record = json.loads(line)
# Aqui seria o processamento real com os 9 especialistas
# Por enquanto, retornar mock
results.append({
"acordao_id": record.get("acordao_id"),
"status": "processed",
"timestamp": datetime.now().isoformat()
})
processed += 1
except Exception as e:
failed += 1
results.append({
"acordao_id": record.get("acordao_id", "unknown"),
"status": "error",
"error": str(e)
})
# Criar arquivo TAR.GZ com resultados
output_dir = Path(settings.OUTPUT_PATH) / "archives"
output_dir.mkdir(parents=True, exist_ok=True)
archive_path = output_dir / f"{task_id}.tar.gz"
# Criar JSON com resultados
result_json = {
"task_id": task_id,
"processed": processed,
"failed": failed,
"results": results,
"completed_at": datetime.now().isoformat()
}
temp_json = output_dir / f"{task_id}_results.json"
with open(temp_json, 'w', encoding='utf-8') as f:
json.dump(result_json, f, ensure_ascii=False, indent=2)
# Criar TAR.GZ
with tarfile.open(archive_path, 'w:gz') as tar:
tar.add(temp_json, arcname=f"{task_id}_results.json")
# Remover JSON temporário
temp_json.unlink()
# Calcular hash
with open(archive_path, 'rb') as f:
file_hash = hashlib.sha256(f.read()).hexdigest()
return {
"processed": processed,
"failed": failed,
"archive_path": str(archive_path),
"hash": file_hash,
"elapsed_seconds": 1.5 # mock
}