Spaces:
Runtime error
Runtime error
File size: 3,059 Bytes
ae2014a 124d299 ae2014a 124d299 ae2014a 124d299 ae2014a 124d299 ae2014a 124d299 ae2014a 124d299 ae2014a 124d299 ae2014a 124d299 ae2014a 124d299 ae2014a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
"""Serviço de processamento de acórdãos"""
from typing import Dict, Any
import json
import tarfile
import hashlib
from pathlib import Path
from datetime import datetime
class ProcessingService:
"""Serviço para processar arquivos JSONL de acórdãos."""
async def process_jsonl_file(
self,
file_path: str,
task_id: str,
llm_provider: str = "groq",
model_type: str = "balanced",
enable_parallel: bool = True,
max_workers: int = 3
) -> Dict[str, Any]:
"""
Processa arquivo JSONL com acórdãos.
Returns:
Dict com resultados do processamento
"""
from api.config import get_settings
settings = get_settings()
processed = 0
failed = 0
results = []
# Ler arquivo JSONL
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
if not line.strip():
continue
try:
record = json.loads(line)
# Aqui seria o processamento real com os 9 especialistas
# Por enquanto, retornar mock
results.append({
"acordao_id": record.get("acordao_id"),
"status": "processed",
"timestamp": datetime.now().isoformat()
})
processed += 1
except Exception as e:
failed += 1
results.append({
"acordao_id": record.get("acordao_id", "unknown"),
"status": "error",
"error": str(e)
})
# Criar arquivo TAR.GZ com resultados
output_dir = Path(settings.OUTPUT_PATH) / "archives"
output_dir.mkdir(parents=True, exist_ok=True)
archive_path = output_dir / f"{task_id}.tar.gz"
# Criar JSON com resultados
result_json = {
"task_id": task_id,
"processed": processed,
"failed": failed,
"results": results,
"completed_at": datetime.now().isoformat()
}
temp_json = output_dir / f"{task_id}_results.json"
with open(temp_json, 'w', encoding='utf-8') as f:
json.dump(result_json, f, ensure_ascii=False, indent=2)
# Criar TAR.GZ
with tarfile.open(archive_path, 'w:gz') as tar:
tar.add(temp_json, arcname=f"{task_id}_results.json")
# Remover JSON temporário
temp_json.unlink()
# Calcular hash
with open(archive_path, 'rb') as f:
file_hash = hashlib.sha256(f.read()).hexdigest()
return {
"processed": processed,
"failed": failed,
"archive_path": str(archive_path),
"hash": file_hash,
"elapsed_seconds": 1.5 # mock
}
|