Spaces:
Runtime error
Runtime error
| """Serviço de processamento de acórdãos""" | |
| from typing import Dict, Any | |
| import json | |
| import tarfile | |
| import hashlib | |
| from pathlib import Path | |
| from datetime import datetime | |
| class ProcessingService: | |
| """Serviço para processar arquivos JSONL de acórdãos.""" | |
| async def process_jsonl_file( | |
| self, | |
| file_path: str, | |
| task_id: str, | |
| llm_provider: str = "groq", | |
| model_type: str = "balanced", | |
| enable_parallel: bool = True, | |
| max_workers: int = 3 | |
| ) -> Dict[str, Any]: | |
| """ | |
| Processa arquivo JSONL com acórdãos. | |
| Returns: | |
| Dict com resultados do processamento | |
| """ | |
| from api.config import get_settings | |
| settings = get_settings() | |
| processed = 0 | |
| failed = 0 | |
| results = [] | |
| # Ler arquivo JSONL | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| for line in f: | |
| if not line.strip(): | |
| continue | |
| try: | |
| record = json.loads(line) | |
| # Aqui seria o processamento real com os 9 especialistas | |
| # Por enquanto, retornar mock | |
| results.append({ | |
| "acordao_id": record.get("acordao_id"), | |
| "status": "processed", | |
| "timestamp": datetime.now().isoformat() | |
| }) | |
| processed += 1 | |
| except Exception as e: | |
| failed += 1 | |
| results.append({ | |
| "acordao_id": record.get("acordao_id", "unknown"), | |
| "status": "error", | |
| "error": str(e) | |
| }) | |
| # Criar arquivo TAR.GZ com resultados | |
| output_dir = Path(settings.OUTPUT_PATH) / "archives" | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| archive_path = output_dir / f"{task_id}.tar.gz" | |
| # Criar JSON com resultados | |
| result_json = { | |
| "task_id": task_id, | |
| "processed": processed, | |
| "failed": failed, | |
| "results": results, | |
| "completed_at": datetime.now().isoformat() | |
| } | |
| temp_json = output_dir / f"{task_id}_results.json" | |
| with open(temp_json, 'w', encoding='utf-8') as f: | |
| json.dump(result_json, f, ensure_ascii=False, indent=2) | |
| # Criar TAR.GZ | |
| with tarfile.open(archive_path, 'w:gz') as tar: | |
| tar.add(temp_json, arcname=f"{task_id}_results.json") | |
| # Remover JSON temporário | |
| temp_json.unlink() | |
| # Calcular hash | |
| with open(archive_path, 'rb') as f: | |
| file_hash = hashlib.sha256(f.read()).hexdigest() | |
| return { | |
| "processed": processed, | |
| "failed": failed, | |
| "archive_path": str(archive_path), | |
| "hash": file_hash, | |
| "elapsed_seconds": 1.5 # mock | |
| } | |