Spaces:

caarleexx
/

PARA.AI

Runtime error

App Files Files Community

caarleexx commited on 30 days ago

Commit

17f01f8

verified ·

1 Parent(s): a255057

Upload 3 files

Browse files

Files changed (3) hide show

api/routers/debug.py +37 -0
api/routers/health.py +92 -0
api/routers/processing.py +343 -0

api/routers/debug.py ADDED Viewed

	@@ -0,0 +1,37 @@

+"""Router de debug - informações de sistema e tasks"""
+from fastapi import APIRouter
+from api.config import get_settings
+from api.utils.logger import setup_logger
+import sys, os
+router = APIRouter()
+logger = setup_logger(__name__)
+settings = get_settings()
+@router.get("/info")
+async def debug_info():
+    """Informações completas do sistema"""
+    return {
+        "python_version": sys.version,
+        "environment": settings.APP_ENV,
+        "debug_mode": settings.DEBUG,
+        "paths": {
+            "upload": settings.UPLOAD_PATH,
+            "output": settings.OUTPUT_PATH,
+            "files": settings.FILES_BASE_PATH
+        },
+        "llm_config": {
+            "default_provider": settings.DEFAULT_LLM_PROVIDER,
+            "providers_available": {
+                "groq": bool(settings.GROQ_API_KEY),
+                "openai": bool(settings.OPENAI_API_KEY),
+                "anthropic": bool(settings.ANTHROPIC_API_KEY)
+            }
+        }
+    }
+@router.get("/tasks")
+async def list_tasks():
+    """Lista todas as tasks"""
+    from api.routers.processing import processing_tasks
+    return {"total": len(processing_tasks), "tasks": list(processing_tasks.keys())}

api/routers/health.py ADDED Viewed

	@@ -0,0 +1,92 @@

+"""
+Router de health check e status do sistema
+"""
+from fastapi import APIRouter
+from datetime import datetime
+import psutil
+import os
+from api.models.responses import HealthResponse
+from api.config import get_settings
+from api.utils.logger import setup_logger
+router = APIRouter()
+logger = setup_logger(__name__)
+settings = get_settings()
+app_start_time = datetime.now()
+@router.get("/health", response_model=HealthResponse)
+async def health_check():
+    """
+    **Health check completo do sistema.**
+    Verifica:
+    - ✅ Database connectivity
+    - ✅ LLM providers availability
+    - ✅ File system access
+    - ✅ System metrics
+    """
+    services = {}
+    # Check Database
+    try:
+        from database.db_manager import get_db_manager
+        db = get_db_manager()
+        services['database'] = db.health_check()
+    except Exception as e:
+        logger.error(f"DB health check failed: {e}")
+        services['database'] = False
+    # Check LLM Providers
+    try:
+        services['llm_groq'] = bool(settings.GROQ_API_KEY)
+        services['llm_openai'] = bool(settings.OPENAI_API_KEY)
+        services['llm_anthropic'] = bool(settings.ANTHROPIC_API_KEY)
+    except Exception as e:
+        logger.error(f"LLM health check failed: {e}")
+        services.update({
+            'llm_groq': False,
+            'llm_openai': False,
+            'llm_anthropic': False
+        })
+    # Check Files
+    from pathlib import Path
+    services['files_upload'] = Path(settings.UPLOAD_PATH).exists()
+    services['files_output'] = Path(settings.OUTPUT_PATH).exists()
+    services['files_base'] = Path(settings.FILES_BASE_PATH).exists()
+    # System metrics
+    try:
+        metrics = {
+            'cpu_percent': psutil.cpu_percent(),
+            'memory_percent': psutil.virtual_memory().percent,
+            'disk_percent': psutil.disk_usage('/').percent,
+            'process_count': len(psutil.pids())
+        }
+    except:
+        metrics = {}
+    # Calcular uptime
+    uptime = (datetime.now() - app_start_time).total_seconds()
+    # Status geral
+    status = "healthy" if all(services.values()) else "degraded"
+    return HealthResponse(
+        status=status,
+        version=settings.APP_VERSION,
+        environment=settings.APP_ENV,
+        timestamp=datetime.now(),
+        uptime_seconds=uptime,
+        services=services,
+        metrics=metrics
+    )
+@router.get("/ping")
+async def ping():
+    """Simple ping endpoint."""
+    return {"status": "pong", "timestamp": datetime.now().isoformat()}

api/routers/processing.py ADDED Viewed

	@@ -0,0 +1,343 @@

+"""
+Router de processamento de acórdãos
+ENDPOINT PRINCIPAL: Upload JSONL → Processamento → Download TAR.GZ
+"""
+from fastapi import APIRouter, UploadFile, File, BackgroundTasks, HTTPException, Query
+from fastapi.responses import FileResponse
+import uuid
+import json
+from pathlib import Path
+from datetime import datetime
+import hashlib
+from api.models.requests import ProcessingOptionsRequest
+from api.models.responses import ProcessingResponse, ProcessingStatus, FileInfoResponse
+from api.services.processing_service import ProcessingService
+from api.utils.logger import setup_logger
+from api.config import get_settings
+router = APIRouter()
+logger = setup_logger(__name__)
+settings = get_settings()
+# Storage de tasks (em produção usar Redis ou Database)
+processing_tasks = {}
+@router.post("/process/upload", response_model=ProcessingResponse, status_code=202)
+async def upload_and_process(
+    background_tasks: BackgroundTasks,
+    file: UploadFile = File(..., description="Arquivo JSONL com acórdãos"),
+    llm_provider: str = Query("groq", description="Provedor LLM (groq/openai/anthropic)"),
+    model_type: str = Query("balanced", description="Tipo de modelo (fast/balanced/quality)"),
+    enable_parallel: bool = Query(True, description="Processar em paralelo"),
+    max_workers: int = Query(3, ge=1, le=10, description="Workers paralelos"),
+    save_to_db: bool = Query(False, description="Salvar resultados no banco")
+):
+    """
+    **Upload de arquivo JSONL e início do processamento em background.**
+    ## Fluxo:
+    1. Upload do arquivo JSONL
+    2. Validação do formato
+    3. Criação de task de processamento
+    4. Processamento em background (9 especialistas)
+    5. Geração de arquivo TAR.GZ com resultados
+    ## Formato JSONL esperado:
+    ```json
+    {"acordao_id": "001", "tribunal": "TJPR", "ementa": "...", "integra": "..."}
+    {"acordao_id": "002", "tribunal": "TJSP", "ementa": "...", "integra": "..."}
+    ```
+    ## Response:
+    - **task_id**: ID único para consultar status
+    - **status**: Status inicial (pending)
+    - Use `/process/status/{task_id}` para acompanhar
+    - Use `/process/download/{task_id}` para baixar resultados
+    """
+    # Validar extensão
+    if not file.filename.endswith(('.jsonl', '.json')):
+        raise HTTPException(
+            status_code=400,
+            detail="Arquivo deve ser .jsonl ou .json"
+        )
+    # Validar tamanho
+    content = await file.read()
+    size_mb = len(content) / (1024 * 1024)
+    if size_mb > settings.MAX_UPLOAD_SIZE_MB:
+        raise HTTPException(
+            status_code=413,
+            detail=f"Arquivo muito grande: {size_mb:.2f}MB (máx: {settings.MAX_UPLOAD_SIZE_MB}MB)"
+        )
+    # Criar task ID
+    task_id = f"task-{uuid.uuid4()}"
+    # Criar diretórios
+    upload_dir = Path(settings.UPLOAD_PATH)
+    upload_dir.mkdir(parents=True, exist_ok=True)
+    # Salvar arquivo
+    file_path = upload_dir / f"{task_id}_{file.filename}"
+    file_path.write_bytes(content)
+    # Calcular hash
+    file_hash = hashlib.sha256(content).hexdigest()
+    # Validar e contar registros
+    total_records = 0
+    try:
+        for line_num, line in enumerate(content.decode('utf-8').strip().split('
+'), 1):
+            if not line.strip():
+                continue
+            try:
+                record = json.loads(line)
+                # Validar campos obrigatórios
+                if 'ementa' not in record or 'integra' not in record:
+                    raise HTTPException(
+                        status_code=422,
+                        detail=f"Linha {line_num}: campos obrigatórios ausentes (ementa, integra)"
+                    )
+                total_records += 1
+            except json.JSONDecodeError as e:
+                raise HTTPException(
+                    status_code=422,
+                    detail=f"JSONL inválido na linha {line_num}: {str(e)}"
+                )
+    except UnicodeDecodeError:
+        raise HTTPException(
+            status_code=422,
+            detail="Arquivo deve estar em UTF-8"
+        )
+    # Criar response inicial
+    response = ProcessingResponse(
+        task_id=task_id,
+        status=ProcessingStatus.PENDING,
+        message=f"Processamento agendado para {total_records} registros",
+        total_records=total_records,
+        processed_records=0,
+        failed_records=0,
+        started_at=datetime.now()
+    )
+    # Armazenar task
+    processing_tasks[task_id] = response.dict()
+    # Adicionar metadados
+    processing_tasks[task_id]['metadata'] = {
+        'filename': file.filename,
+        'size_bytes': len(content),
+        'hash_sha256': file_hash,
+        'llm_provider': llm_provider,
+        'model_type': model_type,
+        'enable_parallel': enable_parallel,
+        'max_workers': max_workers,
+        'save_to_db': save_to_db
+    }
+    # Iniciar processamento em background
+    background_tasks.add_task(
+        process_acordaos_background,
+        task_id=task_id,
+        file_path=str(file_path),
+        llm_provider=llm_provider,
+        model_type=model_type,
+        enable_parallel=enable_parallel,
+        max_workers=max_workers,
+        save_to_db=save_to_db
+    )
+    logger.info(f"✅ Task {task_id} criada - {total_records} registros - {size_mb:.2f}MB")
+    return response
+@router.get("/process/status/{task_id}", response_model=ProcessingResponse)
+async def get_processing_status(task_id: str):
+    """
+    **Consulta status de processamento.**
+    Retorna informações atualizadas sobre a task:
+    - Status atual (pending/processing/completed/error)
+    - Progresso (processados/total)
+    - Tempo estimado restante
+    - URL de download (quando concluído)
+    """
+    if task_id not in processing_tasks:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Task '{task_id}' não encontrada"
+        )
+    return ProcessingResponse(**processing_tasks[task_id])
+@router.get("/process/list")
+async def list_all_tasks():
+    """
+    **Lista todas as tasks de processamento.**
+    Útil para debug e monitoramento.
+    """
+    return {
+        "total": len(processing_tasks),
+        "tasks": [
+            {
+                "task_id": task_id,
+                "status": data["status"],
+                "progress": f"{data['processed_records']}/{data['total_records']}",
+                "started_at": data.get("started_at")
+            }
+            for task_id, data in processing_tasks.items()
+        ]
+    }
+@router.get("/process/download/{task_id}")
+async def download_result(task_id: str):
+    """
+    **Download do arquivo TAR.GZ com resultados.**
+    Disponível apenas quando status = "completed".
+    ## Conteúdo do arquivo:
+    - `{task_id}_results.json`: Resultados completos
+    - Análises de cada especialista
+    - Metadados do processamento
+    - Logs e métricas
+    """
+    if task_id not in processing_tasks:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Task '{task_id}' não encontrada"
+        )
+    task = processing_tasks[task_id]
+    if task['status'] != ProcessingStatus.COMPLETED:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Processamento ainda não concluído. Status atual: {task['status']}"
+        )
+    # Procurar arquivo
+    output_file = Path(settings.OUTPUT_PATH) / "archives" / f"{task_id}.tar.gz"
+    if not output_file.exists():
+        raise HTTPException(
+            status_code=404,
+            detail="Arquivo de resultado não encontrado"
+        )
+    logger.info(f"📦 Download iniciado: {task_id}")
+    return FileResponse(
+        path=str(output_file),
+        filename=f"para_ai_resultado_{task_id}.tar.gz",
+        media_type="application/gzip",
+        headers={
+            "Content-Disposition": f"attachment; filename=para_ai_resultado_{task_id}.tar.gz"
+        }
+    )
+@router.delete("/process/{task_id}")
+async def delete_task(task_id: str):
+    """
+    **Deleta uma task e seus arquivos.**
+    Útil para limpeza de tasks antigas.
+    """
+    if task_id not in processing_tasks:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Task '{task_id}' não encontrada"
+        )
+    # Remover arquivos
+    upload_dir = Path(settings.UPLOAD_PATH)
+    output_dir = Path(settings.OUTPUT_PATH)
+    for file in upload_dir.glob(f"{task_id}_*"):
+        file.unlink()
+    for file in output_dir.glob(f"{task_id}*"):
+        file.unlink()
+    # Remover da memória
+    del processing_tasks[task_id]
+    logger.info(f"🗑️ Task deletada: {task_id}")
+    return {"message": f"Task {task_id} deletada com sucesso"}
+# ============================================================================
+# FUNÇÃO DE BACKGROUND
+# ============================================================================
+async def process_acordaos_background(
+    task_id: str,
+    file_path: str,
+    llm_provider: str,
+    model_type: str,
+    enable_parallel: bool,
+    max_workers: int,
+    save_to_db: bool
+):
+    """
+    Função executada em background para processar acórdãos.
+    Atualiza o status da task conforme progresso.
+    """
+    try:
+        # Atualizar status
+        processing_tasks[task_id]['status'] = ProcessingStatus.PROCESSING
+        processing_tasks[task_id]['message'] = "Processamento em andamento..."
+        logger.info(f"🚀 Iniciando processamento background: {task_id}")
+        # Inicializar serviço
+        service = ProcessingService()
+        # Processar
+        result = await service.process_jsonl_file(
+            file_path=file_path,
+            task_id=task_id,
+            llm_provider=llm_provider,
+            model_type=model_type,
+            enable_parallel=enable_parallel,
+            max_workers=max_workers
+        )
+        # Atualizar task com sucesso
+        processing_tasks[task_id].update({
+            'status': ProcessingStatus.COMPLETED,
+            'message': f"Processamento concluído com sucesso em {result['elapsed_seconds']:.2f}s",
+            'processed_records': result['processed'],
+            'failed_records': result['failed'],
+            'completed_at': datetime.now(),
+            'download_url': f"/api/v1/process/download/{task_id}",
+            'result_metadata': {
+                'archive_path': result['archive_path'],
+                'hash_sha256': result['hash'],
+                'elapsed_seconds': result['elapsed_seconds']
+            }
+        })
+        logger.info(f"✅ Task {task_id} concluída - {result['processed']} processados, {result['failed']} falhas")
+    except Exception as e:
+        # Atualizar task com erro
+        logger.error(f"❌ Erro na task {task_id}: {str(e)}", exc_info=True)
+        processing_tasks[task_id].update({
+            'status': ProcessingStatus.ERROR,
+            'message': f"Erro no processamento: {str(e)}",
+            'completed_at': datetime.now(),
+            'errors': [str(e)]
+        })