Spaces:

caarleexx
/

PARA.AI

Runtime error

App Files Files Community

Carlex22 commited on 30 days ago

Commit

3233c37

1 Parent(s): e895ba5

ParaAIV3.0

Browse files

Files changed (23) hide show

api/config.py +0 -58
api/core/__init__.py +0 -8
api/core/file_handler.py +0 -35
api/core/init.py +0 -6
api/core/processor_manager.py +0 -82
api/core/tar_genetator.py +0 -34
api/env.py +0 -52
api/main.py +206 -33
api/routes/__init__.py +3 -3
api/routes/debug.py +37 -0
api/routes/debug_routes.py +0 -83
api/routes/download.py +0 -30
api/routes/health.py +85 -28
api/routes/process.py +0 -98
api/routes/process_routes.py +0 -135
api/routes/processing.py +343 -0
api/routes/status_routes.py +0 -44
api/schemas.py +0 -31
api/utils/__init__.py +0 -1
api/utils/logger.py +0 -20
api/utils/validators.py +0 -16
code_copy.py +0 -71
copy_code.py +91 -0

api/config.py DELETED Viewed

@@ -1,58 +0,0 @@
-"""
-API Configuration
-Environment variables and settings
-"""
-from pydantic_settings import BaseSettings
-from pydantic import Field
-from typing import Optional
-from pathlib import Path
-import os
-class Settings(BaseSettings):
-    """API Configuration Settings"""
-    APP_NAME: str = "para.AI API"
-    APP_VERSION: str = "3.0.0"
-    DEBUG: bool = os.getenv("DEBUG", "true").lower() == "true"
-    API_HOST: str = os.getenv("API_HOST", "0.0.0.0")
-    API_PORT: int = int(os.getenv("API_PORT", "7860"))
-    # Paths
-    BASE_DIR: Path = Path(__file__).parent
-    STORAGE_DIR: Path = BASE_DIR / "storage"
-    TEMP_DIR: Path = STORAGE_DIR / "temp"
-    OUTPUT_DIR: Path = STORAGE_DIR / "output"
-    DOWNLOADS_DIR: Path = STORAGE_DIR / "downloads"
-    DATABASE_URL: str = os.getenv("DATABASE_URL", "postgresql://user:pass@localhost:5432/paraai")
-    REDIS_URL: str = os.getenv("REDIS_URL", "redis://localhost:6379/0")
-    LLM_MODEL: str = os.getenv("LLM_MODEL", "gemini-1.5-pro-001")
-    LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "1.3"))
-    LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "14000"))
-    MAX_WORKERS: int = int(os.getenv("MAX_WORKERS", "10"))
-    BATCH_SIZE: int = int(os.getenv("BATCH_SIZE", "100"))
-    CORS_ORIGINS: list[str] = [
-        "http://localhost:7860",
-        "https://*.hf.space",
-        "*"
-    ]
-    LOG_LEVEL: str = os.getenv("LOG_LEVEL", "DEBUG")
-    model_config = {
-        "env_file": ".env",
-        "case_sensitive": True
-    }
-settings = Settings()
-# Criar diretórios de storage
-for directory in [settings.TEMP_DIR, settings.OUTPUT_DIR, settings.DOWNLOADS_DIR]:
-    directory.mkdir(parents=True, exist_ok=True)

api/core/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-"""Core processing modules"""
-"""Core processing modules"""
-from api.core.processor_manager import ProcessorManager
-from api.core.file_handler import FileHandler
-from api.core.tar_generator import TARGenerator
-__all__ = ['ProcessorManager', 'FileHandler', 'TARGenerator']

api/core/file_handler.py DELETED Viewed

@@ -1,35 +0,0 @@
-"""Manipulação de arquivos"""
-from pathlib import Path
-from api.config import settings
-import json
-from datetime import datetime
-class FileHandler:
-    """Gerenciador de arquivos"""
-    def save_temp_jsonl(self, batch_id: str, content: bytes) -> Path:
-        """Salva JSONL temporário"""
-        file_path = settings.TEMP_DIR / f"{batch_id}.jsonl"
-        with open(file_path, 'wb') as f:
-            f.write(content)
-        return file_path
-    def save_processed_results(self, batch_id: str, results: list) -> Path:
-        """Salva resultados processados"""
-        output_path = settings.OUTPUT_DIR / f"{batch_id}_processed.jsonl"
-        with open(output_path, 'w', encoding='utf-8') as f:
-            for result in results:
-                f.write(json.dumps(result, ensure_ascii=False) + '
-')
-        return output_path
-    def cleanup_temp(self, batch_id: str):
-        """Limpa arquivos temporários"""
-        temp_file = settings.TEMP_DIR / f"{batch_id}.jsonl"
-        if temp_file.exists():
-            temp_file.unlink()
-    def get_temp_file(self, batch_id: str) -> Path:
-        """Retorna caminho do arquivo temporário"""
-        return settings.TEMP_DIR / f"{batch_id}.jsonl"

api/core/init.py DELETED Viewed

@@ -1,6 +0,0 @@
-"""Core processing modules"""
-from api.core.processor_manager import ProcessorManager
-from api.core.file_handler import FileHandler
-from api.core.tar_generator import TARGenerator
-__all__ = ['ProcessorManager', 'FileHandler', 'TARGenerator']

api/core/processor_manager.py DELETED Viewed

@@ -1,82 +0,0 @@
-"""Orquestrador de 9 especialistas"""
-import asyncio
-from typing import Dict, List, Any, Optional
-from api.processors.metadados import ProcessorMetadados
-from api.processors.relatorio import ProcessorRelatorio
-from api.processors.fundamentacao import ProcessorFundamentacao
-from api.processors.decisao import ProcessorDecisao
-from api.processors.auditoria import ProcessorAuditoria
-from api.processors.arquivo_relacional import ProcessorArquivoRelacional
-from api.processors.segmentacao import ProcessorSegmentacao
-from api.processors.contexto import ProcessorContexto
-from api.processors.transcricao import ProcessorTranscricao
-import logging
-logger = logging.getLogger("para_ai")
-class ProcessorManager:
-    """Gerenciador dos 9 especialistas"""
-    def __init__(self):
-        self.specialists = {
-            1: ProcessorMetadados(),
-            2: ProcessorRelatorio(),
-            3: ProcessorFundamentacao(),
-            4: ProcessorDecisao(),
-            5: ProcessorAuditoria(),
-            6: ProcessorArquivoRelacional(),
-            7: ProcessorSegmentacao(),
-            8: ProcessorContexto(),
-            9: ProcessorTranscricao(),
-        }
-        logger.info("✅ ProcessorManager inicializado com 9 especialistas")
-    async def process_acordao_sequential(
-        self,
-        acordao_data: Dict[str, Any],
-        specialist_ids: List[int]
-    ) -> Dict[str, Any]:
-        """Processa sequencialmente"""
-        results = {}
-        for spec_id in specialist_ids:
-            specialist = self.specialists[spec_id]
-            result = specialist.process(acordao_data)
-            results[f"specialist_{spec_id}"] = result
-        return {"status": "completed", "results": results}
-    async def process_acordao_parallel(
-        self,
-        acordao_data: Dict[str, Any],
-        specialist_ids: List[int]
-    ) -> Dict[str, Any]:
-        """Processa em paralelo"""
-        tasks = []
-        for spec_id in specialist_ids:
-            specialist = self.specialists[spec_id]
-            task = asyncio.to_thread(specialist.process, acordao_data)
-            tasks.append(task)
-        results_list = await asyncio.gather(*tasks)
-        results = {}
-        for spec_id, result in zip(specialist_ids, results_list):
-            results[f"specialist_{spec_id}"] = result
-        return {"status": "completed", "results": results}
-    def get_specialist(self, spec_id: int):
-        """Retorna especialista específico"""
-        return self.specialists.get(spec_id)
-    def get_specialists_info(self) -> List[Dict]:
-        """Info de todos especialistas"""
-        return [
-            {
-                "id": sid,
-                "name": s.specialist_name,
-                "description": s.__class__.__doc__
-            }
-            for sid, s in self.specialists.items()
-        ]

api/core/tar_genetator.py DELETED Viewed

@@ -1,34 +0,0 @@
-"""Gerador de arquivos TAR.GZ"""
-import tarfile
-from pathlib import Path
-from api.config import settings
-from datetime import datetime
-class TARGenerator:
-    """Gerador de TAR.GZ para download"""
-    def create_tar_gz(self, batch_id: str, source_file: Path) -> Path:
-        """Cria TAR.GZ com resultados"""
-        tar_path = settings.DOWNLOADS_DIR / f"{batch_id}.tar.gz"
-        with tarfile.open(tar_path, 'w:gz') as tar:
-            tar.add(source_file, arcname=source_file.name)
-        return tar_path
-    def get_tar_path(self, batch_id: str) -> Path:
-        """Retorna caminho do TAR.GZ"""
-        return settings.DOWNLOADS_DIR / f"{batch_id}.tar.gz"
-    def list_available_downloads(self) -> list:
-        """Lista downloads disponíveis"""
-        downloads = []
-        for tar_file in settings.DOWNLOADS_DIR.glob('*.tar.gz'):
-            stat = tar_file.stat()
-            downloads.append({
-                'batch_id': tar_file.stem,
-                'filename': tar_file.name,
-                'size_bytes': stat.st_size,
-                'created_at': datetime.fromtimestamp(stat.st_ctime).isoformat()
-            })
-        return downloads

api/env.py DELETED Viewed

@@ -1,52 +0,0 @@
-from logging.config import fileConfig
-from sqlalchemy import engine_from_config
-from sqlalchemy import pool
-from alembic import context
-import os
-from database.models import Base
-config = context.config
-if config.config_file_name is not None:
-    fileConfig(config.config_file_name)
-target_metadata = Base.metadata
-def run_migrations_offline() -> None:
-    sqlalchemy_url = os.getenv('DATABASE_URL', 'postgresql://user:password@localhost:5432/para_ai')
-    context.configure(
-        url=sqlalchemy_url,
-        target_metadata=target_metadata,
-        literal_binds=True,
-        dialect_opts={"paramstyle": "named"},
-    )
-    with context.begin_transaction():
-        context.run_migrations()
-def run_migrations_online() -> None:
-    configuration = config.get_section(config.config_ini_section)
-    configuration["sqlalchemy.url"] = os.getenv('DATABASE_URL', 'postgresql://user:password@localhost:5432/para_ai')
-    connectable = engine_from_config(
-        configuration,
-        prefix="sqlalchemy.",
-        poolclass=pool.NullPool,
-    )
-    with connectable.connect() as connection:
-        context.configure(
-            connection=connection, target_metadata=target_metadata
-        )
-        with context.begin_transaction():
-            context.run_migrations()
-if context.is_offline_mode():
-    run_migrations_offline()
-else:
-    run_migrations_online()

api/main.py CHANGED Viewed

@@ -1,29 +1,89 @@
 """
-FastAPI Application Principal
 """
-from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-from api.config import settings
-from api.routes import health_routes, debug_routes, status_routes
-from api.routes import test_routes, process_routes
-import logging
-# Setup logging
-logging.basicConfig(
-    level=settings.LOG_LEVEL,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger("para_ai")
-# Criar app
 app = FastAPI(
     title=settings.APP_NAME,
     version=settings.APP_VERSION,
-    description="Sistema para.AI - 9 Especialistas LLM para análise jurisprudencial",
-    docs_url="/docs" if settings.DEBUG else None
 )
-# CORS
 app.add_middleware(
     CORSMiddleware,
     allow_origins=settings.CORS_ORIGINS,
@@ -31,28 +91,141 @@ app.add_middleware(
     allow_methods=["*"],
     allow_headers=["*"],
 )
-# Rotas
-app.include_router(health_routes.router, prefix="/health", tags=["Health"])
-app.include_router(test_routes.router, prefix="/test", tags=["Tests"])
-app.include_router(process_routes.router, prefix="/process", tags=["Process"])
-app.include_router(status_routes.router, prefix="/status", tags=["Status"])
-if settings.DEBUG:
-    app.include_router(debug_routes.router, prefix="/debug", tags=["Debug"])
-@app.get("/")
 async def root():
     return {
-        "app": settings.APP_NAME,
         "version": settings.APP_VERSION,
-        "status": "running",
         "endpoints": {
-            "docs": "/docs",
-            "health": "/health",
-            "test_specialists": "/test/specialists",
-            "process_jsonl": "/process/jsonl",
-            "download": "/download/{batch_id}"
         }
-    }

 """
+FastAPI Application - para.AI v3.0
+Endpoint principal para processamento de acórdãos jurisprudenciais
 """
+from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.middleware.gzip import GZipMiddleware
+from fastapi.responses import JSONResponse
+from contextlib import asynccontextmanager
+import time
+from datetime import datetime
+from api.config import get_settings
+from api.utils.logger import api_logger
+from api.utils.exceptions import ParaAIException
+settings = get_settings()
+# Variável global para tracking de uptime
+app_start_time = datetime.now()
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Lifecycle events - startup e shutdown."""
+    # Startup
+    api_logger.info("=" * 70)
+    api_logger.info(f"🚀 Starting {settings.APP_NAME} v{settings.APP_VERSION}")
+    api_logger.info(f"📍 Environment: {settings.APP_ENV}")
+    api_logger.info(f"🐛 Debug mode: {settings.DEBUG}")
+    api_logger.info(f"🗄️  Database: {settings.DATABASE_URL.split('@')[-1] if '@' in settings.DATABASE_URL else 'N/A'}")
+    api_logger.info(f"📂 Files path: {settings.FILES_BASE_PATH}")
+    api_logger.info(f"🤖 LLM Providers:")
+    api_logger.info(f"   - Groq: {'✅' if settings.GROQ_API_KEY else '❌'}")
+    api_logger.info(f"   - OpenAI: {'✅' if settings.OPENAI_API_KEY else '❌'}")
+    api_logger.info(f"   - Anthropic: {'✅' if settings.ANTHROPIC_API_KEY else '❌'}")
+    api_logger.info("=" * 70)
+    yield
+    # Shutdown
+    api_logger.info("🛑 Shutting down para.AI API")
 app = FastAPI(
     title=settings.APP_NAME,
     version=settings.APP_VERSION,
+    description="""
+    # para.AI - Análise Jurisprudencial com IA
+    Sistema completo para processamento automatizado de acórdãos com 9 especialistas IA.
+    ## Funcionalidades Principais
+    * 📤 **Upload JSONL**: Envie lotes de acórdãos para processamento
+    * 🤖 **9 Especialistas**: Análise por múltiplos processadores especializados
+    * 📦 **Download TAR.GZ**: Receba resultados compactados
+    * 🔍 **Debug Completo**: Teste cada componente isoladamente
+    * 📊 **Métricas**: Acompanhe performance e custos
+    ## Endpoints Principais
+    * `/api/v1/process/upload` - Upload e processamento
+    * `/api/v1/process/status/{task_id}` - Status da tarefa
+    * `/api/v1/process/download/{task_id}` - Download de resultados
+    * `/api/v1/health` - Health check
+    ## Debug e Testes
+    * `/api/v1/debug/*` - Informações de sistema
+    * `/api/v1/test/llm/*` - Testar LLMs
+    * `/api/v1/test/processors/*` - Testar processadores
+    * `/api/v1/test/database/*` - Testar banco de dados
+    * `/api/v1/test/files/*` - Testar gestão de arquivos
+    """,
+    docs_url="/api/docs",
+    redoc_url="/api/redoc",
+    openapi_url="/api/openapi.json",
+    lifespan=lifespan,
+    swagger_ui_parameters={"defaultModelsExpandDepth": -1}
 )
+# ============================================================================
+# MIDDLEWARES
+# ============================================================================
 app.add_middleware(
     CORSMiddleware,
     allow_origins=settings.CORS_ORIGINS,
     allow_methods=["*"],
     allow_headers=["*"],
 )
+app.add_middleware(GZipMiddleware, minimum_size=1000)
+@app.middleware("http")
+async def log_requests(request: Request, call_next):
+    """Middleware para logging de todas as requisições."""
+    start_time = time.time()
+    # Log request
+    api_logger.info(f"➡️  {request.method} {request.url.path}")
+    # Process
+    response = await call_next(request)
+    # Log response
+    process_time = (time.time() - start_time) * 1000
+    api_logger.info(
+        f"⬅️  {request.method} {request.url.path} - "
+        f"Status: {response.status_code} - "
+        f"Time: {process_time:.2f}ms"
+    )
+    # Add headers
+    response.headers["X-Process-Time"] = f"{process_time:.2f}"
+    response.headers["X-API-Version"] = settings.APP_VERSION
+    return response
+@app.middleware("http")
+async def add_security_headers(request: Request, call_next):
+    """Middleware para adicionar headers de segurança."""
+    response = await call_next(request)
+    response.headers["X-Content-Type-Options"] = "nosniff"
+    response.headers["X-Frame-Options"] = "DENY"
+    response.headers["X-XSS-Protection"] = "1; mode=block"
+    return response
+# ============================================================================
+# EXCEPTION HANDLERS
+# ============================================================================
+@app.exception_handler(ParaAIException)
+async def para_ai_exception_handler(request: Request, exc: ParaAIException):
+    """Handler para exceções customizadas do para.AI."""
+    api_logger.error(f"ParaAIException: {exc.message} - Details: {exc.details}")
+    return JSONResponse(
+        status_code=500,
+        content={
+            "error": exc.message,
+            "details": exc.details,
+            "type": exc.__class__.__name__,
+            "timestamp": datetime.now().isoformat()
+        }
+    )
+@app.exception_handler(Exception)
+async def global_exception_handler(request: Request, exc: Exception):
+    """Handler global para exceções não tratadas."""
+    api_logger.error(f"Unhandled exception: {str(exc)}", exc_info=True)
+    return JSONResponse(
+        status_code=500,
+        content={
+            "error": "Internal server error",
+            "detail": str(exc) if settings.DEBUG else "An unexpected error occurred",
+            "timestamp": datetime.now().isoformat()
+        }
+    )
+# ============================================================================
+# ROUTERS
+# ============================================================================
+# Import routers (fazemos lazy import para evitar dependências circulares)
+from api.routers import health, processing, debug, llm, database, files, processors
+app.include_router(health.router, prefix="/api/v1", tags=["🏥 Health"])
+app.include_router(processing.router, prefix="/api/v1", tags=["🚀 Processing"])
+app.include_router(debug.router, prefix="/api/v1/debug", tags=["🐛 Debug"])
+app.include_router(llm.router, prefix="/api/v1/test/llm", tags=["🤖 LLM Testing"])
+app.include_router(database.router, prefix="/api/v1/test/database", tags=["🗄️ Database Testing"])
+app.include_router(files.router, prefix="/api/v1/test/files", tags=["📁 Files Testing"])
+app.include_router(processors.router, prefix="/api/v1/test/processors", tags=["⚙️ Processors Testing"])
+# ============================================================================
+# ROOT ENDPOINTS
+# ============================================================================
+@app.get("/", include_in_schema=False)
 async def root():
+    """Root endpoint - informações básicas da API."""
+    uptime = (datetime.now() - app_start_time).total_seconds()
     return {
+        "name": settings.APP_NAME,
         "version": settings.APP_VERSION,
+        "environment": settings.APP_ENV,
+        "status": "online",
+        "uptime_seconds": uptime,
+        "docs": "/api/docs",
+        "health": "/api/v1/health",
+        "timestamp": datetime.now().isoformat()
+    }
+@app.get("/api", include_in_schema=False)
+async def api_root():
+    """API root - redireciona para docs."""
+    return {
+        "message": "para.AI API v3.0 - Acesse /api/docs para documentação completa",
+        "docs": "/api/docs",
         "endpoints": {
+            "health": "/api/v1/health",
+            "upload": "/api/v1/process/upload",
+            "debug": "/api/v1/debug/info"
         }
+    }
+# ============================================================================
+# MAIN (para execução direta)
+# ============================================================================
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "api.main:app",
+        host=settings.HOST,
+        port=settings.PORT,
+        reload=settings.DEBUG,
+        workers=1 if settings.DEBUG else settings.WORKERS,
+        log_level=settings.LOG_LEVEL.lower(),
+        access_log=True
+    )

api/routes/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Routes package"""
-from api.routes import health, test, process, download
-__all__ = ['health', 'test', 'process', 'download']

+"""Routers package"""
+from . import health, processing, debug, llm, database, files, processors
+__all__ = ["health", "processing", "debug", "llm", "database", "files", "processors"]

api/routes/debug.py ADDED Viewed

	@@ -0,0 +1,37 @@

+"""Router de debug - informações de sistema e tasks"""
+from fastapi import APIRouter
+from api.config import get_settings
+from api.utils.logger import setup_logger
+import sys, os
+router = APIRouter()
+logger = setup_logger(__name__)
+settings = get_settings()
+@router.get("/info")
+async def debug_info():
+    """Informações completas do sistema"""
+    return {
+        "python_version": sys.version,
+        "environment": settings.APP_ENV,
+        "debug_mode": settings.DEBUG,
+        "paths": {
+            "upload": settings.UPLOAD_PATH,
+            "output": settings.OUTPUT_PATH,
+            "files": settings.FILES_BASE_PATH
+        },
+        "llm_config": {
+            "default_provider": settings.DEFAULT_LLM_PROVIDER,
+            "providers_available": {
+                "groq": bool(settings.GROQ_API_KEY),
+                "openai": bool(settings.OPENAI_API_KEY),
+                "anthropic": bool(settings.ANTHROPIC_API_KEY)
+            }
+        }
+    }
+@router.get("/tasks")
+async def list_tasks():
+    """Lista todas as tasks"""
+    from api.routers.processing import processing_tasks
+    return {"total": len(processing_tasks), "tasks": list(processing_tasks.keys())}

api/routes/debug_routes.py DELETED Viewed

@@ -1,83 +0,0 @@
-#!/usr/bin/env python3
-"""
-ParaAi - Debug Routes
-GET /debug/* - Endpoints de debug para testes
-"""
-from fastapi import APIRouter, HTTPException, Query
-from datetime import datetime
-def create_debug_router(llm_client, db_manager, context_engine, files_manager, workers):
-    """Factory para criar router de debug"""
-    router = APIRouter()
-    @router.get("/health")
-    async def debug_health() -> Dict:
-        """Health check detalhado"""
-        return {
-            "status": "healthy",
-            "llm_client": "OK" if llm_client else "FAIL",
-            "db_manager": "OK" if db_manager else "FAIL",
-            "context_engine": "OK" if context_engine else "FAIL",
-            "files_manager": "OK" if files_manager else "FAIL",
-            "workers_count": len(workers),
-            "timestamp": datetime.now().isoformat()
-        }
-    @router.get("/stats")
-    async def debug_stats() -> Dict:
-        """Estatísticas globais de todos os componentes"""
-        return {
-            "llm_client": llm_client.get_stats() if llm_client else {},
-            "context_engine": context_engine.obter_stats() if context_engine else {},
-            "files_manager": files_manager.obter_stats() if files_manager else {},
-            "workers_count": len(workers)
-        }
-    @router.get("/llm-config")
-    async def debug_llm_config() -> Dict:
-        """Retorna configuração de especialistas LLM"""
-        if not llm_client:
-            raise HTTPException(status_code=503, detail="LLM Client não inicializado")
-        return {
-            "especialistas": llm_client.obter_config_especialistas()
-        }
-    @router.get("/test-llm-api")
-    async def debug_test_llm_api(
-        especialista: str = "metadados",
-        systemprompt: str = "Você é um assistente útil."
-    ) -> Dict:
-        """Testa chamada de API LLM"""
-        try:
-            resultado = llm_client.processar_requisicao(
-                especialista=especialista,
-                systemprompt=systemprompt,
-                userprompt="Qual é a capital do Brasil?",
-                cache=False
-            )
-            return resultado
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=str(e))
-    @router.get("/test-especialista/{especialista}")
-    async def debug_test_especialista(especialista: str) -> Dict:
-        """Testa um especialista específico"""
-        try:
-            if especialista not in llm_client.especialistas:
-                raise HTTPException(
-                    status_code=404,
-                    detail=f"Especialista {especialista} não encontrado"
-                )
-            resultado = llm_client.processar_requisicao(
-                especialista=especialista,
-                systemprompt=f"Você é um especialista em {especialista}.",
-                userprompt="Analise este texto jurídico simples.",
-                cache=False
-            )
-            return resultado
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=str(e))
-    return router

api/routes/download.py DELETED Viewed

@@ -1,30 +0,0 @@
-"""Endpoints de download"""
-from fastapi import APIRouter, HTTPException
-from fastapi.responses import FileResponse
-from api.core.tar_generator import TARGenerator
-from pathlib import Path
-router = APIRouter()
-tar_gen = TARGenerator()
-@router.get("/{batch_id}")
-async def download_results(batch_id: str):
-    """Download do TAR.GZ processado"""
-    tar_path = tar_gen.get_tar_path(batch_id)
-    if not tar_path.exists():
-        raise HTTPException(
-            status_code=404,
-            detail=f"Arquivo não encontrado para batch_id: {batch_id}"
-        )
-    return FileResponse(
-        path=str(tar_path),
-        filename=tar_path.name,
-        media_type='application/gzip'
-    )
-@router.get("/list/all")
-async def list_downloads():
-    """Lista todos os downloads disponíveis"""
-    return tar_gen.list_available_downloads()

api/routes/health.py CHANGED Viewed

@@ -1,35 +1,92 @@
-"""Health check endpoints"""
 from fastapi import APIRouter
 from datetime import datetime
-from api.config import settings
 router = APIRouter()
-@router.get("/")
-async def health_check():
-    """Health check básico"""
-    return {
-        "status": "healthy",
-        "timestamp": datetime.now().isoformat(),
-        "version": settings.APP_VERSION,
-        "app": settings.APP_NAME
-    }
-@router.get("/detailed")
-async def detailed_health():
-    """Health check detalhado"""
-    return {
-        "status": "healthy",
-        "timestamp": datetime.now().isoformat(),
-        "version": settings.APP_VERSION,
-        "config": {
-            "max_workers": settings.MAX_WORKERS,
-            "batch_size": settings.BATCH_SIZE,
-            "debug": settings.DEBUG
-        },
-        "storage": {
-            "temp_dir": str(settings.TEMP_DIR),
-            "output_dir": str(settings.OUTPUT_DIR),
-            "downloads_dir": str(settings.DOWNLOADS_DIR)
         }
-    }

+"""
+Router de health check e status do sistema
+"""
 from fastapi import APIRouter
 from datetime import datetime
+import psutil
+import os
+from api.models.responses import HealthResponse
+from api.config import get_settings
+from api.utils.logger import setup_logger
 router = APIRouter()
+logger = setup_logger(__name__)
+settings = get_settings()
+app_start_time = datetime.now()
+@router.get("/health", response_model=HealthResponse)
+async def health_check():
+    """
+    **Health check completo do sistema.**
+    Verifica:
+    - ✅ Database connectivity
+    - ✅ LLM providers availability
+    - ✅ File system access
+    - ✅ System metrics
+    """
+    services = {}
+    # Check Database
+    try:
+        from database.db_manager import get_db_manager
+        db = get_db_manager()
+        services['database'] = db.health_check()
+    except Exception as e:
+        logger.error(f"DB health check failed: {e}")
+        services['database'] = False
+    # Check LLM Providers
+    try:
+        services['llm_groq'] = bool(settings.GROQ_API_KEY)
+        services['llm_openai'] = bool(settings.OPENAI_API_KEY)
+        services['llm_anthropic'] = bool(settings.ANTHROPIC_API_KEY)
+    except Exception as e:
+        logger.error(f"LLM health check failed: {e}")
+        services.update({
+            'llm_groq': False,
+            'llm_openai': False,
+            'llm_anthropic': False
+        })
+    # Check Files
+    from pathlib import Path
+    services['files_upload'] = Path(settings.UPLOAD_PATH).exists()
+    services['files_output'] = Path(settings.OUTPUT_PATH).exists()
+    services['files_base'] = Path(settings.FILES_BASE_PATH).exists()
+    # System metrics
+    try:
+        metrics = {
+            'cpu_percent': psutil.cpu_percent(),
+            'memory_percent': psutil.virtual_memory().percent,
+            'disk_percent': psutil.disk_usage('/').percent,
+            'process_count': len(psutil.pids())
         }
+    except:
+        metrics = {}
+    # Calcular uptime
+    uptime = (datetime.now() - app_start_time).total_seconds()
+    # Status geral
+    status = "healthy" if all(services.values()) else "degraded"
+    return HealthResponse(
+        status=status,
+        version=settings.APP_VERSION,
+        environment=settings.APP_ENV,
+        timestamp=datetime.now(),
+        uptime_seconds=uptime,
+        services=services,
+        metrics=metrics
+    )
+@router.get("/ping")
+async def ping():
+    """Simple ping endpoint."""
+    return {"status": "pong", "timestamp": datetime.now().isoformat()}

api/routes/process.py DELETED Viewed

@@ -1,98 +0,0 @@
-"""Endpoints de processamento JSONL"""
-from fastapi import APIRouter, UploadFile, File, BackgroundTasks, HTTPException
-from api.api.schemas import ProcessResponse
-from api.core.processor_manager import ProcessorManager
-from api.core.file_handler import FileHandler
-from api.core.tar_generator import TARGenerator
-from datetime import datetime
-import json
-import uuid
-router = APIRouter()
-manager = ProcessorManager()
-file_handler = FileHandler()
-tar_gen = TARGenerator()
-@router.post("/jsonl", response_model=ProcessResponse)
-async def process_jsonl(
-    file: UploadFile = File(...),
-    background_tasks: BackgroundTasks = None,
-    parallel: bool = True
-):
-    """
-    Processa arquivo JSONL e gera TAR.GZ
-    Args:
-        file: Arquivo JSONL com acórdãos
-        parallel: Processar em paralelo?
-    Returns:
-        Status do processamento com batch_id
-    """
-    if not file.filename.endswith('.jsonl'):
-        raise HTTPException(status_code=400, detail="Arquivo deve ser .jsonl")
-    # Gerar batch_id
-    batch_id = f"batch_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
-    # Ler arquivo
-    content = await file.read()
-    lines = content.decode('utf-8').split('\n')
-    lines = [l.strip() for l in lines if l.strip()]
-    total_records = len(lines)
-    # Salvar arquivo temporário
-    temp_file = file_handler.save_temp_jsonl(batch_id, content)
-    # Processar em background
-    background_tasks.add_task(
-        process_batch_task,
-        batch_id=batch_id,
-        temp_file=temp_file,
-        total_records=total_records,
-        parallel=parallel
-    )
-    return ProcessResponse(
-        batch_id=batch_id,
-        status="processing",
-        total_records=total_records,
-        timestamp=datetime.now().isoformat(),
-        message=f"Processamento iniciado. Use /download/{batch_id} após conclusão."
-    )
-async def process_batch_task(batch_id: str, temp_file: str, total_records: int, parallel: bool):
-    """Task de processamento em background"""
-    import logging
-    logger = logging.getLogger("para_ai")
-    logger.info(f"🚀 Iniciando processamento batch {batch_id} ({total_records} registros)")
-    try:
-        # Ler registros
-        with open(temp_file, 'r', encoding='utf-8') as f:
-            registros = [json.loads(line) for line in f if line.strip()]
-        # Processar cada registro
-        resultados = []
-        for idx, registro in enumerate(registros):
-            logger.info(f"📄 Processando {idx+1}/{total_records}")
-            if parallel:
-                resultado = await manager.process_acordao_parallel(registro, [1,2,3,4,5,6,7,8,9])
-            else:
-                resultado = await manager.process_acordao_sequential(registro, [1,2,3,4,5,6,7,8,9])
-            resultados.append(resultado)
-        # Salvar resultados processados
-        output_file = file_handler.save_processed_results(batch_id, resultados)
-        # Gerar TAR.GZ
-        tar_path = tar_gen.create_tar_gz(batch_id, output_file)
-        logger.info(f"✅ Batch {batch_id} concluído: {tar_path}")
-    except Exception as e:
-        logger.error(f"❌ Erro no batch {batch_id}: {e}", exc_info=True)

api/routes/process_routes.py DELETED Viewed

@@ -1,135 +0,0 @@
-#!/usr/bin/env python3
-"""
-ParaAi - Process Routes
-POST /process - Inicia processamento de lote
-"""
-import json
-import logging
-from datetime import datetime
-from typing import Dict, List, Optional
-from pathlib import Path
-from fastapi import APIRouter, HTTPException, BackgroundTasks, UploadFile, File
-from pydantic import BaseModel
-logger = logging.getLogger(__name__)
-class ProcessRequest(BaseModel):
-    jsonl_path: str
-    batch_id: Optional[str] = None
-    num_workers: int = 10
-    enable_cache: bool = True
-class ProcessResponse(BaseModel):
-    status: str
-    batch_id: str
-    timestamp: str
-def create_process_router(llm_client, db_manager, context_engine, files_manager, workers):
-    """Factory para criar router de processamento"""
-    router = APIRouter()
-    @router.post("/start")
-    async def process_start(
-        request: ProcessRequest,
-        background_tasks: BackgroundTasks
-    ) -> Dict:
-        """
-        Inicia processamento de lote de acórdãos
-        Args:
-            jsonl_path: Caminho para arquivo JSONL
-            batch_id: ID único do lote (gerado se não fornecido)
-            num_workers: Número de workers paralelos
-            enable_cache: Se deve usar cache de LLM
-        Returns:
-            {batch_id, status, timestamp, total_records}
-        """
-        try:
-            # Validar caminho
-            jsonl_file = Path(request.jsonl_path)
-            if not jsonl_file.exists():
-                raise HTTPException(
-                    status_code=404,
-                    detail=f"Arquivo JSONL não encontrado: {request.jsonl_path}"
-                )
-            # Gerar batch_id
-            batch_id = request.batch_id or f"batch_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
-            # Contar registros
-            total_records = sum(1 for _ in open(jsonl_file))
-            logger.info(f"🚀 Iniciando processamento: batch_id={batch_id}, registros={total_records}")
-            # Agendar processamento em background
-            background_tasks.add_task(
-                process_batch_task,
-                batch_id=batch_id,
-                jsonl_path=str(jsonl_file),
-                num_workers=request.num_workers,
-                enable_cache=request.enable_cache,
-                llm_client=llm_client,
-                db_manager=db_manager,
-                context_engine=context_engine,
-                files_manager=files_manager,
-                workers=workers
-            )
-            return {
-                "status": "accepted",
-                "batch_id": batch_id,
-                "total_records": total_records,
-                "timestamp": datetime.now().isoformat()
-            }
-        except Exception as e:
-            logger.error(f"❌ Erro ao iniciar processamento: {e}")
-            raise HTTPException(status_code=500, detail=str(e))
-    return router
-async def process_batch_task(
-    batch_id: str,
-    jsonl_path: str,
-    num_workers: int,
-    enable_cache: bool,
-    llm_client,
-    db_manager,
-    context_engine,
-    files_manager,
-    workers
-):
-    """Task que executa processamento em background"""
-    logger.info(f"📊 Iniciando processamento de batch {batch_id}")
-    try:
-        # Ler arquivo JSONL
-        registros = []
-        with open(jsonl_path, 'r', encoding='utf-8') as f:
-            for linha in f:
-                registros.append(json.loads(linha))
-        logger.info(f"📋 {len(registros)} registros carregados")
-        # Distribuir entre workers (round-robin)
-        for idx, registro in enumerate(registros):
-            worker_idx = idx % num_workers
-            worker = workers[worker_idx]
-            # Processar
-            resultado = worker.processar_tarefa({
-                'id': f"{batch_id}_{idx}",
-                'chunk_id': f"chunk_{idx}",
-                'dados_originais': registro
-            })
-            logger.info(f"✅ Registro {idx} processado: {resultado.get('status')}")
-        logger.info(f"🎉 Batch {batch_id} concluído")
-    except Exception as e:
-        logger.error(f"❌ Erro no processamento de batch {batch_id}: {e}")

api/routes/processing.py ADDED Viewed

	@@ -0,0 +1,343 @@

+"""
+Router de processamento de acórdãos
+ENDPOINT PRINCIPAL: Upload JSONL → Processamento → Download TAR.GZ
+"""
+from fastapi import APIRouter, UploadFile, File, BackgroundTasks, HTTPException, Query
+from fastapi.responses import FileResponse
+import uuid
+import json
+from pathlib import Path
+from datetime import datetime
+import hashlib
+from api.models.requests import ProcessingOptionsRequest
+from api.models.responses import ProcessingResponse, ProcessingStatus, FileInfoResponse
+from api.services.processing_service import ProcessingService
+from api.utils.logger import setup_logger
+from api.config import get_settings
+router = APIRouter()
+logger = setup_logger(__name__)
+settings = get_settings()
+# Storage de tasks (em produção usar Redis ou Database)
+processing_tasks = {}
+@router.post("/process/upload", response_model=ProcessingResponse, status_code=202)
+async def upload_and_process(
+    background_tasks: BackgroundTasks,
+    file: UploadFile = File(..., description="Arquivo JSONL com acórdãos"),
+    llm_provider: str = Query("groq", description="Provedor LLM (groq/openai/anthropic)"),
+    model_type: str = Query("balanced", description="Tipo de modelo (fast/balanced/quality)"),
+    enable_parallel: bool = Query(True, description="Processar em paralelo"),
+    max_workers: int = Query(3, ge=1, le=10, description="Workers paralelos"),
+    save_to_db: bool = Query(False, description="Salvar resultados no banco")
+):
+    """
+    **Upload de arquivo JSONL e início do processamento em background.**
+    ## Fluxo:
+    1. Upload do arquivo JSONL
+    2. Validação do formato
+    3. Criação de task de processamento
+    4. Processamento em background (9 especialistas)
+    5. Geração de arquivo TAR.GZ com resultados
+    ## Formato JSONL esperado:
+    ```json
+    {"acordao_id": "001", "tribunal": "TJPR", "ementa": "...", "integra": "..."}
+    {"acordao_id": "002", "tribunal": "TJSP", "ementa": "...", "integra": "..."}
+    ```
+    ## Response:
+    - **task_id**: ID único para consultar status
+    - **status**: Status inicial (pending)
+    - Use `/process/status/{task_id}` para acompanhar
+    - Use `/process/download/{task_id}` para baixar resultados
+    """
+    # Validar extensão
+    if not file.filename.endswith(('.jsonl', '.json')):
+        raise HTTPException(
+            status_code=400,
+            detail="Arquivo deve ser .jsonl ou .json"
+        )
+    # Validar tamanho
+    content = await file.read()
+    size_mb = len(content) / (1024 * 1024)
+    if size_mb > settings.MAX_UPLOAD_SIZE_MB:
+        raise HTTPException(
+            status_code=413,
+            detail=f"Arquivo muito grande: {size_mb:.2f}MB (máx: {settings.MAX_UPLOAD_SIZE_MB}MB)"
+        )
+    # Criar task ID
+    task_id = f"task-{uuid.uuid4()}"
+    # Criar diretórios
+    upload_dir = Path(settings.UPLOAD_PATH)
+    upload_dir.mkdir(parents=True, exist_ok=True)
+    # Salvar arquivo
+    file_path = upload_dir / f"{task_id}_{file.filename}"
+    file_path.write_bytes(content)
+    # Calcular hash
+    file_hash = hashlib.sha256(content).hexdigest()
+    # Validar e contar registros
+    total_records = 0
+    try:
+        for line_num, line in enumerate(content.decode('utf-8').strip().split('
+'), 1):
+            if not line.strip():
+                continue
+            try:
+                record = json.loads(line)
+                # Validar campos obrigatórios
+                if 'ementa' not in record or 'integra' not in record:
+                    raise HTTPException(
+                        status_code=422,
+                        detail=f"Linha {line_num}: campos obrigatórios ausentes (ementa, integra)"
+                    )
+                total_records += 1
+            except json.JSONDecodeError as e:
+                raise HTTPException(
+                    status_code=422,
+                    detail=f"JSONL inválido na linha {line_num}: {str(e)}"
+                )
+    except UnicodeDecodeError:
+        raise HTTPException(
+            status_code=422,
+            detail="Arquivo deve estar em UTF-8"
+        )
+    # Criar response inicial
+    response = ProcessingResponse(
+        task_id=task_id,
+        status=ProcessingStatus.PENDING,
+        message=f"Processamento agendado para {total_records} registros",
+        total_records=total_records,
+        processed_records=0,
+        failed_records=0,
+        started_at=datetime.now()
+    )
+    # Armazenar task
+    processing_tasks[task_id] = response.dict()
+    # Adicionar metadados
+    processing_tasks[task_id]['metadata'] = {
+        'filename': file.filename,
+        'size_bytes': len(content),
+        'hash_sha256': file_hash,
+        'llm_provider': llm_provider,
+        'model_type': model_type,
+        'enable_parallel': enable_parallel,
+        'max_workers': max_workers,
+        'save_to_db': save_to_db
+    }
+    # Iniciar processamento em background
+    background_tasks.add_task(
+        process_acordaos_background,
+        task_id=task_id,
+        file_path=str(file_path),
+        llm_provider=llm_provider,
+        model_type=model_type,
+        enable_parallel=enable_parallel,
+        max_workers=max_workers,
+        save_to_db=save_to_db
+    )
+    logger.info(f"✅ Task {task_id} criada - {total_records} registros - {size_mb:.2f}MB")
+    return response
+@router.get("/process/status/{task_id}", response_model=ProcessingResponse)
+async def get_processing_status(task_id: str):
+    """
+    **Consulta status de processamento.**
+    Retorna informações atualizadas sobre a task:
+    - Status atual (pending/processing/completed/error)
+    - Progresso (processados/total)
+    - Tempo estimado restante
+    - URL de download (quando concluído)
+    """
+    if task_id not in processing_tasks:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Task '{task_id}' não encontrada"
+        )
+    return ProcessingResponse(**processing_tasks[task_id])
+@router.get("/process/list")
+async def list_all_tasks():
+    """
+    **Lista todas as tasks de processamento.**
+    Útil para debug e monitoramento.
+    """
+    return {
+        "total": len(processing_tasks),
+        "tasks": [
+            {
+                "task_id": task_id,
+                "status": data["status"],
+                "progress": f"{data['processed_records']}/{data['total_records']}",
+                "started_at": data.get("started_at")
+            }
+            for task_id, data in processing_tasks.items()
+        ]
+    }
+@router.get("/process/download/{task_id}")
+async def download_result(task_id: str):
+    """
+    **Download do arquivo TAR.GZ com resultados.**
+    Disponível apenas quando status = "completed".
+    ## Conteúdo do arquivo:
+    - `{task_id}_results.json`: Resultados completos
+    - Análises de cada especialista
+    - Metadados do processamento
+    - Logs e métricas
+    """
+    if task_id not in processing_tasks:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Task '{task_id}' não encontrada"
+        )
+    task = processing_tasks[task_id]
+    if task['status'] != ProcessingStatus.COMPLETED:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Processamento ainda não concluído. Status atual: {task['status']}"
+        )
+    # Procurar arquivo
+    output_file = Path(settings.OUTPUT_PATH) / "archives" / f"{task_id}.tar.gz"
+    if not output_file.exists():
+        raise HTTPException(
+            status_code=404,
+            detail="Arquivo de resultado não encontrado"
+        )
+    logger.info(f"📦 Download iniciado: {task_id}")
+    return FileResponse(
+        path=str(output_file),
+        filename=f"para_ai_resultado_{task_id}.tar.gz",
+        media_type="application/gzip",
+        headers={
+            "Content-Disposition": f"attachment; filename=para_ai_resultado_{task_id}.tar.gz"
+        }
+    )
+@router.delete("/process/{task_id}")
+async def delete_task(task_id: str):
+    """
+    **Deleta uma task e seus arquivos.**
+    Útil para limpeza de tasks antigas.
+    """
+    if task_id not in processing_tasks:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Task '{task_id}' não encontrada"
+        )
+    # Remover arquivos
+    upload_dir = Path(settings.UPLOAD_PATH)
+    output_dir = Path(settings.OUTPUT_PATH)
+    for file in upload_dir.glob(f"{task_id}_*"):
+        file.unlink()
+    for file in output_dir.glob(f"{task_id}*"):
+        file.unlink()
+    # Remover da memória
+    del processing_tasks[task_id]
+    logger.info(f"🗑️ Task deletada: {task_id}")
+    return {"message": f"Task {task_id} deletada com sucesso"}
+# ============================================================================
+# FUNÇÃO DE BACKGROUND
+# ============================================================================
+async def process_acordaos_background(
+    task_id: str,
+    file_path: str,
+    llm_provider: str,
+    model_type: str,
+    enable_parallel: bool,
+    max_workers: int,
+    save_to_db: bool
+):
+    """
+    Função executada em background para processar acórdãos.
+    Atualiza o status da task conforme progresso.
+    """
+    try:
+        # Atualizar status
+        processing_tasks[task_id]['status'] = ProcessingStatus.PROCESSING
+        processing_tasks[task_id]['message'] = "Processamento em andamento..."
+        logger.info(f"🚀 Iniciando processamento background: {task_id}")
+        # Inicializar serviço
+        service = ProcessingService()
+        # Processar
+        result = await service.process_jsonl_file(
+            file_path=file_path,
+            task_id=task_id,
+            llm_provider=llm_provider,
+            model_type=model_type,
+            enable_parallel=enable_parallel,
+            max_workers=max_workers
+        )
+        # Atualizar task com sucesso
+        processing_tasks[task_id].update({
+            'status': ProcessingStatus.COMPLETED,
+            'message': f"Processamento concluído com sucesso em {result['elapsed_seconds']:.2f}s",
+            'processed_records': result['processed'],
+            'failed_records': result['failed'],
+            'completed_at': datetime.now(),
+            'download_url': f"/api/v1/process/download/{task_id}",
+            'result_metadata': {
+                'archive_path': result['archive_path'],
+                'hash_sha256': result['hash'],
+                'elapsed_seconds': result['elapsed_seconds']
+            }
+        })
+        logger.info(f"✅ Task {task_id} concluída - {result['processed']} processados, {result['failed']} falhas")
+    except Exception as e:
+        # Atualizar task com erro
+        logger.error(f"❌ Erro na task {task_id}: {str(e)}", exc_info=True)
+        processing_tasks[task_id].update({
+            'status': ProcessingStatus.ERROR,
+            'message': f"Erro no processamento: {str(e)}",
+            'completed_at': datetime.now(),
+            'errors': [str(e)]
+        })

api/routes/status_routes.py DELETED Viewed

@@ -1,44 +0,0 @@
-#!/usr/bin/env python3
-"""
-ParaAi - Status Routes
-GET /status/{batch_id} - Status do processamento
-"""
-from fastapi import APIRouter, HTTPException
-from datetime import datetime
-def create_status_router(context_engine, files_manager):
-    """Factory para criar router de status"""
-    router = APIRouter()
-    @router.get("/{batch_id}")
-    async def get_status(batch_id: str) -> Dict:
-        """Retorna status de um batch em processamento"""
-        try:
-            status = context_engine.obter_status_processamento(batch_id)
-            if not status:
-                raise HTTPException(
-                    status_code=404,
-                    detail=f"Batch {batch_id} não encontrado"
-                )
-            return status
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=str(e))
-    @router.get("/download/{batch_id}")
-    async def get_download_url(batch_id: str) -> Dict:
-        """Retorna URL de download do TAR.GZ"""
-        try:
-            arquivos = files_manager.listar_arquivos_download(limite=10)
-            for arq in arquivos:
-                if batch_id in arq['nome']:
-                    return {"url": arq['url']}
-            raise HTTPException(
-                status_code=404,
-                detail=f"Arquivo para batch {batch_id} não encontrado"
-            )
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=str(e))
-    return router

api/schemas.py DELETED Viewed

@@ -1,31 +0,0 @@
-"""Pydantic schemas para request/response"""
-from pydantic import BaseModel, Field
-from typing import Optional, List, Dict, Any
-from datetime import datetime
-class ProcessJSONLRequest(BaseModel):
-    """Request para processar JSONL"""
-    batch_id: Optional[str] = None
-    enable_specialists: List[int] = Field(default=[1, 2, 3, 4, 5, 6, 7, 8, 9])
-    parallel: bool = True
-class ProcessResponse(BaseModel):
-    """Response de processamento"""
-    batch_id: str
-    status: str
-    total_records: int
-    timestamp: str
-    message: str
-class SpecialistTestRequest(BaseModel):
-    """Request para testar especialista"""
-    specialist_id: int = Field(ge=1, le=9)
-    acordao_data: Dict[str, Any]
-class DownloadResponse(BaseModel):
-    """Response de download"""
-    batch_id: str
-    filename: str
-    size_bytes: int
-    url: str
-    created_at: str

api/utils/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """Utilitários"""

api/utils/logger.py DELETED Viewed

@@ -1,20 +0,0 @@
-"""Configuração de logging"""
-import logging
-import sys
-from api.config import settings
-def setup_logger():
-    """Configura logger global"""
-    logger = logging.getLogger("para_ai")
-    logger.setLevel(settings.LOG_LEVEL)
-    handler = logging.StreamHandler(sys.stdout)
-    handler.setLevel(settings.LOG_LEVEL)
-    formatter = logging.Formatter(
-        '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-    )
-    handler.setFormatter(formatter)
-    logger.addHandler(handler)
-    return logger

api/utils/validators.py DELETED Viewed

@@ -1,16 +0,0 @@
-"""Validadores de dados"""
-from typing import Dict, Any
-import json
-def validate_jsonl_line(line: str) -> bool:
-    """Valida se linha é JSON válido"""
-    try:
-        json.loads(line)
-        return True
-    except:
-        return False
-def validate_acordao_data(data: Dict[str, Any]) -> bool:
-    """Valida estrutura básica de acórdão"""
-    required_fields = ['ementa', 'integra']
-    return all(field in data for field in required_fields)

code_copy.py DELETED Viewed

@@ -1,71 +0,0 @@
-import os
-from pathlib import Path
-def consolidar_arquivos(diretorio_base='.', arquivo_saida='código.md'):
-    """
-    Itera por todos os arquivos de código em um diretório e subdiretórios,
-    consolidando-os em um único arquivo Markdown.
-    Args:
-        diretorio_base: Diretório raiz para buscar arquivos (padrão: diretório atual)
-        arquivo_saida: Nome do arquivo de saída (padrão: código.md)
-    """
-    # Extensões de arquivo para processar
-    extensoes_validas = {'.py', '.sql', '.md', '.json'}
-    # Lista para armazenar todos os arquivos encontrados
-    arquivos_encontrados = []
-    # Iterar por todos os arquivos no diretório e subdiretórios
-    for root, dirs, files in os.walk(diretorio_base):
-        for file in files:
-            # Verificar se a extensão do arquivo está na lista
-            if Path(file).suffix in extensoes_validas:
-                caminho_completo = os.path.join(root, file)
-                # Evitar processar o próprio arquivo de saída
-                if file != arquivo_saida:
-                    arquivos_encontrados.append(caminho_completo)
-    # Ordenar arquivos para melhor organização
-    arquivos_encontrados.sort()
-    # Escrever no arquivo de saída
-    with open(arquivo_saida, 'w', encoding='utf-8') as f_out:
-        f_out.write(f"# Consolidação de Código\n\n")
-        f_out.write(f"Total de arquivos processados: {len(arquivos_encontrados)}\n")
-        f_out.write("---\n\n")
-        for caminho_arquivo in arquivos_encontrados:
-            try:
-                # Ler o conteúdo do arquivo
-                with open(caminho_arquivo, 'r', encoding='utf-8') as f_in:
-                    conteudo = f_in.read()
-                # Obter a extensão para o bloco de código
-                extensao = Path(caminho_arquivo).suffix[1:]  # Remove o ponto
-                # Escrever no formato solicitado
-                f_out.write(f"\n\n### **Código do arquivo salvo em {caminho_arquivo}\n**")
-                f_out.write(f"```{extensao}\n")
-                f_out.write(conteudo)
-                # Garantir que termina com nova linha
-                if not conteudo.endswith(''):
-                    f_out.write('')
-                f_out.write("```\n")
-                f_out.write("---\n")
-                print(f"✓ Processado: {caminho_arquivo}")
-            except Exception as e:
-                print(f"✗ Erro ao processar {caminho_arquivo}: {e}")
-    print(f"✓ Consolidação completa! Arquivo gerado: {arquivo_saida}")
-    print(f"Total de arquivos: {len(arquivos_encontrados)}")
-if __name__ == "__main__":
-    # Executar a função
-    consolidar_arquivos()
-    # Alternativas de uso:
-    # consolidar_arquivos('./meu_projeto', 'todos_codigos.md')  # Especificar diretório
-    # consolidar_arquivos(arquivo_saida='backup_codigo.md')     # Mudar nome da saída

copy_code.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+from pathlib import Path
+from collections import defaultdict
+def normalize_path_for_filename(path):
+    """Normaliza o caminho do diretório para usar no nome do arquivo"""
+    normalized = path.replace(os.sep, '_').replace('/', '_').replace('\\', '_')
+    normalized = normalized.replace(':', '').replace('.', '_')
+    if normalized.startswith('_'):
+        normalized = normalized[1:]
+    if not normalized:
+        normalized = 'raiz'
+    return normalized
+def get_language_tag(extension):
+    """Retorna a tag de linguagem para o bloco de código markdown"""
+    mapping = {
+        '.py': 'python',
+        '.sql': 'sql',
+        '.md': 'markdown',
+        '.json': 'json'
+    }
+    return mapping.get(extension, '')
+def process_directory(root_dir='.', extensions=['.py', '.sql', '.md', '.json']):
+    """
+    Processa todos os arquivos do diretório e subdiretórios,
+    gerando um arquivo MD por pasta com os arquivos encontrados.
+    """
+    # Dicionário para agrupar arquivos por diretório
+    files_by_dir = defaultdict(list)
+    # Itera por todos os arquivos
+    for dirpath, dirnames, filenames in os.walk(root_dir):
+        for filename in filenames:
+            file_ext = os.path.splitext(filename)[1].lower()
+            # Verifica se a extensão está na lista
+            if file_ext in extensions:
+                full_path = os.path.join(dirpath, filename)
+                files_by_dir[dirpath].append((filename, full_path, file_ext))
+    # Gera um arquivo MD por diretório
+    for directory, files in files_by_dir.items():
+        # Normaliza o nome do diretório para o arquivo
+        normalized_dir = normalize_path_for_filename(directory)
+        output_filename = f'cópia_código_{normalized_dir}.md'
+        with open(output_filename, 'w', encoding='utf-8') as output_file:
+            output_file.write(f'# Arquivos do diretório: {directory}\n\n')
+            # Ordena arquivos por nome
+            files.sort(key=lambda x: x[0])
+            for filename, full_path, file_ext in files:
+                # Calcula o caminho relativo
+                rel_path = os.path.relpath(full_path, root_dir)
+                # Escreve o cabeçalho do arquivo
+                output_file.write(f'### {rel_path}\n')
+                # Lê o conteúdo do arquivo
+                try:
+                    with open(full_path, 'r', encoding='utf-8') as f:
+                        content = f.read()
+                    # Escreve o bloco de código
+                    lang_tag = get_language_tag(file_ext)
+                    output_file.write(f'```{lang_tag}\n')
+                    output_file.write(content)
+                    if not content.endswith('\n'):
+                        output_file.write('\n')
+                    output_file.write('```\n')
+                except Exception as e:
+                    output_file.write(f'```\n')
+                    output_file.write(f'Erro ao ler arquivo: {e}\n')
+                    output_file.write('```\n')
+                output_file.write('---\n\n')
+        print(f'✓ Criado: {output_filename} ({len(files)} arquivos)')
+    print(f'\nTotal: {len(files_by_dir)} diretórios processados')
+    return len(files_by_dir)
+# Exemplo de uso:
+# process_directory('.', ['.py', '.sql', '.md', '.json'])
+#
+# Ou especificando um diretório diferente:
+process_directory('.', ['.py', '.sql', '.md', '.json'])