diff --git a/Dockerfile b/Dockerfile
index 05a1891dae3cddb81c245180f5c1c088584295ee..7f23e0d6eb0ca91cf496513fb4d0227ffac10ec9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,33 +1,24 @@
-# ==============================================================================
-# Dockerfile — AetherMap API (versão profissional)
-# ==============================================================================
+FROM python:3.11-slim
 
-# Imagem Python robusta (não slim → evita erros de build)
-FROM python:3.10
-
-# Define diretório da aplicação
 WORKDIR /app
 
-# --- INSTALAR TORCH CPU ANTES (CRÍTICO!) ---
-# Isso garante que a versão certa (CPU) seja instalada
-RUN pip install --no-cache-dir \
-    torch \
-    torchvision \
-    torchaudio \
-    --index-url https://download.pytorch.org/whl/cpu
-
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    && rm -rf /var/lib/apt/lists/*
 
-# Copiar requirements
+# Copy requirements first for better caching
 COPY requirements.txt .
-
-# Instalar dependências restantes
 RUN pip install --no-cache-dir -r requirements.txt
 
-# Copiar código da aplicação
+# Copy application code
 COPY . .
 
-# Expor porta usada pelo Hugging Face Spaces
+# Create data directory for SQLite
+RUN mkdir -p /app/data
+
+# Expose port (HF Spaces uses 7860)
 EXPOSE 7860
 
-# Comando padrão para executar FastAPI
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
+# Run the application
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
diff --git a/README.md b/README.md
index 1b0b6975ed9d1181da24ce796835935436320b98..b969ddabfb79f685dcf88fbe98f72b5d3bc1bead 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,27 @@
 ---
-title: AetherMap
-emoji: 🦀
-colorFrom: indigo
-colorTo: pink
+title: Numidium
+emoji: 🔮
+colorFrom: blue
+colorTo: red
 sdk: docker
 pinned: false
-license: apache-2.0
 ---
 
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# Numidium API
+
+Backend do sistema de inteligência Numidium/VANTAGE.
+
+## Endpoints
+
+- `/docs` - Documentação Swagger
+- `/api/v1/entities` - CRUD de entidades
+- `/api/v1/relationships` - Conexões
+- `/api/v1/events` - Eventos
+- `/api/v1/search` - Busca global
+- `/api/v1/ingest` - Ingestão de dados (Wikipedia, News)
+
+## Stack
+
+- FastAPI
+- SQLite
+- BeautifulSoup (scraping)
diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ca62e91c6b6d2fd4d3a0d2f3169941e71d37af3
--- /dev/null
+++ b/app/__init__.py
@@ -0,0 +1 @@
+# Numidium Backend App
diff --git a/app/__pycache__/__init__.cpython-311.pyc b/app/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5a44e729bba8a6e6cdf407034b3b1ec551cfb6fe
Binary files /dev/null and b/app/__pycache__/__init__.cpython-311.pyc differ
diff --git a/app/__pycache__/config.cpython-311.pyc b/app/__pycache__/config.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f6ba5b98d9de60400fecda19a96033ffd700d3a1
Binary files /dev/null and b/app/__pycache__/config.cpython-311.pyc differ
diff --git a/app/api/__init__.py b/app/api/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce0a2733c6eceaf10144429177e8f20db9604545
--- /dev/null
+++ b/app/api/__init__.py
@@ -0,0 +1 @@
+# API module
diff --git a/app/api/__pycache__/__init__.cpython-311.pyc b/app/api/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e59a223a6007cd27a3443d5ab5a26d31df7fb4ff
Binary files /dev/null and b/app/api/__pycache__/__init__.cpython-311.pyc differ
diff --git a/app/api/__pycache__/deps.cpython-311.pyc b/app/api/__pycache__/deps.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..07e15cf0e980065fc3e41e4e0eea81575dc514d5
Binary files /dev/null and b/app/api/__pycache__/deps.cpython-311.pyc differ
diff --git a/app/api/deps.py b/app/api/deps.py
new file mode 100644
index 0000000000000000000000000000000000000000..bcea9d8c46a65a9857513605150ce15591631945
--- /dev/null
+++ b/app/api/deps.py
@@ -0,0 +1,35 @@
+"""
+API dependencies.
+"""
+from typing import Generator, Optional
+
+from fastapi import Cookie, Header
+from sqlalchemy.orm import Session
+
+from app.core.database import get_db_for_session, get_default_session
+
+
+def get_session_id(
+    x_session_id: Optional[str] = Header(None),
+    numidium_session: Optional[str] = Cookie(None)
+) -> Optional[str]:
+    """Return the session id from header or cookie."""
+    return x_session_id or numidium_session
+
+
+def get_scoped_db(
+    x_session_id: Optional[str] = Header(None),
+    numidium_session: Optional[str] = Cookie(None)
+) -> Generator[Session, None, None]:
+    """
+    Provide a session-scoped DB if available, otherwise the default DB.
+    """
+    session_id = x_session_id or numidium_session
+    if session_id:
+        db = get_db_for_session(session_id)
+    else:
+        db = get_default_session()
+    try:
+        yield db
+    finally:
+        db.close()
diff --git a/app/api/routes/__init__.py b/app/api/routes/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e37c97a33d27ba2e879921f79996d8fdc3edbb73
--- /dev/null
+++ b/app/api/routes/__init__.py
@@ -0,0 +1,2 @@
+# API Routes module
+from app.api.routes import entities, relationships, events, search, ingest
diff --git a/app/api/routes/__pycache__/__init__.cpython-311.pyc b/app/api/routes/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e48c20bb1f744a1d1037323ce205527266cb5c7c
Binary files /dev/null and b/app/api/routes/__pycache__/__init__.cpython-311.pyc differ
diff --git a/app/api/routes/__pycache__/entities.cpython-311.pyc b/app/api/routes/__pycache__/entities.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ee99ab907a18b99a588dfe960d31a7e21c7e53d6
Binary files /dev/null and b/app/api/routes/__pycache__/entities.cpython-311.pyc differ
diff --git a/app/api/routes/__pycache__/events.cpython-311.pyc b/app/api/routes/__pycache__/events.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..52f29ec16d10fc54bd6be7d6e32591d65d3acfcc
Binary files /dev/null and b/app/api/routes/__pycache__/events.cpython-311.pyc differ
diff --git a/app/api/routes/__pycache__/ingest.cpython-311.pyc b/app/api/routes/__pycache__/ingest.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e524bafc4ce081ccccb32d94f2426c10b1e79b9a
Binary files /dev/null and b/app/api/routes/__pycache__/ingest.cpython-311.pyc differ
diff --git a/app/api/routes/__pycache__/investigate.cpython-311.pyc b/app/api/routes/__pycache__/investigate.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..61c0e309052c422eb7d506d8623cfaed4ff4e01e
Binary files /dev/null and b/app/api/routes/__pycache__/investigate.cpython-311.pyc differ
diff --git a/app/api/routes/__pycache__/relationships.cpython-311.pyc b/app/api/routes/__pycache__/relationships.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..73c88868d8b1ad76745a529fe05928d06408c415
Binary files /dev/null and b/app/api/routes/__pycache__/relationships.cpython-311.pyc differ
diff --git a/app/api/routes/__pycache__/search.cpython-311.pyc b/app/api/routes/__pycache__/search.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..83951b1b069fe2d10b140852fbc85e7294cac015
Binary files /dev/null and b/app/api/routes/__pycache__/search.cpython-311.pyc differ
diff --git a/app/api/routes/aethermap.py b/app/api/routes/aethermap.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc0535153069d293dcdbe97be9565e0a17728e3e
--- /dev/null
+++ b/app/api/routes/aethermap.py
@@ -0,0 +1,307 @@
+"""
+AetherMap Routes - Document Mapping & Semantic Search
+Integrates with AetherMap API for document clustering, NER, and semantic search.
+"""
+from fastapi import APIRouter, HTTPException, UploadFile, File, Form, Depends
+from pydantic import BaseModel, Field
+from typing import Optional, List, Dict, Any
+from sqlalchemy.orm import Session
+import io
+
+from app.api.deps import get_scoped_db
+from app.services.aethermap_client import aethermap, ProcessResult, SearchResult, EntityGraphResult
+
+
+router = APIRouter()
+
+
+# ============================================================================
+# Request/Response Models
+# ============================================================================
+
+class IndexDocumentsRequest(BaseModel):
+    """Request to index documents from text list"""
+    documents: List[str] = Field(..., description="Lista de textos para indexar")
+    fast_mode: bool = Field(True, description="Modo rápido (PCA) ou preciso (UMAP)")
+
+
+class IndexEntitiesRequest(BaseModel):
+    """Request to index entities from NUMIDIUM database"""
+    entity_types: Optional[List[str]] = Field(None, description="Filtrar por tipos de entidade")
+    limit: int = Field(500, description="Limite de entidades")
+
+
+class SemanticSearchRequest(BaseModel):
+    """Request for semantic search"""
+    query: str = Field(..., description="Termo de busca")
+    turbo_mode: bool = Field(True, description="Modo turbo (mais rápido)")
+
+
+class IndexResponse(BaseModel):
+    """Response from indexing"""
+    job_id: str
+    num_documents: int
+    num_clusters: int
+    num_noise: int
+    metrics: Dict[str, Any] = {}
+    cluster_analysis: Dict[str, Any] = {}
+
+
+class SearchResponse(BaseModel):
+    """Response from search"""
+    summary: str
+    results: List[Dict[str, Any]] = []
+
+
+class EntityGraphResponse(BaseModel):
+    """Response from NER extraction"""
+    hubs: List[Dict[str, Any]] = []
+    insights: Dict[str, Any] = {}
+    node_count: int = 0
+    edge_count: int = 0
+
+
+class StatusResponse(BaseModel):
+    """AetherMap status"""
+    connected: bool
+    job_id: Optional[str] = None
+    documents_indexed: int = 0
+
+
+# ============================================================================
+# Endpoints
+# ============================================================================
+
+@router.get("/status", response_model=StatusResponse)
+async def get_status():
+    """
+    Get AetherMap connection status.
+    """
+    return StatusResponse(
+        connected=True,
+        job_id=aethermap.current_job_id,
+        documents_indexed=0  # TODO: track this
+    )
+
+
+@router.post("/index", response_model=IndexResponse)
+async def index_documents(request: IndexDocumentsRequest):
+    """
+    Index a list of documents for semantic search.
+    
+    The documents will be:
+    - Embedded using sentence transformers
+    - Clustered using HDBSCAN
+    - Indexed in FAISS + BM25 for hybrid search
+    """
+    try:
+        if not request.documents:
+            raise HTTPException(status_code=400, detail="Nenhum documento fornecido")
+        
+        result = await aethermap.process_documents(
+            texts=request.documents,
+            fast_mode=request.fast_mode
+        )
+        
+        return IndexResponse(
+            job_id=result.job_id,
+            num_documents=result.num_documents,
+            num_clusters=result.num_clusters,
+            num_noise=result.num_noise,
+            metrics=result.metrics,
+            cluster_analysis=result.cluster_analysis
+        )
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/index-entities", response_model=IndexResponse)
+async def index_entities(
+    request: IndexEntitiesRequest,
+    db: Session = Depends(get_scoped_db)
+):
+    """
+    Index entities from NUMIDIUM database.
+    
+    Collects entity names and descriptions, sends to AetherMap for processing.
+    """
+    from app.models.entity import Entity
+    
+    try:
+        query = db.query(Entity)
+        
+        if request.entity_types:
+            query = query.filter(Entity.type.in_(request.entity_types))
+        
+        entities = query.limit(request.limit).all()
+        
+        if not entities:
+            raise HTTPException(status_code=404, detail="Nenhuma entidade encontrada")
+        
+        # Build text representations
+        documents = []
+        for e in entities:
+            text = f"{e.name} ({e.type})"
+            if e.description:
+                text += f": {e.description[:1000]}"
+            documents.append(text)
+        
+        result = await aethermap.process_documents(
+            texts=documents,
+            fast_mode=request.fast_mode if hasattr(request, 'fast_mode') else True
+        )
+        
+        return IndexResponse(
+            job_id=result.job_id,
+            num_documents=result.num_documents,
+            num_clusters=result.num_clusters,
+            num_noise=result.num_noise,
+            metrics=result.metrics,
+            cluster_analysis=result.cluster_analysis
+        )
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/upload", response_model=IndexResponse)
+async def upload_documents(
+    file: UploadFile = File(...),
+    fast_mode: bool = Form(True)
+):
+    """
+    Upload a file (TXT or CSV) for indexing.
+    
+    - TXT: One document per line
+    - CSV: Will use first text column found
+    """
+    try:
+        content = await file.read()
+        text = content.decode('utf-8', errors='ignore')
+        
+        # Split by lines for TXT
+        documents = [line.strip() for line in text.splitlines() if line.strip()]
+        
+        if not documents:
+            raise HTTPException(status_code=400, detail="Arquivo vazio ou sem texto válido")
+        
+        result = await aethermap.process_documents(
+            texts=documents,
+            fast_mode=fast_mode
+        )
+        
+        return IndexResponse(
+            job_id=result.job_id,
+            num_documents=result.num_documents,
+            num_clusters=result.num_clusters,
+            num_noise=result.num_noise,
+            metrics=result.metrics,
+            cluster_analysis=result.cluster_analysis
+        )
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/search", response_model=SearchResponse)
+async def semantic_search(request: SemanticSearchRequest):
+    """
+    Semantic search in indexed documents.
+    
+    Uses hybrid RAG (FAISS + BM25 + reranking + LLM).
+    Returns a summary answering the query with citations.
+    """
+    try:
+        if not aethermap.current_job_id:
+            raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
+        
+        result = await aethermap.semantic_search(
+            query=request.query,
+            turbo_mode=request.turbo_mode
+        )
+        
+        return SearchResponse(
+            summary=result.summary,
+            results=result.results
+        )
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/entities", response_model=EntityGraphResponse)
+async def extract_entities():
+    """
+    Extract named entities (NER) from indexed documents.
+    
+    Returns:
+    - Hub entities (most connected)
+    - Relationship insights
+    - Graph metrics
+    """
+    try:
+        if not aethermap.current_job_id:
+            raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
+        
+        result = await aethermap.extract_entities()
+        
+        return EntityGraphResponse(
+            hubs=result.hubs,
+            insights=result.insights,
+            node_count=len(result.nodes),
+            edge_count=len(result.edges)
+        )
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/analyze")
+async def analyze_graph():
+    """
+    Analyze entity graph using LLM.
+    
+    Returns semantic insights about relationships and patterns.
+    """
+    try:
+        if not aethermap.current_job_id:
+            raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
+        
+        result = await aethermap.analyze_graph()
+        
+        return {
+            "analysis": result.analysis,
+            "key_entities": result.key_entities,
+            "relationships": result.relationships
+        }
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/describe-clusters")
+async def describe_clusters():
+    """
+    Get LLM descriptions for each cluster found.
+    """
+    try:
+        if not aethermap.current_job_id:
+            raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
+        
+        result = await aethermap.describe_clusters()
+        
+        return result
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
diff --git a/app/api/routes/analyze.py b/app/api/routes/analyze.py
new file mode 100644
index 0000000000000000000000000000000000000000..37b93947c0e0c9f2a5a626301007c1cf30b212d6
--- /dev/null
+++ b/app/api/routes/analyze.py
@@ -0,0 +1,309 @@
+"""
+Analyze API Routes - LLM-based text analysis
+"""
+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel, Field
+from typing import Optional, List
+from sqlalchemy.orm import Session
+import traceback
+
+from app.api.deps import get_scoped_db
+from app.services.nlp import entity_extractor
+from app.services.geocoding import geocode
+from app.models.entity import Entity, Relationship, Event
+from app.config import settings
+
+
+router = APIRouter(prefix="/analyze", tags=["Analysis"])
+
+
+class AnalyzeRequest(BaseModel):
+    """Request model for text analysis"""
+    text: str = Field(..., min_length=10, description="Text to analyze")
+    auto_create: bool = Field(default=False, description="Auto-create extracted entities in database")
+    
+
+class ExtractedEntityResponse(BaseModel):
+    """Response model for an extracted entity"""
+    name: str
+    type: str
+    role: Optional[str] = None
+    aliases: Optional[List[str]] = None
+    description: Optional[str] = None
+    created: bool = False  # Whether it was created in DB
+    entity_id: Optional[str] = None  # DB ID if created
+
+
+class ExtractedRelationshipResponse(BaseModel):
+    """Response model for an extracted relationship"""
+    source: str
+    target: str
+    relationship_type: str
+    context: Optional[str] = None
+    created: bool = False
+
+
+class ExtractedEventResponse(BaseModel):
+    """Response model for an extracted event"""
+    description: str
+    event_type: Optional[str] = None
+    date: Optional[str] = None
+    location: Optional[str] = None
+    participants: Optional[List[str]] = None
+    created: bool = False
+    event_id: Optional[str] = None
+
+
+class AnalyzeResponse(BaseModel):
+    """Response model for analysis"""
+    entities: List[ExtractedEntityResponse]
+    relationships: List[ExtractedRelationshipResponse]
+    events: List[ExtractedEventResponse]
+    stats: dict
+
+
+@router.post("", response_model=AnalyzeResponse)
+async def analyze_text(request: AnalyzeRequest, db: Session = Depends(get_scoped_db)):
+    """
+    Analyze text using LLM to extract entities, relationships, and events.
+    
+    Uses Cerebras API with Qwen 3 235B for intelligent extraction.
+    
+    Args:
+        text: Text to analyze (min 10 characters)
+        auto_create: If true, automatically creates entities in the database
+        
+    Returns:
+        Extracted entities, relationships, events, and statistics
+    """
+    try:
+        # Extract using LLM
+        result = await entity_extractor.extract(request.text)
+        
+        # Prepare response
+        entities_response = []
+        relationships_response = []
+        events_response = []
+        
+        created_entities = 0
+        created_relationships = 0
+        created_events = 0
+        
+        # Helper function to parse date strings
+        def parse_date(date_str):
+            if not date_str:
+                return None
+            from datetime import datetime
+            try:
+                # Try YYYY-MM-DD format
+                return datetime.strptime(date_str[:10], "%Y-%m-%d")
+            except:
+                try:
+                    # Try YYYY format
+                    return datetime.strptime(date_str[:4], "%Y")
+                except:
+                    return None
+        
+        # Process entities
+        for entity in result.entities:
+            entity_data = ExtractedEntityResponse(
+                name=entity.name,
+                type=entity.type,
+                role=entity.role,
+                aliases=entity.aliases,
+                description=entity.description,
+                created=False
+            )
+            
+            if request.auto_create and entity.name:
+                # Check if entity already exists
+                existing = db.query(Entity).filter(
+                    Entity.name.ilike(f"%{entity.name}%")
+                ).first()
+                
+                if not existing:
+                    # Get coordinates for location entities
+                    lat, lng = None, None
+                    if entity.type == "location":
+                        coords = await geocode(entity.name)
+                        if coords:
+                            lat, lng = coords
+                    
+                    # Parse event_date if available
+                    event_date = parse_date(getattr(entity, 'event_date', None))
+                    
+                    # Create new entity
+                    new_entity = Entity(
+                        name=entity.name,
+                        type=entity.type if entity.type in ["person", "organization", "location", "event"] else "person",
+                        description=entity.description or entity.role or "",
+                        source="llm_extraction",
+                        latitude=lat,
+                        longitude=lng,
+                        event_date=event_date,
+                        properties={"role": entity.role, "aliases": entity.aliases}
+                    )
+                    db.add(new_entity)
+                    db.commit()
+                    db.refresh(new_entity)
+                    
+                    entity_data.created = True
+                    entity_data.entity_id = new_entity.id
+                    created_entities += 1
+                else:
+                    entity_data.entity_id = existing.id
+            
+            entities_response.append(entity_data)
+        
+        # Process relationships
+        for rel in result.relationships:
+            rel_data = ExtractedRelationshipResponse(
+                source=rel.source,
+                target=rel.target,
+                relationship_type=rel.relationship_type,
+                context=rel.context,
+                created=False
+            )
+            
+            if request.auto_create:
+                # Find source and target entities
+                source_entity = db.query(Entity).filter(
+                    Entity.name.ilike(f"%{rel.source}%")
+                ).first()
+                target_entity = db.query(Entity).filter(
+                    Entity.name.ilike(f"%{rel.target}%")
+                ).first()
+                
+                if source_entity and target_entity:
+                    # Check if relationship exists
+                    existing_rel = db.query(Relationship).filter(
+                        Relationship.source_id == source_entity.id,
+                        Relationship.target_id == target_entity.id,
+                        Relationship.type == rel.relationship_type
+                    ).first()
+                    
+                    if not existing_rel:
+                        # Parse event_date if available
+                        rel_event_date = parse_date(getattr(rel, 'event_date', None))
+                        
+                        new_rel = Relationship(
+                            source_id=source_entity.id,
+                            target_id=target_entity.id,
+                            type=rel.relationship_type,
+                            event_date=rel_event_date,
+                            properties={"context": rel.context}
+                        )
+                        db.add(new_rel)
+                        db.commit()
+                        rel_data.created = True
+                        created_relationships += 1
+            
+            relationships_response.append(rel_data)
+        
+        # Process events
+        for event in result.events:
+            event_data = ExtractedEventResponse(
+                description=event.description,
+                event_type=event.event_type,
+                date=event.date,
+                location=event.location,
+                participants=event.participants,
+                created=False
+            )
+            
+            if request.auto_create and event.description:
+                # Create event
+                new_event = Event(
+                    title=event.description[:100] if len(event.description) > 100 else event.description,
+                    description=event.description,
+                    type=event.event_type or "general",
+                    source="llm_extraction"
+                )
+                db.add(new_event)
+                db.commit()
+                db.refresh(new_event)
+                
+                event_data.created = True
+                event_data.event_id = new_event.id
+                created_events += 1
+            
+            events_response.append(event_data)
+        
+        return AnalyzeResponse(
+            entities=entities_response,
+            relationships=relationships_response,
+            events=events_response,
+            stats={
+                "total_entities": len(entities_response),
+                "total_relationships": len(relationships_response),
+                "total_events": len(events_response),
+                "created_entities": created_entities,
+                "created_relationships": created_relationships,
+                "created_events": created_events
+            }
+        )
+        
+    except Exception as e:
+        # Log the full error with traceback
+        print(f"=== ANALYZE ERROR ===")
+        print(f"Error type: {type(e).__name__}")
+        print(f"Error message: {str(e)}")
+        print(f"Traceback:")
+        traceback.print_exc()
+        print(f"=== END ERROR ===")
+        raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
+
+
+@router.get("/debug")
+async def debug_config():
+    """
+    Debug endpoint to check if API is configured correctly.
+    """
+    api_key = settings.cerebras_api_key
+    return {
+        "cerebras_api_key_configured": bool(api_key),
+        "cerebras_api_key_length": len(api_key) if api_key else 0,
+        "cerebras_api_key_preview": f"{api_key[:8]}...{api_key[-4:]}" if api_key and len(api_key) > 12 else "NOT SET"
+    }
+
+
+@router.post("/quick")
+async def quick_analyze(request: AnalyzeRequest):
+    """
+    Quick analysis without database operations.
+    Returns only extracted data without creating anything.
+    """
+    try:
+        result = await entity_extractor.extract(request.text)
+        
+        return {
+            "entities": [
+                {
+                    "name": e.name,
+                    "type": e.type,
+                    "role": e.role,
+                    "aliases": e.aliases
+                }
+                for e in result.entities
+            ],
+            "relationships": [
+                {
+                    "source": r.source,
+                    "target": r.target,
+                    "type": r.relationship_type,
+                    "context": r.context
+                }
+                for r in result.relationships
+            ],
+            "events": [
+                {
+                    "description": ev.description,
+                    "type": ev.event_type,
+                    "date": ev.date,
+                    "participants": ev.participants
+                }
+                for ev in result.events
+            ]
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
diff --git a/app/api/routes/chat.py b/app/api/routes/chat.py
new file mode 100644
index 0000000000000000000000000000000000000000..f75b133bf9956e67eb7b1b86312192d7fa093c46
--- /dev/null
+++ b/app/api/routes/chat.py
@@ -0,0 +1,63 @@
+"""
+Chat API Routes - Intelligent chat with RAG
+"""
+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel, Field
+from typing import Optional
+from sqlalchemy.orm import Session
+
+from app.api.deps import get_scoped_db, get_session_id
+from app.services.chat import chat_service
+
+
+router = APIRouter(prefix="/chat", tags=["Chat"])
+
+
+class ChatRequest(BaseModel):
+    """Chat request model"""
+    message: str = Field(..., min_length=1, description="User message")
+    use_web: bool = Field(default=True, description="Include web search")
+    use_history: bool = Field(default=True, description="Use conversation history")
+
+
+class ChatResponse(BaseModel):
+    """Chat response model"""
+    answer: str
+    local_context_used: bool
+    web_context_used: bool
+    entities_found: int
+
+
+@router.post("", response_model=ChatResponse)
+async def chat(
+    request: ChatRequest,
+    db: Session = Depends(get_scoped_db),
+    session_id: Optional[str] = Depends(get_session_id)
+):
+    """
+    Send a message and get an intelligent response.
+
+    Uses:
+    - Local NUMIDIUM knowledge (entities/relationships)
+    - Lancer web search (if enabled)
+    - Cerebras LLM for synthesis
+    """
+    try:
+        result = await chat_service.chat(
+            message=request.message,
+            db=db,
+            use_web=request.use_web,
+            use_history=request.use_history,
+            session_id=session_id
+        )
+        return ChatResponse(**result)
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/clear")
+async def clear_history(session_id: Optional[str] = Depends(get_session_id)):
+    """Clear conversation history"""
+    chat_service.clear_history(session_id=session_id)
+    return {"message": "Historico limpo"}
diff --git a/app/api/routes/dados_publicos.py b/app/api/routes/dados_publicos.py
new file mode 100644
index 0000000000000000000000000000000000000000..842e82d2d17f48687b92bb8012105eab495a8051
--- /dev/null
+++ b/app/api/routes/dados_publicos.py
@@ -0,0 +1,155 @@
+"""
+Public Data API Routes - IBGE and TSE data access
+"""
+from fastapi import APIRouter, HTTPException, Query
+from pydantic import BaseModel, Field
+from typing import Optional, List, Dict, Any
+
+from app.services.ibge_api import (
+    listar_estados, 
+    listar_municipios, 
+    buscar_municipio,
+    enriquecer_localizacao
+)
+from app.services.tse_api import (
+    listar_eleicoes,
+    buscar_candidatos,
+    obter_candidato_detalhes,
+    buscar_politico
+)
+
+
+router = APIRouter(prefix="/dados", tags=["Public Data"])
+
+
+# ========== IBGE Endpoints ==========
+
+class EstadoResponse(BaseModel):
+    id: int
+    sigla: str
+    nome: str
+    regiao: str
+
+
+class MunicipioResponse(BaseModel):
+    id: int
+    nome: str
+    estado_sigla: str
+    estado_nome: str
+    regiao: str
+
+
+@router.get("/ibge/estados", response_model=List[EstadoResponse])
+async def get_estados():
+    """List all Brazilian states"""
+    estados = await listar_estados()
+    return [EstadoResponse(**e.__dict__) for e in estados]
+
+
+@router.get("/ibge/municipios/{uf}", response_model=List[MunicipioResponse])
+async def get_municipios(uf: str):
+    """List municipalities in a state"""
+    municipios = await listar_municipios(uf)
+    return [MunicipioResponse(**m.__dict__) for m in municipios]
+
+
+@router.get("/ibge/buscar")
+async def buscar_cidade(
+    nome: str = Query(..., min_length=2),
+    uf: Optional[str] = None
+):
+    """Search for a municipality by name"""
+    municipios = await buscar_municipio(nome, uf)
+    return [MunicipioResponse(**m.__dict__) for m in municipios]
+
+
+@router.get("/ibge/enriquecer")
+async def enriquecer_cidade(
+    cidade: str = Query(..., min_length=2),
+    uf: Optional[str] = None
+):
+    """Enrich a location name with IBGE data"""
+    return await enriquecer_localizacao(cidade, uf)
+
+
+# ========== TSE Endpoints ==========
+
+class EleicaoResponse(BaseModel):
+    id: int
+    ano: int
+    descricao: str
+    turno: int
+
+
+class CandidatoResponse(BaseModel):
+    id: int
+    nome: str
+    nome_urna: str
+    numero: str
+    cargo: str
+    partido_sigla: str
+    uf: str
+    municipio: str
+    situacao: str
+    total_bens: float
+
+
+class CandidatoDetalhadoResponse(BaseModel):
+    id: int
+    nome: str
+    nome_urna: str
+    numero: str
+    cargo: str
+    partido_sigla: str
+    partido_nome: str
+    uf: str
+    municipio: str
+    situacao: str
+    data_nascimento: str
+    genero: str
+    grau_instrucao: str
+    ocupacao: str
+    total_bens: float
+    bens: List[Dict[str, Any]]
+
+
+@router.get("/tse/eleicoes", response_model=List[EleicaoResponse])
+async def get_eleicoes():
+    """List available elections"""
+    eleicoes = await listar_eleicoes()
+    return [EleicaoResponse(**e.__dict__) for e in eleicoes]
+
+
+@router.get("/tse/candidatos")
+async def get_candidatos(
+    nome: str = Query(..., min_length=3),
+    ano: int = Query(default=2024),
+    uf: Optional[str] = None,
+    cargo: Optional[str] = None
+):
+    """Search for candidates by name"""
+    candidatos = await buscar_candidatos(nome, ano=ano, uf=uf, cargo=cargo)
+    return [CandidatoResponse(**c.__dict__) for c in candidatos]
+
+
+@router.get("/tse/candidato/{id_candidato}")
+async def get_candidato_detalhes(
+    id_candidato: int,
+    ano: int = Query(default=2024)
+):
+    """Get detailed candidate information including assets"""
+    candidato = await obter_candidato_detalhes(id_candidato, ano=ano)
+    
+    if not candidato:
+        raise HTTPException(status_code=404, detail="Candidato não encontrado")
+    
+    return CandidatoDetalhadoResponse(**candidato.__dict__)
+
+
+@router.get("/tse/politico")
+async def pesquisar_politico(nome: str = Query(..., min_length=3)):
+    """
+    Search for a politician across multiple elections.
+    Returns consolidated career information.
+    """
+    return await buscar_politico(nome)
diff --git a/app/api/routes/entities.py b/app/api/routes/entities.py
new file mode 100644
index 0000000000000000000000000000000000000000..2727179e0e20a58a8a5893f1821a15c24df3013f
--- /dev/null
+++ b/app/api/routes/entities.py
@@ -0,0 +1,353 @@
+"""
+Entity CRUD Routes
+"""
+from fastapi import APIRouter, Depends, HTTPException, Query
+from sqlalchemy.orm import Session
+from sqlalchemy import or_
+from typing import List, Optional
+
+from app.api.deps import get_scoped_db
+from app.models import Entity, Relationship
+from app.schemas import EntityCreate, EntityUpdate, EntityResponse, GraphData, GraphNode, GraphEdge
+
+router = APIRouter(prefix="/entities", tags=["Entities"])
+
+
+@router.get("", response_model=List[EntityResponse])
+def list_entities(
+    type: Optional[str] = None,
+    search: Optional[str] = None,
+    project_id: Optional[str] = None,
+    limit: int = Query(default=50, le=200),
+    offset: int = 0,
+    db: Session = Depends(get_scoped_db)
+):
+    """Lista todas as entidades com filtros opcionais"""
+    query = db.query(Entity)
+    
+    if project_id:
+        query = query.filter(Entity.project_id == project_id)
+    
+    if type:
+        query = query.filter(Entity.type == type)
+    
+    if search:
+        query = query.filter(
+            or_(
+                Entity.name.ilike(f"%{search}%"),
+                Entity.description.ilike(f"%{search}%")
+            )
+        )
+    
+    query = query.order_by(Entity.created_at.desc())
+    return query.offset(offset).limit(limit).all()
+
+
+@router.get("/types")
+def get_entity_types(db: Session = Depends(get_scoped_db)):
+    """Retorna todos os tipos de entidade únicos"""
+    types = db.query(Entity.type).distinct().all()
+    return [t[0] for t in types]
+
+
+@router.get("/suggest-merge")
+async def suggest_merge_candidates(
+    limit: int = Query(default=10, le=50),
+    db: Session = Depends(get_scoped_db)
+):
+    """
+    Use LLM to find potential duplicate entities that could be merged.
+    Returns pairs of entities that might be the same.
+    """
+    import httpx
+    import json
+    import re
+    from app.config import settings
+    
+    # Get all entities
+    entities = db.query(Entity).order_by(Entity.name).limit(200).all()
+    
+    if len(entities) < 2:
+        return {"candidates": [], "message": "Not enough entities to compare"}
+    
+    # Build entity list for LLM
+    entity_list = []
+    for e in entities:
+        aliases = (e.properties or {}).get("aliases", [])
+        entity_list.append({
+            "id": e.id,
+            "name": e.name,
+            "type": e.type,
+            "aliases": aliases[:5] if aliases else []
+        })
+    
+    # Ask LLM to find duplicates
+    prompt = f"""Analise esta lista de entidades e encontre possíveis DUPLICATAS (mesma pessoa/organização/local com nomes diferentes).
+
+Entidades:
+{entity_list[:100]}
+
+Retorne APENAS um JSON válido com pares de IDs que são provavelmente a mesma entidade:
+```json
+{{
+  "duplicates": [
+    {{
+      "id1": "uuid1",
+      "id2": "uuid2", 
+      "confidence": 0.95,
+      "reason": "Mesmo nome com variação"
+    }}
+  ]
+}}
+```
+
+Se não houver duplicatas, retorne: {{"duplicates": []}}
+"""
+    
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                "https://api.cerebras.ai/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {settings.cerebras_api_key}",
+                    "Content-Type": "application/json"
+                },
+                json={
+                    "model": "zai-glm-4.7",
+                    "messages": [
+                        {"role": "system", "content": "Você é um especialista em detecção de entidades duplicadas. Responda apenas em JSON válido."},
+                        {"role": "user", "content": prompt}
+                    ],
+                    "temperature": 0.1,
+                    "max_tokens": 1024
+                }
+            )
+            
+            if response.status_code != 200:
+                return {"candidates": [], "error": "LLM API error"}
+            
+            data = response.json()
+            content = data["choices"][0]["message"]["content"]
+            
+            # Parse JSON from response
+            json_match = re.search(r'\{.*\}', content, re.DOTALL)
+            if json_match:
+                result = json.loads(json_match.group(0))
+                
+                # Enrich with entity names
+                candidates = []
+                for dup in result.get("duplicates", [])[:limit]:
+                    e1 = next((e for e in entities if e.id == dup.get("id1")), None)
+                    e2 = next((e for e in entities if e.id == dup.get("id2")), None)
+                    if e1 and e2:
+                        candidates.append({
+                            "entity1": {"id": e1.id, "name": e1.name, "type": e1.type},
+                            "entity2": {"id": e2.id, "name": e2.name, "type": e2.type},
+                            "confidence": dup.get("confidence", 0.5),
+                            "reason": dup.get("reason", "Possível duplicata")
+                        })
+                
+                return {"candidates": candidates}
+            
+            return {"candidates": [], "message": "No duplicates found"}
+            
+    except Exception as e:
+        return {"candidates": [], "error": str(e)}
+
+
+@router.get("/{entity_id}", response_model=EntityResponse)
+def get_entity(entity_id: str, db: Session = Depends(get_scoped_db)):
+    """Busca uma entidade por ID"""
+    entity = db.query(Entity).filter(Entity.id == entity_id).first()
+    if not entity:
+        raise HTTPException(status_code=404, detail="Entity not found")
+    return entity
+
+
+@router.post("", response_model=EntityResponse, status_code=201)
+def create_entity(entity: EntityCreate, db: Session = Depends(get_scoped_db)):
+    """Cria uma nova entidade"""
+    db_entity = Entity(**entity.model_dump())
+    db.add(db_entity)
+    db.commit()
+    db.refresh(db_entity)
+    return db_entity
+
+
+@router.put("/{entity_id}", response_model=EntityResponse)
+def update_entity(entity_id: str, entity: EntityUpdate, db: Session = Depends(get_scoped_db)):
+    """Atualiza uma entidade existente"""
+    db_entity = db.query(Entity).filter(Entity.id == entity_id).first()
+    if not db_entity:
+        raise HTTPException(status_code=404, detail="Entity not found")
+    
+    update_data = entity.model_dump(exclude_unset=True)
+    for field, value in update_data.items():
+        setattr(db_entity, field, value)
+    
+    db.commit()
+    db.refresh(db_entity)
+    return db_entity
+
+
+@router.delete("/{entity_id}")
+def delete_entity(entity_id: str, db: Session = Depends(get_scoped_db)):
+    """Deleta uma entidade"""
+    db_entity = db.query(Entity).filter(Entity.id == entity_id).first()
+    if not db_entity:
+        raise HTTPException(status_code=404, detail="Entity not found")
+    
+    # Delete related relationships
+    db.query(Relationship).filter(
+        or_(
+            Relationship.source_id == entity_id,
+            Relationship.target_id == entity_id
+        )
+    ).delete()
+    
+    db.delete(db_entity)
+    db.commit()
+    return {"message": "Entity deleted"}
+
+
+@router.get("/{entity_id}/connections", response_model=GraphData)
+def get_entity_connections(
+    entity_id: str,
+    depth: int = Query(default=1, le=3),
+    db: Session = Depends(get_scoped_db)
+):
+    """
+    Retorna o grafo de conexões de uma entidade
+    Usado para visualização de rede no frontend
+    """
+    entity = db.query(Entity).filter(Entity.id == entity_id).first()
+    if not entity:
+        raise HTTPException(status_code=404, detail="Entity not found")
+    
+    nodes = {}
+    edges = []
+    visited = set()
+    
+    def explore(eid: str, current_depth: int):
+        if current_depth > depth or eid in visited:
+            return
+        visited.add(eid)
+        
+        e = db.query(Entity).filter(Entity.id == eid).first()
+        if not e:
+            return
+            
+        nodes[e.id] = GraphNode(
+            id=e.id,
+            type=e.type,
+            name=e.name,
+            properties=e.properties or {}
+        )
+        
+        # Outgoing relationships
+        for rel in db.query(Relationship).filter(Relationship.source_id == eid).all():
+            edges.append(GraphEdge(
+                source=rel.source_id,
+                target=rel.target_id,
+                type=rel.type,
+                confidence=rel.confidence
+            ))
+            explore(rel.target_id, current_depth + 1)
+        
+        # Incoming relationships
+        for rel in db.query(Relationship).filter(Relationship.target_id == eid).all():
+            edges.append(GraphEdge(
+                source=rel.source_id,
+                target=rel.target_id,
+                type=rel.type,
+                confidence=rel.confidence
+            ))
+            explore(rel.source_id, current_depth + 1)
+    
+    explore(entity_id, 0)
+    
+    return GraphData(
+        nodes=list(nodes.values()),
+        edges=edges
+    )
+
+
+@router.post("/merge")
+def merge_entities(
+    primary_id: str,
+    secondary_id: str,
+    db: Session = Depends(get_scoped_db)
+):
+    """
+    Merge two entities into one.
+    The primary entity is kept, the secondary is deleted.
+    All relationships from secondary are transferred to primary.
+    """
+    if primary_id == secondary_id:
+        raise HTTPException(status_code=400, detail="Cannot merge entity with itself")
+    
+    primary = db.query(Entity).filter(Entity.id == primary_id).first()
+    secondary = db.query(Entity).filter(Entity.id == secondary_id).first()
+    
+    if not primary:
+        raise HTTPException(status_code=404, detail="Primary entity not found")
+    if not secondary:
+        raise HTTPException(status_code=404, detail="Secondary entity not found")
+    
+    # Merge properties
+    primary_props = primary.properties or {}
+    secondary_props = secondary.properties or {}
+    
+    # Add aliases from secondary
+    aliases = primary_props.get("aliases", []) or []
+    if secondary.name not in aliases:
+        aliases.append(secondary.name)
+    secondary_aliases = secondary_props.get("aliases", []) or []
+    for alias in secondary_aliases:
+        if alias not in aliases:
+            aliases.append(alias)
+    primary_props["aliases"] = aliases
+    
+    # Add merge history
+    merge_history = primary_props.get("merged_from", []) or []
+    merge_history.append({
+        "id": secondary.id,
+        "name": secondary.name,
+        "source": secondary.source
+    })
+    primary_props["merged_from"] = merge_history
+    
+    # Combine descriptions if primary has none
+    if not primary.description and secondary.description:
+        primary.description = secondary.description
+    
+    primary.properties = primary_props
+    
+    # Transfer relationships from secondary to primary
+    # Update source_id
+    db.query(Relationship).filter(
+        Relationship.source_id == secondary_id
+    ).update({"source_id": primary_id})
+    
+    # Update target_id
+    db.query(Relationship).filter(
+        Relationship.target_id == secondary_id
+    ).update({"target_id": primary_id})
+    
+    # Delete duplicate relationships (same source, target, type)
+    # This is a simple approach - in production you'd want more sophisticated deduplication
+    
+    # Delete the secondary entity
+    db.delete(secondary)
+    db.commit()
+    db.refresh(primary)
+    
+    return {
+        "message": f"Merged '{secondary.name}' into '{primary.name}'",
+        "primary": {
+            "id": primary.id,
+            "name": primary.name,
+            "aliases": aliases
+        }
+    }
+
diff --git a/app/api/routes/events.py b/app/api/routes/events.py
new file mode 100644
index 0000000000000000000000000000000000000000..19a16292e599f2a33bffe593cf788d69be9f28dd
--- /dev/null
+++ b/app/api/routes/events.py
@@ -0,0 +1,113 @@
+"""
+Events CRUD Routes
+"""
+from fastapi import APIRouter, Depends, HTTPException, Query
+from sqlalchemy.orm import Session
+from sqlalchemy import or_
+from typing import List, Optional
+from datetime import datetime
+
+from app.api.deps import get_scoped_db
+from app.models import Event
+from app.schemas import EventCreate, EventResponse
+
+router = APIRouter(prefix="/events", tags=["Events"])
+
+
+@router.get("/", response_model=List[EventResponse])
+def list_events(
+    type: Optional[str] = None,
+    search: Optional[str] = None,
+    start_date: Optional[datetime] = None,
+    end_date: Optional[datetime] = None,
+    limit: int = Query(default=50, le=200),
+    offset: int = 0,
+    db: Session = Depends(get_scoped_db)
+):
+    """Lista eventos com filtros opcionais"""
+    query = db.query(Event)
+
+    if type:
+        query = query.filter(Event.type == type)
+
+    if search:
+        query = query.filter(
+            or_(
+                Event.title.ilike(f"%{search}%"),
+                Event.description.ilike(f"%{search}%")
+            )
+        )
+
+    if start_date:
+        query = query.filter(Event.event_date >= start_date)
+    if end_date:
+        query = query.filter(Event.event_date <= end_date)
+
+    query = query.order_by(Event.event_date.desc().nullslast())
+    return query.offset(offset).limit(limit).all()
+
+
+@router.get("/types")
+def get_event_types(db: Session = Depends(get_scoped_db)):
+    """Retorna todos os tipos de evento unicos"""
+    types = db.query(Event.type).distinct().all()
+    return [t[0] for t in types]
+
+
+@router.get("/timeline")
+def get_timeline(
+    entity_id: Optional[str] = None,
+    limit: int = Query(default=50, le=200),
+    db: Session = Depends(get_scoped_db)
+):
+    """
+    Retorna eventos em formato timeline.
+    """
+    query = db.query(Event).filter(Event.event_date.isnot(None))
+
+    if entity_id:
+        query = query.filter(Event.entity_ids.contains([entity_id]))
+
+    events = query.order_by(Event.event_date.asc()).limit(limit).all()
+
+    return [
+        {
+            "id": e.id,
+            "title": e.title,
+            "date": e.event_date.isoformat() if e.event_date else None,
+            "type": e.type,
+            "location": e.location_name
+        }
+        for e in events
+    ]
+
+
+@router.get("/{event_id}", response_model=EventResponse)
+def get_event(event_id: str, db: Session = Depends(get_scoped_db)):
+    """Busca um evento por ID"""
+    event = db.query(Event).filter(Event.id == event_id).first()
+    if not event:
+        raise HTTPException(status_code=404, detail="Event not found")
+    return event
+
+
+@router.post("/", response_model=EventResponse, status_code=201)
+def create_event(event: EventCreate, db: Session = Depends(get_scoped_db)):
+    """Cria um novo evento"""
+    db_event = Event(**event.model_dump())
+    db.add(db_event)
+    db.commit()
+    db.refresh(db_event)
+    return db_event
+
+
+@router.delete("/{event_id}")
+def delete_event(event_id: str, db: Session = Depends(get_scoped_db)):
+    """Deleta um evento"""
+    db_event = db.query(Event).filter(Event.id == event_id).first()
+    if not db_event:
+        raise HTTPException(status_code=404, detail="Event not found")
+
+    db.delete(db_event)
+    db.commit()
+    return {"message": "Event deleted"}
diff --git a/app/api/routes/graph.py b/app/api/routes/graph.py
new file mode 100644
index 0000000000000000000000000000000000000000..66a0886d6fb53b0884ef1e803f2acefd29f92873
--- /dev/null
+++ b/app/api/routes/graph.py
@@ -0,0 +1,173 @@
+"""
+Graph API Routes - Network visualization endpoints
+"""
+from fastapi import APIRouter, Depends, HTTPException, Query
+from typing import Optional, List
+from sqlalchemy.orm import Session
+from sqlalchemy import or_
+
+from app.api.deps import get_scoped_db
+from app.models.entity import Entity, Relationship
+
+
+router = APIRouter(prefix="/graph", tags=["Graph"])
+
+
+@router.get("")
+async def get_graph(
+    entity_type: Optional[str] = Query(None, description="Filter by entity type"),
+    limit: int = Query(100, le=500, description="Maximum number of entities"),
+    db: Session = Depends(get_scoped_db)
+):
+    """
+    Get graph data for visualization.
+    Returns nodes (entities) and edges (relationships).
+    """
+    try:
+        # Get entities
+        query = db.query(Entity)
+        if entity_type:
+            query = query.filter(Entity.type == entity_type)
+        
+        entities = query.limit(limit).all()
+        entity_ids = [e.id for e in entities]
+        
+        # Get relationships between these entities
+        relationships = db.query(Relationship).filter(
+            or_(
+                Relationship.source_id.in_(entity_ids),
+                Relationship.target_id.in_(entity_ids)
+            )
+        ).all()
+        
+        # Format for Cytoscape.js
+        nodes = []
+        for e in entities:
+            nodes.append({
+                "data": {
+                    "id": e.id,
+                    "label": e.name[:30] + "..." if len(e.name) > 30 else e.name,
+                    "fullName": e.name,
+                    "type": e.type,
+                    "description": e.description[:100] if e.description else "",
+                    "source": e.source or "unknown"
+                }
+            })
+        
+        edges = []
+        for r in relationships:
+            if r.source_id in entity_ids and r.target_id in entity_ids:
+                edges.append({
+                    "data": {
+                        "id": r.id,
+                        "source": r.source_id,
+                        "target": r.target_id,
+                        "label": r.type,
+                        "type": r.type
+                    }
+                })
+        
+        return {
+            "nodes": nodes,
+            "edges": edges,
+            "stats": {
+                "total_nodes": len(nodes),
+                "total_edges": len(edges)
+            }
+        }
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to get graph: {str(e)}")
+
+
+@router.get("/entity/{entity_id}")
+async def get_entity_graph(
+    entity_id: str,
+    depth: int = Query(1, ge=1, le=3, description="How many levels of connections to include"),
+    db: Session = Depends(get_scoped_db)
+):
+    """
+    Get graph centered on a specific entity.
+    """
+    try:
+        # Get the central entity
+        central = db.query(Entity).filter(Entity.id == entity_id).first()
+        if not central:
+            raise HTTPException(status_code=404, detail="Entity not found")
+        
+        # Collect entity IDs at each depth level
+        collected_ids = {entity_id}
+        current_level = {entity_id}
+        
+        for _ in range(depth):
+            rels = db.query(Relationship).filter(
+                or_(
+                    Relationship.source_id.in_(current_level),
+                    Relationship.target_id.in_(current_level)
+                )
+            ).all()
+            
+            next_level = set()
+            for r in rels:
+                next_level.add(r.source_id)
+                next_level.add(r.target_id)
+            
+            current_level = next_level - collected_ids
+            collected_ids.update(next_level)
+        
+        # Get all entities
+        entities = db.query(Entity).filter(Entity.id.in_(collected_ids)).all()
+        
+        # Get all relationships between collected entities
+        relationships = db.query(Relationship).filter(
+            Relationship.source_id.in_(collected_ids),
+            Relationship.target_id.in_(collected_ids)
+        ).all()
+        
+        # Format for Cytoscape
+        nodes = []
+        for e in entities:
+            nodes.append({
+                "data": {
+                    "id": e.id,
+                    "label": e.name[:30] + "..." if len(e.name) > 30 else e.name,
+                    "fullName": e.name,
+                    "type": e.type,
+                    "description": e.description[:100] if e.description else "",
+                    "source": e.source or "unknown",
+                    "isCentral": e.id == entity_id
+                }
+            })
+        
+        edges = []
+        for r in relationships:
+            edges.append({
+                "data": {
+                    "id": r.id,
+                    "source": r.source_id,
+                    "target": r.target_id,
+                    "label": r.type,
+                    "type": r.type
+                }
+            })
+        
+        return {
+            "central": {
+                "id": central.id,
+                "name": central.name,
+                "type": central.type
+            },
+            "nodes": nodes,
+            "edges": edges,
+            "stats": {
+                "total_nodes": len(nodes),
+                "total_edges": len(edges),
+                "depth": depth
+            }
+        }
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to get entity graph: {str(e)}")
+
diff --git a/app/api/routes/ingest.py b/app/api/routes/ingest.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2216481b8ad615180d4b4bfad5d7f24e453774a
--- /dev/null
+++ b/app/api/routes/ingest.py
@@ -0,0 +1,341 @@
+"""
+Data Ingestion Routes
+Endpoints para importar dados de fontes externas
+"""
+from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
+from sqlalchemy.orm import Session
+from typing import Optional, List
+from datetime import datetime
+import asyncio
+
+from app.api.deps import get_scoped_db
+from app.models import Entity, Document, Relationship
+from app.schemas import EntityResponse, DocumentResponse
+from app.services.ingestion import wikipedia_scraper, news_service
+from app.services.nlp import entity_extractor
+from app.services.geocoding import geocode
+
+router = APIRouter(prefix="/ingest", tags=["Data Ingestion"])
+
+
+def parse_event_date(date_str):
+    """Parse date string to datetime object"""
+    if not date_str:
+        return None
+    try:
+        # Try YYYY-MM-DD format
+        return datetime.strptime(date_str[:10], "%Y-%m-%d")
+    except:
+        try:
+            # Try YYYY format
+            return datetime.strptime(date_str[:4], "%Y")
+        except:
+            return None
+
+
+# ========== Wikipedia ==========
+
+@router.get("/wikipedia/search")
+def search_wikipedia(q: str, limit: int = 10):
+    """Busca artigos na Wikipedia"""
+    results = wikipedia_scraper.search(q, limit)
+    return results
+
+
+@router.post("/wikipedia/entity", response_model=EntityResponse)
+async def import_from_wikipedia(
+    title: str,
+    entity_type: str = "person",
+    project_id: Optional[str] = None,
+    auto_extract: bool = True,
+    db: Session = Depends(get_scoped_db)
+):
+    """
+    Importa uma entidade da Wikipedia
+    entity_type: person, organization, location
+    project_id: ID do projeto para associar a entidade
+    auto_extract: Se True, usa LLM para extrair entidades relacionadas
+    """
+    # Check if entity already exists
+    existing = db.query(Entity).filter(
+        Entity.name == title,
+        Entity.source == "wikipedia"
+    ).first()
+    
+    if existing:
+        return existing
+    
+    # Scrape based on type
+    if entity_type == "person":
+        data = wikipedia_scraper.scrape_person(title)
+    elif entity_type == "organization":
+        data = wikipedia_scraper.scrape_organization(title)
+    elif entity_type == "location":
+        data = wikipedia_scraper.scrape_location(title)
+    else:
+        data = wikipedia_scraper.scrape_person(title)  # default
+    
+    if not data:
+        raise HTTPException(status_code=404, detail="Article not found on Wikipedia")
+    
+    # Create main entity with project_id
+    entity = Entity(**data)
+    entity.project_id = project_id
+    db.add(entity)
+    db.commit()
+    db.refresh(entity)
+    
+    # Auto-extract entities and relationships using LLM
+    if auto_extract and data.get("description"):
+        try:
+            # Limit text to avoid token limits
+            text_to_analyze = data["description"][:3000]
+            result = await entity_extractor.extract(text_to_analyze)
+            
+            # Create extracted entities
+            created_entities = {}
+            for ext_entity in result.entities:
+                # Skip if same as main entity
+                if ext_entity.name.lower() == title.lower():
+                    created_entities[ext_entity.name] = entity
+                    continue
+                
+                # Check if entity exists (by similar name)
+                existing_ent = db.query(Entity).filter(
+                    Entity.name.ilike(f"%{ext_entity.name}%")
+                ).first()
+                
+                if existing_ent:
+                    created_entities[ext_entity.name] = existing_ent
+                else:
+                    # Get coordinates for location entities
+                    lat, lng = None, None
+                    if ext_entity.type == "location":
+                        coords = await geocode(ext_entity.name)
+                        if coords:
+                            lat, lng = coords
+                    
+                    # Parse event_date
+                    event_date = parse_event_date(getattr(ext_entity, 'event_date', None))
+                    
+                    new_ent = Entity(
+                        name=ext_entity.name,
+                        type=ext_entity.type if ext_entity.type in ["person", "organization", "location", "event"] else "person",
+                        description=ext_entity.description or ext_entity.role,
+                        source="wikipedia_extraction",
+                        latitude=lat,
+                        longitude=lng,
+                        event_date=event_date,
+                        project_id=project_id,
+                        properties={"role": ext_entity.role, "aliases": ext_entity.aliases, "extracted_from": title}
+                    )
+                    db.add(new_ent)
+                    db.commit()
+                    db.refresh(new_ent)
+                    created_entities[ext_entity.name] = new_ent
+            
+            # Create relationships
+            for rel in result.relationships:
+                source_ent = created_entities.get(rel.source) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.source}%")).first()
+                target_ent = created_entities.get(rel.target) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.target}%")).first()
+                
+                if source_ent and target_ent and source_ent.id != target_ent.id:
+                    # Check if relationship exists
+                    existing_rel = db.query(Relationship).filter(
+                        Relationship.source_id == source_ent.id,
+                        Relationship.target_id == target_ent.id,
+                        Relationship.type == rel.relationship_type
+                    ).first()
+                    
+                    if not existing_rel:
+                        # Parse relationship event_date
+                        rel_event_date = parse_event_date(getattr(rel, 'event_date', None))
+                        
+                        new_rel = Relationship(
+                            source_id=source_ent.id,
+                            target_id=target_ent.id,
+                            type=rel.relationship_type,
+                            event_date=rel_event_date,
+                            properties={"context": rel.context, "extracted_from": title}
+                        )
+                        db.add(new_rel)
+            
+            db.commit()
+            
+        except Exception as e:
+            print(f"NER extraction error: {e}")
+            # Continue without extraction if it fails
+    
+    return entity
+
+
+# ========== News ==========
+
+@router.get("/news/feeds")
+def list_available_feeds():
+    """Lista os feeds de notícias disponíveis"""
+    return list(news_service.RSS_FEEDS.keys())
+
+
+@router.get("/news/fetch")
+def fetch_news(feed: Optional[str] = None):
+    """
+    Busca notícias dos feeds RSS
+    Se feed não for especificado, busca de todos
+    """
+    if feed:
+        if feed not in news_service.RSS_FEEDS:
+            raise HTTPException(status_code=404, detail="Feed not found")
+        url = news_service.RSS_FEEDS[feed]
+        articles = news_service.fetch_feed(url)
+    else:
+        articles = news_service.fetch_all_feeds()
+    
+    return articles
+
+
+@router.get("/news/search")
+def search_news(q: str):
+    """Busca notícias por palavra-chave via Google News"""
+    return news_service.search_news(q)
+
+
+@router.post("/news/import")
+async def import_news(
+    query: Optional[str] = None,
+    feed: Optional[str] = None,
+    auto_extract: bool = True,
+    db: Session = Depends(get_scoped_db)
+):
+    """
+    Importa notícias como documentos no sistema
+    auto_extract: Se True, usa LLM para extrair entidades de cada notícia
+    """
+    if query:
+        articles = news_service.search_news(query)
+    elif feed:
+        if feed not in news_service.RSS_FEEDS:
+            raise HTTPException(status_code=404, detail="Feed not found")
+        articles = news_service.fetch_feed(news_service.RSS_FEEDS[feed])
+    else:
+        articles = news_service.fetch_all_feeds()
+    
+    imported = 0
+    extracted_entities = 0
+    
+    for article in articles:
+        # Check if document already exists (by URL)
+        if article.get("url"):
+            existing = db.query(Document).filter(
+                Document.source_url == article["url"]
+            ).first()
+            if existing:
+                continue
+        
+        doc_data = news_service.to_document(article)
+        doc = Document(**doc_data)
+        db.add(doc)
+        db.commit()
+        imported += 1
+        
+        # Extract entities from article content
+        if auto_extract:
+            try:
+                text_to_analyze = f"{article.get('title', '')} {article.get('description', '')}".strip()
+                if len(text_to_analyze) >= 20:
+                    result = await entity_extractor.extract(text_to_analyze[:2000])
+                    
+                    created_entities = {}
+                    for ext_entity in result.entities:
+                        # Check if entity exists
+                        existing_ent = db.query(Entity).filter(
+                            Entity.name.ilike(f"%{ext_entity.name}%")
+                        ).first()
+                        
+                        if existing_ent:
+                            created_entities[ext_entity.name] = existing_ent
+                        else:
+                            # Get coordinates for location entities
+                            lat, lng = None, None
+                            if ext_entity.type == "location":
+                                coords = await geocode(ext_entity.name)
+                                if coords:
+                                    lat, lng = coords
+                            
+                            new_ent = Entity(
+                                name=ext_entity.name,
+                                type=ext_entity.type if ext_entity.type in ["person", "organization", "location", "event"] else "person",
+                                description=ext_entity.description or ext_entity.role,
+                                source="news_extraction",
+                                latitude=lat,
+                                longitude=lng,
+                                properties={"role": ext_entity.role, "aliases": ext_entity.aliases, "from_article": article.get('title', '')}
+                            )
+                            db.add(new_ent)
+                            db.commit()
+                            db.refresh(new_ent)
+                            created_entities[ext_entity.name] = new_ent
+                            extracted_entities += 1
+                    
+                    # Create relationships
+                    for rel in result.relationships:
+                        source_ent = created_entities.get(rel.source) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.source}%")).first()
+                        target_ent = created_entities.get(rel.target) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.target}%")).first()
+                        
+                        if source_ent and target_ent and source_ent.id != target_ent.id:
+                            existing_rel = db.query(Relationship).filter(
+                                Relationship.source_id == source_ent.id,
+                                Relationship.target_id == target_ent.id,
+                                Relationship.type == rel.relationship_type
+                            ).first()
+                            
+                            if not existing_rel:
+                                new_rel = Relationship(
+                                    source_id=source_ent.id,
+                                    target_id=target_ent.id,
+                                    type=rel.relationship_type,
+                                    properties={"context": rel.context}
+                                )
+                                db.add(new_rel)
+                    
+                    db.commit()
+                    
+            except Exception as e:
+                print(f"NER extraction error for article: {e}")
+                # Continue without extraction
+    
+    return {
+        "message": f"Imported {imported} articles", 
+        "total_found": len(articles),
+        "extracted_entities": extracted_entities
+    }
+
+
+# ========== Manual Import ==========
+
+@router.post("/bulk/entities")
+def bulk_import_entities(
+    entities: List[dict],
+    db: Session = Depends(get_scoped_db)
+):
+    """
+    Importa múltiplas entidades de uma vez
+    Útil para importar de CSV/JSON
+    """
+    imported = 0
+    for entity_data in entities:
+        entity = Entity(
+            type=entity_data.get("type", "unknown"),
+            name=entity_data.get("name", "Unnamed"),
+            description=entity_data.get("description"),
+            properties=entity_data.get("properties", {}),
+            latitude=entity_data.get("latitude"),
+            longitude=entity_data.get("longitude"),
+            source=entity_data.get("source", "manual")
+        )
+        db.add(entity)
+        imported += 1
+    
+    db.commit()
+    
+    return {"message": f"Imported {imported} entities"}
diff --git a/app/api/routes/investigate.py b/app/api/routes/investigate.py
new file mode 100644
index 0000000000000000000000000000000000000000..646857df8ac0eed0f99ac443367d25c7a6af1512
--- /dev/null
+++ b/app/api/routes/investigate.py
@@ -0,0 +1,207 @@
+"""
+Investigation API Routes - Build dossiers on companies and people
+"""
+from fastapi import APIRouter, HTTPException, Depends
+from pydantic import BaseModel, Field
+from typing import Optional, List, Dict, Any
+from sqlalchemy.orm import Session
+
+from app.services.investigation import (
+    investigar_empresa,
+    investigar_pessoa,
+    dossier_to_dict
+)
+from app.services.brazil_apis import consultar_cnpj
+from app.services.investigator_agent import investigator_agent
+from app.api.deps import get_scoped_db
+
+
+router = APIRouter(prefix="/investigate", tags=["Investigation"])
+
+
+class InvestigateCompanyRequest(BaseModel):
+    """Request to investigate a company"""
+    cnpj: str = Field(..., min_length=11, description="CNPJ da empresa")
+
+
+class InvestigatePersonRequest(BaseModel):
+    """Request to investigate a person"""
+    nome: str = Field(..., min_length=2, description="Nome da pessoa")
+    cpf: Optional[str] = Field(None, description="CPF (opcional)")
+
+
+class DossierResponse(BaseModel):
+    """Dossier response"""
+    tipo: str
+    alvo: str
+    cnpj_cpf: Optional[str]
+    red_flags: List[str]
+    score_risco: int
+    data_geracao: str
+    fonte_dados: List[str]
+    secoes: Dict[str, Any]
+
+
+class CNPJResponse(BaseModel):
+    """Quick CNPJ lookup response"""
+    cnpj: str
+    razao_social: str
+    nome_fantasia: str
+    situacao: str
+    data_abertura: str
+    capital_social: float
+    endereco: str
+    telefone: str
+    email: str
+    atividade: str
+    socios: List[Dict[str, Any]]
+
+
+@router.post("/company", response_model=DossierResponse)
+async def investigate_company(request: InvestigateCompanyRequest):
+    """
+    Build a comprehensive dossier on a company.
+    
+    Collects:
+    - Cadastral data from CNPJ
+    - Partners/owners
+    - Sanctions (CEIS, CNEP, CEPIM)
+    - News and media mentions
+    - Related entities
+    
+    Returns risk score and red flags.
+    """
+    try:
+        dossier = await investigar_empresa(request.cnpj)
+        return DossierResponse(**dossier_to_dict(dossier))
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/person", response_model=DossierResponse)
+async def investigate_person(request: InvestigatePersonRequest):
+    """
+    Build a dossier on a person.
+    
+    Note: Due to LGPD, personal data is limited.
+    Mainly uses web search for public information.
+    """
+    try:
+        dossier = await investigar_pessoa(request.nome, request.cpf)
+        return DossierResponse(**dossier_to_dict(dossier))
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/cnpj/{cnpj}", response_model=CNPJResponse)
+async def lookup_cnpj(cnpj: str):
+    """
+    Quick CNPJ lookup - returns basic company data.
+    """
+    try:
+        data = await consultar_cnpj(cnpj)
+        
+        if not data:
+            raise HTTPException(status_code=404, detail="CNPJ não encontrado")
+        
+        return CNPJResponse(
+            cnpj=data.cnpj,
+            razao_social=data.razao_social,
+            nome_fantasia=data.nome_fantasia,
+            situacao=data.situacao,
+            data_abertura=data.data_abertura,
+            capital_social=data.capital_social,
+            endereco=f"{data.logradouro}, {data.numero} - {data.bairro}, {data.cidade}/{data.uf}",
+            telefone=data.telefone,
+            email=data.email,
+            atividade=f"{data.cnae_principal} - {data.cnae_descricao}",
+            socios=data.socios
+        )
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ===========================================
+# Autonomous Investigation Agent
+# ===========================================
+
+class AgentInvestigateRequest(BaseModel):
+    """Request for autonomous investigation"""
+    mission: str = Field(..., min_length=5, description="Missão de investigação em linguagem natural")
+    max_iterations: int = Field(10, ge=1, le=20, description="Máximo de iterações do agente")
+
+
+class FindingResponse(BaseModel):
+    """A finding from investigation"""
+    title: str
+    content: str
+    source: str
+    timestamp: str
+
+
+class AgentInvestigateResponse(BaseModel):
+    """Response from autonomous investigation"""
+    mission: str
+    status: str
+    report: str
+    findings: List[FindingResponse]
+    entities_discovered: int
+    connections_mapped: int
+    iterations: int
+    tools_used: List[str]
+
+
+@router.post("/agent", response_model=AgentInvestigateResponse)
+async def investigate_with_agent(
+    request: AgentInvestigateRequest,
+    db: Session = Depends(get_scoped_db)
+):
+    """
+    Autonomous investigation with AI agent.
+    
+    The agent will:
+    1. Search NUMIDIUM for existing entities
+    2. Query CNPJ data for Brazilian companies
+    3. Search the web for news and public info
+    4. Follow leads and connections
+    5. Generate a comprehensive report
+    
+    Example missions:
+    - "Investigue a rede de empresas de João Silva"
+    - "Descubra os sócios da empresa CNPJ 11.222.333/0001-44"
+    - "Pesquise sobre a empresa XYZ e suas conexões"
+    """
+    try:
+        result = await investigator_agent.investigate(
+            mission=request.mission,
+            db=db,
+            max_iterations=request.max_iterations
+        )
+        
+        return AgentInvestigateResponse(
+            mission=result.mission,
+            status=result.status,
+            report=result.report,
+            findings=[
+                FindingResponse(
+                    title=f.title,
+                    content=f.content,
+                    source=f.source,
+                    timestamp=f.timestamp
+                )
+                for f in result.findings
+            ],
+            entities_discovered=len(result.entities_discovered),
+            connections_mapped=len(result.connections_mapped),
+            iterations=result.iterations,
+            tools_used=result.tools_used
+        )
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
diff --git a/app/api/routes/projects.py b/app/api/routes/projects.py
new file mode 100644
index 0000000000000000000000000000000000000000..d283d8b2d566c49e7a32cba8acc10b39307b4299
--- /dev/null
+++ b/app/api/routes/projects.py
@@ -0,0 +1,135 @@
+"""
+Projects API Routes - Workspace management
+"""
+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel
+from typing import Optional, List
+from datetime import datetime
+from sqlalchemy.orm import Session
+
+from app.api.deps import get_scoped_db
+from app.models import Project, Entity, Relationship
+
+
+router = APIRouter(prefix="/projects", tags=["Projects"])
+
+
+class ProjectCreate(BaseModel):
+    name: str
+    description: Optional[str] = None
+    color: str = "#00d4ff"
+    icon: str = "folder"
+
+
+class ProjectResponse(BaseModel):
+    id: str
+    name: str
+    description: Optional[str]
+    color: str
+    icon: str
+    entity_count: int = 0
+    created_at: datetime
+    
+    class Config:
+        from_attributes = True
+
+
+@router.get("", response_model=List[ProjectResponse])
+def list_projects(db: Session = Depends(get_scoped_db)):
+    """List all projects"""
+    projects = db.query(Project).order_by(Project.created_at.desc()).all()
+    
+    result = []
+    for p in projects:
+        entity_count = db.query(Entity).filter(Entity.project_id == p.id).count()
+        result.append(ProjectResponse(
+            id=p.id,
+            name=p.name,
+            description=p.description,
+            color=p.color,
+            icon=p.icon,
+            entity_count=entity_count,
+            created_at=p.created_at
+        ))
+    
+    return result
+
+
+@router.post("", response_model=ProjectResponse)
+def create_project(project: ProjectCreate, db: Session = Depends(get_scoped_db)):
+    """Create a new project"""
+    new_project = Project(
+        name=project.name,
+        description=project.description,
+        color=project.color,
+        icon=project.icon
+    )
+    db.add(new_project)
+    db.commit()
+    db.refresh(new_project)
+    
+    return ProjectResponse(
+        id=new_project.id,
+        name=new_project.name,
+        description=new_project.description,
+        color=new_project.color,
+        icon=new_project.icon,
+        entity_count=0,
+        created_at=new_project.created_at
+    )
+
+
+@router.get("/{project_id}", response_model=ProjectResponse)
+def get_project(project_id: str, db: Session = Depends(get_scoped_db)):
+    """Get project by ID"""
+    project = db.query(Project).filter(Project.id == project_id).first()
+    
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+    
+    entity_count = db.query(Entity).filter(Entity.project_id == project_id).count()
+    
+    return ProjectResponse(
+        id=project.id,
+        name=project.name,
+        description=project.description,
+        color=project.color,
+        icon=project.icon,
+        entity_count=entity_count,
+        created_at=project.created_at
+    )
+
+
+@router.delete("/{project_id}")
+def delete_project(project_id: str, db: Session = Depends(get_scoped_db)):
+    """Delete project and optionally its entities"""
+    project = db.query(Project).filter(Project.id == project_id).first()
+    
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+    
+    # Set entities and relationships to no project (null)
+    db.query(Entity).filter(Entity.project_id == project_id).update({"project_id": None})
+    db.query(Relationship).filter(Relationship.project_id == project_id).update({"project_id": None})
+    
+    db.delete(project)
+    db.commit()
+    
+    return {"message": f"Project '{project.name}' deleted"}
+
+
+@router.put("/{project_id}")
+def update_project(project_id: str, project: ProjectCreate, db: Session = Depends(get_scoped_db)):
+    """Update project"""
+    existing = db.query(Project).filter(Project.id == project_id).first()
+    
+    if not existing:
+        raise HTTPException(status_code=404, detail="Project not found")
+    
+    existing.name = project.name
+    existing.description = project.description
+    existing.color = project.color
+    existing.icon = project.icon
+    db.commit()
+    
+    return {"message": "Project updated"}
diff --git a/app/api/routes/relationships.py b/app/api/routes/relationships.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5887de9ce8df297614adf63c4db8d365fc33114
--- /dev/null
+++ b/app/api/routes/relationships.py
@@ -0,0 +1,76 @@
+"""
+Relationship CRUD Routes
+"""
+from fastapi import APIRouter, Depends, HTTPException, Query
+from sqlalchemy.orm import Session
+from typing import List, Optional
+
+from app.api.deps import get_scoped_db
+from app.models import Relationship, Entity
+from app.schemas import RelationshipCreate, RelationshipResponse
+
+router = APIRouter(prefix="/relationships", tags=["Relationships"])
+
+
+@router.get("/", response_model=List[RelationshipResponse])
+def list_relationships(
+    type: Optional[str] = None,
+    source_id: Optional[str] = None,
+    target_id: Optional[str] = None,
+    limit: int = Query(default=50, le=200),
+    db: Session = Depends(get_scoped_db)
+):
+    """Lista relacionamentos com filtros opcionais"""
+    query = db.query(Relationship)
+
+    if type:
+        query = query.filter(Relationship.type == type)
+    if source_id:
+        query = query.filter(Relationship.source_id == source_id)
+    if target_id:
+        query = query.filter(Relationship.target_id == target_id)
+
+    return query.limit(limit).all()
+
+
+@router.get("/types")
+def get_relationship_types(db: Session = Depends(get_scoped_db)):
+    """Retorna todos os tipos de relacionamento unicos"""
+    types = db.query(Relationship.type).distinct().all()
+    return [t[0] for t in types]
+
+
+@router.post("/", response_model=RelationshipResponse, status_code=201)
+def create_relationship(
+    rel: RelationshipCreate,
+    db: Session = Depends(get_scoped_db)
+):
+    """Cria um novo relacionamento entre entidades"""
+    source = db.query(Entity).filter(Entity.id == rel.source_id).first()
+    target = db.query(Entity).filter(Entity.id == rel.target_id).first()
+
+    if not source:
+        raise HTTPException(status_code=404, detail="Source entity not found")
+    if not target:
+        raise HTTPException(status_code=404, detail="Target entity not found")
+
+    db_rel = Relationship(**rel.model_dump())
+    db.add(db_rel)
+    db.commit()
+    db.refresh(db_rel)
+    return db_rel
+
+
+@router.delete("/{relationship_id}")
+def delete_relationship(
+    relationship_id: str,
+    db: Session = Depends(get_scoped_db)
+):
+    """Deleta um relacionamento"""
+    db_rel = db.query(Relationship).filter(Relationship.id == relationship_id).first()
+    if not db_rel:
+        raise HTTPException(status_code=404, detail="Relationship not found")
+
+    db.delete(db_rel)
+    db.commit()
+    return {"message": "Relationship deleted"}
diff --git a/app/api/routes/research.py b/app/api/routes/research.py
new file mode 100644
index 0000000000000000000000000000000000000000..41eb6efdb31bbc7cb0da78df28ce780a75fc0f9b
--- /dev/null
+++ b/app/api/routes/research.py
@@ -0,0 +1,158 @@
+"""
+Research API Routes - Deep research with automatic entity extraction
+"""
+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel, Field
+from typing import Optional, List
+import traceback
+from sqlalchemy.orm import Session
+
+from app.api.deps import get_scoped_db
+from app.services import lancer
+from app.services.nlp import entity_extractor
+from app.services.geocoding import geocode
+from app.models.entity import Entity, Relationship
+
+
+router = APIRouter(prefix="/research", tags=["Research"])
+
+
+class ResearchRequest(BaseModel):
+    """Request model for research"""
+    query: str = Field(..., min_length=3, description="Research query")
+    mode: str = Field(default="search", description="Research mode: search, deep, heavy")
+    max_results: int = Field(default=10, le=20)
+    auto_extract: bool = Field(default=True, description="Auto-extract entities using NER")
+
+
+class ResearchResponse(BaseModel):
+    """Response model for research"""
+    query: str
+    answer: Optional[str]
+    sources: List[dict]
+    citations: List[dict]
+    extracted_entities: int
+    extracted_relationships: int
+    processing_time_ms: float
+
+
+@router.post("", response_model=ResearchResponse)
+async def research(request: ResearchRequest, db: Session = Depends(get_scoped_db)):
+    """
+    Perform AI-powered research using Lancer API and optionally extract entities.
+    
+    Modes:
+    - search: Fast search with AI synthesis
+    - deep: Multi-dimensional deep research (slower, more comprehensive)
+    - heavy: Search with full content scraping
+    """
+    try:
+        # Call Lancer API based on mode
+        if request.mode == "deep":
+            result = await lancer.deep_research(request.query)
+        elif request.mode == "heavy":
+            result = await lancer.heavy_search(request.query, request.max_results)
+        else:
+            result = await lancer.search(request.query, request.max_results)
+        
+        extracted_entities = 0
+        extracted_relationships = 0
+        
+        # Extract entities if enabled
+        if request.auto_extract and result.raw_text:
+            try:
+                # Limit text to avoid token limits
+                text_to_analyze = result.raw_text[:5000]
+                ner_result = await entity_extractor.extract(text_to_analyze)
+                
+                created_entities = {}
+                
+                # Create entities
+                for entity in ner_result.entities:
+                    # Check if exists
+                    existing = db.query(Entity).filter(
+                        Entity.name.ilike(f"%{entity.name}%")
+                    ).first()
+                    
+                    if existing:
+                        created_entities[entity.name] = existing
+                    else:
+                        # Geocode if location
+                        lat, lng = None, None
+                        if entity.type == "location":
+                            coords = await geocode(entity.name)
+                            if coords:
+                                lat, lng = coords
+                        
+                        new_entity = Entity(
+                            name=entity.name,
+                            type=entity.type if entity.type in ["person", "organization", "location", "event"] else "person",
+                            description=entity.description or entity.role or "",
+                            source="lancer_research",
+                            latitude=lat,
+                            longitude=lng,
+                            properties={
+                                "role": entity.role,
+                                "aliases": entity.aliases,
+                                "research_query": request.query
+                            }
+                        )
+                        db.add(new_entity)
+                        db.commit()
+                        db.refresh(new_entity)
+                        created_entities[entity.name] = new_entity
+                        extracted_entities += 1
+                
+                # Create relationships
+                for rel in ner_result.relationships:
+                    source_ent = created_entities.get(rel.source) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.source}%")).first()
+                    target_ent = created_entities.get(rel.target) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.target}%")).first()
+                    
+                    if source_ent and target_ent and source_ent.id != target_ent.id:
+                        existing_rel = db.query(Relationship).filter(
+                            Relationship.source_id == source_ent.id,
+                            Relationship.target_id == target_ent.id,
+                            Relationship.type == rel.relationship_type
+                        ).first()
+                        
+                        if not existing_rel:
+                            new_rel = Relationship(
+                                source_id=source_ent.id,
+                                target_id=target_ent.id,
+                                type=rel.relationship_type,
+                                properties={"context": rel.context, "research_query": request.query}
+                            )
+                            db.add(new_rel)
+                            extracted_relationships += 1
+                
+                db.commit()
+                
+            except Exception as e:
+                print(f"NER extraction error: {e}")
+                traceback.print_exc()
+        
+        # Prepare sources for response
+        sources = [
+            {
+                "title": r.title,
+                "url": r.url,
+                "content": r.content[:300] if r.content else "",
+                "score": r.score
+            }
+            for r in result.results[:10]
+        ]
+        
+        return ResearchResponse(
+            query=result.query,
+            answer=result.answer,
+            sources=sources,
+            citations=result.citations,
+            extracted_entities=extracted_entities,
+            extracted_relationships=extracted_relationships,
+            processing_time_ms=result.processing_time_ms
+        )
+        
+    except Exception as e:
+        print(f"Research error: {e}")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
diff --git a/app/api/routes/search.py b/app/api/routes/search.py
new file mode 100644
index 0000000000000000000000000000000000000000..27ad925fb6abc0eb121ff2660bc06fd55fd322f0
--- /dev/null
+++ b/app/api/routes/search.py
@@ -0,0 +1,126 @@
+"""
+Search and Analytics Routes
+"""
+from fastapi import APIRouter, Depends, Query
+from sqlalchemy.orm import Session
+from sqlalchemy import or_, func
+from typing import Optional
+
+from app.api.deps import get_scoped_db
+from app.models import Entity, Relationship, Event, Document
+from app.schemas import SearchResult, SystemStats
+
+router = APIRouter(prefix="/search", tags=["Search"])
+
+
+@router.get("", response_model=SearchResult)
+def global_search(
+    q: str = Query(..., min_length=2, description="Search query"),
+    types: Optional[str] = Query(None, description="Entity types (comma-separated)"),
+    limit: int = Query(default=20, le=100),
+    db: Session = Depends(get_scoped_db)
+):
+    """
+    Busca global em todas as entidades, eventos e documentos.
+    """
+    search_term = f"%{q}%"
+    type_filter = types.split(",") if types else None
+
+    entity_query = db.query(Entity).filter(
+        or_(
+            Entity.name.ilike(search_term),
+            Entity.description.ilike(search_term)
+        )
+    )
+    if type_filter:
+        entity_query = entity_query.filter(Entity.type.in_(type_filter))
+    entities = entity_query.limit(limit).all()
+
+    events = db.query(Event).filter(
+        or_(
+            Event.title.ilike(search_term),
+            Event.description.ilike(search_term)
+        )
+    ).limit(limit).all()
+
+    documents = db.query(Document).filter(
+        or_(
+            Document.title.ilike(search_term),
+            Document.content.ilike(search_term)
+        )
+    ).limit(limit).all()
+
+    return SearchResult(
+        entities=entities,
+        events=events,
+        documents=documents
+    )
+
+
+@router.get("/stats", response_model=SystemStats)
+def get_system_stats(db: Session = Depends(get_scoped_db)):
+    """
+    Retorna estatisticas gerais do sistema.
+    """
+    total_entities = db.query(Entity).count()
+    total_relationships = db.query(Relationship).count()
+    total_events = db.query(Event).count()
+    total_documents = db.query(Document).count()
+
+    type_counts = db.query(
+        Entity.type,
+        func.count(Entity.id)
+    ).group_by(Entity.type).all()
+
+    entities_by_type = {t: c for t, c in type_counts}
+
+    recent = db.query(Entity).order_by(Entity.created_at.desc()).limit(10).all()
+    recent_activity = [
+        {
+            "id": e.id,
+            "type": e.type,
+            "name": e.name,
+            "created_at": e.created_at.isoformat()
+        }
+        for e in recent
+    ]
+
+    return SystemStats(
+        total_entities=total_entities,
+        total_relationships=total_relationships,
+        total_events=total_events,
+        total_documents=total_documents,
+        entities_by_type=entities_by_type,
+        recent_activity=recent_activity
+    )
+
+
+@router.get("/geo")
+def get_geo_data(
+    entity_type: Optional[str] = None,
+    db: Session = Depends(get_scoped_db)
+):
+    """
+    Retorna entidades com geolocalizacao.
+    """
+    query = db.query(Entity).filter(
+        Entity.latitude.isnot(None),
+        Entity.longitude.isnot(None)
+    )
+
+    if entity_type:
+        query = query.filter(Entity.type == entity_type)
+
+    entities = query.all()
+
+    return [
+        {
+            "id": e.id,
+            "type": e.type,
+            "name": e.name,
+            "lat": e.latitude,
+            "lng": e.longitude,
+            "properties": e.properties
+        }
+        for e in entities
+    ]
diff --git a/app/api/routes/session.py b/app/api/routes/session.py
new file mode 100644
index 0000000000000000000000000000000000000000..c81ea29a5363a585f72aa5f0df7e2798292c189e
--- /dev/null
+++ b/app/api/routes/session.py
@@ -0,0 +1,44 @@
+"""
+Session management routes
+"""
+from fastapi import APIRouter, Header, Cookie, Response, Request
+from typing import Optional
+import uuid
+
+from app.core.database import create_new_session_id
+from app.config import settings
+
+router = APIRouter(prefix="/session", tags=["Session"])
+
+
+@router.post("/create")
+def create_session(response: Response, request: Request):
+    """Create a new session and return session_id"""
+    session_id = create_new_session_id()
+    secure = settings.cookie_secure
+    samesite = settings.cookie_samesite
+    proto = request.headers.get("x-forwarded-proto", request.url.scheme)
+    if proto != "https" and secure:
+        secure = False
+        samesite = "lax"
+    response.set_cookie(
+        key="numidium_session",
+        value=session_id,
+        max_age=60*60*24*365,  # 1 year
+        httponly=True,
+        samesite=samesite,
+        secure=secure
+    )
+    return {"session_id": session_id}
+
+
+@router.get("/current")
+def get_current_session(
+    numidium_session: Optional[str] = Cookie(None),
+    x_session_id: Optional[str] = Header(None)
+):
+    """Get current session ID"""
+    session_id = x_session_id or numidium_session
+    if not session_id:
+        return {"session_id": None, "message": "No session. Call POST /session/create"}
+    return {"session_id": session_id}
diff --git a/app/api/routes/timeline.py b/app/api/routes/timeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa45453faf038d34277ffb6a5f1481a2748a8b0e
--- /dev/null
+++ b/app/api/routes/timeline.py
@@ -0,0 +1,165 @@
+"""
+Timeline API Routes - Temporal view of entities and relationships
+"""
+from fastapi import APIRouter, Depends, Query
+from pydantic import BaseModel
+from typing import Optional, List, Dict, Any
+from datetime import datetime, timedelta
+from collections import defaultdict
+from sqlalchemy.orm import Session
+
+from app.api.deps import get_scoped_db
+from app.models.entity import Entity, Relationship
+
+
+router = APIRouter(prefix="/timeline", tags=["Timeline"])
+
+
+class TimelineEvent(BaseModel):
+    id: str
+    type: str  # "entity" or "relationship"
+    entity_type: Optional[str] = None
+    name: str
+    description: Optional[str] = None
+    date: str
+    icon: str
+
+
+class TimelineGroup(BaseModel):
+    date: str
+    label: str
+    events: List[TimelineEvent]
+
+
+class TimelineResponse(BaseModel):
+    groups: List[TimelineGroup]
+    total_events: int
+
+
+@router.get("", response_model=TimelineResponse)
+async def get_timeline(
+    days: int = Query(default=30, ge=1, le=365),
+    entity_type: Optional[str] = None,
+    limit: int = Query(default=100, ge=1, le=500),
+    db: Session = Depends(get_scoped_db)
+):
+    """
+    Get timeline of recent entities and relationships.
+    Groups events by date.
+    """
+    # Calculate date range
+    end_date = datetime.now()
+    start_date = end_date - timedelta(days=days)
+    
+    events = []
+    
+    # Get entities
+    query = db.query(Entity).filter(
+        Entity.created_at >= start_date
+    )
+    
+    if entity_type:
+        query = query.filter(Entity.type == entity_type)
+    
+    entities = query.order_by(Entity.created_at.desc()).limit(limit).all()
+    
+    icon_map = {
+        "person": "👤",
+        "organization": "🏢",
+        "location": "📍",
+        "event": "📅",
+        "concept": "💡",
+        "product": "📦"
+    }
+    
+    for e in entities:
+        # Prefer event_date over created_at
+        date = e.event_date if e.event_date else e.created_at
+        events.append(TimelineEvent(
+            id=e.id,
+            type="entity",
+            entity_type=e.type,
+            name=e.name,
+            description=e.description[:100] if e.description else None,
+            date=date.isoformat() if date else datetime.now().isoformat(),
+            icon=icon_map.get(e.type, "📄")
+        ))
+    
+    # Get relationships
+    relationships = db.query(Relationship).filter(
+        Relationship.created_at >= start_date
+    ).order_by(Relationship.created_at.desc()).limit(limit // 2).all()
+    
+    for r in relationships:
+        source = db.query(Entity).filter(Entity.id == r.source_id).first()
+        target = db.query(Entity).filter(Entity.id == r.target_id).first()
+        
+        if source and target:
+            # Prefer event_date over created_at
+            date = r.event_date if r.event_date else r.created_at
+            events.append(TimelineEvent(
+                id=r.id,
+                type="relationship",
+                name=f"{source.name} → {target.name}",
+                description=r.type,
+                date=date.isoformat() if date else datetime.now().isoformat(),
+                icon="🔗"
+            ))
+    
+    # Sort by date
+    events.sort(key=lambda x: x.date, reverse=True)
+    
+    # Group by date
+    groups_dict = defaultdict(list)
+    for event in events:
+        date_key = event.date[:10]  # YYYY-MM-DD
+        groups_dict[date_key].append(event)
+    
+    # Format groups
+    groups = []
+    for date_key in sorted(groups_dict.keys(), reverse=True):
+        try:
+            dt = datetime.fromisoformat(date_key)
+            label = dt.strftime("%d %b %Y")
+        except:
+            label = date_key
+        
+        groups.append(TimelineGroup(
+            date=date_key,
+            label=label,
+            events=groups_dict[date_key]
+        ))
+    
+    return TimelineResponse(
+        groups=groups,
+        total_events=len(events)
+    )
+
+
+@router.get("/stats")
+async def get_timeline_stats(db: Session = Depends(get_scoped_db)):
+    """Get statistics for timeline visualization"""
+    
+    # Count entities by type
+    entity_counts = {}
+    for entity_type in ["person", "organization", "location", "event", "concept"]:
+        count = db.query(Entity).filter(Entity.type == entity_type).count()
+        entity_counts[entity_type] = count
+    
+    # Count relationships
+    relationship_count = db.query(Relationship).count()
+    
+    # Recent activity (last 7 days)
+    week_ago = datetime.now() - timedelta(days=7)
+    recent_entities = db.query(Entity).filter(Entity.created_at >= week_ago).count()
+    recent_relationships = db.query(Relationship).filter(Relationship.created_at >= week_ago).count()
+    
+    return {
+        "entity_counts": entity_counts,
+        "relationship_count": relationship_count,
+        "recent_activity": {
+            "entities": recent_entities,
+            "relationships": recent_relationships,
+            "total": recent_entities + recent_relationships
+        }
+    }
diff --git a/app/config.py b/app/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..23f3497193305642c7ae08e7c907009e44c489f0
--- /dev/null
+++ b/app/config.py
@@ -0,0 +1,47 @@
+"""
+Numidium Backend Configuration
+"""
+from pydantic_settings import BaseSettings
+from functools import lru_cache
+import os
+
+
+class Settings(BaseSettings):
+    """Application settings"""
+    
+    # App Info
+    app_name: str = "Numidium"
+    app_version: str = "0.1.0"
+    debug: bool = False
+    
+    # Database
+    database_url: str = "sqlite:///./data/numidium.db"
+    
+    # APIs (opcional - pode configurar depois)
+    newsapi_key: str = ""
+    
+    # Cerebras API for LLM-based entity extraction
+    cerebras_api_key: str = ""
+    
+    # AetherMap API for semantic search and NER
+    aethermap_url: str = "https://madras1-aethermap.hf.space"
+    
+    # CORS
+    cors_origins: list[str] = ["*"]
+
+    # Session cookie
+    cookie_secure: bool = True
+    cookie_samesite: str = "none"
+    
+    class Config:
+        env_file = ".env"
+        env_file_encoding = "utf-8"
+
+
+@lru_cache()
+def get_settings() -> Settings:
+    """Get cached settings"""
+    return Settings()
+
+
+settings = get_settings()
diff --git a/app/core/__init__.py b/app/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e8825ce5959f9f016f6f5ed46c2a54fdd15d9e8
--- /dev/null
+++ b/app/core/__init__.py
@@ -0,0 +1,2 @@
+# Core module
+from app.core.database import get_db, init_db, Base
diff --git a/app/core/__pycache__/__init__.cpython-311.pyc b/app/core/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c5dc2c47dff4d25a449c31d5b491838968bd8699
Binary files /dev/null and b/app/core/__pycache__/__init__.cpython-311.pyc differ
diff --git a/app/core/__pycache__/database.cpython-311.pyc b/app/core/__pycache__/database.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5d66b606dc407b3d70e7e6b1d62893eb13ff9d42
Binary files /dev/null and b/app/core/__pycache__/database.cpython-311.pyc differ
diff --git a/app/core/database.py b/app/core/database.py
new file mode 100644
index 0000000000000000000000000000000000000000..6fbd7f1d970d02b46df83e466a09287bfc0090be
--- /dev/null
+++ b/app/core/database.py
@@ -0,0 +1,115 @@
+"""
+Database configuration and session management
+Per-session databases - each user session gets its own SQLite file
+"""
+from sqlalchemy import create_engine, text
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker, Session
+from typing import Optional
+import os
+import uuid
+
+# Ensure data directory exists
+os.makedirs("data/sessions", exist_ok=True)
+
+# Base class for models
+Base = declarative_base()
+
+# Cache for session engines
+_session_engines = {}
+_session_makers = {}
+
+
+def get_session_engine(session_id: str):
+    """Get or create engine for a specific session"""
+    if session_id not in _session_engines:
+        db_path = f"data/sessions/{session_id}.db"
+        engine = create_engine(
+            f"sqlite:///./{db_path}",
+            connect_args={"check_same_thread": False}
+        )
+        _session_engines[session_id] = engine
+        _session_makers[session_id] = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+        
+        # Initialize tables for this session
+        Base.metadata.create_all(bind=engine)
+        _run_migrations(engine)
+        
+    return _session_engines[session_id]
+
+
+def get_session_db(session_id: str):
+    """Get database session for a specific user session"""
+    get_session_engine(session_id)  # Ensure engine exists
+    SessionLocal = _session_makers[session_id]
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+
+
+def get_db_for_session(session_id: str) -> Session:
+    """Direct session getter (non-generator) for routes"""
+    get_session_engine(session_id)
+    SessionLocal = _session_makers[session_id]
+    return SessionLocal()
+
+
+# Legacy - default database for backwards compatibility
+from app.config import settings
+engine = create_engine(
+    settings.database_url,
+    connect_args={"check_same_thread": False}
+)
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+
+def get_default_session() -> Session:
+    """Create a new session for the default database."""
+    return SessionLocal()
+
+
+def get_db():
+    """Legacy: Default database session"""
+    db = get_default_session()
+    try:
+        yield db
+    finally:
+        db.close()
+
+
+def _run_migrations(eng):
+    """Run migrations on an engine"""
+    with eng.connect() as conn:
+        try:
+            conn.execute(text("ALTER TABLE entities ADD COLUMN event_date DATETIME"))
+            conn.commit()
+        except Exception:
+            pass
+        try:
+            conn.execute(text("ALTER TABLE relationships ADD COLUMN event_date DATETIME"))
+            conn.commit()
+        except Exception:
+            pass
+        try:
+            conn.execute(text("ALTER TABLE entities ADD COLUMN project_id VARCHAR(36)"))
+            conn.commit()
+        except Exception:
+            pass
+        try:
+            conn.execute(text("ALTER TABLE relationships ADD COLUMN project_id VARCHAR(36)"))
+            conn.commit()
+        except Exception:
+            pass
+
+
+def init_db():
+    """Initialize default database tables"""
+    Base.metadata.create_all(bind=engine)
+    _run_migrations(engine)
+
+
+def create_new_session_id() -> str:
+    """Generate a new session ID"""
+    return str(uuid.uuid4())
diff --git a/app/main.py b/app/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..7abfa0ca7fb7e31fc2a58e35b5528eb7b135bada
--- /dev/null
+++ b/app/main.py
@@ -0,0 +1,99 @@
+"""
+Numidium Backend - Main Application
+Plataforma de Inteligência e Análise de Dados
+"""
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from contextlib import asynccontextmanager
+
+from app.config import settings
+from app.core.database import init_db
+from app.api.routes import entities, relationships, events, search, ingest, analyze, graph, research, chat, investigate, dados_publicos, timeline, session, aethermap
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Startup and shutdown events"""
+    # Startup: Initialize database
+    init_db()
+    print("🚀 Numidium Backend started!")
+    print(f"📊 Database: {settings.database_url}")
+    yield
+    # Shutdown
+    print("👋 Numidium Backend shutting down...")
+
+
+# Create FastAPI app
+app = FastAPI(
+    title="Numidium API",
+    description="""
+    ## 🔮 Sistema de Inteligência e Análise de Dados
+    
+    Backend do VANTAGE - Uma plataforma para:
+    - 📥 Ingestão de dados de múltiplas fontes (Wikipedia, News, Manual)
+    - 🔗 Mapeamento de conexões entre entidades
+    - 🗺️ Visualização geográfica
+    - 📊 Análise de grafos e relacionamentos
+    - 🔍 Busca global
+    """,
+    version=settings.app_version,
+    lifespan=lifespan
+)
+
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=settings.cors_origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Include routers
+app.include_router(entities.router, prefix="/api/v1")
+app.include_router(relationships.router, prefix="/api/v1")
+app.include_router(events.router, prefix="/api/v1")
+app.include_router(search.router, prefix="/api/v1")
+app.include_router(ingest.router, prefix="/api/v1")
+app.include_router(analyze.router, prefix="/api/v1")
+app.include_router(graph.router, prefix="/api/v1")
+app.include_router(research.router, prefix="/api/v1")
+app.include_router(chat.router, prefix="/api/v1")
+app.include_router(investigate.router, prefix="/api/v1")
+app.include_router(dados_publicos.router, prefix="/api/v1")
+app.include_router(timeline.router, prefix="/api/v1")
+app.include_router(session.router, prefix="/api/v1")
+app.include_router(aethermap.router, prefix="/api/v1/aethermap", tags=["aethermap"])
+
+
+@app.get("/")
+def root():
+    """Root endpoint - API info"""
+    return {
+        "name": "Numidium",
+        "version": settings.app_version,
+        "status": "online",
+        "docs": "/docs",
+        "description": "Sistema de Inteligência e Análise de Dados"
+    }
+
+
+@app.get("/health")
+def health_check():
+    """Health check endpoint for HF Spaces"""
+    return {"status": "healthy"}
+
+
+@app.get("/api/v1")
+def api_info():
+    """API v1 info"""
+    return {
+        "version": "1.0.0",
+        "endpoints": {
+            "entities": "/api/v1/entities",
+            "relationships": "/api/v1/relationships",
+            "events": "/api/v1/events",
+            "search": "/api/v1/search",
+            "ingest": "/api/v1/ingest"
+        }
+    }
diff --git a/app/models/__init__.py b/app/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dda9ada652332c2b420769a6ace731249e11cfd8
--- /dev/null
+++ b/app/models/__init__.py
@@ -0,0 +1,3 @@
+# Models module
+from app.models.entity import Entity, Relationship, Event, Document
+from app.models.project import Project
diff --git a/app/models/__pycache__/__init__.cpython-311.pyc b/app/models/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5bf1933151b2e8a290dc79c4647a626d3d0500ff
Binary files /dev/null and b/app/models/__pycache__/__init__.cpython-311.pyc differ
diff --git a/app/models/__pycache__/entity.cpython-311.pyc b/app/models/__pycache__/entity.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..08def07731fef26be3662b7e40f9afca7961637f
Binary files /dev/null and b/app/models/__pycache__/entity.cpython-311.pyc differ
diff --git a/app/models/__pycache__/project.cpython-311.pyc b/app/models/__pycache__/project.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5e11c4a8127b91fa0a97a4fc6607860af7c04d37
Binary files /dev/null and b/app/models/__pycache__/project.cpython-311.pyc differ
diff --git a/app/models/entity.py b/app/models/entity.py
new file mode 100644
index 0000000000000000000000000000000000000000..07f9afbd7c789db76ca4d482de4655cd99eb3bda
--- /dev/null
+++ b/app/models/entity.py
@@ -0,0 +1,143 @@
+"""
+SQLAlchemy Models for Numidium
+"""
+from sqlalchemy import Column, String, Text, DateTime, Float, JSON, ForeignKey, Table
+from sqlalchemy.orm import relationship
+from datetime import datetime
+import uuid
+
+from app.core.database import Base
+
+
+def generate_uuid():
+    return str(uuid.uuid4())
+
+
+class Entity(Base):
+    """
+    Entidade - qualquer coisa rastreável no sistema
+    Pode ser: pessoa, organização, local, veículo, evento, documento, etc.
+    """
+    __tablename__ = "entities"
+    
+    id = Column(String(36), primary_key=True, default=generate_uuid)
+    project_id = Column(String(36), ForeignKey("projects.id"), nullable=True, index=True)
+    type = Column(String(50), nullable=False, index=True)  # person, organization, location, etc
+    name = Column(String(255), nullable=False, index=True)
+    description = Column(Text, nullable=True)
+    properties = Column(JSON, default=dict)  # Dados flexíveis
+    
+    # Geolocalização (opcional)
+    latitude = Column(Float, nullable=True)
+    longitude = Column(Float, nullable=True)
+    
+    # Data histórica do evento/entidade (quando aconteceu, não quando foi adicionado)
+    event_date = Column(DateTime, nullable=True)
+    
+    # Fonte do dado
+    source = Column(String(100), nullable=True)  # wikipedia, newsapi, manual, etc
+    source_url = Column(Text, nullable=True)
+    
+    # Timestamps
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    
+    # Relacionamentos
+    outgoing_relationships = relationship(
+        "Relationship",
+        foreign_keys="Relationship.source_id",
+        back_populates="source_entity"
+    )
+    incoming_relationships = relationship(
+        "Relationship",
+        foreign_keys="Relationship.target_id",
+        back_populates="target_entity"
+    )
+
+
+class Relationship(Base):
+    """
+    Relacionamento entre duas entidades
+    Exemplos: works_for, knows, owns, located_at, participated_in
+    """
+    __tablename__ = "relationships"
+    
+    id = Column(String(36), primary_key=True, default=generate_uuid)
+    project_id = Column(String(36), ForeignKey("projects.id"), nullable=True, index=True)
+    source_id = Column(String(36), ForeignKey("entities.id"), nullable=False)
+    target_id = Column(String(36), ForeignKey("entities.id"), nullable=False)
+    type = Column(String(50), nullable=False, index=True)  # works_for, knows, owns, etc
+    properties = Column(JSON, default=dict)
+    confidence = Column(Float, default=1.0)  # 0-1, quão certo estamos dessa conexão
+    
+    # Data histórica do relacionamento (quando aconteceu)
+    event_date = Column(DateTime, nullable=True)
+    
+    # Fonte
+    source = Column(String(100), nullable=True)
+    
+    # Timestamps
+    created_at = Column(DateTime, default=datetime.utcnow)
+    
+    # Relacionamentos
+    source_entity = relationship("Entity", foreign_keys=[source_id], back_populates="outgoing_relationships")
+    target_entity = relationship("Entity", foreign_keys=[target_id], back_populates="incoming_relationships")
+
+
+class Event(Base):
+    """
+    Evento - algo que aconteceu envolvendo entidades
+    """
+    __tablename__ = "events"
+    
+    id = Column(String(36), primary_key=True, default=generate_uuid)
+    type = Column(String(50), nullable=False, index=True)
+    title = Column(String(255), nullable=False)
+    description = Column(Text, nullable=True)
+    
+    # Quando aconteceu
+    event_date = Column(DateTime, nullable=True)
+    
+    # Onde aconteceu
+    location_name = Column(String(255), nullable=True)
+    latitude = Column(Float, nullable=True)
+    longitude = Column(Float, nullable=True)
+    
+    # Entidades envolvidas (armazenado como JSON array de IDs)
+    entity_ids = Column(JSON, default=list)
+    
+    # Fonte
+    source = Column(String(100), nullable=True)
+    source_url = Column(Text, nullable=True)
+    
+    # Metadados
+    properties = Column(JSON, default=dict)
+    
+    # Timestamps
+    created_at = Column(DateTime, default=datetime.utcnow)
+
+
+class Document(Base):
+    """
+    Documento - texto/arquivo para análise
+    """
+    __tablename__ = "documents"
+    
+    id = Column(String(36), primary_key=True, default=generate_uuid)
+    title = Column(String(255), nullable=False)
+    content = Column(Text, nullable=True)
+    summary = Column(Text, nullable=True)  # Resumo gerado por IA
+    
+    # Tipo de documento
+    doc_type = Column(String(50), default="text")  # text, news, report, etc
+    
+    # Entidades mencionadas (extraídas por NLP)
+    mentioned_entities = Column(JSON, default=list)
+    
+    # Fonte
+    source = Column(String(100), nullable=True)
+    source_url = Column(Text, nullable=True)
+    
+    # Timestamps
+    published_at = Column(DateTime, nullable=True)
+    created_at = Column(DateTime, default=datetime.utcnow)
diff --git a/app/models/project.py b/app/models/project.py
new file mode 100644
index 0000000000000000000000000000000000000000..72f601e1975770622c146cc3b1b9fb6fbd912a3c
--- /dev/null
+++ b/app/models/project.py
@@ -0,0 +1,29 @@
+"""
+Project Model - Workspaces for organizing investigations
+"""
+from sqlalchemy import Column, String, Text, DateTime
+from datetime import datetime
+import uuid
+
+from app.core.database import Base
+
+
+def generate_uuid():
+    return str(uuid.uuid4())
+
+
+class Project(Base):
+    """
+    Projeto/Workspace - agrupa entidades e relacionamentos por investigação
+    """
+    __tablename__ = "projects"
+    
+    id = Column(String(36), primary_key=True, default=generate_uuid)
+    name = Column(String(255), nullable=False)
+    description = Column(Text, nullable=True)
+    color = Column(String(7), default="#00d4ff")  # Hex color for UI
+    icon = Column(String(50), default="folder")  # Icon name
+    
+    # Timestamps
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
diff --git a/app/schemas/__init__.py b/app/schemas/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5afaa5cdacc5762ea19abe607c7dab6309e351a8
--- /dev/null
+++ b/app/schemas/__init__.py
@@ -0,0 +1,10 @@
+# Schemas module
+from app.schemas.schemas import (
+    EntityCreate, EntityUpdate, EntityResponse,
+    RelationshipCreate, RelationshipResponse,
+    EventCreate, EventResponse,
+    DocumentCreate, DocumentResponse,
+    GraphData, GraphNode, GraphEdge,
+    SearchQuery, SearchResult,
+    SystemStats
+)
diff --git a/app/schemas/__pycache__/__init__.cpython-311.pyc b/app/schemas/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5a0991c3a362725e79629796654b9dc0ed9c9668
Binary files /dev/null and b/app/schemas/__pycache__/__init__.cpython-311.pyc differ
diff --git a/app/schemas/__pycache__/schemas.cpython-311.pyc b/app/schemas/__pycache__/schemas.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8358505555f3036c07fa84d01cd9cd01b7b97b9f
Binary files /dev/null and b/app/schemas/__pycache__/schemas.cpython-311.pyc differ
diff --git a/app/schemas/schemas.py b/app/schemas/schemas.py
new file mode 100644
index 0000000000000000000000000000000000000000..afbff0c301ddb1fbe8cae0e4848fcafc48082ff0
--- /dev/null
+++ b/app/schemas/schemas.py
@@ -0,0 +1,163 @@
+"""
+Pydantic Schemas for API validation
+"""
+from pydantic import BaseModel, Field
+from typing import Optional, List, Any
+from datetime import datetime
+
+
+# ========== Entity Schemas ==========
+
+class EntityBase(BaseModel):
+    type: str = Field(..., description="Tipo da entidade: person, organization, location, etc")
+    name: str = Field(..., description="Nome da entidade")
+    description: Optional[str] = None
+    properties: dict = Field(default_factory=dict)
+    latitude: Optional[float] = None
+    longitude: Optional[float] = None
+    source: Optional[str] = None
+    source_url: Optional[str] = None
+
+
+class EntityCreate(EntityBase):
+    pass
+
+
+class EntityUpdate(BaseModel):
+    type: Optional[str] = None
+    name: Optional[str] = None
+    description: Optional[str] = None
+    properties: Optional[dict] = None
+    latitude: Optional[float] = None
+    longitude: Optional[float] = None
+
+
+class EntityResponse(EntityBase):
+    id: str
+    created_at: datetime
+    updated_at: datetime
+    
+    class Config:
+        from_attributes = True
+
+
+# ========== Relationship Schemas ==========
+
+class RelationshipBase(BaseModel):
+    source_id: str
+    target_id: str
+    type: str = Field(..., description="Tipo: works_for, knows, owns, located_at, etc")
+    properties: dict = Field(default_factory=dict)
+    confidence: float = Field(default=1.0, ge=0, le=1)
+    source: Optional[str] = None
+
+
+class RelationshipCreate(RelationshipBase):
+    pass
+
+
+class RelationshipResponse(RelationshipBase):
+    id: str
+    created_at: datetime
+    
+    class Config:
+        from_attributes = True
+
+
+# ========== Event Schemas ==========
+
+class EventBase(BaseModel):
+    type: str
+    title: str
+    description: Optional[str] = None
+    event_date: Optional[datetime] = None
+    location_name: Optional[str] = None
+    latitude: Optional[float] = None
+    longitude: Optional[float] = None
+    entity_ids: List[str] = Field(default_factory=list)
+    source: Optional[str] = None
+    source_url: Optional[str] = None
+    properties: dict = Field(default_factory=dict)
+
+
+class EventCreate(EventBase):
+    pass
+
+
+class EventResponse(EventBase):
+    id: str
+    created_at: datetime
+    
+    class Config:
+        from_attributes = True
+
+
+# ========== Document Schemas ==========
+
+class DocumentBase(BaseModel):
+    title: str
+    content: Optional[str] = None
+    doc_type: str = "text"
+    source: Optional[str] = None
+    source_url: Optional[str] = None
+    published_at: Optional[datetime] = None
+
+
+class DocumentCreate(DocumentBase):
+    pass
+
+
+class DocumentResponse(DocumentBase):
+    id: str
+    summary: Optional[str] = None
+    mentioned_entities: List[str] = []
+    created_at: datetime
+    
+    class Config:
+        from_attributes = True
+
+
+# ========== Graph Schemas ==========
+
+class GraphNode(BaseModel):
+    id: str
+    type: str
+    name: str
+    properties: dict = {}
+
+
+class GraphEdge(BaseModel):
+    source: str
+    target: str
+    type: str
+    confidence: float = 1.0
+
+
+class GraphData(BaseModel):
+    nodes: List[GraphNode]
+    edges: List[GraphEdge]
+
+
+# ========== Search Schemas ==========
+
+class SearchQuery(BaseModel):
+    query: str
+    entity_types: Optional[List[str]] = None
+    limit: int = Field(default=20, le=100)
+
+
+class SearchResult(BaseModel):
+    entities: List[EntityResponse]
+    events: List[EventResponse]
+    documents: List[DocumentResponse]
+
+
+# ========== Stats Schemas ==========
+
+class SystemStats(BaseModel):
+    total_entities: int
+    total_relationships: int
+    total_events: int
+    total_documents: int
+    entities_by_type: dict
+    recent_activity: List[dict]
diff --git a/app/services/__init__.py b/app/services/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7f87b77ce421c83e59588e4c341ebab500c3c41
--- /dev/null
+++ b/app/services/__init__.py
@@ -0,0 +1 @@
+# Services module
diff --git a/app/services/__pycache__/__init__.cpython-311.pyc b/app/services/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bdd1095f0f04ab7b53c9f32b8bbce7e4a48236e3
Binary files /dev/null and b/app/services/__pycache__/__init__.cpython-311.pyc differ
diff --git a/app/services/__pycache__/brazil_apis.cpython-311.pyc b/app/services/__pycache__/brazil_apis.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9751973d578e79e328d2912fe354747fb95d79a9
Binary files /dev/null and b/app/services/__pycache__/brazil_apis.cpython-311.pyc differ
diff --git a/app/services/__pycache__/geocoding.cpython-311.pyc b/app/services/__pycache__/geocoding.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..664fbab62acd7d6c1db2527f8ab8b4b7a11662e9
Binary files /dev/null and b/app/services/__pycache__/geocoding.cpython-311.pyc differ
diff --git a/app/services/__pycache__/investigation.cpython-311.pyc b/app/services/__pycache__/investigation.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..219ab0bfb74e241ee442a062277765b2d3f84c26
Binary files /dev/null and b/app/services/__pycache__/investigation.cpython-311.pyc differ
diff --git a/app/services/__pycache__/investigator_agent.cpython-311.pyc b/app/services/__pycache__/investigator_agent.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..17cd96446bf23aa53179b163aba52d3f2b0ece2e
Binary files /dev/null and b/app/services/__pycache__/investigator_agent.cpython-311.pyc differ
diff --git a/app/services/__pycache__/lancer.cpython-311.pyc b/app/services/__pycache__/lancer.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0415c81ac5d96a648b50a300ce010fe32a9a53bb
Binary files /dev/null and b/app/services/__pycache__/lancer.cpython-311.pyc differ
diff --git a/app/services/__pycache__/transparencia_api.cpython-311.pyc b/app/services/__pycache__/transparencia_api.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8758135b5b049f71eceec09add4595fb7b11dd12
Binary files /dev/null and b/app/services/__pycache__/transparencia_api.cpython-311.pyc differ
diff --git a/app/services/aethermap_client.py b/app/services/aethermap_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e9a6490f843a94391b376e528db87554c8e31cf
--- /dev/null
+++ b/app/services/aethermap_client.py
@@ -0,0 +1,343 @@
+"""
+AetherMap Client
+Client para integração com AetherMap API - busca semântica, NER e análise de grafos.
+"""
+import httpx
+import json
+import io
+from typing import List, Dict, Any, Optional
+from dataclasses import dataclass, field
+from datetime import datetime
+import logging
+
+from app.config import settings
+
+logger = logging.getLogger(__name__)
+
+
+# URL base do AetherMap (HuggingFace Space)
+AETHERMAP_URL = getattr(settings, 'aethermap_url', 'https://madras1-aethermap.hf.space')
+
+
+@dataclass
+class ProcessResult:
+    """Resultado do processamento de documentos"""
+    job_id: str
+    num_documents: int
+    num_clusters: int
+    num_noise: int
+    metrics: Dict[str, Any] = field(default_factory=dict)
+    cluster_analysis: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class SearchResult:
+    """Resultado de busca semântica"""
+    summary: str  # Resposta RAG gerada pelo LLM
+    results: List[Dict[str, Any]] = field(default_factory=list)
+
+
+@dataclass 
+class EntityNode:
+    """Nó de entidade no grafo"""
+    entity: str
+    entity_type: str
+    docs: int
+    degree: int = 0
+    centrality: float = 0.0
+    role: str = "peripheral"  # hub, connector, peripheral
+
+
+@dataclass
+class EntityEdge:
+    """Aresta do grafo de entidades"""
+    source_entity: str
+    target_entity: str
+    weight: int
+    reason: str
+
+
+@dataclass
+class EntityGraphResult:
+    """Resultado da extração de entidades"""
+    nodes: List[EntityNode] = field(default_factory=list)
+    edges: List[EntityEdge] = field(default_factory=list)
+    hubs: List[Dict[str, Any]] = field(default_factory=list)
+    insights: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class GraphAnalysis:
+    """Análise do grafo via LLM"""
+    analysis: str
+    key_entities: List[str] = field(default_factory=list)
+    relationships: List[str] = field(default_factory=list)
+
+
+class AetherMapClient:
+    """
+    Client para AetherMap API.
+    
+    Funcionalidades:
+    - Processamento de documentos (embeddings + clusters)
+    - Busca semântica RAG (FAISS + BM25 + reranking + LLM)
+    - Extração de entidades NER
+    - Análise de grafo via LLM
+    """
+    
+    def __init__(self, base_url: str = None, timeout: float = 600.0):
+        self.base_url = (base_url or AETHERMAP_URL).rstrip('/')
+        self.timeout = timeout
+        self._current_job_id: Optional[str] = None
+    
+    @property
+    def current_job_id(self) -> Optional[str]:
+        """Retorna o job_id atual"""
+        return self._current_job_id
+    
+    async def process_documents(
+        self,
+        texts: List[str],
+        fast_mode: bool = True,
+        min_cluster_size: int = 0,
+        min_samples: int = 0
+    ) -> ProcessResult:
+        """
+        Processa uma lista de textos gerando embeddings e clusters.
+        
+        Args:
+            texts: Lista de textos/documentos
+            fast_mode: Se True, usa PCA (rápido). Se False, usa UMAP (preciso)
+            min_cluster_size: Tamanho mínimo do cluster (0=auto)
+            min_samples: Mínimo de amostras (0=auto)
+            
+        Returns:
+            ProcessResult com job_id e métricas
+        """
+        # Criar arquivo TXT em memória
+        content = "\n".join(texts)
+        file_bytes = content.encode('utf-8')
+        
+        try:
+            async with httpx.AsyncClient(timeout=self.timeout) as client:
+                files = {
+                    'file': ('documents.txt', io.BytesIO(file_bytes), 'text/plain')
+                }
+                data = {
+                    'n_samples': str(len(texts)),
+                    'fast_mode': 'true' if fast_mode else 'false',
+                    'min_cluster_size': str(min_cluster_size),
+                    'min_samples': str(min_samples)
+                }
+                
+                logger.info(f"AetherMap: Processando {len(texts)} documentos para {self.base_url}/process/")
+                
+                response = await client.post(
+                    f"{self.base_url}/process/",
+                    files=files,
+                    data=data
+                )
+                
+                logger.info(f"AetherMap: Response status {response.status_code}")
+                
+                if response.status_code != 200:
+                    error_text = response.text[:500] if response.text else "No response body"
+                    logger.error(f"AetherMap error: {response.status_code} - {error_text}")
+                    raise Exception(f"AetherMap error: {response.status_code} - {error_text}")
+                
+                result = response.json()
+                
+                self._current_job_id = result.get('job_id')
+                metadata = result.get('metadata', {})
+                
+                logger.info(f"AetherMap: Job criado {self._current_job_id}")
+                
+                return ProcessResult(
+                    job_id=self._current_job_id or "unknown",
+                    num_documents=metadata.get('num_documents_processed', len(texts)),
+                    num_clusters=metadata.get('num_clusters_found', 0),
+                    num_noise=metadata.get('num_noise_points', 0),
+                    metrics=result.get('metrics', {}),
+                    cluster_analysis=result.get('cluster_analysis', {})
+                )
+        except httpx.TimeoutException:
+            logger.error(f"AetherMap: Timeout ao conectar com {self.base_url}")
+            raise Exception(f"Timeout: AetherMap Space pode estar dormindo. Tente novamente em alguns segundos.")
+        except httpx.ConnectError as e:
+            logger.error(f"AetherMap: Erro de conexão: {e}")
+            raise Exception(f"Erro de conexão com AetherMap: {e}")
+        except Exception as e:
+            logger.error(f"AetherMap: Erro inesperado: {e}")
+            raise
+    
+    async def semantic_search(
+        self,
+        query: str,
+        job_id: str = None,
+        turbo_mode: bool = False
+    ) -> SearchResult:
+        """
+        Busca semântica RAG híbrida nos documentos processados.
+        
+        Args:
+            query: Termo de busca
+            job_id: ID do job (se não fornecido, usa o último)
+            turbo_mode: Se True, busca mais rápida (menos precisa)
+            
+        Returns:
+            SearchResult com resumo e resultados
+        """
+        job_id = job_id or self._current_job_id
+        if not job_id:
+            raise ValueError("Nenhum job_id disponível. Processe documentos primeiro.")
+        
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            data = {
+                'query': query,
+                'job_id': job_id,
+                'turbo_mode': 'true' if turbo_mode else 'false'
+            }
+            
+            logger.info(f"AetherMap: Buscando '{query}'...")
+            
+            response = await client.post(
+                f"{self.base_url}/search/",
+                data=data
+            )
+            
+            if response.status_code != 200:
+                raise Exception(f"AetherMap search error: {response.status_code} - {response.text}")
+            
+            result = response.json()
+            
+            return SearchResult(
+                summary=result.get('summary', ''),
+                results=result.get('results', [])
+            )
+    
+    async def extract_entities(self, job_id: str = None) -> EntityGraphResult:
+        """
+        Extrai entidades nomeadas (NER) e cria grafo de conexões.
+        
+        Args:
+            job_id: ID do job (se não fornecido, usa o último)
+            
+        Returns:
+            EntityGraphResult com nós, arestas e insights
+        """
+        job_id = job_id or self._current_job_id
+        if not job_id:
+            raise ValueError("Nenhum job_id disponível. Processe documentos primeiro.")
+        
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            data = {'job_id': job_id}
+            
+            logger.info(f"AetherMap: Extraindo entidades...")
+            
+            response = await client.post(
+                f"{self.base_url}/entity_graph/",
+                data=data
+            )
+            
+            if response.status_code != 200:
+                raise Exception(f"AetherMap entity_graph error: {response.status_code} - {response.text}")
+            
+            result = response.json()
+            
+            # Converter para dataclasses
+            nodes = [
+                EntityNode(
+                    entity=n.get('entity', ''),
+                    entity_type=n.get('type', ''),
+                    docs=n.get('docs', 0),
+                    degree=n.get('degree', 0),
+                    centrality=n.get('centrality', 0.0),
+                    role=n.get('role', 'peripheral')
+                )
+                for n in result.get('nodes', [])
+            ]
+            
+            edges = [
+                EntityEdge(
+                    source_entity=e.get('source_entity', ''),
+                    target_entity=e.get('target_entity', ''),
+                    weight=e.get('weight', 0),
+                    reason=e.get('reason', '')
+                )
+                for e in result.get('edges', [])
+            ]
+            
+            return EntityGraphResult(
+                nodes=nodes,
+                edges=edges,
+                hubs=result.get('hubs', []),
+                insights=result.get('insights', {})
+            )
+    
+    async def analyze_graph(self, job_id: str = None) -> GraphAnalysis:
+        """
+        Usa LLM para analisar o Knowledge Graph e extrair insights.
+        
+        Args:
+            job_id: ID do job (se não fornecido, usa o último)
+            
+        Returns:
+            GraphAnalysis com análise textual
+        """
+        job_id = job_id or self._current_job_id
+        if not job_id:
+            raise ValueError("Nenhum job_id disponível. Processe documentos primeiro.")
+        
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            data = {'job_id': job_id}
+            
+            logger.info(f"AetherMap: Analisando grafo com LLM...")
+            
+            response = await client.post(
+                f"{self.base_url}/analyze_graph/",
+                data=data
+            )
+            
+            if response.status_code != 200:
+                raise Exception(f"AetherMap analyze_graph error: {response.status_code} - {response.text}")
+            
+            result = response.json()
+            
+            return GraphAnalysis(
+                analysis=result.get('analysis', ''),
+                key_entities=result.get('key_entities', []),
+                relationships=result.get('relationships', [])
+            )
+    
+    async def describe_clusters(self, job_id: str = None) -> Dict[str, Any]:
+        """
+        Usa LLM para descrever cada cluster encontrado.
+        
+        Args:
+            job_id: ID do job (se não fornecido, usa o último)
+            
+        Returns:
+            Dict com insights por cluster
+        """
+        job_id = job_id or self._current_job_id
+        if not job_id:
+            raise ValueError("Nenhum job_id disponível. Processe documentos primeiro.")
+        
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            data = {'job_id': job_id}
+            
+            logger.info(f"AetherMap: Descrevendo clusters...")
+            
+            response = await client.post(
+                f"{self.base_url}/describe_clusters/",
+                data=data
+            )
+            
+            if response.status_code != 200:
+                raise Exception(f"AetherMap describe_clusters error: {response.status_code} - {response.text}")
+            
+            return response.json()
+
+
+# Instância global do client
+aethermap = AetherMapClient()
diff --git a/app/services/analysis/__init__.py b/app/services/analysis/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..18e8fe19c13e9ec59fb147e63518a8ddbeef5f25
--- /dev/null
+++ b/app/services/analysis/__init__.py
@@ -0,0 +1 @@
+# Analysis services
diff --git a/app/services/brazil_apis.py b/app/services/brazil_apis.py
new file mode 100644
index 0000000000000000000000000000000000000000..3cf938529a35708355664dd05f60288519c0d7df
--- /dev/null
+++ b/app/services/brazil_apis.py
@@ -0,0 +1,218 @@
+"""
+Brazilian Data APIs Service
+Consolidates access to public Brazilian data APIs for investigation
+"""
+import httpx
+from typing import Optional, Dict, Any, List
+from dataclasses import dataclass, field
+import re
+
+
+# API URLs
+CNPJA_URL = "https://api.cnpja.com.br/office"
+OPENCNPJ_URL = "https://api.opencnpj.org/v1/cnpj"
+BRASILAPI_CNPJ = "https://brasilapi.com.br/api/cnpj/v1"
+BRASILAPI_CEP = "https://brasilapi.com.br/api/cep/v2"
+
+
+@dataclass
+class CompanyData:
+    """Data structure for company information"""
+    cnpj: str
+    razao_social: str = ""
+    nome_fantasia: str = ""
+    situacao: str = ""
+    data_abertura: str = ""
+    natureza_juridica: str = ""
+    capital_social: float = 0.0
+    porte: str = ""
+    
+    # Address
+    logradouro: str = ""
+    numero: str = ""
+    complemento: str = ""
+    bairro: str = ""
+    cidade: str = ""
+    uf: str = ""
+    cep: str = ""
+    
+    # Contact
+    telefone: str = ""
+    email: str = ""
+    
+    # Activity
+    cnae_principal: str = ""
+    cnae_descricao: str = ""
+    cnaes_secundarios: List[str] = field(default_factory=list)
+    
+    # Partners/Owners
+    socios: List[Dict[str, Any]] = field(default_factory=list)
+    
+    # Source
+    fonte: str = ""
+
+
+def clean_cnpj(cnpj: str) -> str:
+    """Remove formatting from CNPJ"""
+    return re.sub(r'[^0-9]', '', cnpj)
+
+
+async def consultar_cnpj(cnpj: str) -> Optional[CompanyData]:
+    """
+    Query CNPJ data from available APIs.
+    Tries BrasilAPI first (more reliable), then falls back to others.
+    """
+    cnpj_clean = clean_cnpj(cnpj)
+    
+    if len(cnpj_clean) != 14:
+        return None
+    
+    # Try BrasilAPI first
+    result = await _query_brasilapi(cnpj_clean)
+    if result:
+        return result
+    
+    # Fallback to OpenCNPJ
+    result = await _query_opencnpj(cnpj_clean)
+    if result:
+        return result
+    
+    return None
+
+
+async def _query_brasilapi(cnpj: str) -> Optional[CompanyData]:
+    """Query BrasilAPI for CNPJ data"""
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(f"{BRASILAPI_CNPJ}/{cnpj}")
+            
+            if response.status_code != 200:
+                return None
+            
+            data = response.json()
+            
+            # Parse partners
+            socios = []
+            for socio in data.get("qsa", []):
+                socios.append({
+                    "nome": socio.get("nome_socio", ""),
+                    "qualificacao": socio.get("qualificacao_socio", ""),
+                    "cpf_cnpj": socio.get("cnpj_cpf_do_socio", ""),
+                    "data_entrada": socio.get("data_entrada_sociedade", "")
+                })
+            
+            # Parse CNAEs
+            cnaes_sec = []
+            for cnae in data.get("cnaes_secundarios", []):
+                if isinstance(cnae, dict):
+                    cnaes_sec.append(f"{cnae.get('codigo', '')} - {cnae.get('descricao', '')}")
+                else:
+                    cnaes_sec.append(str(cnae))
+            
+            return CompanyData(
+                cnpj=cnpj,
+                razao_social=data.get("razao_social", ""),
+                nome_fantasia=data.get("nome_fantasia", ""),
+                situacao=data.get("descricao_situacao_cadastral", ""),
+                data_abertura=data.get("data_inicio_atividade", ""),
+                natureza_juridica=data.get("natureza_juridica", ""),
+                capital_social=float(data.get("capital_social", 0)),
+                porte=data.get("porte", ""),
+                logradouro=data.get("logradouro", ""),
+                numero=data.get("numero", ""),
+                complemento=data.get("complemento", ""),
+                bairro=data.get("bairro", ""),
+                cidade=data.get("municipio", ""),
+                uf=data.get("uf", ""),
+                cep=data.get("cep", ""),
+                telefone=data.get("ddd_telefone_1", ""),
+                email=data.get("email", ""),
+                cnae_principal=str(data.get("cnae_fiscal", "")),
+                cnae_descricao=data.get("cnae_fiscal_descricao", ""),
+                cnaes_secundarios=cnaes_sec,
+                socios=socios,
+                fonte="BrasilAPI"
+            )
+            
+    except Exception as e:
+        print(f"BrasilAPI error: {e}")
+        return None
+
+
+async def _query_opencnpj(cnpj: str) -> Optional[CompanyData]:
+    """Query OpenCNPJ API"""
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(f"{OPENCNPJ_URL}/{cnpj}")
+            
+            if response.status_code != 200:
+                return None
+            
+            data = response.json()
+            
+            # Parse partners
+            socios = []
+            for socio in data.get("socios", []):
+                socios.append({
+                    "nome": socio.get("nome", ""),
+                    "qualificacao": socio.get("qualificacao", ""),
+                    "cpf_cnpj": "",
+                    "data_entrada": socio.get("data_entrada", "")
+                })
+            
+            return CompanyData(
+                cnpj=cnpj,
+                razao_social=data.get("razao_social", ""),
+                nome_fantasia=data.get("nome_fantasia", ""),
+                situacao=data.get("situacao_cadastral", ""),
+                data_abertura=data.get("data_inicio_atividade", ""),
+                natureza_juridica=data.get("natureza_juridica", ""),
+                capital_social=float(data.get("capital_social", 0) or 0),
+                porte=data.get("porte", ""),
+                logradouro=data.get("logradouro", ""),
+                numero=data.get("numero", ""),
+                complemento=data.get("complemento", ""),
+                bairro=data.get("bairro", ""),
+                cidade=data.get("municipio", ""),
+                uf=data.get("uf", ""),
+                cep=data.get("cep", ""),
+                telefone=data.get("telefone", ""),
+                email=data.get("email", ""),
+                cnae_principal=data.get("cnae_principal", {}).get("codigo", ""),
+                cnae_descricao=data.get("cnae_principal", {}).get("descricao", ""),
+                cnaes_secundarios=[],
+                socios=socios,
+                fonte="OpenCNPJ"
+            )
+            
+    except Exception as e:
+        print(f"OpenCNPJ error: {e}")
+        return None
+
+
+async def consultar_cep(cep: str) -> Optional[Dict[str, Any]]:
+    """Query address by CEP"""
+    cep_clean = re.sub(r'[^0-9]', '', cep)
+    
+    try:
+        async with httpx.AsyncClient(timeout=15.0) as client:
+            response = await client.get(f"{BRASILAPI_CEP}/{cep_clean}")
+            
+            if response.status_code != 200:
+                return None
+            
+            return response.json()
+            
+    except Exception as e:
+        print(f"CEP query error: {e}")
+        return None
+
+
+async def buscar_empresas_por_nome(nome: str, uf: Optional[str] = None) -> List[Dict[str, Any]]:
+    """
+    Search companies by name using web search (via Lancer).
+    This is a workaround since direct name search APIs are paid.
+    """
+    # This would need Lancer integration for web search
+    # For now, return empty - will be filled by investigation service
+    return []
diff --git a/app/services/chat.py b/app/services/chat.py
new file mode 100644
index 0000000000000000000000000000000000000000..89595f334653e11a19d2103c28ccfaeb97110844
--- /dev/null
+++ b/app/services/chat.py
@@ -0,0 +1,213 @@
+"""
+Chat Service - Intelligent chat with RAG capabilities
+Uses local database + Lancer for comprehensive responses
+"""
+import httpx
+from typing import Optional, List, Dict, Any
+from sqlalchemy.orm import Session
+
+from app.config import settings
+from app.models.entity import Entity, Relationship
+
+
+LANCER_URL = "https://madras1-lancer.hf.space/api/v1"
+
+SYSTEM_PROMPT = """Você é um assistente de inteligência do NUMIDIUM.
+Você tem acesso a um grafo de conhecimento com entidades e relacionamentos,
+e pode pesquisar na web para informações atualizadas.
+
+Responda em português brasileiro de forma clara e direta.
+Se não tiver certeza, diga que não sabe em vez de inventar."""
+
+
+class ChatService:
+    """Chat service with RAG using local database and Lancer"""
+    
+    def __init__(self):
+        self.api_url = "https://api.cerebras.ai/v1/chat/completions"
+        self.conversation_history: Dict[str, List[Dict[str, str]]] = {}
+    
+    def _get_history(self, session_id: Optional[str]) -> List[Dict[str, str]]:
+        key = session_id or "default"
+        if key not in self.conversation_history:
+            self.conversation_history[key] = []
+        return self.conversation_history[key]
+
+    def clear_history(self, session_id: Optional[str] = None):
+        """Clear conversation history"""
+        key = session_id or "default"
+        self.conversation_history.pop(key, None)
+    
+    def _get_local_context(self, query: str, db: Session, limit: int = 5) -> str:
+        """Get relevant entities from local database"""
+        # Search entities by name
+        entities = db.query(Entity).filter(
+            Entity.name.ilike(f"%{query}%")
+        ).limit(limit).all()
+        
+        # Also search by description
+        if len(entities) < limit:
+            desc_entities = db.query(Entity).filter(
+                Entity.description.ilike(f"%{query}%")
+            ).limit(limit - len(entities)).all()
+            entities.extend(desc_entities)
+        
+        if not entities:
+            # Try splitting query into words
+            words = query.split()
+            for word in words:
+                if len(word) > 3:
+                    word_entities = db.query(Entity).filter(
+                        Entity.name.ilike(f"%{word}%")
+                    ).limit(2).all()
+                    entities.extend(word_entities)
+        
+        if not entities:
+            return ""
+        
+        context_parts = []
+        seen_ids = set()
+        
+        for entity in entities:
+            if entity.id in seen_ids:
+                continue
+            seen_ids.add(entity.id)
+            
+            ctx = f"• {entity.name} ({entity.type})"
+            if entity.description:
+                ctx += f": {entity.description[:200]}"
+            
+            # Get relationships
+            relationships = db.query(Relationship).filter(
+                (Relationship.source_id == entity.id) | 
+                (Relationship.target_id == entity.id)
+            ).limit(5).all()
+            
+            if relationships:
+                related = []
+                for rel in relationships:
+                    if rel.source_id == entity.id:
+                        target = db.query(Entity).filter(Entity.id == rel.target_id).first()
+                        if target:
+                            related.append(f"{rel.type} → {target.name}")
+                    else:
+                        source = db.query(Entity).filter(Entity.id == rel.source_id).first()
+                        if source:
+                            related.append(f"{source.name} → {rel.type}")
+                
+                if related:
+                    ctx += f" | Relações: {', '.join(related[:3])}"
+            
+            context_parts.append(ctx)
+        
+        return "\n".join(context_parts)
+    
+    async def _get_web_context(self, query: str) -> str:
+        """Get context from Lancer web search"""
+        try:
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                response = await client.post(
+                    f"{LANCER_URL}/search",
+                    json={
+                        "query": query,
+                        "max_results": 5,
+                        "include_answer": True
+                    }
+                )
+                
+                if response.status_code == 200:
+                    data = response.json()
+                    if data.get("answer"):
+                        return f"Informações da web:\n{data['answer'][:1000]}"
+                
+                return ""
+        except Exception as e:
+            print(f"Lancer error: {e}")
+            return ""
+    
+    async def _call_llm(self, messages: List[Dict[str, str]]) -> str:
+        """Call Cerebras LLM"""
+        try:
+            async with httpx.AsyncClient(timeout=60.0) as client:
+                response = await client.post(
+                    self.api_url,
+                    headers={
+                        "Authorization": f"Bearer {settings.cerebras_api_key}",
+                        "Content-Type": "application/json"
+                    },
+                    json={
+                        "model": "qwen-3-32b",
+                        "messages": messages,
+                        "temperature": 0.7,
+                        "max_tokens": 2048
+                    }
+                )
+                
+                if response.status_code == 200:
+                    data = response.json()
+                    return data["choices"][0]["message"]["content"]
+                else:
+                    return f"Erro na API: {response.status_code}"
+                    
+        except Exception as e:
+            return f"Erro: {str(e)}"
+    
+    async def chat(
+        self, 
+        message: str, 
+        db: Session,
+        use_web: bool = True,
+        use_history: bool = True,
+        session_id: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Process chat message with RAG"""
+        history = self._get_history(session_id)
+        
+        # Get local context
+        local_context = self._get_local_context(message, db)
+        
+        # Get web context if enabled
+        web_context = ""
+        if use_web:
+            web_context = await self._get_web_context(message)
+        
+        # Build context
+        context_parts = []
+        if local_context:
+            context_parts.append(f"📊 Conhecimento local:\n{local_context}")
+        if web_context:
+            context_parts.append(f"🌐 {web_context}")
+        
+        context = "\n\n".join(context_parts) if context_parts else "Nenhum contexto disponível."
+        
+        # Build messages
+        messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+        
+        if use_history and history:
+            messages.extend(history[-6:])
+        
+        user_message = f"""Contexto:
+{context}
+
+Pergunta: {message}"""
+        
+        messages.append({"role": "user", "content": user_message})
+        
+        # Call LLM
+        response = await self._call_llm(messages)
+        
+        # Store history
+        if use_history:
+            history.append({"role": "user", "content": message})
+            history.append({"role": "assistant", "content": response})
+        
+        return {
+            "answer": response,
+            "local_context_used": bool(local_context),
+            "web_context_used": bool(web_context),
+            "entities_found": local_context.count("•") if local_context else 0
+        }
+
+
+# Singleton
+chat_service = ChatService()
diff --git a/app/services/geocoding.py b/app/services/geocoding.py
new file mode 100644
index 0000000000000000000000000000000000000000..06863f2be60350c5cd8251ca8cfa063809135cea
--- /dev/null
+++ b/app/services/geocoding.py
@@ -0,0 +1,63 @@
+"""
+Geocoding Service - Uses Nominatim (OpenStreetMap) for free geocoding
+"""
+import httpx
+from typing import Optional, Tuple
+import asyncio
+
+
+NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
+USER_AGENT = "NUMIDIUM/1.0 (Intelligence System)"
+
+
+async def geocode(location_name: str) -> Optional[Tuple[float, float]]:
+    """
+    Convert a location name to coordinates using Nominatim.
+    Returns (latitude, longitude) or None if not found.
+    
+    Note: Nominatim has rate limits (1 request/second), so be careful with batch operations.
+    """
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            response = await client.get(
+                NOMINATIM_URL,
+                params={
+                    "q": location_name,
+                    "format": "json",
+                    "limit": 1,
+                    "addressdetails": 0
+                },
+                headers={
+                    "User-Agent": USER_AGENT
+                }
+            )
+            
+            if response.status_code == 200:
+                data = response.json()
+                if data and len(data) > 0:
+                    lat = float(data[0]["lat"])
+                    lon = float(data[0]["lon"])
+                    return (lat, lon)
+            
+            return None
+            
+    except Exception as e:
+        print(f"Geocoding error for '{location_name}': {e}")
+        return None
+
+
+async def geocode_batch(location_names: list[str], delay: float = 1.0) -> dict[str, Tuple[float, float]]:
+    """
+    Geocode multiple locations with proper rate limiting.
+    Returns a dict mapping location names to (lat, lon) tuples.
+    """
+    results = {}
+    
+    for name in location_names:
+        coords = await geocode(name)
+        if coords:
+            results[name] = coords
+        # Respect Nominatim rate limits
+        await asyncio.sleep(delay)
+    
+    return results
diff --git a/app/services/ibge_api.py b/app/services/ibge_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..26d5000ed2798dfe0f7a1ce55603f305dad74783
--- /dev/null
+++ b/app/services/ibge_api.py
@@ -0,0 +1,192 @@
+"""
+IBGE API Service
+Access to Brazilian geographic and demographic data
+"""
+import httpx
+from typing import Optional, Dict, Any, List
+from dataclasses import dataclass
+
+
+IBGE_BASE_URL = "https://servicodados.ibge.gov.br/api/v1"
+
+
+@dataclass
+class Estado:
+    """Brazilian state data"""
+    id: int
+    sigla: str
+    nome: str
+    regiao: str
+
+
+@dataclass
+class Municipio:
+    """Brazilian municipality data"""
+    id: int
+    nome: str
+    estado_sigla: str
+    estado_nome: str
+    regiao: str
+    # Optional enriched data
+    populacao: Optional[int] = None
+    area_km2: Optional[float] = None
+
+
+async def listar_estados() -> List[Estado]:
+    """List all Brazilian states"""
+    try:
+        async with httpx.AsyncClient(timeout=15.0) as client:
+            response = await client.get(f"{IBGE_BASE_URL}/localidades/estados")
+            
+            if response.status_code != 200:
+                return []
+            
+            data = response.json()
+            estados = []
+            
+            for item in data:
+                estados.append(Estado(
+                    id=item["id"],
+                    sigla=item["sigla"],
+                    nome=item["nome"],
+                    regiao=item.get("regiao", {}).get("nome", "")
+                ))
+            
+            return sorted(estados, key=lambda x: x.nome)
+            
+    except Exception as e:
+        print(f"IBGE estados error: {e}")
+        return []
+
+
+async def listar_municipios(uf: str) -> List[Municipio]:
+    """List all municipalities in a state"""
+    try:
+        async with httpx.AsyncClient(timeout=15.0) as client:
+            response = await client.get(
+                f"{IBGE_BASE_URL}/localidades/estados/{uf}/municipios"
+            )
+            
+            if response.status_code != 200:
+                return []
+            
+            data = response.json()
+            municipios = []
+            
+            for item in data:
+                municipios.append(Municipio(
+                    id=item["id"],
+                    nome=item["nome"],
+                    estado_sigla=uf.upper(),
+                    estado_nome=item.get("microrregiao", {}).get("mesorregiao", {}).get("UF", {}).get("nome", ""),
+                    regiao=item.get("microrregiao", {}).get("mesorregiao", {}).get("UF", {}).get("regiao", {}).get("nome", "")
+                ))
+            
+            return sorted(municipios, key=lambda x: x.nome)
+            
+    except Exception as e:
+        print(f"IBGE municipios error: {e}")
+        return []
+
+
+async def buscar_municipio(nome: str, uf: Optional[str] = None) -> List[Municipio]:
+    """Search for municipalities by name"""
+    try:
+        # If UF provided, search only that state
+        if uf:
+            municipios = await listar_municipios(uf)
+            return [m for m in municipios if nome.lower() in m.nome.lower()]
+        
+        # Otherwise search all states (slower)
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(f"{IBGE_BASE_URL}/localidades/municipios")
+            
+            if response.status_code != 200:
+                return []
+            
+            data = response.json()
+            results = []
+            
+            for item in data:
+                if nome.lower() in item["nome"].lower():
+                    uf_info = item.get("microrregiao", {}).get("mesorregiao", {}).get("UF", {})
+                    results.append(Municipio(
+                        id=item["id"],
+                        nome=item["nome"],
+                        estado_sigla=uf_info.get("sigla", ""),
+                        estado_nome=uf_info.get("nome", ""),
+                        regiao=uf_info.get("regiao", {}).get("nome", "")
+                    ))
+            
+            return results[:20]  # Limit results
+            
+    except Exception as e:
+        print(f"IBGE search error: {e}")
+        return []
+
+
+async def obter_municipio_por_id(id_municipio: int) -> Optional[Municipio]:
+    """Get municipality by IBGE code"""
+    try:
+        async with httpx.AsyncClient(timeout=15.0) as client:
+            response = await client.get(
+                f"{IBGE_BASE_URL}/localidades/municipios/{id_municipio}"
+            )
+            
+            if response.status_code != 200:
+                return None
+            
+            item = response.json()
+            uf_info = item.get("microrregiao", {}).get("mesorregiao", {}).get("UF", {})
+            
+            return Municipio(
+                id=item["id"],
+                nome=item["nome"],
+                estado_sigla=uf_info.get("sigla", ""),
+                estado_nome=uf_info.get("nome", ""),
+                regiao=uf_info.get("regiao", {}).get("nome", "")
+            )
+            
+    except Exception as e:
+        print(f"IBGE municipio error: {e}")
+        return None
+
+
+async def enriquecer_localizacao(cidade: str, uf: Optional[str] = None) -> Dict[str, Any]:
+    """
+    Enrich a location name with IBGE data.
+    Useful for adding context to extracted locations.
+    """
+    resultado = {
+        "cidade_original": cidade,
+        "encontrado": False,
+        "ibge_codigo": None,
+        "cidade": None,
+        "estado": None,
+        "estado_sigla": None,
+        "regiao": None
+    }
+    
+    municipios = await buscar_municipio(cidade, uf)
+    
+    if municipios:
+        # Take best match (exact or first)
+        melhor = None
+        for m in municipios:
+            if m.nome.lower() == cidade.lower():
+                melhor = m
+                break
+        
+        if not melhor:
+            melhor = municipios[0]
+        
+        resultado.update({
+            "encontrado": True,
+            "ibge_codigo": melhor.id,
+            "cidade": melhor.nome,
+            "estado": melhor.estado_nome,
+            "estado_sigla": melhor.estado_sigla,
+            "regiao": melhor.regiao
+        })
+    
+    return resultado
diff --git a/app/services/ingestion/__init__.py b/app/services/ingestion/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..53751fc389795a6893e21379a16b0680f55cda41
--- /dev/null
+++ b/app/services/ingestion/__init__.py
@@ -0,0 +1,3 @@
+# Ingestion services
+from app.services.ingestion.wikipedia import wikipedia_scraper
+from app.services.ingestion.news import news_service
diff --git a/app/services/ingestion/__pycache__/__init__.cpython-311.pyc b/app/services/ingestion/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..de09d686a52c85f16de0eac33cbd28ca9065604d
Binary files /dev/null and b/app/services/ingestion/__pycache__/__init__.cpython-311.pyc differ
diff --git a/app/services/ingestion/__pycache__/news.cpython-311.pyc b/app/services/ingestion/__pycache__/news.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..47a4ad23456ff8907ab2a47285b1b74cd099a8fe
Binary files /dev/null and b/app/services/ingestion/__pycache__/news.cpython-311.pyc differ
diff --git a/app/services/ingestion/__pycache__/wikipedia.cpython-311.pyc b/app/services/ingestion/__pycache__/wikipedia.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..215244f9f9e1bdf8dc6071c4e0237f41318f352a
Binary files /dev/null and b/app/services/ingestion/__pycache__/wikipedia.cpython-311.pyc differ
diff --git a/app/services/ingestion/news.py b/app/services/ingestion/news.py
new file mode 100644
index 0000000000000000000000000000000000000000..1aba8df40e8cfb6d2cc19900fea89cf6ce04cf14
--- /dev/null
+++ b/app/services/ingestion/news.py
@@ -0,0 +1,86 @@
+"""
+News API Client Service
+Usa RSS feeds públicos para não precisar de API key
+"""
+import feedparser
+import requests
+from typing import List, Dict
+from datetime import datetime
+import re
+
+
+class NewsService:
+    """Serviço para buscar notícias de fontes públicas via RSS"""
+    
+    # RSS feeds públicos brasileiros e internacionais
+    RSS_FEEDS = {
+        "g1": "https://g1.globo.com/rss/g1/",
+        "folha": "https://feeds.folha.uol.com.br/folha/rss/rss091.xml",
+        "bbc_brasil": "https://www.bbc.com/portuguese/articles/rss.xml",
+        "reuters": "https://www.reutersagency.com/feed/",
+        "google_news_br": "https://news.google.com/rss?hl=pt-BR&gl=BR&ceid=BR:pt-419"
+    }
+    
+    def fetch_feed(self, feed_url: str) -> List[Dict]:
+        """Busca artigos de um feed RSS"""
+        try:
+            feed = feedparser.parse(feed_url)
+            articles = []
+            
+            for entry in feed.entries[:20]:  # Limitar a 20 artigos
+                published = None
+                if hasattr(entry, 'published_parsed') and entry.published_parsed:
+                    published = datetime(*entry.published_parsed[:6])
+                
+                articles.append({
+                    "title": entry.get("title", ""),
+                    "description": self._clean_html(entry.get("summary", "")),
+                    "url": entry.get("link", ""),
+                    "published_at": published,
+                    "source": feed.feed.get("title", "Unknown")
+                })
+            
+            return articles
+        except Exception as e:
+            print(f"Error fetching feed {feed_url}: {e}")
+            return []
+    
+    def fetch_all_feeds(self) -> List[Dict]:
+        """Busca artigos de todos os feeds configurados"""
+        all_articles = []
+        for name, url in self.RSS_FEEDS.items():
+            articles = self.fetch_feed(url)
+            for article in articles:
+                article["feed_name"] = name
+            all_articles.extend(articles)
+        return all_articles
+    
+    def search_news(self, query: str) -> List[Dict]:
+        """
+        Busca notícias pelo Google News RSS
+        """
+        # Google News RSS search
+        search_url = f"https://news.google.com/rss/search?q={query}&hl=pt-BR&gl=BR&ceid=BR:pt-419"
+        return self.fetch_feed(search_url)
+    
+    def _clean_html(self, text: str) -> str:
+        """Remove HTML tags do texto"""
+        clean = re.compile('<.*?>')
+        return re.sub(clean, '', text)
+    
+    def to_document(self, article: Dict) -> Dict:
+        """
+        Converte um artigo de notícia para o formato Document
+        """
+        return {
+            "title": article["title"],
+            "content": article.get("description", ""),
+            "doc_type": "news",
+            "source": article.get("source", "news"),
+            "source_url": article.get("url"),
+            "published_at": article.get("published_at")
+        }
+
+
+# Singleton instance
+news_service = NewsService()
diff --git a/app/services/ingestion/wikipedia.py b/app/services/ingestion/wikipedia.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c64a6f77d4bcd406506966ad4b1c3a75972a8e3
--- /dev/null
+++ b/app/services/ingestion/wikipedia.py
@@ -0,0 +1,215 @@
+"""
+Wikipedia Scraper Service
+"""
+import requests
+from bs4 import BeautifulSoup
+from typing import Optional, Dict, List
+import re
+
+
+class WikipediaScraper:
+    """Scraper para extrair dados da Wikipedia"""
+    
+    BASE_URL = "https://pt.wikipedia.org"
+    API_URL = "https://pt.wikipedia.org/w/api.php"
+    
+    # User-Agent obrigatório para API da Wikipedia
+    HEADERS = {
+        "User-Agent": "NumidiumBot/1.0 (https://github.com/numidium; contact@numidium.app) Python/3.11"
+    }
+    
+    def search(self, query: str, limit: int = 10) -> List[Dict]:
+        """
+        Busca artigos na Wikipedia
+        """
+        try:
+            params = {
+                "action": "query",
+                "list": "search",
+                "srsearch": query,
+                "srlimit": limit,
+                "format": "json"
+            }
+            
+            response = requests.get(
+                self.API_URL, 
+                params=params, 
+                headers=self.HEADERS,
+                timeout=10
+            )
+            response.raise_for_status()
+            data = response.json()
+            
+            results = []
+            for item in data.get("query", {}).get("search", []):
+                results.append({
+                    "title": item["title"],
+                    "snippet": BeautifulSoup(item["snippet"], "html.parser").get_text(),
+                    "pageid": item["pageid"]
+                })
+            
+            return results
+        except Exception as e:
+            print(f"Wikipedia search error: {e}")
+            return []
+    
+    def get_article(self, title: str) -> Optional[Dict]:
+        """
+        Busca informações completas de um artigo
+        """
+        try:
+            params = {
+                "action": "query",
+                "titles": title,
+                "prop": "extracts|pageimages|coordinates|categories",
+                "exintro": True,
+                "explaintext": True,
+                "pithumbsize": 300,
+                "format": "json"
+            }
+            
+            response = requests.get(
+                self.API_URL, 
+                params=params, 
+                headers=self.HEADERS,
+                timeout=10
+            )
+            response.raise_for_status()
+            data = response.json()
+            
+            pages = data.get("query", {}).get("pages", {})
+            for page_id, page in pages.items():
+                if page_id == "-1":
+                    return None
+                
+                result = {
+                    "title": page.get("title"),
+                    "extract": page.get("extract"),
+                    "pageid": page.get("pageid"),
+                    "url": f"{self.BASE_URL}/wiki/{page.get('title', '').replace(' ', '_')}",
+                    "thumbnail": page.get("thumbnail", {}).get("source"),
+                    "categories": [c["title"].replace("Categoria:", "") 
+                                  for c in page.get("categories", [])]
+                }
+                
+                # Coordenadas se disponíveis
+                if "coordinates" in page:
+                    coords = page["coordinates"][0]
+                    result["latitude"] = coords.get("lat")
+                    result["longitude"] = coords.get("lon")
+                
+                return result
+            
+            return None
+        except Exception as e:
+            print(f"Wikipedia article error: {e}")
+            return None
+    
+    def get_infobox(self, title: str) -> Dict:
+        """
+        Tenta extrair dados estruturados do infobox de um artigo
+        """
+        try:
+            url = f"{self.BASE_URL}/wiki/{title.replace(' ', '_')}"
+            response = requests.get(url, headers=self.HEADERS, timeout=10)
+            soup = BeautifulSoup(response.text, "html.parser")
+            
+            infobox = soup.find("table", class_="infobox")
+            if not infobox:
+                return {}
+            
+            data = {}
+            for row in infobox.find_all("tr"):
+                header = row.find("th")
+                cell = row.find("td")
+                if header and cell:
+                    key = header.get_text(strip=True)
+                    value = cell.get_text(strip=True)
+                    # Clean up the value
+                    value = re.sub(r'\[\d+\]', '', value)  # Remove references
+                    data[key] = value
+            
+            return data
+        except Exception as e:
+            print(f"Infobox error: {e}")
+            return {}
+    
+    def scrape_person(self, name: str) -> Optional[Dict]:
+        """
+        Scrape dados de uma pessoa da Wikipedia
+        Retorna dados formatados para criar uma Entity
+        """
+        article = self.get_article(name)
+        if not article:
+            return None
+        
+        infobox = self.get_infobox(name)
+        
+        return {
+            "type": "person",
+            "name": article["title"],
+            "description": article.get("extract"),
+            "source": "wikipedia",
+            "source_url": article["url"],
+            "properties": {
+                "thumbnail": article.get("thumbnail"),
+                "categories": article.get("categories", []),
+                **infobox
+            },
+            "latitude": article.get("latitude"),
+            "longitude": article.get("longitude")
+        }
+    
+    def scrape_organization(self, name: str) -> Optional[Dict]:
+        """
+        Scrape dados de uma organização da Wikipedia
+        """
+        article = self.get_article(name)
+        if not article:
+            return None
+        
+        infobox = self.get_infobox(name)
+        
+        return {
+            "type": "organization",
+            "name": article["title"],
+            "description": article.get("extract"),
+            "source": "wikipedia",
+            "source_url": article["url"],
+            "properties": {
+                "thumbnail": article.get("thumbnail"),
+                "categories": article.get("categories", []),
+                **infobox
+            },
+            "latitude": article.get("latitude"),
+            "longitude": article.get("longitude")
+        }
+    
+    def scrape_location(self, name: str) -> Optional[Dict]:
+        """
+        Scrape dados de um local da Wikipedia
+        """
+        article = self.get_article(name)
+        if not article:
+            return None
+        
+        infobox = self.get_infobox(name)
+        
+        return {
+            "type": "location",
+            "name": article["title"],
+            "description": article.get("extract"),
+            "source": "wikipedia",
+            "source_url": article["url"],
+            "properties": {
+                "thumbnail": article.get("thumbnail"),
+                "categories": article.get("categories", []),
+                **infobox
+            },
+            "latitude": article.get("latitude"),
+            "longitude": article.get("longitude")
+        }
+
+
+# Singleton instance
+wikipedia_scraper = WikipediaScraper()
diff --git a/app/services/investigation.py b/app/services/investigation.py
new file mode 100644
index 0000000000000000000000000000000000000000..cfbfc764bbd26579e5e92285959e952e3d8afa7e
--- /dev/null
+++ b/app/services/investigation.py
@@ -0,0 +1,324 @@
+"""
+Investigation Service - Builds comprehensive dossiers
+Combines CNPJ data, transparency/sanctions, Lancer web search, and NER
+"""
+import httpx
+from typing import Optional, Dict, Any, List
+from dataclasses import dataclass, field, asdict
+import asyncio
+
+from app.services.brazil_apis import consultar_cnpj, CompanyData
+from app.services.transparencia_api import verificar_sancoes
+# from app.services.tse_api import buscar_politico  # TSE API needs fixing
+from app.services import lancer
+from app.services.nlp import entity_extractor
+from app.core.database import get_db
+from app.models.entity import Entity, Relationship
+
+
+LANCER_URL = "https://madras1-lancer.hf.space/api/v1"
+
+
+@dataclass
+class DossierSection:
+    """A section of the dossier"""
+    titulo: str
+    conteudo: Any
+    status: str = "ok"  # ok, warning, danger, info
+    icone: str = "📋"
+
+
+@dataclass
+class Dossier:
+    """Complete investigation dossier"""
+    tipo: str  # "organization" or "person"
+    alvo: str  # Target name
+    cnpj_cpf: Optional[str] = None
+    
+    # Sections
+    dados_cadastrais: Optional[DossierSection] = None
+    socios: Optional[DossierSection] = None
+    sancoes: Optional[DossierSection] = None
+    dados_politicos: Optional[DossierSection] = None  # TSE data
+    noticias: Optional[DossierSection] = None
+    entidades_relacionadas: Optional[DossierSection] = None
+    
+    # Metadata
+    red_flags: List[str] = field(default_factory=list)
+    score_risco: int = 0  # 0-100
+    data_geracao: str = ""
+    fonte_dados: List[str] = field(default_factory=list)
+
+
+async def investigar_empresa(nome_ou_cnpj: str) -> Dossier:
+    """
+    Investigate a company and build a comprehensive dossier.
+    """
+    import re
+    from datetime import datetime
+    
+    dossier = Dossier(
+        tipo="organization",
+        alvo=nome_ou_cnpj,
+        data_geracao=datetime.now().isoformat()
+    )
+    
+    # Check if input is CNPJ
+    cnpj_clean = re.sub(r'[^0-9]', '', nome_ou_cnpj)
+    is_cnpj = len(cnpj_clean) == 14
+    
+    company_data = None
+    
+    # 1. Get company data from CNPJ
+    if is_cnpj:
+        dossier.cnpj_cpf = cnpj_clean
+        company_data = await consultar_cnpj(cnpj_clean)
+        
+        if company_data:
+            dossier.alvo = company_data.razao_social or company_data.nome_fantasia or nome_ou_cnpj
+            dossier.fonte_dados.append(company_data.fonte)
+            
+            # Build cadastral section
+            dossier.dados_cadastrais = DossierSection(
+                titulo="Dados Cadastrais",
+                icone="🏢",
+                conteudo={
+                    "cnpj": company_data.cnpj,
+                    "razao_social": company_data.razao_social,
+                    "nome_fantasia": company_data.nome_fantasia,
+                    "situacao": company_data.situacao,
+                    "data_abertura": company_data.data_abertura,
+                    "natureza_juridica": company_data.natureza_juridica,
+                    "capital_social": company_data.capital_social,
+                    "porte": company_data.porte,
+                    "endereco": f"{company_data.logradouro}, {company_data.numero} - {company_data.bairro}, {company_data.cidade}/{company_data.uf}",
+                    "cep": company_data.cep,
+                    "telefone": company_data.telefone,
+                    "email": company_data.email,
+                    "atividade_principal": f"{company_data.cnae_principal} - {company_data.cnae_descricao}"
+                }
+            )
+            
+            # Check situação for red flags
+            if company_data.situacao and "ATIVA" not in company_data.situacao.upper():
+                dossier.red_flags.append(f"⚠️ Situação cadastral: {company_data.situacao}")
+                dossier.dados_cadastrais.status = "warning"
+            
+            # Build partners section
+            if company_data.socios:
+                dossier.socios = DossierSection(
+                    titulo=f"Sócios ({len(company_data.socios)})",
+                    icone="👥",
+                    conteudo=company_data.socios
+                )
+    
+    # 2. Check sanctions/transparency
+    if dossier.cnpj_cpf:
+        sancoes = await verificar_sancoes(dossier.cnpj_cpf)
+        dossier.fonte_dados.append("Portal da Transparência")
+        
+        if sancoes["tem_sancoes"]:
+            dossier.red_flags.append(f"🚨 Encontrado em {sancoes['total_sancoes']} lista(s) de sanções")
+            dossier.score_risco += 40
+            
+            dossier.sancoes = DossierSection(
+                titulo=f"Sanções ({sancoes['total_sancoes']})",
+                icone="⚠️",
+                status="danger",
+                conteudo=sancoes
+            )
+        else:
+            dossier.sancoes = DossierSection(
+                titulo="Sanções",
+                icone="✅",
+                status="ok",
+                conteudo={"mensagem": "Nenhuma sanção encontrada nos cadastros públicos"}
+            )
+    
+    # 3. Web search for news and context
+    search_query = dossier.alvo
+    if company_data and company_data.nome_fantasia:
+        search_query = company_data.nome_fantasia
+    
+    try:
+        web_result = await lancer.search(f"{search_query} notícias escândalos processos", max_results=8)
+        
+        if web_result.answer or web_result.results:
+            dossier.fonte_dados.append("Lancer Web Search")
+            
+            news_content = {
+                "resumo": web_result.answer or "Sem resumo disponível",
+                "fontes": [
+                    {"titulo": r.title, "url": r.url, "snippet": r.content[:200]}
+                    for r in web_result.results[:5]
+                ]
+            }
+            
+            dossier.noticias = DossierSection(
+                titulo="Notícias e Mídia",
+                icone="📰",
+                conteudo=news_content
+            )
+            
+            # Check for negative keywords in news
+            negative_keywords = ["escândalo", "fraude", "corrupção", "prisão", "investigado", "denúncia", "irregularidade"]
+            raw_text = (web_result.answer or "").lower()
+            for kw in negative_keywords:
+                if kw in raw_text:
+                    dossier.red_flags.append(f"📰 Menção a '{kw}' encontrada nas notícias")
+                    dossier.noticias.status = "warning"
+                    dossier.score_risco += 10
+                    break
+    except Exception as e:
+        print(f"Web search error: {e}")
+    
+    # 4. Extract related entities using NER
+    if dossier.noticias and dossier.noticias.conteudo.get("resumo"):
+        try:
+            text_to_analyze = dossier.noticias.conteudo.get("resumo", "")[:3000]
+            ner_result = await entity_extractor.extract(text_to_analyze)
+            
+            if ner_result.entities:
+                entities = [
+                    {"nome": e.name, "tipo": e.type, "descricao": e.description or e.role}
+                    for e in ner_result.entities[:10]
+                ]
+                
+                dossier.entidades_relacionadas = DossierSection(
+                    titulo=f"Entidades Relacionadas ({len(entities)})",
+                    icone="🔗",
+                    conteudo=entities
+                )
+        except Exception as e:
+            print(f"NER error: {e}")
+    
+    # Calculate final risk score
+    dossier.score_risco = min(100, dossier.score_risco + len(dossier.red_flags) * 5)
+    
+    return dossier
+
+
+async def investigar_pessoa(nome: str, cpf: Optional[str] = None) -> Dossier:
+    """
+    Investigate a person and build a dossier.
+    Note: CPF data is heavily protected by LGPD, so mainly uses web search.
+    """
+    from datetime import datetime
+    
+    dossier = Dossier(
+        tipo="person",
+        alvo=nome,
+        cnpj_cpf=cpf,
+        data_geracao=datetime.now().isoformat()
+    )
+    
+    # 1. Check sanctions if CPF provided
+    if cpf:
+        sancoes = await verificar_sancoes(cpf)
+        dossier.fonte_dados.append("Portal da Transparência")
+        
+        if sancoes["tem_sancoes"]:
+            dossier.red_flags.append(f"🚨 Encontrado em {sancoes['total_sancoes']} lista(s) de sanções")
+            dossier.score_risco += 50
+            
+            dossier.sancoes = DossierSection(
+                titulo=f"Sanções ({sancoes['total_sancoes']})",
+                icone="⚠️",
+                status="danger",
+                conteudo=sancoes
+            )
+    
+    # 2. Check TSE for political data (DISABLED - API needs fixing)
+    # try:
+    #     tse_data = await buscar_politico(nome)
+    #     if tse_data.get("encontrado"):
+    #         dossier.fonte_dados.append("TSE (DivulgaCand)")
+    #         candidaturas = tse_data.get("candidaturas", [])
+    #         patrimonio = tse_data.get("total_patrimonio", 0)
+    #         partidos = tse_data.get("partidos", [])
+    #         dossier.dados_politicos = DossierSection(...)
+    # except Exception as e:
+    #     print(f"TSE search error: {e}")
+    
+    
+    # 3. Web search for information
+    try:
+        web_result = await lancer.search(f'"{nome}" biografia cargo empresa', max_results=10)
+        
+        if web_result.answer or web_result.results:
+            dossier.fonte_dados.append("Lancer Web Search")
+            
+            dossier.noticias = DossierSection(
+                titulo="Informações Públicas",
+                icone="🌐",
+                conteudo={
+                    "resumo": web_result.answer or "Informações limitadas",
+                    "fontes": [
+                        {"titulo": r.title, "url": r.url, "snippet": r.content[:200]}
+                        for r in web_result.results[:5]
+                    ]
+                }
+            )
+            
+            # Check for negative keywords
+            negative_keywords = ["preso", "condenado", "investigado", "acusado", "escândalo", "fraude"]
+            raw_text = (web_result.answer or "").lower()
+            for kw in negative_keywords:
+                if kw in raw_text:
+                    dossier.red_flags.append(f"📰 Menção a '{kw}' encontrada")
+                    dossier.noticias.status = "warning"
+                    dossier.score_risco += 15
+                    break
+    except Exception as e:
+        print(f"Web search error: {e}")
+    
+    # 3. Extract related entities
+    if dossier.noticias and dossier.noticias.conteudo.get("resumo"):
+        try:
+            ner_result = await entity_extractor.extract(dossier.noticias.conteudo["resumo"][:2000])
+            
+            if ner_result.entities:
+                entities = [
+                    {"nome": e.name, "tipo": e.type, "descricao": e.description or e.role}
+                    for e in ner_result.entities[:10]
+                    if e.name.lower() != nome.lower()  # Exclude the target
+                ]
+                
+                if entities:
+                    dossier.entidades_relacionadas = DossierSection(
+                        titulo=f"Conexões ({len(entities)})",
+                        icone="🔗",
+                        conteudo=entities
+                    )
+        except Exception as e:
+            print(f"NER error: {e}")
+    
+    dossier.score_risco = min(100, dossier.score_risco + len(dossier.red_flags) * 5)
+    
+    return dossier
+
+
+def dossier_to_dict(dossier: Dossier) -> Dict[str, Any]:
+    """Convert dossier to dictionary for JSON response"""
+    result = {
+        "tipo": dossier.tipo,
+        "alvo": dossier.alvo,
+        "cnpj_cpf": dossier.cnpj_cpf,
+        "red_flags": dossier.red_flags,
+        "score_risco": dossier.score_risco,
+        "data_geracao": dossier.data_geracao,
+        "fonte_dados": dossier.fonte_dados,
+        "secoes": {}
+    }
+    
+    for field_name in ["dados_cadastrais", "socios", "sancoes", "dados_politicos", "noticias", "entidades_relacionadas"]:
+        section = getattr(dossier, field_name)
+        if section:
+            result["secoes"][field_name] = {
+                "titulo": section.titulo,
+                "icone": section.icone,
+                "status": section.status,
+                "conteudo": section.conteudo
+            }
+    
+    return result
diff --git a/app/services/investigator_agent.py b/app/services/investigator_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..56b74ad4c994947ed35f3185df53fc586a4232cc
--- /dev/null
+++ b/app/services/investigator_agent.py
@@ -0,0 +1,659 @@
+"""
+Investigator Agent - Autonomous Investigation with Tool Calling
+Uses Cerebras native tool calling for multi-source investigations
+"""
+import json
+import re
+import httpx
+from typing import Optional, List, Dict, Any
+from dataclasses import dataclass, field
+from datetime import datetime
+from sqlalchemy.orm import Session
+
+from app.config import settings
+from app.services import lancer
+from app.services.brazil_apis import consultar_cnpj
+from app.models.entity import Entity, Relationship
+
+
+def sanitize_text(text: str) -> str:
+    """
+    Clean up text from model that may contain thinking artifacts.
+    Only removes thinking tags, does NOT remove valid characters.
+    """
+    if not text:
+        return text
+    
+    # Remove thinking tags and content between them
+    text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
+    text = re.sub(r'<\|think\|>.*?<\|/think\|>', '', text, flags=re.DOTALL)
+    
+    # Remove other common model artifacts like <|...|> tags
+    text = re.sub(r'<\|.*?\|>', '', text)
+    
+    # Clean up excessive newlines only
+    text = re.sub(r'\n{3,}', '\n\n', text)
+    
+    return text.strip()
+
+
+@dataclass
+class Finding:
+    """A discovery made during investigation"""
+    title: str
+    content: str
+    source: str
+    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
+
+
+@dataclass 
+class InvestigationResult:
+    """Complete investigation result"""
+    mission: str
+    findings: List[Finding]
+    entities_discovered: List[Dict[str, Any]]
+    connections_mapped: List[Dict[str, Any]]
+    report: str
+    iterations: int
+    tools_used: List[str]
+    status: str = "completed"
+
+
+# Tool definitions for Cerebras API
+TOOLS = [
+    {
+        "type": "function",
+        "function": {
+            "name": "search_entity",
+            "description": "Buscar entidade no NUMIDIUM (grafo de conhecimento) por nome. Use para encontrar pessoas, empresas ou locais já conhecidos.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Nome ou termo para buscar"
+                    },
+                    "entity_type": {
+                        "type": "string",
+                        "enum": ["person", "organization", "location", "any"],
+                        "description": "Tipo de entidade (opcional)"
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_connections",
+            "description": "Obter a rede de conexões de uma entidade específica. Retorna entidades relacionadas.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "entity_id": {
+                        "type": "string",
+                        "description": "ID da entidade no NUMIDIUM"
+                    }
+                },
+                "required": ["entity_id"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "lookup_cnpj",
+            "description": "Consultar dados de uma empresa brasileira pelo CNPJ. Retorna razão social, sócios, endereço, CNAEs, etc.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "cnpj": {
+                        "type": "string",
+                        "description": "CNPJ da empresa (com ou sem formatação)"
+                    }
+                },
+                "required": ["cnpj"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "web_search",
+            "description": "Pesquisar informações na web. Use para buscar notícias, artigos e informações públicas.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Termo de busca"
+                    },
+                    "freshness": {
+                        "type": "string",
+                        "enum": ["day", "week", "month", "any"],
+                        "description": "Frescor dos resultados",
+                        "default": "any"
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "deep_research",
+            "description": "Pesquisa profunda e multi-dimensional sobre um tema. Use para tópicos complexos.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "topic": {
+                        "type": "string",
+                        "description": "Tópico para pesquisa profunda"
+                    }
+                },
+                "required": ["topic"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "save_finding",
+            "description": "Salvar uma descoberta importante da investigação.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "title": {
+                        "type": "string",
+                        "description": "Título curto da descoberta"
+                    },
+                    "content": {
+                        "type": "string",
+                        "description": "Conteúdo detalhado"
+                    },
+                    "source": {
+                        "type": "string",
+                        "description": "Fonte da informação"
+                    }
+                },
+                "required": ["title", "content", "source"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "finish_investigation",
+            "description": "Finalizar a investigação e gerar o relatório final.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "summary": {
+                        "type": "string",
+                        "description": "Resumo das descobertas principais"
+                    }
+                },
+                "required": ["summary"]
+            }
+        }
+    }
+]
+
+
+SYSTEM_PROMPT = """Você é um agente investigador autônomo do sistema NUMIDIUM/AVANGARD. /no_think
+
+Sua missão é investigar temas usando múltiplas fontes de dados:
+- NUMIDIUM: Grafo de conhecimento com entidades e relacionamentos
+- Consulta CNPJ: Dados oficiais de empresas brasileiras (BrasilAPI)
+- Web Search: Pesquisa na internet via Lancer
+
+## Estratégia de Investigação:
+
+1. Comece buscando no NUMIDIUM se já temos informações sobre o alvo
+2. Para empresas brasileiras, consulte o CNPJ para obter sócios e dados
+3. Use web_search para buscar notícias e informações públicas
+4. Para cada sócio/conexão descoberta, considere investigar mais a fundo
+5. Use save_finding para registrar descobertas importantes
+6. Quando tiver informações suficientes, use finish_investigation
+
+## Regras:
+- Seja metódico e siga pistas
+- Não invente informações - use apenas dados das ferramentas
+- Priorize qualidade sobre quantidade
+- Cite sempre as fontes
+- NÃO use pensamento interno ou tags <think>. Responda diretamente."""
+
+
+class InvestigatorAgent:
+    """Autonomous investigation agent with tool calling"""
+    
+    def __init__(self):
+        self.api_url = "https://api.cerebras.ai/v1/chat/completions"
+        self.api_key = settings.cerebras_api_key
+        self.model = "zai-glm-4.7"
+        
+        # Investigation state
+        self.findings: List[Finding] = []
+        self.entities_discovered: List[Dict[str, Any]] = []
+        self.connections_mapped: List[Dict[str, Any]] = []
+        self.tools_used: List[str] = []
+        self.messages: List[Dict[str, Any]] = []
+        self.db: Optional[Session] = None
+    
+    def _reset_state(self):
+        """Reset investigation state"""
+        self.findings = []
+        self.entities_discovered = []
+        self.connections_mapped = []
+        self.tools_used = []
+        self.messages = []
+    
+    async def _call_llm(
+        self, 
+        messages: List[Dict[str, Any]], 
+        tools: List[Dict] = None
+    ) -> Dict[str, Any]:
+        """Call Cerebras API with tool calling support"""
+        try:
+            payload = {
+                "model": self.model,
+                "messages": messages,
+                "temperature": 0.3,
+                "max_tokens": 2048,
+            }
+            
+            if tools:
+                payload["tools"] = tools
+                payload["tool_choice"] = "auto"
+                payload["parallel_tool_calls"] = True
+            
+            async with httpx.AsyncClient(timeout=60.0) as client:
+                response = await client.post(
+                    self.api_url,
+                    headers={
+                        "Authorization": f"Bearer {self.api_key}",
+                        "Content-Type": "application/json"
+                    },
+                    json=payload
+                )
+                
+                if response.status_code != 200:
+                    raise Exception(f"API error: {response.status_code} - {response.text}")
+                
+                return response.json()
+                
+        except Exception as e:
+            raise Exception(f"LLM call failed: {str(e)}")
+    
+    async def _execute_tool(self, tool_name: str, arguments: Dict) -> str:
+        """Execute a tool and return the result"""
+        self.tools_used.append(tool_name)
+        
+        try:
+            if tool_name == "search_entity":
+                return await self._search_entity(
+                    arguments.get("query", ""),
+                    arguments.get("entity_type")
+                )
+            
+            elif tool_name == "get_connections":
+                return await self._get_connections(arguments.get("entity_id"))
+            
+            elif tool_name == "lookup_cnpj":
+                return await self._lookup_cnpj(arguments.get("cnpj", ""))
+            
+            
+            elif tool_name == "web_search":
+                return await self._web_search(
+                    arguments.get("query", ""),
+                    arguments.get("freshness", "any")
+                )
+            
+            elif tool_name == "deep_research":
+                return await self._deep_research(arguments.get("topic", ""))
+            
+            elif tool_name == "aether_search":
+                return await self._aether_search(arguments.get("query", ""))
+            
+            elif tool_name == "aether_entities":
+                return await self._aether_entities()
+            
+            elif tool_name == "save_finding":
+                finding = Finding(
+                    title=arguments.get("title", ""),
+                    content=arguments.get("content", ""),
+                    source=arguments.get("source", "")
+                )
+                self.findings.append(finding)
+                return f"Descoberta salva: {finding.title}"
+            
+            elif tool_name == "finish_investigation":
+                return f"INVESTIGATION_COMPLETE: {arguments.get('summary', '')}"
+            
+            else:
+                return f"Ferramenta desconhecida: {tool_name}"
+                
+        except Exception as e:
+            return f"Erro ao executar {tool_name}: {str(e)}"
+    
+    async def _search_entity(self, query: str, entity_type: Optional[str]) -> str:
+        """Search entities in database"""
+        if not self.db:
+            return "Erro: Banco de dados não disponível"
+        
+        q = self.db.query(Entity).filter(Entity.name.ilike(f"%{query}%"))
+        if entity_type and entity_type != "any":
+            q = q.filter(Entity.type == entity_type)
+        
+        entities = q.limit(10).all()
+        
+        if entities:
+            result = []
+            for e in entities:
+                self.entities_discovered.append({
+                    "id": str(e.id),
+                    "name": e.name,
+                    "type": e.type
+                })
+                result.append({
+                    "id": str(e.id),
+                    "name": e.name,
+                    "type": e.type,
+                    "description": e.description[:200] if e.description else None
+                })
+            return json.dumps(result, ensure_ascii=False, indent=2)
+        
+        return "Nenhuma entidade encontrada no NUMIDIUM."
+    
+    async def _get_connections(self, entity_id: str) -> str:
+        """Get entity connections"""
+        if not self.db:
+            return "Erro: Banco de dados não disponível"
+        
+        relationships = self.db.query(Relationship).filter(
+            (Relationship.source_id == entity_id) | (Relationship.target_id == entity_id)
+        ).limit(20).all()
+        
+        if relationships:
+            connections = []
+            for rel in relationships:
+                source = self.db.query(Entity).filter(Entity.id == rel.source_id).first()
+                target = self.db.query(Entity).filter(Entity.id == rel.target_id).first()
+                if source and target:
+                    connections.append({
+                        "source": source.name,
+                        "target": target.name,
+                        "type": rel.type
+                    })
+            return json.dumps(connections, ensure_ascii=False, indent=2)
+        
+        return "Nenhuma conexão encontrada."
+    
+    async def _lookup_cnpj(self, cnpj: str) -> str:
+        """Lookup CNPJ via BrasilAPI"""
+        cnpj_clean = cnpj.replace(".", "").replace("/", "").replace("-", "")
+        result = await consultar_cnpj(cnpj_clean)
+        
+        if result:
+            data = {
+                "razao_social": result.razao_social,
+                "nome_fantasia": result.nome_fantasia,
+                "situacao": result.situacao,
+                "data_abertura": result.data_abertura,
+                "capital_social": result.capital_social,
+                "endereco": f"{result.logradouro}, {result.numero} - {result.cidade}/{result.uf}",
+                "cnae": f"{result.cnae_principal} - {result.cnae_descricao}",
+                "socios": result.socios
+            }
+            return json.dumps(data, ensure_ascii=False, indent=2)
+        
+        return "CNPJ não encontrado."
+    
+    async def _lookup_phone(self, phone: str) -> str:
+        """Lookup phone number via NumVerify API"""
+        # Clean phone number - keep only digits
+        phone_clean = "".join(c for c in phone if c.isdigit())
+        
+        # NumVerify API key (free tier: 100 req/month)
+        numverify_key = getattr(settings, 'numverify_api_key', None)
+        
+        if not numverify_key:
+            # Fallback: just do a web search for the number
+            return await self._web_search(f'"{phone_clean}" telefone', "any")
+        
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                response = await client.get(
+                    "http://apilayer.net/api/validate",
+                    params={
+                        "access_key": numverify_key,
+                        "number": phone_clean,
+                        "country_code": "",  # Auto-detect
+                        "format": 1
+                    }
+                )
+                
+                if response.status_code == 200:
+                    data = response.json()
+                    
+                    if data.get("valid"):
+                        result = {
+                            "numero": data.get("international_format"),
+                            "valido": True,
+                            "pais": data.get("country_name"),
+                            "codigo_pais": data.get("country_code"),
+                            "operadora": data.get("carrier"),
+                            "tipo_linha": data.get("line_type"),  # mobile, landline, etc
+                            "localizacao": data.get("location")
+                        }
+                        return json.dumps(result, ensure_ascii=False, indent=2)
+                    else:
+                        return f"Número {phone_clean} não é válido ou não foi encontrado."
+                
+                return "Erro ao consultar número."
+                
+        except Exception as e:
+            # Fallback to web search
+            return await self._web_search(f'"{phone_clean}" telefone', "any")
+    
+    async def _web_search(self, query: str, freshness: str) -> str:
+        """Web search via Lancer"""
+        try:
+            result = await lancer.search(query, max_results=5, freshness=freshness)
+            if result.answer:
+                return f"Resumo: {result.answer}\n\nFontes: {len(result.results)} resultados"
+            return "Nenhum resultado encontrado."
+        except Exception as e:
+            return f"Erro na busca web: {str(e)}"
+    
+    async def _deep_research(self, topic: str) -> str:
+        """Deep research via Lancer"""
+        try:
+            result = await lancer.deep_research(topic, max_dimensions=3)
+            if result.answer:
+                return result.answer
+            return "Pesquisa profunda não retornou resultados."
+        except Exception as e:
+            return f"Erro na pesquisa: {str(e)}"
+    
+    async def _aether_search(self, query: str) -> str:
+        """Semantic search via AetherMap"""
+        try:
+            # Check if we have a job_id cached
+            if not aethermap.current_job_id:
+                # Index entities from database first
+                if self.db:
+                    entities = self.db.query(Entity).limit(500).all()
+                    if entities:
+                        texts = []
+                        for e in entities:
+                            text = f"{e.name} ({e.type})"
+                            if e.description:
+                                text += f": {e.description[:500]}"
+                            texts.append(text)
+                        
+                        if texts:
+                            result = await aethermap.process_documents(texts, fast_mode=True)
+                            # Continue with search
+                
+            if aethermap.current_job_id:
+                result = await aethermap.semantic_search(query, turbo_mode=True)
+                return f"RAG Response:\n{result.summary}"
+            else:
+                return "Nenhum documento indexado no AetherMap."
+                
+        except Exception as e:
+            return f"Erro no AetherMap search: {str(e)}"
+    
+    async def _aether_entities(self) -> str:
+        """Extract NER entities via AetherMap"""
+        try:
+            if not aethermap.current_job_id:
+                return "Nenhum documento indexado. Use aether_search primeiro."
+            
+            result = await aethermap.extract_entities()
+            
+            # Format response
+            output = []
+            
+            if result.hubs:
+                output.append("**Entidades Centrais (Hubs):**")
+                for hub in result.hubs[:5]:
+                    output.append(f"- {hub.get('entity')} ({hub.get('type')}): {hub.get('degree')} conexões")
+            
+            if result.insights:
+                output.append(f"\n**Insights:**")
+                output.append(f"- Total de conexões: {result.insights.get('total_connections', 0)}")
+                output.append(f"- Grau médio: {result.insights.get('avg_degree', 0)}")
+            
+            if result.edges:
+                output.append(f"\n**Top 5 Relacionamentos:**")
+                for edge in result.edges[:5]:
+                    output.append(f"- {edge.source_entity} <-> {edge.target_entity}: {edge.reason}")
+            
+            return "\n".join(output) if output else "Nenhuma entidade significativa encontrada."
+            
+        except Exception as e:
+            return f"Erro na extração de entidades: {str(e)}"
+    
+    async def investigate(
+        self, 
+        mission: str, 
+        db: Session,
+        max_iterations: int = 10
+    ) -> InvestigationResult:
+        """Main investigation loop"""
+        self._reset_state()
+        self.db = db
+        
+        self.messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": f"Missão de investigação: {mission}\n\nComece a investigação."}
+        ]
+        
+        iteration = 0
+        final_summary = ""
+        
+        while iteration < max_iterations:
+            iteration += 1
+            
+            response = await self._call_llm(self.messages, TOOLS)
+            
+            choice = response["choices"][0]
+            message = choice["message"]
+            self.messages.append(message)
+            
+            tool_calls = message.get("tool_calls", [])
+            
+            if not tool_calls:
+                if message.get("content"):
+                    final_summary = message["content"]
+                break
+            
+            for tool_call in tool_calls:
+                func = tool_call["function"]
+                tool_name = func["name"]
+                
+                try:
+                    arguments = json.loads(func["arguments"])
+                except:
+                    arguments = {}
+                
+                result = await self._execute_tool(tool_name, arguments)
+                
+                if result.startswith("INVESTIGATION_COMPLETE:"):
+                    final_summary = result.replace("INVESTIGATION_COMPLETE:", "").strip()
+                    break
+                
+                self.messages.append({
+                    "role": "tool",
+                    "tool_call_id": tool_call["id"],
+                    "content": result
+                })
+            
+            if final_summary:
+                break
+        
+        if not final_summary:
+            final_summary = await self._generate_report(mission)
+        
+        # Sanitize all text outputs to remove thinking artifacts
+        final_summary = sanitize_text(final_summary)
+        
+        # Sanitize findings content
+        sanitized_findings = []
+        for f in self.findings:
+            sanitized_findings.append(Finding(
+                title=sanitize_text(f.title),
+                content=sanitize_text(f.content),
+                source=f.source,
+                timestamp=f.timestamp
+            ))
+        
+        return InvestigationResult(
+            mission=mission,
+            findings=sanitized_findings,
+            entities_discovered=self.entities_discovered,
+            connections_mapped=self.connections_mapped,
+            report=final_summary,
+            iterations=iteration,
+            tools_used=list(set(self.tools_used)),
+            status="completed"
+        )
+    
+    async def _generate_report(self, mission: str) -> str:
+        """Generate final report"""
+        findings_text = "\n".join([
+            f"- {f.title}: {f.content} (Fonte: {f.source})"
+            for f in self.findings
+        ]) or "Nenhuma descoberta registrada."
+        
+        entities_text = ", ".join([
+            e.get("name", "Unknown") for e in self.entities_discovered[:10]
+        ]) or "Nenhuma entidade."
+        
+        prompt = f"""Gere um relatório de investigação:
+
+Missão: {mission}
+
+Descobertas:
+{findings_text}
+
+Entidades: {entities_text}
+
+Ferramentas usadas: {', '.join(set(self.tools_used))}
+
+Gere relatório estruturado com: Resumo Executivo, Descobertas, Entidades, Recomendações."""
+        
+        response = await self._call_llm([
+            {"role": "system", "content": "Gere relatórios concisos."},
+            {"role": "user", "content": prompt}
+        ])
+        
+        return sanitize_text(response["choices"][0]["message"]["content"])
+
+
+# Singleton
+investigator_agent = InvestigatorAgent()
diff --git a/app/services/lancer.py b/app/services/lancer.py
new file mode 100644
index 0000000000000000000000000000000000000000..179868cdd00136f0a9376b6ea6fdff3df5b48abf
--- /dev/null
+++ b/app/services/lancer.py
@@ -0,0 +1,198 @@
+"""
+Lancer Deep Research Service
+Integrates with Lancer Search API for AI-powered research
+"""
+import httpx
+from typing import Optional, List, Dict, Any
+from dataclasses import dataclass
+
+
+LANCER_BASE_URL = "https://madras1-lancer.hf.space"
+
+
+@dataclass
+class SearchResult:
+    """Individual search result from Lancer"""
+    title: str
+    url: str
+    content: str
+    score: float
+    published_date: Optional[str] = None
+
+
+@dataclass
+class ResearchResponse:
+    """Response from Lancer research/search"""
+    query: str
+    answer: Optional[str]
+    results: List[SearchResult]
+    citations: List[Dict[str, Any]]
+    processing_time_ms: float
+    raw_text: str  # Combined text for NER extraction
+
+
+async def search(
+    query: str,
+    max_results: int = 10,
+    freshness: str = "any"
+) -> ResearchResponse:
+    """
+    Perform a search with AI synthesis using Lancer API.
+    """
+    try:
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            response = await client.post(
+                f"{LANCER_BASE_URL}/api/v1/search",
+                json={
+                    "query": query,
+                    "max_results": max_results,
+                    "freshness": freshness,
+                    "include_answer": True
+                }
+            )
+            
+            if response.status_code != 200:
+                raise Exception(f"Lancer API error: {response.status_code}")
+            
+            data = response.json()
+            
+            results = [
+                SearchResult(
+                    title=r.get("title", ""),
+                    url=r.get("url", ""),
+                    content=r.get("content", ""),
+                    score=r.get("score", 0.0),
+                    published_date=r.get("published_date")
+                )
+                for r in data.get("results", [])
+            ]
+            
+            # Combine all text for NER
+            raw_text = data.get("answer", "") or ""
+            for r in results:
+                raw_text += f"\n{r.title}. {r.content}"
+            
+            return ResearchResponse(
+                query=data.get("query", query),
+                answer=data.get("answer"),
+                results=results,
+                citations=data.get("citations", []),
+                processing_time_ms=data.get("processing_time_ms", 0),
+                raw_text=raw_text
+            )
+            
+    except Exception as e:
+        raise Exception(f"Lancer search failed: {str(e)}")
+
+
+async def deep_research(
+    query: str,
+    max_dimensions: int = 5,
+    max_sources_per_dim: int = 5
+) -> ResearchResponse:
+    """
+    Perform deep multi-dimensional research using Lancer API.
+    This provides richer, more comprehensive analysis.
+    """
+    try:
+        async with httpx.AsyncClient(timeout=120.0) as client:
+            response = await client.post(
+                f"{LANCER_BASE_URL}/api/v1/research/deep",
+                json={
+                    "query": query,
+                    "max_dimensions": max_dimensions,
+                    "max_sources_per_dim": max_sources_per_dim,
+                    "max_total_searches": 20
+                }
+            )
+            
+            if response.status_code != 200:
+                raise Exception(f"Lancer API error: {response.status_code}")
+            
+            data = response.json()
+            
+            # Deep research returns a different format - adapt it
+            results = []
+            raw_text = ""
+            
+            # Extract from dimensions if present
+            if "dimensions" in data:
+                for dim in data["dimensions"]:
+                    dim_name = dim.get("dimension", "")
+                    raw_text += f"\n## {dim_name}\n"
+                    for r in dim.get("results", []):
+                        results.append(SearchResult(
+                            title=r.get("title", ""),
+                            url=r.get("url", ""),
+                            content=r.get("content", ""),
+                            score=r.get("score", 0.0)
+                        ))
+                        raw_text += f"{r.get('title', '')}. {r.get('content', '')}\n"
+            
+            # Add final report
+            final_report = data.get("final_report", data.get("report", ""))
+            if final_report:
+                raw_text = final_report + "\n" + raw_text
+            
+            return ResearchResponse(
+                query=query,
+                answer=final_report,
+                results=results,
+                citations=data.get("citations", []),
+                processing_time_ms=data.get("processing_time_ms", 0),
+                raw_text=raw_text
+            )
+            
+    except Exception as e:
+        raise Exception(f"Lancer deep research failed: {str(e)}")
+
+
+async def heavy_search(
+    query: str,
+    max_results: int = 5
+) -> ResearchResponse:
+    """
+    Heavy search with full content scraping from sources.
+    Slower but provides more context.
+    """
+    try:
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{LANCER_BASE_URL}/api/v1/search/heavy",
+                json={
+                    "query": query,
+                    "max_results": max_results,
+                    "include_answer": True
+                }
+            )
+            
+            if response.status_code != 200:
+                raise Exception(f"Lancer API error: {response.status_code}")
+            
+            data = response.json()
+            
+            results = [
+                SearchResult(
+                    title=r.get("title", ""),
+                    url=r.get("url", ""),
+                    content=r.get("content", ""),
+                    score=r.get("score", 0.0)
+                )
+                for r in data.get("results", [])
+            ]
+            
+            raw_text = data.get("answer", "") or ""
+            for r in results:
+                raw_text += f"\n{r.title}. {r.content}"
+            
+            return ResearchResponse(
+                query=query,
+                answer=data.get("answer"),
+                results=results,
+                citations=data.get("citations", []),
+                processing_time_ms=data.get("processing_time_ms", 0),
+                raw_text=raw_text
+            )
+            
+    except Exception as e:
+        raise Exception(f"Lancer heavy search failed: {str(e)}")
diff --git a/app/services/nlp/__init__.py b/app/services/nlp/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9265c7e61b3b29a87dcd75c4455abd114be3e18
--- /dev/null
+++ b/app/services/nlp/__init__.py
@@ -0,0 +1,2 @@
+# NLP Services
+from .entity_extractor import entity_extractor
diff --git a/app/services/nlp/__pycache__/__init__.cpython-311.pyc b/app/services/nlp/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8671a044592ad7e7b9a10ee976be1a78f1f7958d
Binary files /dev/null and b/app/services/nlp/__pycache__/__init__.cpython-311.pyc differ
diff --git a/app/services/nlp/__pycache__/entity_extractor.cpython-311.pyc b/app/services/nlp/__pycache__/entity_extractor.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a7aac7bb33e176996105a8d539ec88db2b3ceaf5
Binary files /dev/null and b/app/services/nlp/__pycache__/entity_extractor.cpython-311.pyc differ
diff --git a/app/services/nlp/entity_extractor.py b/app/services/nlp/entity_extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..8855cc0c67661840ce99ce63f7aafe9da23e60b2
--- /dev/null
+++ b/app/services/nlp/entity_extractor.py
@@ -0,0 +1,265 @@
+"""
+Entity Extractor Service - LLM-based NER
+Uses Cerebras API with Qwen 3 235B for intelligent entity and relationship extraction
+"""
+import json
+import re
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass
+import httpx
+
+from app.config import settings
+
+
+@dataclass
+class ExtractedEntity:
+    """Represents an extracted entity"""
+    name: str
+    type: str  # person, organization, location, event
+    role: Optional[str] = None
+    aliases: Optional[List[str]] = None
+    description: Optional[str] = None
+    latitude: Optional[float] = None
+    longitude: Optional[float] = None
+    event_date: Optional[str] = None  # Date in ISO format (YYYY-MM-DD)
+
+
+@dataclass
+class ExtractedRelationship:
+    """Represents a relationship between entities"""
+    source: str
+    target: str
+    relationship_type: str
+    context: Optional[str] = None
+    event_date: Optional[str] = None  # Date in ISO format (YYYY-MM-DD)
+
+
+@dataclass
+class ExtractedEvent:
+    """Represents an extracted event"""
+    description: str
+    event_type: Optional[str] = None
+    date: Optional[str] = None
+    location: Optional[str] = None
+    participants: Optional[List[str]] = None
+
+
+@dataclass
+class ExtractionResult:
+    """Complete extraction result"""
+    entities: List[ExtractedEntity]
+    relationships: List[ExtractedRelationship]
+    events: List[ExtractedEvent]
+    raw_response: Optional[str] = None
+
+
+EXTRACTION_PROMPT = """Você é um especialista em extração de informações estruturadas de textos.
+
+Analise o texto fornecido e extraia TODAS as entidades, relacionamentos e eventos mencionados.
+
+## Regras:
+1. Identifique entidades: pessoas, organizações, locais, eventos
+2. Para PESSOAS: inclua nome completo (se mencionado ou conhecido), cargo/função
+3. Para ORGANIZAÇÕES: inclua nome oficial e siglas
+4. Para LOCAIS: seja específico (cidade, país, endereço)
+5. Identifique RELACIONAMENTOS entre entidades (quem trabalha onde, quem conhece quem, etc.)
+6. Identifique EVENTOS mencionados (reuniões, anúncios, eleições, etc.)
+7. EXTRAIA DATAS sempre que mencionadas (formato YYYY-MM-DD ou YYYY se só o ano)
+
+## Formato de resposta (JSON válido):
+```json
+{{
+  "entities": [
+    {{
+      "name": "Nome Completo",
+      "type": "person|organization|location|event",
+      "role": "cargo ou função (opcional)",
+      "aliases": ["apelidos", "siglas"],
+      "description": "breve descrição se relevante",
+      "event_date": "YYYY-MM-DD ou YYYY (data relevante como nascimento, fundação, etc)"
+    }}
+  ],
+  "relationships": [
+    {{
+      "source": "Nome da Entidade 1",
+      "target": "Nome da Entidade 2", 
+      "relationship_type": "tipo de relação (trabalha em, preside, fundou, reuniu-se com, etc.)",
+      "context": "contexto da relação",
+      "event_date": "YYYY-MM-DD ou YYYY (quando o relacionamento aconteceu/iniciou)"
+    }}
+  ],
+  "events": [
+    {{
+      "description": "O que aconteceu",
+      "event_type": "meeting|announcement|election|crime|etc",
+      "date": "YYYY-MM-DD ou YYYY",
+      "location": "local se mencionado",
+      "participants": ["lista de participantes"]
+    }}
+  ]
+}}
+```
+
+Retorne APENAS o JSON, sem texto adicional.
+
+## Texto para análise:
+{text}
+"""
+
+
+class EntityExtractor:
+    """
+    LLM-based Entity Extractor using Cerebras API
+    """
+    
+    def __init__(self):
+        self.api_key = settings.cerebras_api_key
+        self.base_url = "https://api.cerebras.ai/v1"
+        self.model = "qwen-3-235b-a22b-instruct-2507"
+        self.timeout = 60.0
+    
+    async def extract(self, text: str) -> ExtractionResult:
+        """
+        Extract entities, relationships, and events from text using LLM
+        
+        Args:
+            text: The text to analyze
+            
+        Returns:
+            ExtractionResult with all extracted information
+        """
+        if not self.api_key:
+            raise ValueError("CEREBRAS_API_KEY not configured. Please set the environment variable.")
+        
+        if not text or len(text.strip()) < 10:
+            return ExtractionResult(entities=[], relationships=[], events=[])
+        
+        # Prepare the prompt
+        prompt = EXTRACTION_PROMPT.format(text=text)
+        
+        try:
+            # Call Cerebras API
+            async with httpx.AsyncClient(timeout=self.timeout) as client:
+                response = await client.post(
+                    f"{self.base_url}/chat/completions",
+                    headers={
+                        "Authorization": f"Bearer {self.api_key}",
+                        "Content-Type": "application/json"
+                    },
+                    json={
+                        "model": self.model,
+                        "messages": [
+                            {
+                                "role": "system",
+                                "content": "Você é um assistente especialista em extração de entidades e relacionamentos. Sempre responda em JSON válido."
+                            },
+                            {
+                                "role": "user",
+                                "content": prompt
+                            }
+                        ],
+                        "temperature": 0.1,  # Low temperature for consistent extraction
+                        "max_tokens": 4096
+                    }
+                )
+                
+                if response.status_code != 200:
+                    error_text = response.text
+                    print(f"Cerebras API error: {response.status_code} - {error_text}")
+                    raise ValueError(f"Cerebras API error: {response.status_code}")
+                
+                data = response.json()
+            
+            # Parse the response
+            raw_content = data["choices"][0]["message"]["content"]
+            return self._parse_response(raw_content)
+            
+        except httpx.TimeoutException:
+            print("Cerebras API timeout")
+            raise ValueError("API timeout - please try again with shorter text")
+        except httpx.RequestError as e:
+            print(f"Cerebras API request error: {e}")
+            raise ValueError(f"API connection error: {str(e)}")
+        except KeyError as e:
+            print(f"Unexpected API response format: {e}")
+            raise ValueError("Unexpected API response format")
+    
+    def _parse_response(self, content: str) -> ExtractionResult:
+        """Parse the LLM response into structured data"""
+        try:
+            # Try to extract JSON from the response
+            # Sometimes the model wraps it in ```json ... ```
+            json_match = re.search(r'```json\s*(.*?)\s*```', content, re.DOTALL)
+            if json_match:
+                json_str = json_match.group(1)
+            else:
+                # Try to find raw JSON
+                json_match = re.search(r'\{.*\}', content, re.DOTALL)
+                if json_match:
+                    json_str = json_match.group(0)
+                else:
+                    json_str = content
+            
+            data = json.loads(json_str)
+            
+            # Parse entities
+            entities = []
+            for e in data.get("entities", []):
+                entities.append(ExtractedEntity(
+                    name=e.get("name", ""),
+                    type=e.get("type", "unknown"),
+                    role=e.get("role"),
+                    aliases=e.get("aliases", []),
+                    description=e.get("description"),
+                    event_date=e.get("event_date")
+                ))
+            
+            # Parse relationships
+            relationships = []
+            for r in data.get("relationships", []):
+                relationships.append(ExtractedRelationship(
+                    source=r.get("source", ""),
+                    target=r.get("target", ""),
+                    relationship_type=r.get("relationship_type", "related_to"),
+                    context=r.get("context"),
+                    event_date=r.get("event_date")
+                ))
+            
+            # Parse events
+            events = []
+            for ev in data.get("events", []):
+                events.append(ExtractedEvent(
+                    description=ev.get("description", ""),
+                    event_type=ev.get("event_type"),
+                    date=ev.get("date"),
+                    location=ev.get("location"),
+                    participants=ev.get("participants", [])
+                ))
+            
+            return ExtractionResult(
+                entities=entities,
+                relationships=relationships,
+                events=events,
+                raw_response=content
+            )
+            
+        except json.JSONDecodeError as e:
+            print(f"Failed to parse LLM response: {e}")
+            print(f"Raw content: {content}")
+            return ExtractionResult(
+                entities=[],
+                relationships=[],
+                events=[],
+                raw_response=content
+            )
+    
+    def extract_sync(self, text: str) -> ExtractionResult:
+        """
+        Synchronous version of extract for non-async contexts
+        """
+        import asyncio
+        return asyncio.run(self.extract(text))
+
+
+# Singleton instance
+entity_extractor = EntityExtractor()
diff --git a/app/services/transparencia_api.py b/app/services/transparencia_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..13face26b77d772b00023efd68c0a6af8dc03d9e
--- /dev/null
+++ b/app/services/transparencia_api.py
@@ -0,0 +1,146 @@
+"""
+Portal da Transparência APIs
+Access to Brazilian government transparency data
+"""
+import httpx
+from typing import Optional, Dict, Any, List
+from dataclasses import dataclass
+
+
+# Portal da Transparência base URL
+TRANSPARENCIA_URL = "https://api.portaldatransparencia.gov.br/api-de-dados"
+
+
+@dataclass
+class SanctionRecord:
+    """Data structure for sanction/punishment records"""
+    tipo: str  # CEIS, CNEP, CEPIM
+    cpf_cnpj: str
+    nome: str
+    tipo_pessoa: str  # 'F' or 'J'
+    
+    # Sanction details
+    tipo_sancao: str = ""
+    data_inicio: str = ""
+    data_fim: str = ""
+    orgao_sancionador: str = ""
+    uf_orgao: str = ""
+    fundamentacao_legal: str = ""
+    
+    # Source
+    fonte_url: str = ""
+
+
+async def consultar_ceis(cnpj_cpf: str, token: Optional[str] = None) -> List[SanctionRecord]:
+    """
+    Query CEIS - Cadastro de Empresas Inidôneas e Suspensas
+    Note: Requires authentication token from Portal da Transparência
+    """
+    # Without token, we can still try - some endpoints work without auth
+    return await _query_sanctions("ceis", cnpj_cpf, token)
+
+
+async def consultar_cnep(cnpj_cpf: str, token: Optional[str] = None) -> List[SanctionRecord]:
+    """
+    Query CNEP - Cadastro Nacional de Empresas Punidas
+    """
+    return await _query_sanctions("cnep", cnpj_cpf, token)
+
+
+async def consultar_cepim(cnpj_cpf: str, token: Optional[str] = None) -> List[SanctionRecord]:
+    """
+    Query CEPIM - Cadastro de Entidades Privadas sem Fins Lucrativos Impedidas
+    """
+    return await _query_sanctions("cepim", cnpj_cpf, token)
+
+
+async def _query_sanctions(
+    endpoint: str, 
+    cnpj_cpf: str, 
+    token: Optional[str] = None
+) -> List[SanctionRecord]:
+    """Internal function to query sanction APIs"""
+    try:
+        headers = {}
+        if token:
+            headers["chave-api-dados"] = token
+        
+        params = {"cnpjCpf": cnpj_cpf}
+        
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(
+                f"{TRANSPARENCIA_URL}/{endpoint}",
+                params=params,
+                headers=headers
+            )
+            
+            if response.status_code == 401:
+                # Need authentication - return empty for now
+                print(f"Portal da Transparência requires authentication for {endpoint}")
+                return []
+            
+            if response.status_code != 200:
+                return []
+            
+            data = response.json()
+            if not isinstance(data, list):
+                data = [data] if data else []
+            
+            records = []
+            for item in data:
+                records.append(SanctionRecord(
+                    tipo=endpoint.upper(),
+                    cpf_cnpj=item.get("cpfCnpj", ""),
+                    nome=item.get("nomeRazaoSocial", item.get("nome", "")),
+                    tipo_pessoa=item.get("tipoPessoa", ""),
+                    tipo_sancao=item.get("tipoSancao", {}).get("descricao", "") if isinstance(item.get("tipoSancao"), dict) else str(item.get("tipoSancao", "")),
+                    data_inicio=item.get("dataInicioSancao", ""),
+                    data_fim=item.get("dataFimSancao", ""),
+                    orgao_sancionador=item.get("orgaoSancionador", {}).get("nome", "") if isinstance(item.get("orgaoSancionador"), dict) else str(item.get("orgaoSancionador", "")),
+                    uf_orgao=item.get("ufOrgaoSancionador", ""),
+                    fundamentacao_legal=item.get("fundamentacaoLegal", ""),
+                    fonte_url=f"https://portaldatransparencia.gov.br/{endpoint}"
+                ))
+            
+            return records
+            
+    except Exception as e:
+        print(f"Transparência API error ({endpoint}): {e}")
+        return []
+
+
+async def verificar_sancoes(cnpj_cpf: str, token: Optional[str] = None) -> Dict[str, Any]:
+    """
+    Check all sanction databases for a CNPJ/CPF
+    Returns consolidated result
+    """
+    import asyncio
+    
+    # Query all databases in parallel
+    ceis_task = consultar_ceis(cnpj_cpf, token)
+    cnep_task = consultar_cnep(cnpj_cpf, token)
+    cepim_task = consultar_cepim(cnpj_cpf, token)
+    
+    ceis, cnep, cepim = await asyncio.gather(ceis_task, cnep_task, cepim_task)
+    
+    all_sanctions = ceis + cnep + cepim
+    
+    return {
+        "cnpj_cpf": cnpj_cpf,
+        "tem_sancoes": len(all_sanctions) > 0,
+        "total_sancoes": len(all_sanctions),
+        "ceis": len(ceis),
+        "cnep": len(cnep),
+        "cepim": len(cepim),
+        "registros": [
+            {
+                "tipo": s.tipo,
+                "tipo_sancao": s.tipo_sancao,
+                "orgao": s.orgao_sancionador,
+                "inicio": s.data_inicio,
+                "fim": s.data_fim,
+                "fundamentacao": s.fundamentacao_legal
+            }
+            for s in all_sanctions
+        ]
+    }
diff --git a/app/services/tse_api.py b/app/services/tse_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..e851625961d9a2c673f2eab9d91c44145d05e9cc
--- /dev/null
+++ b/app/services/tse_api.py
@@ -0,0 +1,270 @@
+"""
+TSE (Tribunal Superior Eleitoral) API Service
+Access to Brazilian electoral data - candidates, assets, donations
+"""
+import httpx
+from typing import Optional, Dict, Any, List
+from dataclasses import dataclass, field
+
+
+# DivulgaCand API (unofficial but functional)
+TSE_DIVULGACAND_URL = "https://divulgacandcontas.tse.jus.br/divulga/rest/v1"
+
+
+@dataclass
+class Candidato:
+    """Electoral candidate data"""
+    id: int
+    nome: str
+    nome_urna: str
+    cpf_parcial: str = ""  # TSE only shows partial
+    numero: str = ""
+    cargo: str = ""
+    partido_sigla: str = ""
+    partido_nome: str = ""
+    coligacao: str = ""
+    situacao: str = ""
+    
+    # Location
+    uf: str = ""
+    municipio: str = ""
+    
+    # Personal
+    data_nascimento: str = ""
+    genero: str = ""
+    grau_instrucao: str = ""
+    ocupacao: str = ""
+    
+    # Assets
+    total_bens: float = 0.0
+    bens: List[Dict[str, Any]] = field(default_factory=list)
+    
+    # Campaign
+    total_receitas: float = 0.0
+    total_despesas: float = 0.0
+
+
+@dataclass
+class Eleicao:
+    """Election metadata"""
+    id: int
+    ano: int
+    descricao: str
+    turno: int = 1
+
+
+async def listar_eleicoes() -> List[Eleicao]:
+    """List available elections"""
+    try:
+        async with httpx.AsyncClient(timeout=15.0) as client:
+            response = await client.get(f"{TSE_DIVULGACAND_URL}/eleicao/ordinarias")
+            
+            if response.status_code != 200:
+                return []
+            
+            data = response.json()
+            eleicoes = []
+            
+            for item in data:
+                eleicoes.append(Eleicao(
+                    id=item.get("id", 0),
+                    ano=item.get("ano", 0),
+                    descricao=item.get("descricaoEleicao", ""),
+                    turno=item.get("turno", 1)
+                ))
+            
+            return sorted(eleicoes, key=lambda x: x.ano, reverse=True)
+            
+    except Exception as e:
+        print(f"TSE eleicoes error: {e}")
+        return []
+
+
+async def buscar_candidatos(
+    nome: str,
+    ano: int = 2024,
+    uf: Optional[str] = None,
+    cargo: Optional[str] = None
+) -> List[Candidato]:
+    """
+    Search for candidates by name.
+    
+    Args:
+        nome: Candidate name to search
+        ano: Election year (default 2024)
+        uf: State filter (optional)
+        cargo: Position filter (optional)
+    """
+    try:
+        # First get the election ID for the year
+        eleicoes = await listar_eleicoes()
+        eleicao = next((e for e in eleicoes if e.ano == ano), None)
+        
+        if not eleicao:
+            # Try common election IDs
+            eleicao_id = {2024: 546, 2022: 544, 2020: 426, 2018: 295}.get(ano, 546)
+        else:
+            eleicao_id = eleicao.id
+        
+        # Build search URL
+        base_url = f"{TSE_DIVULGACAND_URL}/candidatura/listar/{ano}/{eleicao_id}"
+        
+        params = {"nomeCompleto": nome}
+        if uf:
+            params["uf"] = uf.upper()
+        if cargo:
+            params["cargo"] = cargo
+        
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(base_url, params=params)
+            
+            if response.status_code != 200:
+                return []
+            
+            data = response.json()
+            candidatos_data = data.get("candidatos", [])
+            
+            candidatos = []
+            for item in candidatos_data:
+                candidatos.append(Candidato(
+                    id=item.get("id", 0),
+                    nome=item.get("nomeCompleto", ""),
+                    nome_urna=item.get("nomeUrna", ""),
+                    cpf_parcial=item.get("cpf", "")[:3] + ".***.***-**" if item.get("cpf") else "",
+                    numero=str(item.get("numero", "")),
+                    cargo=item.get("cargo", {}).get("nome", "") if isinstance(item.get("cargo"), dict) else str(item.get("cargo", "")),
+                    partido_sigla=item.get("partido", {}).get("sigla", "") if isinstance(item.get("partido"), dict) else "",
+                    partido_nome=item.get("partido", {}).get("nome", "") if isinstance(item.get("partido"), dict) else "",
+                    uf=item.get("ufSigla", "") or item.get("uf", ""),
+                    municipio=item.get("municipio", {}).get("nome", "") if isinstance(item.get("municipio"), dict) else "",
+                    situacao=item.get("situacao", ""),
+                    total_bens=float(item.get("totalDeBens", 0) or 0)
+                ))
+            
+            return candidatos
+            
+    except Exception as e:
+        print(f"TSE search error: {e}")
+        return []
+
+
+async def obter_candidato_detalhes(
+    id_candidato: int,
+    ano: int = 2024,
+    eleicao_id: Optional[int] = None
+) -> Optional[Candidato]:
+    """Get detailed candidate information including assets"""
+    try:
+        if not eleicao_id:
+            eleicao_id = {2024: 546, 2022: 544, 2020: 426, 2018: 295}.get(ano, 546)
+        
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            # Get candidate details
+            response = await client.get(
+                f"{TSE_DIVULGACAND_URL}/candidatura/buscar/{ano}/{eleicao_id}/candidato/{id_candidato}"
+            )
+            
+            if response.status_code != 200:
+                return None
+            
+            item = response.json()
+            
+            candidato = Candidato(
+                id=item.get("id", 0),
+                nome=item.get("nomeCompleto", ""),
+                nome_urna=item.get("nomeUrna", ""),
+                numero=str(item.get("numero", "")),
+                cargo=item.get("cargo", {}).get("nome", "") if isinstance(item.get("cargo"), dict) else "",
+                partido_sigla=item.get("partido", {}).get("sigla", "") if isinstance(item.get("partido"), dict) else "",
+                partido_nome=item.get("partido", {}).get("nome", "") if isinstance(item.get("partido"), dict) else "",
+                uf=item.get("ufSigla", ""),
+                municipio=item.get("localCandidatura", ""),
+                situacao=item.get("situacao", ""),
+                data_nascimento=item.get("dataNascimento", ""),
+                genero=item.get("genero", ""),
+                grau_instrucao=item.get("grauInstrucao", ""),
+                ocupacao=item.get("ocupacao", ""),
+                total_bens=float(item.get("totalDeBens", 0) or 0)
+            )
+            
+            # Try to get assets (bens)
+            try:
+                bens_response = await client.get(
+                    f"{TSE_DIVULGACAND_URL}/candidatura/buscar/{ano}/{eleicao_id}/candidato/{id_candidato}/bens"
+                )
+                if bens_response.status_code == 200:
+                    bens_data = bens_response.json()
+                    candidato.bens = [
+                        {
+                            "tipo": b.get("tipoBem", ""),
+                            "descricao": b.get("descricao", ""),
+                            "valor": float(b.get("valor", 0) or 0)
+                        }
+                        for b in bens_data
+                    ]
+            except:
+                pass
+            
+            return candidato
+            
+    except Exception as e:
+        print(f"TSE details error: {e}")
+        return None
+
+
+async def buscar_politico(nome: str) -> Dict[str, Any]:
+    """
+    Search for a politician across multiple elections.
+    Returns consolidated information.
+    """
+    resultado = {
+        "nome": nome,
+        "encontrado": False,
+        "candidaturas": [],
+        "ultimo_cargo": None,
+        "total_patrimonio": 0.0,
+        "partidos": set(),
+        "ufs": set()
+    }
+    
+    # Search in recent elections - continue through ALL years
+    for ano in [2024, 2022, 2020, 2018]:
+        try:
+            candidatos = await buscar_candidatos(nome, ano=ano)
+            print(f"TSE: Buscando '{nome}' em {ano} - encontrados: {len(candidatos)}")
+            
+            for c in candidatos:
+                # Match if nome is in the candidate's full name
+                if nome.lower() in c.nome.lower() or nome.lower() in c.nome_urna.lower():
+                    resultado["encontrado"] = True
+                    resultado["candidaturas"].append({
+                        "ano": ano,
+                        "cargo": c.cargo,
+                        "partido": c.partido_sigla,
+                        "uf": c.uf,
+                        "situacao": c.situacao,
+                        "patrimonio": c.total_bens
+                    })
+                    
+                    if c.partido_sigla:
+                        resultado["partidos"].add(c.partido_sigla)
+                    if c.uf:
+                        resultado["ufs"].add(c.uf)
+                    
+                    if c.total_bens > resultado["total_patrimonio"]:
+                        resultado["total_patrimonio"] = c.total_bens
+                    
+                    if not resultado["ultimo_cargo"]:
+                        resultado["ultimo_cargo"] = f"{c.cargo} ({ano})"
+        except Exception as e:
+            print(f"TSE search {ano} error: {e}")
+            continue
+    
+    # Convert sets to lists for JSON
+    resultado["partidos"] = list(resultado["partidos"])
+    resultado["ufs"] = list(resultado["ufs"])
+    
+    print(f"TSE resultado para '{nome}': encontrado={resultado['encontrado']}, candidaturas={len(resultado['candidaturas'])}")
+    
+    return resultado
+
diff --git a/data/numidium.db b/data/numidium.db
new file mode 100644
index 0000000000000000000000000000000000000000..5293aaa509c3eea7032c820aaf1ed43707885ac6
Binary files /dev/null and b/data/numidium.db differ
diff --git a/requirements.txt b/requirements.txt
index d3d8138add29dc954de2005bd86bdec53dd629e8..8d6b074afd2336d205fa0443bc46feac92470d10 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,28 +1,12 @@
-# --- SERVIDOR E API ---
-fastapi
-uvicorn[standard]
-python-multipart
-openai
-prometheus-fastapi-instrumentator  
-prometheus-client
-tavily-python                 
-
-# --- MACHINE LEARNING E NLP ---
-sentence-transformers
-numpy
-pandas
-scikit-learn
-scipy
-umap-learn
-hdbscan
-faiss-cpu
-nltk
-spacy
-langdetect
-https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
-https://github.com/explosion/spacy-models/releases/download/pt_core_news_sm-3.7.0/pt_core_news_sm-3.7.0-py3-none-any.whl
-
-# --- TORCH CPU (>=2.6 required for CVE-2025-32434) ---
-torch>=2.6.0
-torchvision
-torchaudio
\ No newline at end of file
+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+sqlalchemy==2.0.23
+pydantic==2.5.2
+pydantic-settings==2.1.0
+requests==2.31.0
+beautifulsoup4==4.12.2
+httpx==0.25.2
+python-multipart==0.0.6
+aiohttp==3.9.1
+feedparser==6.0.10
+# httpx already included - used for Cerebras API calls