diff --git a/app/__init__.py b/app/__init__.py
deleted file mode 100644
index 5ca62e91c6b6d2fd4d3a0d2f3169941e71d37af3..0000000000000000000000000000000000000000
--- a/app/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Numidium Backend App
diff --git a/app/__pycache__/__init__.cpython-311.pyc b/app/__pycache__/__init__.cpython-311.pyc
deleted file mode 100644
index 5a44e729bba8a6e6cdf407034b3b1ec551cfb6fe..0000000000000000000000000000000000000000
Binary files a/app/__pycache__/__init__.cpython-311.pyc and /dev/null differ
diff --git a/app/__pycache__/config.cpython-311.pyc b/app/__pycache__/config.cpython-311.pyc
deleted file mode 100644
index f6ba5b98d9de60400fecda19a96033ffd700d3a1..0000000000000000000000000000000000000000
Binary files a/app/__pycache__/config.cpython-311.pyc and /dev/null differ
diff --git a/app/api/__init__.py b/app/api/__init__.py
deleted file mode 100644
index ce0a2733c6eceaf10144429177e8f20db9604545..0000000000000000000000000000000000000000
--- a/app/api/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# API module
diff --git a/app/api/__pycache__/__init__.cpython-311.pyc b/app/api/__pycache__/__init__.cpython-311.pyc
deleted file mode 100644
index e59a223a6007cd27a3443d5ab5a26d31df7fb4ff..0000000000000000000000000000000000000000
Binary files a/app/api/__pycache__/__init__.cpython-311.pyc and /dev/null differ
diff --git a/app/api/__pycache__/deps.cpython-311.pyc b/app/api/__pycache__/deps.cpython-311.pyc
deleted file mode 100644
index 07e15cf0e980065fc3e41e4e0eea81575dc514d5..0000000000000000000000000000000000000000
Binary files a/app/api/__pycache__/deps.cpython-311.pyc and /dev/null differ
diff --git a/app/api/deps.py b/app/api/deps.py
deleted file mode 100644
index bcea9d8c46a65a9857513605150ce15591631945..0000000000000000000000000000000000000000
--- a/app/api/deps.py
+++ /dev/null
@@ -1,35 +0,0 @@
-"""
-API dependencies.
-"""
-from typing import Generator, Optional
-
-from fastapi import Cookie, Header
-from sqlalchemy.orm import Session
-
-from app.core.database import get_db_for_session, get_default_session
-
-
-def get_session_id(
- x_session_id: Optional[str] = Header(None),
- numidium_session: Optional[str] = Cookie(None)
-) -> Optional[str]:
- """Return the session id from header or cookie."""
- return x_session_id or numidium_session
-
-
-def get_scoped_db(
- x_session_id: Optional[str] = Header(None),
- numidium_session: Optional[str] = Cookie(None)
-) -> Generator[Session, None, None]:
- """
- Provide a session-scoped DB if available, otherwise the default DB.
- """
- session_id = x_session_id or numidium_session
- if session_id:
- db = get_db_for_session(session_id)
- else:
- db = get_default_session()
- try:
- yield db
- finally:
- db.close()
diff --git a/app/api/routes/__init__.py b/app/api/routes/__init__.py
deleted file mode 100644
index e37c97a33d27ba2e879921f79996d8fdc3edbb73..0000000000000000000000000000000000000000
--- a/app/api/routes/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# API Routes module
-from app.api.routes import entities, relationships, events, search, ingest
diff --git a/app/api/routes/__pycache__/__init__.cpython-311.pyc b/app/api/routes/__pycache__/__init__.cpython-311.pyc
deleted file mode 100644
index e48c20bb1f744a1d1037323ce205527266cb5c7c..0000000000000000000000000000000000000000
Binary files a/app/api/routes/__pycache__/__init__.cpython-311.pyc and /dev/null differ
diff --git a/app/api/routes/__pycache__/entities.cpython-311.pyc b/app/api/routes/__pycache__/entities.cpython-311.pyc
deleted file mode 100644
index ee99ab907a18b99a588dfe960d31a7e21c7e53d6..0000000000000000000000000000000000000000
Binary files a/app/api/routes/__pycache__/entities.cpython-311.pyc and /dev/null differ
diff --git a/app/api/routes/__pycache__/events.cpython-311.pyc b/app/api/routes/__pycache__/events.cpython-311.pyc
deleted file mode 100644
index 52f29ec16d10fc54bd6be7d6e32591d65d3acfcc..0000000000000000000000000000000000000000
Binary files a/app/api/routes/__pycache__/events.cpython-311.pyc and /dev/null differ
diff --git a/app/api/routes/__pycache__/ingest.cpython-311.pyc b/app/api/routes/__pycache__/ingest.cpython-311.pyc
deleted file mode 100644
index e524bafc4ce081ccccb32d94f2426c10b1e79b9a..0000000000000000000000000000000000000000
Binary files a/app/api/routes/__pycache__/ingest.cpython-311.pyc and /dev/null differ
diff --git a/app/api/routes/__pycache__/investigate.cpython-311.pyc b/app/api/routes/__pycache__/investigate.cpython-311.pyc
deleted file mode 100644
index 61c0e309052c422eb7d506d8623cfaed4ff4e01e..0000000000000000000000000000000000000000
Binary files a/app/api/routes/__pycache__/investigate.cpython-311.pyc and /dev/null differ
diff --git a/app/api/routes/__pycache__/relationships.cpython-311.pyc b/app/api/routes/__pycache__/relationships.cpython-311.pyc
deleted file mode 100644
index 73c88868d8b1ad76745a529fe05928d06408c415..0000000000000000000000000000000000000000
Binary files a/app/api/routes/__pycache__/relationships.cpython-311.pyc and /dev/null differ
diff --git a/app/api/routes/__pycache__/search.cpython-311.pyc b/app/api/routes/__pycache__/search.cpython-311.pyc
deleted file mode 100644
index 83951b1b069fe2d10b140852fbc85e7294cac015..0000000000000000000000000000000000000000
Binary files a/app/api/routes/__pycache__/search.cpython-311.pyc and /dev/null differ
diff --git a/app/api/routes/aethermap.py b/app/api/routes/aethermap.py
deleted file mode 100644
index bc0535153069d293dcdbe97be9565e0a17728e3e..0000000000000000000000000000000000000000
--- a/app/api/routes/aethermap.py
+++ /dev/null
@@ -1,307 +0,0 @@
-"""
-AetherMap Routes - Document Mapping & Semantic Search
-Integrates with AetherMap API for document clustering, NER, and semantic search.
-"""
-from fastapi import APIRouter, HTTPException, UploadFile, File, Form, Depends
-from pydantic import BaseModel, Field
-from typing import Optional, List, Dict, Any
-from sqlalchemy.orm import Session
-import io
-
-from app.api.deps import get_scoped_db
-from app.services.aethermap_client import aethermap, ProcessResult, SearchResult, EntityGraphResult
-
-
-router = APIRouter()
-
-
-# ============================================================================
-# Request/Response Models
-# ============================================================================
-
-class IndexDocumentsRequest(BaseModel):
- """Request to index documents from text list"""
- documents: List[str] = Field(..., description="Lista de textos para indexar")
- fast_mode: bool = Field(True, description="Modo rápido (PCA) ou preciso (UMAP)")
-
-
-class IndexEntitiesRequest(BaseModel):
- """Request to index entities from NUMIDIUM database"""
- entity_types: Optional[List[str]] = Field(None, description="Filtrar por tipos de entidade")
- limit: int = Field(500, description="Limite de entidades")
-
-
-class SemanticSearchRequest(BaseModel):
- """Request for semantic search"""
- query: str = Field(..., description="Termo de busca")
- turbo_mode: bool = Field(True, description="Modo turbo (mais rápido)")
-
-
-class IndexResponse(BaseModel):
- """Response from indexing"""
- job_id: str
- num_documents: int
- num_clusters: int
- num_noise: int
- metrics: Dict[str, Any] = {}
- cluster_analysis: Dict[str, Any] = {}
-
-
-class SearchResponse(BaseModel):
- """Response from search"""
- summary: str
- results: List[Dict[str, Any]] = []
-
-
-class EntityGraphResponse(BaseModel):
- """Response from NER extraction"""
- hubs: List[Dict[str, Any]] = []
- insights: Dict[str, Any] = {}
- node_count: int = 0
- edge_count: int = 0
-
-
-class StatusResponse(BaseModel):
- """AetherMap status"""
- connected: bool
- job_id: Optional[str] = None
- documents_indexed: int = 0
-
-
-# ============================================================================
-# Endpoints
-# ============================================================================
-
-@router.get("/status", response_model=StatusResponse)
-async def get_status():
- """
- Get AetherMap connection status.
- """
- return StatusResponse(
- connected=True,
- job_id=aethermap.current_job_id,
- documents_indexed=0 # TODO: track this
- )
-
-
-@router.post("/index", response_model=IndexResponse)
-async def index_documents(request: IndexDocumentsRequest):
- """
- Index a list of documents for semantic search.
-
- The documents will be:
- - Embedded using sentence transformers
- - Clustered using HDBSCAN
- - Indexed in FAISS + BM25 for hybrid search
- """
- try:
- if not request.documents:
- raise HTTPException(status_code=400, detail="Nenhum documento fornecido")
-
- result = await aethermap.process_documents(
- texts=request.documents,
- fast_mode=request.fast_mode
- )
-
- return IndexResponse(
- job_id=result.job_id,
- num_documents=result.num_documents,
- num_clusters=result.num_clusters,
- num_noise=result.num_noise,
- metrics=result.metrics,
- cluster_analysis=result.cluster_analysis
- )
-
- except Exception as e:
- raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.post("/index-entities", response_model=IndexResponse)
-async def index_entities(
- request: IndexEntitiesRequest,
- db: Session = Depends(get_scoped_db)
-):
- """
- Index entities from NUMIDIUM database.
-
- Collects entity names and descriptions, sends to AetherMap for processing.
- """
- from app.models.entity import Entity
-
- try:
- query = db.query(Entity)
-
- if request.entity_types:
- query = query.filter(Entity.type.in_(request.entity_types))
-
- entities = query.limit(request.limit).all()
-
- if not entities:
- raise HTTPException(status_code=404, detail="Nenhuma entidade encontrada")
-
- # Build text representations
- documents = []
- for e in entities:
- text = f"{e.name} ({e.type})"
- if e.description:
- text += f": {e.description[:1000]}"
- documents.append(text)
-
- result = await aethermap.process_documents(
- texts=documents,
- fast_mode=request.fast_mode if hasattr(request, 'fast_mode') else True
- )
-
- return IndexResponse(
- job_id=result.job_id,
- num_documents=result.num_documents,
- num_clusters=result.num_clusters,
- num_noise=result.num_noise,
- metrics=result.metrics,
- cluster_analysis=result.cluster_analysis
- )
-
- except HTTPException:
- raise
- except Exception as e:
- raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.post("/upload", response_model=IndexResponse)
-async def upload_documents(
- file: UploadFile = File(...),
- fast_mode: bool = Form(True)
-):
- """
- Upload a file (TXT or CSV) for indexing.
-
- - TXT: One document per line
- - CSV: Will use first text column found
- """
- try:
- content = await file.read()
- text = content.decode('utf-8', errors='ignore')
-
- # Split by lines for TXT
- documents = [line.strip() for line in text.splitlines() if line.strip()]
-
- if not documents:
- raise HTTPException(status_code=400, detail="Arquivo vazio ou sem texto válido")
-
- result = await aethermap.process_documents(
- texts=documents,
- fast_mode=fast_mode
- )
-
- return IndexResponse(
- job_id=result.job_id,
- num_documents=result.num_documents,
- num_clusters=result.num_clusters,
- num_noise=result.num_noise,
- metrics=result.metrics,
- cluster_analysis=result.cluster_analysis
- )
-
- except HTTPException:
- raise
- except Exception as e:
- raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.post("/search", response_model=SearchResponse)
-async def semantic_search(request: SemanticSearchRequest):
- """
- Semantic search in indexed documents.
-
- Uses hybrid RAG (FAISS + BM25 + reranking + LLM).
- Returns a summary answering the query with citations.
- """
- try:
- if not aethermap.current_job_id:
- raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
-
- result = await aethermap.semantic_search(
- query=request.query,
- turbo_mode=request.turbo_mode
- )
-
- return SearchResponse(
- summary=result.summary,
- results=result.results
- )
-
- except HTTPException:
- raise
- except Exception as e:
- raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.post("/entities", response_model=EntityGraphResponse)
-async def extract_entities():
- """
- Extract named entities (NER) from indexed documents.
-
- Returns:
- - Hub entities (most connected)
- - Relationship insights
- - Graph metrics
- """
- try:
- if not aethermap.current_job_id:
- raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
-
- result = await aethermap.extract_entities()
-
- return EntityGraphResponse(
- hubs=result.hubs,
- insights=result.insights,
- node_count=len(result.nodes),
- edge_count=len(result.edges)
- )
-
- except HTTPException:
- raise
- except Exception as e:
- raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.post("/analyze")
-async def analyze_graph():
- """
- Analyze entity graph using LLM.
-
- Returns semantic insights about relationships and patterns.
- """
- try:
- if not aethermap.current_job_id:
- raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
-
- result = await aethermap.analyze_graph()
-
- return {
- "analysis": result.analysis,
- "key_entities": result.key_entities,
- "relationships": result.relationships
- }
-
- except HTTPException:
- raise
- except Exception as e:
- raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.post("/describe-clusters")
-async def describe_clusters():
- """
- Get LLM descriptions for each cluster found.
- """
- try:
- if not aethermap.current_job_id:
- raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
-
- result = await aethermap.describe_clusters()
-
- return result
-
- except Exception as e:
- raise HTTPException(status_code=500, detail=str(e))
diff --git a/app/api/routes/analyze.py b/app/api/routes/analyze.py
deleted file mode 100644
index 37b93947c0e0c9f2a5a626301007c1cf30b212d6..0000000000000000000000000000000000000000
--- a/app/api/routes/analyze.py
+++ /dev/null
@@ -1,309 +0,0 @@
-"""
-Analyze API Routes - LLM-based text analysis
-"""
-from fastapi import APIRouter, Depends, HTTPException
-from pydantic import BaseModel, Field
-from typing import Optional, List
-from sqlalchemy.orm import Session
-import traceback
-
-from app.api.deps import get_scoped_db
-from app.services.nlp import entity_extractor
-from app.services.geocoding import geocode
-from app.models.entity import Entity, Relationship, Event
-from app.config import settings
-
-
-router = APIRouter(prefix="/analyze", tags=["Analysis"])
-
-
-class AnalyzeRequest(BaseModel):
- """Request model for text analysis"""
- text: str = Field(..., min_length=10, description="Text to analyze")
- auto_create: bool = Field(default=False, description="Auto-create extracted entities in database")
-
-
-class ExtractedEntityResponse(BaseModel):
- """Response model for an extracted entity"""
- name: str
- type: str
- role: Optional[str] = None
- aliases: Optional[List[str]] = None
- description: Optional[str] = None
- created: bool = False # Whether it was created in DB
- entity_id: Optional[str] = None # DB ID if created
-
-
-class ExtractedRelationshipResponse(BaseModel):
- """Response model for an extracted relationship"""
- source: str
- target: str
- relationship_type: str
- context: Optional[str] = None
- created: bool = False
-
-
-class ExtractedEventResponse(BaseModel):
- """Response model for an extracted event"""
- description: str
- event_type: Optional[str] = None
- date: Optional[str] = None
- location: Optional[str] = None
- participants: Optional[List[str]] = None
- created: bool = False
- event_id: Optional[str] = None
-
-
-class AnalyzeResponse(BaseModel):
- """Response model for analysis"""
- entities: List[ExtractedEntityResponse]
- relationships: List[ExtractedRelationshipResponse]
- events: List[ExtractedEventResponse]
- stats: dict
-
-
-@router.post("", response_model=AnalyzeResponse)
-async def analyze_text(request: AnalyzeRequest, db: Session = Depends(get_scoped_db)):
- """
- Analyze text using LLM to extract entities, relationships, and events.
-
- Uses Cerebras API with Qwen 3 235B for intelligent extraction.
-
- Args:
- text: Text to analyze (min 10 characters)
- auto_create: If true, automatically creates entities in the database
-
- Returns:
- Extracted entities, relationships, events, and statistics
- """
- try:
- # Extract using LLM
- result = await entity_extractor.extract(request.text)
-
- # Prepare response
- entities_response = []
- relationships_response = []
- events_response = []
-
- created_entities = 0
- created_relationships = 0
- created_events = 0
-
- # Helper function to parse date strings
- def parse_date(date_str):
- if not date_str:
- return None
- from datetime import datetime
- try:
- # Try YYYY-MM-DD format
- return datetime.strptime(date_str[:10], "%Y-%m-%d")
- except:
- try:
- # Try YYYY format
- return datetime.strptime(date_str[:4], "%Y")
- except:
- return None
-
- # Process entities
- for entity in result.entities:
- entity_data = ExtractedEntityResponse(
- name=entity.name,
- type=entity.type,
- role=entity.role,
- aliases=entity.aliases,
- description=entity.description,
- created=False
- )
-
- if request.auto_create and entity.name:
- # Check if entity already exists
- existing = db.query(Entity).filter(
- Entity.name.ilike(f"%{entity.name}%")
- ).first()
-
- if not existing:
- # Get coordinates for location entities
- lat, lng = None, None
- if entity.type == "location":
- coords = await geocode(entity.name)
- if coords:
- lat, lng = coords
-
- # Parse event_date if available
- event_date = parse_date(getattr(entity, 'event_date', None))
-
- # Create new entity
- new_entity = Entity(
- name=entity.name,
- type=entity.type if entity.type in ["person", "organization", "location", "event"] else "person",
- description=entity.description or entity.role or "",
- source="llm_extraction",
- latitude=lat,
- longitude=lng,
- event_date=event_date,
- properties={"role": entity.role, "aliases": entity.aliases}
- )
- db.add(new_entity)
- db.commit()
- db.refresh(new_entity)
-
- entity_data.created = True
- entity_data.entity_id = new_entity.id
- created_entities += 1
- else:
- entity_data.entity_id = existing.id
-
- entities_response.append(entity_data)
-
- # Process relationships
- for rel in result.relationships:
- rel_data = ExtractedRelationshipResponse(
- source=rel.source,
- target=rel.target,
- relationship_type=rel.relationship_type,
- context=rel.context,
- created=False
- )
-
- if request.auto_create:
- # Find source and target entities
- source_entity = db.query(Entity).filter(
- Entity.name.ilike(f"%{rel.source}%")
- ).first()
- target_entity = db.query(Entity).filter(
- Entity.name.ilike(f"%{rel.target}%")
- ).first()
-
- if source_entity and target_entity:
- # Check if relationship exists
- existing_rel = db.query(Relationship).filter(
- Relationship.source_id == source_entity.id,
- Relationship.target_id == target_entity.id,
- Relationship.type == rel.relationship_type
- ).first()
-
- if not existing_rel:
- # Parse event_date if available
- rel_event_date = parse_date(getattr(rel, 'event_date', None))
-
- new_rel = Relationship(
- source_id=source_entity.id,
- target_id=target_entity.id,
- type=rel.relationship_type,
- event_date=rel_event_date,
- properties={"context": rel.context}
- )
- db.add(new_rel)
- db.commit()
- rel_data.created = True
- created_relationships += 1
-
- relationships_response.append(rel_data)
-
- # Process events
- for event in result.events:
- event_data = ExtractedEventResponse(
- description=event.description,
- event_type=event.event_type,
- date=event.date,
- location=event.location,
- participants=event.participants,
- created=False
- )
-
- if request.auto_create and event.description:
- # Create event
- new_event = Event(
- title=event.description[:100] if len(event.description) > 100 else event.description,
- description=event.description,
- type=event.event_type or "general",
- source="llm_extraction"
- )
- db.add(new_event)
- db.commit()
- db.refresh(new_event)
-
- event_data.created = True
- event_data.event_id = new_event.id
- created_events += 1
-
- events_response.append(event_data)
-
- return AnalyzeResponse(
- entities=entities_response,
- relationships=relationships_response,
- events=events_response,
- stats={
- "total_entities": len(entities_response),
- "total_relationships": len(relationships_response),
- "total_events": len(events_response),
- "created_entities": created_entities,
- "created_relationships": created_relationships,
- "created_events": created_events
- }
- )
-
- except Exception as e:
- # Log the full error with traceback
- print(f"=== ANALYZE ERROR ===")
- print(f"Error type: {type(e).__name__}")
- print(f"Error message: {str(e)}")
- print(f"Traceback:")
- traceback.print_exc()
- print(f"=== END ERROR ===")
- raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
-
-
-@router.get("/debug")
-async def debug_config():
- """
- Debug endpoint to check if API is configured correctly.
- """
- api_key = settings.cerebras_api_key
- return {
- "cerebras_api_key_configured": bool(api_key),
- "cerebras_api_key_length": len(api_key) if api_key else 0,
- "cerebras_api_key_preview": f"{api_key[:8]}...{api_key[-4:]}" if api_key and len(api_key) > 12 else "NOT SET"
- }
-
-
-@router.post("/quick")
-async def quick_analyze(request: AnalyzeRequest):
- """
- Quick analysis without database operations.
- Returns only extracted data without creating anything.
- """
- try:
- result = await entity_extractor.extract(request.text)
-
- return {
- "entities": [
- {
- "name": e.name,
- "type": e.type,
- "role": e.role,
- "aliases": e.aliases
- }
- for e in result.entities
- ],
- "relationships": [
- {
- "source": r.source,
- "target": r.target,
- "type": r.relationship_type,
- "context": r.context
- }
- for r in result.relationships
- ],
- "events": [
- {
- "description": ev.description,
- "type": ev.event_type,
- "date": ev.date,
- "participants": ev.participants
- }
- for ev in result.events
- ]
- }
- except Exception as e:
- raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
diff --git a/app/api/routes/chat.py b/app/api/routes/chat.py
deleted file mode 100644
index f75b133bf9956e67eb7b1b86312192d7fa093c46..0000000000000000000000000000000000000000
--- a/app/api/routes/chat.py
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
-Chat API Routes - Intelligent chat with RAG
-"""
-from fastapi import APIRouter, Depends, HTTPException
-from pydantic import BaseModel, Field
-from typing import Optional
-from sqlalchemy.orm import Session
-
-from app.api.deps import get_scoped_db, get_session_id
-from app.services.chat import chat_service
-
-
-router = APIRouter(prefix="/chat", tags=["Chat"])
-
-
-class ChatRequest(BaseModel):
- """Chat request model"""
- message: str = Field(..., min_length=1, description="User message")
- use_web: bool = Field(default=True, description="Include web search")
- use_history: bool = Field(default=True, description="Use conversation history")
-
-
-class ChatResponse(BaseModel):
- """Chat response model"""
- answer: str
- local_context_used: bool
- web_context_used: bool
- entities_found: int
-
-
-@router.post("", response_model=ChatResponse)
-async def chat(
- request: ChatRequest,
- db: Session = Depends(get_scoped_db),
- session_id: Optional[str] = Depends(get_session_id)
-):
- """
- Send a message and get an intelligent response.
-
- Uses:
- - Local NUMIDIUM knowledge (entities/relationships)
- - Lancer web search (if enabled)
- - Cerebras LLM for synthesis
- """
- try:
- result = await chat_service.chat(
- message=request.message,
- db=db,
- use_web=request.use_web,
- use_history=request.use_history,
- session_id=session_id
- )
- return ChatResponse(**result)
-
- except Exception as e:
- raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.post("/clear")
-async def clear_history(session_id: Optional[str] = Depends(get_session_id)):
- """Clear conversation history"""
- chat_service.clear_history(session_id=session_id)
- return {"message": "Historico limpo"}
diff --git a/app/api/routes/dados_publicos.py b/app/api/routes/dados_publicos.py
deleted file mode 100644
index 842e82d2d17f48687b92bb8012105eab495a8051..0000000000000000000000000000000000000000
--- a/app/api/routes/dados_publicos.py
+++ /dev/null
@@ -1,155 +0,0 @@
-"""
-Public Data API Routes - IBGE and TSE data access
-"""
-from fastapi import APIRouter, HTTPException, Query
-from pydantic import BaseModel, Field
-from typing import Optional, List, Dict, Any
-
-from app.services.ibge_api import (
- listar_estados,
- listar_municipios,
- buscar_municipio,
- enriquecer_localizacao
-)
-from app.services.tse_api import (
- listar_eleicoes,
- buscar_candidatos,
- obter_candidato_detalhes,
- buscar_politico
-)
-
-
-router = APIRouter(prefix="/dados", tags=["Public Data"])
-
-
-# ========== IBGE Endpoints ==========
-
-class EstadoResponse(BaseModel):
- id: int
- sigla: str
- nome: str
- regiao: str
-
-
-class MunicipioResponse(BaseModel):
- id: int
- nome: str
- estado_sigla: str
- estado_nome: str
- regiao: str
-
-
-@router.get("/ibge/estados", response_model=List[EstadoResponse])
-async def get_estados():
- """List all Brazilian states"""
- estados = await listar_estados()
- return [EstadoResponse(**e.__dict__) for e in estados]
-
-
-@router.get("/ibge/municipios/{uf}", response_model=List[MunicipioResponse])
-async def get_municipios(uf: str):
- """List municipalities in a state"""
- municipios = await listar_municipios(uf)
- return [MunicipioResponse(**m.__dict__) for m in municipios]
-
-
-@router.get("/ibge/buscar")
-async def buscar_cidade(
- nome: str = Query(..., min_length=2),
- uf: Optional[str] = None
-):
- """Search for a municipality by name"""
- municipios = await buscar_municipio(nome, uf)
- return [MunicipioResponse(**m.__dict__) for m in municipios]
-
-
-@router.get("/ibge/enriquecer")
-async def enriquecer_cidade(
- cidade: str = Query(..., min_length=2),
- uf: Optional[str] = None
-):
- """Enrich a location name with IBGE data"""
- return await enriquecer_localizacao(cidade, uf)
-
-
-# ========== TSE Endpoints ==========
-
-class EleicaoResponse(BaseModel):
- id: int
- ano: int
- descricao: str
- turno: int
-
-
-class CandidatoResponse(BaseModel):
- id: int
- nome: str
- nome_urna: str
- numero: str
- cargo: str
- partido_sigla: str
- uf: str
- municipio: str
- situacao: str
- total_bens: float
-
-
-class CandidatoDetalhadoResponse(BaseModel):
- id: int
- nome: str
- nome_urna: str
- numero: str
- cargo: str
- partido_sigla: str
- partido_nome: str
- uf: str
- municipio: str
- situacao: str
- data_nascimento: str
- genero: str
- grau_instrucao: str
- ocupacao: str
- total_bens: float
- bens: List[Dict[str, Any]]
-
-
-@router.get("/tse/eleicoes", response_model=List[EleicaoResponse])
-async def get_eleicoes():
- """List available elections"""
- eleicoes = await listar_eleicoes()
- return [EleicaoResponse(**e.__dict__) for e in eleicoes]
-
-
-@router.get("/tse/candidatos")
-async def get_candidatos(
- nome: str = Query(..., min_length=3),
- ano: int = Query(default=2024),
- uf: Optional[str] = None,
- cargo: Optional[str] = None
-):
- """Search for candidates by name"""
- candidatos = await buscar_candidatos(nome, ano=ano, uf=uf, cargo=cargo)
- return [CandidatoResponse(**c.__dict__) for c in candidatos]
-
-
-@router.get("/tse/candidato/{id_candidato}")
-async def get_candidato_detalhes(
- id_candidato: int,
- ano: int = Query(default=2024)
-):
- """Get detailed candidate information including assets"""
- candidato = await obter_candidato_detalhes(id_candidato, ano=ano)
-
- if not candidato:
- raise HTTPException(status_code=404, detail="Candidato não encontrado")
-
- return CandidatoDetalhadoResponse(**candidato.__dict__)
-
-
-@router.get("/tse/politico")
-async def pesquisar_politico(nome: str = Query(..., min_length=3)):
- """
- Search for a politician across multiple elections.
- Returns consolidated career information.
- """
- return await buscar_politico(nome)
diff --git a/app/api/routes/entities.py b/app/api/routes/entities.py
deleted file mode 100644
index 2727179e0e20a58a8a5893f1821a15c24df3013f..0000000000000000000000000000000000000000
--- a/app/api/routes/entities.py
+++ /dev/null
@@ -1,353 +0,0 @@
-"""
-Entity CRUD Routes
-"""
-from fastapi import APIRouter, Depends, HTTPException, Query
-from sqlalchemy.orm import Session
-from sqlalchemy import or_
-from typing import List, Optional
-
-from app.api.deps import get_scoped_db
-from app.models import Entity, Relationship
-from app.schemas import EntityCreate, EntityUpdate, EntityResponse, GraphData, GraphNode, GraphEdge
-
-router = APIRouter(prefix="/entities", tags=["Entities"])
-
-
-@router.get("", response_model=List[EntityResponse])
-def list_entities(
- type: Optional[str] = None,
- search: Optional[str] = None,
- project_id: Optional[str] = None,
- limit: int = Query(default=50, le=200),
- offset: int = 0,
- db: Session = Depends(get_scoped_db)
-):
- """Lista todas as entidades com filtros opcionais"""
- query = db.query(Entity)
-
- if project_id:
- query = query.filter(Entity.project_id == project_id)
-
- if type:
- query = query.filter(Entity.type == type)
-
- if search:
- query = query.filter(
- or_(
- Entity.name.ilike(f"%{search}%"),
- Entity.description.ilike(f"%{search}%")
- )
- )
-
- query = query.order_by(Entity.created_at.desc())
- return query.offset(offset).limit(limit).all()
-
-
-@router.get("/types")
-def get_entity_types(db: Session = Depends(get_scoped_db)):
- """Retorna todos os tipos de entidade únicos"""
- types = db.query(Entity.type).distinct().all()
- return [t[0] for t in types]
-
-
-@router.get("/suggest-merge")
-async def suggest_merge_candidates(
- limit: int = Query(default=10, le=50),
- db: Session = Depends(get_scoped_db)
-):
- """
- Use LLM to find potential duplicate entities that could be merged.
- Returns pairs of entities that might be the same.
- """
- import httpx
- import json
- import re
- from app.config import settings
-
- # Get all entities
- entities = db.query(Entity).order_by(Entity.name).limit(200).all()
-
- if len(entities) < 2:
- return {"candidates": [], "message": "Not enough entities to compare"}
-
- # Build entity list for LLM
- entity_list = []
- for e in entities:
- aliases = (e.properties or {}).get("aliases", [])
- entity_list.append({
- "id": e.id,
- "name": e.name,
- "type": e.type,
- "aliases": aliases[:5] if aliases else []
- })
-
- # Ask LLM to find duplicates
- prompt = f"""Analise esta lista de entidades e encontre possíveis DUPLICATAS (mesma pessoa/organização/local com nomes diferentes).
-
-Entidades:
-{entity_list[:100]}
-
-Retorne APENAS um JSON válido com pares de IDs que são provavelmente a mesma entidade:
-```json
-{{
- "duplicates": [
- {{
- "id1": "uuid1",
- "id2": "uuid2",
- "confidence": 0.95,
- "reason": "Mesmo nome com variação"
- }}
- ]
-}}
-```
-
-Se não houver duplicatas, retorne: {{"duplicates": []}}
-"""
-
- try:
- async with httpx.AsyncClient(timeout=30.0) as client:
- response = await client.post(
- "https://api.cerebras.ai/v1/chat/completions",
- headers={
- "Authorization": f"Bearer {settings.cerebras_api_key}",
- "Content-Type": "application/json"
- },
- json={
- "model": "zai-glm-4.7",
- "messages": [
- {"role": "system", "content": "Você é um especialista em detecção de entidades duplicadas. Responda apenas em JSON válido."},
- {"role": "user", "content": prompt}
- ],
- "temperature": 0.1,
- "max_tokens": 1024
- }
- )
-
- if response.status_code != 200:
- return {"candidates": [], "error": "LLM API error"}
-
- data = response.json()
- content = data["choices"][0]["message"]["content"]
-
- # Parse JSON from response
- json_match = re.search(r'\{.*\}', content, re.DOTALL)
- if json_match:
- result = json.loads(json_match.group(0))
-
- # Enrich with entity names
- candidates = []
- for dup in result.get("duplicates", [])[:limit]:
- e1 = next((e for e in entities if e.id == dup.get("id1")), None)
- e2 = next((e for e in entities if e.id == dup.get("id2")), None)
- if e1 and e2:
- candidates.append({
- "entity1": {"id": e1.id, "name": e1.name, "type": e1.type},
- "entity2": {"id": e2.id, "name": e2.name, "type": e2.type},
- "confidence": dup.get("confidence", 0.5),
- "reason": dup.get("reason", "Possível duplicata")
- })
-
- return {"candidates": candidates}
-
- return {"candidates": [], "message": "No duplicates found"}
-
- except Exception as e:
- return {"candidates": [], "error": str(e)}
-
-
-@router.get("/{entity_id}", response_model=EntityResponse)
-def get_entity(entity_id: str, db: Session = Depends(get_scoped_db)):
- """Busca uma entidade por ID"""
- entity = db.query(Entity).filter(Entity.id == entity_id).first()
- if not entity:
- raise HTTPException(status_code=404, detail="Entity not found")
- return entity
-
-
-@router.post("", response_model=EntityResponse, status_code=201)
-def create_entity(entity: EntityCreate, db: Session = Depends(get_scoped_db)):
- """Cria uma nova entidade"""
- db_entity = Entity(**entity.model_dump())
- db.add(db_entity)
- db.commit()
- db.refresh(db_entity)
- return db_entity
-
-
-@router.put("/{entity_id}", response_model=EntityResponse)
-def update_entity(entity_id: str, entity: EntityUpdate, db: Session = Depends(get_scoped_db)):
- """Atualiza uma entidade existente"""
- db_entity = db.query(Entity).filter(Entity.id == entity_id).first()
- if not db_entity:
- raise HTTPException(status_code=404, detail="Entity not found")
-
- update_data = entity.model_dump(exclude_unset=True)
- for field, value in update_data.items():
- setattr(db_entity, field, value)
-
- db.commit()
- db.refresh(db_entity)
- return db_entity
-
-
-@router.delete("/{entity_id}")
-def delete_entity(entity_id: str, db: Session = Depends(get_scoped_db)):
- """Deleta uma entidade"""
- db_entity = db.query(Entity).filter(Entity.id == entity_id).first()
- if not db_entity:
- raise HTTPException(status_code=404, detail="Entity not found")
-
- # Delete related relationships
- db.query(Relationship).filter(
- or_(
- Relationship.source_id == entity_id,
- Relationship.target_id == entity_id
- )
- ).delete()
-
- db.delete(db_entity)
- db.commit()
- return {"message": "Entity deleted"}
-
-
-@router.get("/{entity_id}/connections", response_model=GraphData)
-def get_entity_connections(
- entity_id: str,
- depth: int = Query(default=1, le=3),
- db: Session = Depends(get_scoped_db)
-):
- """
- Retorna o grafo de conexões de uma entidade
- Usado para visualização de rede no frontend
- """
- entity = db.query(Entity).filter(Entity.id == entity_id).first()
- if not entity:
- raise HTTPException(status_code=404, detail="Entity not found")
-
- nodes = {}
- edges = []
- visited = set()
-
- def explore(eid: str, current_depth: int):
- if current_depth > depth or eid in visited:
- return
- visited.add(eid)
-
- e = db.query(Entity).filter(Entity.id == eid).first()
- if not e:
- return
-
- nodes[e.id] = GraphNode(
- id=e.id,
- type=e.type,
- name=e.name,
- properties=e.properties or {}
- )
-
- # Outgoing relationships
- for rel in db.query(Relationship).filter(Relationship.source_id == eid).all():
- edges.append(GraphEdge(
- source=rel.source_id,
- target=rel.target_id,
- type=rel.type,
- confidence=rel.confidence
- ))
- explore(rel.target_id, current_depth + 1)
-
- # Incoming relationships
- for rel in db.query(Relationship).filter(Relationship.target_id == eid).all():
- edges.append(GraphEdge(
- source=rel.source_id,
- target=rel.target_id,
- type=rel.type,
- confidence=rel.confidence
- ))
- explore(rel.source_id, current_depth + 1)
-
- explore(entity_id, 0)
-
- return GraphData(
- nodes=list(nodes.values()),
- edges=edges
- )
-
-
-@router.post("/merge")
-def merge_entities(
- primary_id: str,
- secondary_id: str,
- db: Session = Depends(get_scoped_db)
-):
- """
- Merge two entities into one.
- The primary entity is kept, the secondary is deleted.
- All relationships from secondary are transferred to primary.
- """
- if primary_id == secondary_id:
- raise HTTPException(status_code=400, detail="Cannot merge entity with itself")
-
- primary = db.query(Entity).filter(Entity.id == primary_id).first()
- secondary = db.query(Entity).filter(Entity.id == secondary_id).first()
-
- if not primary:
- raise HTTPException(status_code=404, detail="Primary entity not found")
- if not secondary:
- raise HTTPException(status_code=404, detail="Secondary entity not found")
-
- # Merge properties
- primary_props = primary.properties or {}
- secondary_props = secondary.properties or {}
-
- # Add aliases from secondary
- aliases = primary_props.get("aliases", []) or []
- if secondary.name not in aliases:
- aliases.append(secondary.name)
- secondary_aliases = secondary_props.get("aliases", []) or []
- for alias in secondary_aliases:
- if alias not in aliases:
- aliases.append(alias)
- primary_props["aliases"] = aliases
-
- # Add merge history
- merge_history = primary_props.get("merged_from", []) or []
- merge_history.append({
- "id": secondary.id,
- "name": secondary.name,
- "source": secondary.source
- })
- primary_props["merged_from"] = merge_history
-
- # Combine descriptions if primary has none
- if not primary.description and secondary.description:
- primary.description = secondary.description
-
- primary.properties = primary_props
-
- # Transfer relationships from secondary to primary
- # Update source_id
- db.query(Relationship).filter(
- Relationship.source_id == secondary_id
- ).update({"source_id": primary_id})
-
- # Update target_id
- db.query(Relationship).filter(
- Relationship.target_id == secondary_id
- ).update({"target_id": primary_id})
-
- # Delete duplicate relationships (same source, target, type)
- # This is a simple approach - in production you'd want more sophisticated deduplication
-
- # Delete the secondary entity
- db.delete(secondary)
- db.commit()
- db.refresh(primary)
-
- return {
- "message": f"Merged '{secondary.name}' into '{primary.name}'",
- "primary": {
- "id": primary.id,
- "name": primary.name,
- "aliases": aliases
- }
- }
-
diff --git a/app/api/routes/events.py b/app/api/routes/events.py
deleted file mode 100644
index 19a16292e599f2a33bffe593cf788d69be9f28dd..0000000000000000000000000000000000000000
--- a/app/api/routes/events.py
+++ /dev/null
@@ -1,113 +0,0 @@
-"""
-Events CRUD Routes
-"""
-from fastapi import APIRouter, Depends, HTTPException, Query
-from sqlalchemy.orm import Session
-from sqlalchemy import or_
-from typing import List, Optional
-from datetime import datetime
-
-from app.api.deps import get_scoped_db
-from app.models import Event
-from app.schemas import EventCreate, EventResponse
-
-router = APIRouter(prefix="/events", tags=["Events"])
-
-
-@router.get("/", response_model=List[EventResponse])
-def list_events(
- type: Optional[str] = None,
- search: Optional[str] = None,
- start_date: Optional[datetime] = None,
- end_date: Optional[datetime] = None,
- limit: int = Query(default=50, le=200),
- offset: int = 0,
- db: Session = Depends(get_scoped_db)
-):
- """Lista eventos com filtros opcionais"""
- query = db.query(Event)
-
- if type:
- query = query.filter(Event.type == type)
-
- if search:
- query = query.filter(
- or_(
- Event.title.ilike(f"%{search}%"),
- Event.description.ilike(f"%{search}%")
- )
- )
-
- if start_date:
- query = query.filter(Event.event_date >= start_date)
- if end_date:
- query = query.filter(Event.event_date <= end_date)
-
- query = query.order_by(Event.event_date.desc().nullslast())
- return query.offset(offset).limit(limit).all()
-
-
-@router.get("/types")
-def get_event_types(db: Session = Depends(get_scoped_db)):
- """Retorna todos os tipos de evento unicos"""
- types = db.query(Event.type).distinct().all()
- return [t[0] for t in types]
-
-
-@router.get("/timeline")
-def get_timeline(
- entity_id: Optional[str] = None,
- limit: int = Query(default=50, le=200),
- db: Session = Depends(get_scoped_db)
-):
- """
- Retorna eventos em formato timeline.
- """
- query = db.query(Event).filter(Event.event_date.isnot(None))
-
- if entity_id:
- query = query.filter(Event.entity_ids.contains([entity_id]))
-
- events = query.order_by(Event.event_date.asc()).limit(limit).all()
-
- return [
- {
- "id": e.id,
- "title": e.title,
- "date": e.event_date.isoformat() if e.event_date else None,
- "type": e.type,
- "location": e.location_name
- }
- for e in events
- ]
-
-
-@router.get("/{event_id}", response_model=EventResponse)
-def get_event(event_id: str, db: Session = Depends(get_scoped_db)):
- """Busca um evento por ID"""
- event = db.query(Event).filter(Event.id == event_id).first()
- if not event:
- raise HTTPException(status_code=404, detail="Event not found")
- return event
-
-
-@router.post("/", response_model=EventResponse, status_code=201)
-def create_event(event: EventCreate, db: Session = Depends(get_scoped_db)):
- """Cria um novo evento"""
- db_event = Event(**event.model_dump())
- db.add(db_event)
- db.commit()
- db.refresh(db_event)
- return db_event
-
-
-@router.delete("/{event_id}")
-def delete_event(event_id: str, db: Session = Depends(get_scoped_db)):
- """Deleta um evento"""
- db_event = db.query(Event).filter(Event.id == event_id).first()
- if not db_event:
- raise HTTPException(status_code=404, detail="Event not found")
-
- db.delete(db_event)
- db.commit()
- return {"message": "Event deleted"}
diff --git a/app/api/routes/graph.py b/app/api/routes/graph.py
deleted file mode 100644
index 66a0886d6fb53b0884ef1e803f2acefd29f92873..0000000000000000000000000000000000000000
--- a/app/api/routes/graph.py
+++ /dev/null
@@ -1,173 +0,0 @@
-"""
-Graph API Routes - Network visualization endpoints
-"""
-from fastapi import APIRouter, Depends, HTTPException, Query
-from typing import Optional, List
-from sqlalchemy.orm import Session
-from sqlalchemy import or_
-
-from app.api.deps import get_scoped_db
-from app.models.entity import Entity, Relationship
-
-
-router = APIRouter(prefix="/graph", tags=["Graph"])
-
-
-@router.get("")
-async def get_graph(
- entity_type: Optional[str] = Query(None, description="Filter by entity type"),
- limit: int = Query(100, le=500, description="Maximum number of entities"),
- db: Session = Depends(get_scoped_db)
-):
- """
- Get graph data for visualization.
- Returns nodes (entities) and edges (relationships).
- """
- try:
- # Get entities
- query = db.query(Entity)
- if entity_type:
- query = query.filter(Entity.type == entity_type)
-
- entities = query.limit(limit).all()
- entity_ids = [e.id for e in entities]
-
- # Get relationships between these entities
- relationships = db.query(Relationship).filter(
- or_(
- Relationship.source_id.in_(entity_ids),
- Relationship.target_id.in_(entity_ids)
- )
- ).all()
-
- # Format for Cytoscape.js
- nodes = []
- for e in entities:
- nodes.append({
- "data": {
- "id": e.id,
- "label": e.name[:30] + "..." if len(e.name) > 30 else e.name,
- "fullName": e.name,
- "type": e.type,
- "description": e.description[:100] if e.description else "",
- "source": e.source or "unknown"
- }
- })
-
- edges = []
- for r in relationships:
- if r.source_id in entity_ids and r.target_id in entity_ids:
- edges.append({
- "data": {
- "id": r.id,
- "source": r.source_id,
- "target": r.target_id,
- "label": r.type,
- "type": r.type
- }
- })
-
- return {
- "nodes": nodes,
- "edges": edges,
- "stats": {
- "total_nodes": len(nodes),
- "total_edges": len(edges)
- }
- }
-
- except Exception as e:
- raise HTTPException(status_code=500, detail=f"Failed to get graph: {str(e)}")
-
-
-@router.get("/entity/{entity_id}")
-async def get_entity_graph(
- entity_id: str,
- depth: int = Query(1, ge=1, le=3, description="How many levels of connections to include"),
- db: Session = Depends(get_scoped_db)
-):
- """
- Get graph centered on a specific entity.
- """
- try:
- # Get the central entity
- central = db.query(Entity).filter(Entity.id == entity_id).first()
- if not central:
- raise HTTPException(status_code=404, detail="Entity not found")
-
- # Collect entity IDs at each depth level
- collected_ids = {entity_id}
- current_level = {entity_id}
-
- for _ in range(depth):
- rels = db.query(Relationship).filter(
- or_(
- Relationship.source_id.in_(current_level),
- Relationship.target_id.in_(current_level)
- )
- ).all()
-
- next_level = set()
- for r in rels:
- next_level.add(r.source_id)
- next_level.add(r.target_id)
-
- current_level = next_level - collected_ids
- collected_ids.update(next_level)
-
- # Get all entities
- entities = db.query(Entity).filter(Entity.id.in_(collected_ids)).all()
-
- # Get all relationships between collected entities
- relationships = db.query(Relationship).filter(
- Relationship.source_id.in_(collected_ids),
- Relationship.target_id.in_(collected_ids)
- ).all()
-
- # Format for Cytoscape
- nodes = []
- for e in entities:
- nodes.append({
- "data": {
- "id": e.id,
- "label": e.name[:30] + "..." if len(e.name) > 30 else e.name,
- "fullName": e.name,
- "type": e.type,
- "description": e.description[:100] if e.description else "",
- "source": e.source or "unknown",
- "isCentral": e.id == entity_id
- }
- })
-
- edges = []
- for r in relationships:
- edges.append({
- "data": {
- "id": r.id,
- "source": r.source_id,
- "target": r.target_id,
- "label": r.type,
- "type": r.type
- }
- })
-
- return {
- "central": {
- "id": central.id,
- "name": central.name,
- "type": central.type
- },
- "nodes": nodes,
- "edges": edges,
- "stats": {
- "total_nodes": len(nodes),
- "total_edges": len(edges),
- "depth": depth
- }
- }
-
- except HTTPException:
- raise
- except Exception as e:
- raise HTTPException(status_code=500, detail=f"Failed to get entity graph: {str(e)}")
-
diff --git a/app/api/routes/ingest.py b/app/api/routes/ingest.py
deleted file mode 100644
index d2216481b8ad615180d4b4bfad5d7f24e453774a..0000000000000000000000000000000000000000
--- a/app/api/routes/ingest.py
+++ /dev/null
@@ -1,341 +0,0 @@
-"""
-Data Ingestion Routes
-Endpoints para importar dados de fontes externas
-"""
-from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
-from sqlalchemy.orm import Session
-from typing import Optional, List
-from datetime import datetime
-import asyncio
-
-from app.api.deps import get_scoped_db
-from app.models import Entity, Document, Relationship
-from app.schemas import EntityResponse, DocumentResponse
-from app.services.ingestion import wikipedia_scraper, news_service
-from app.services.nlp import entity_extractor
-from app.services.geocoding import geocode
-
-router = APIRouter(prefix="/ingest", tags=["Data Ingestion"])
-
-
-def parse_event_date(date_str):
- """Parse date string to datetime object"""
- if not date_str:
- return None
- try:
- # Try YYYY-MM-DD format
- return datetime.strptime(date_str[:10], "%Y-%m-%d")
- except:
- try:
- # Try YYYY format
- return datetime.strptime(date_str[:4], "%Y")
- except:
- return None
-
-
-# ========== Wikipedia ==========
-
-@router.get("/wikipedia/search")
-def search_wikipedia(q: str, limit: int = 10):
- """Busca artigos na Wikipedia"""
- results = wikipedia_scraper.search(q, limit)
- return results
-
-
-@router.post("/wikipedia/entity", response_model=EntityResponse)
-async def import_from_wikipedia(
- title: str,
- entity_type: str = "person",
- project_id: Optional[str] = None,
- auto_extract: bool = True,
- db: Session = Depends(get_scoped_db)
-):
- """
- Importa uma entidade da Wikipedia
- entity_type: person, organization, location
- project_id: ID do projeto para associar a entidade
- auto_extract: Se True, usa LLM para extrair entidades relacionadas
- """
- # Check if entity already exists
- existing = db.query(Entity).filter(
- Entity.name == title,
- Entity.source == "wikipedia"
- ).first()
-
- if existing:
- return existing
-
- # Scrape based on type
- if entity_type == "person":
- data = wikipedia_scraper.scrape_person(title)
- elif entity_type == "organization":
- data = wikipedia_scraper.scrape_organization(title)
- elif entity_type == "location":
- data = wikipedia_scraper.scrape_location(title)
- else:
- data = wikipedia_scraper.scrape_person(title) # default
-
- if not data:
- raise HTTPException(status_code=404, detail="Article not found on Wikipedia")
-
- # Create main entity with project_id
- entity = Entity(**data)
- entity.project_id = project_id
- db.add(entity)
- db.commit()
- db.refresh(entity)
-
- # Auto-extract entities and relationships using LLM
- if auto_extract and data.get("description"):
- try:
- # Limit text to avoid token limits
- text_to_analyze = data["description"][:3000]
- result = await entity_extractor.extract(text_to_analyze)
-
- # Create extracted entities
- created_entities = {}
- for ext_entity in result.entities:
- # Skip if same as main entity
- if ext_entity.name.lower() == title.lower():
- created_entities[ext_entity.name] = entity
- continue
-
- # Check if entity exists (by similar name)
- existing_ent = db.query(Entity).filter(
- Entity.name.ilike(f"%{ext_entity.name}%")
- ).first()
-
- if existing_ent:
- created_entities[ext_entity.name] = existing_ent
- else:
- # Get coordinates for location entities
- lat, lng = None, None
- if ext_entity.type == "location":
- coords = await geocode(ext_entity.name)
- if coords:
- lat, lng = coords
-
- # Parse event_date
- event_date = parse_event_date(getattr(ext_entity, 'event_date', None))
-
- new_ent = Entity(
- name=ext_entity.name,
- type=ext_entity.type if ext_entity.type in ["person", "organization", "location", "event"] else "person",
- description=ext_entity.description or ext_entity.role,
- source="wikipedia_extraction",
- latitude=lat,
- longitude=lng,
- event_date=event_date,
- project_id=project_id,
- properties={"role": ext_entity.role, "aliases": ext_entity.aliases, "extracted_from": title}
- )
- db.add(new_ent)
- db.commit()
- db.refresh(new_ent)
- created_entities[ext_entity.name] = new_ent
-
- # Create relationships
- for rel in result.relationships:
- source_ent = created_entities.get(rel.source) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.source}%")).first()
- target_ent = created_entities.get(rel.target) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.target}%")).first()
-
- if source_ent and target_ent and source_ent.id != target_ent.id:
- # Check if relationship exists
- existing_rel = db.query(Relationship).filter(
- Relationship.source_id == source_ent.id,
- Relationship.target_id == target_ent.id,
- Relationship.type == rel.relationship_type
- ).first()
-
- if not existing_rel:
- # Parse relationship event_date
- rel_event_date = parse_event_date(getattr(rel, 'event_date', None))
-
- new_rel = Relationship(
- source_id=source_ent.id,
- target_id=target_ent.id,
- type=rel.relationship_type,
- event_date=rel_event_date,
- properties={"context": rel.context, "extracted_from": title}
- )
- db.add(new_rel)
-
- db.commit()
-
- except Exception as e:
- print(f"NER extraction error: {e}")
- # Continue without extraction if it fails
-
- return entity
-
-
-# ========== News ==========
-
-@router.get("/news/feeds")
-def list_available_feeds():
- """Lista os feeds de notícias disponíveis"""
- return list(news_service.RSS_FEEDS.keys())
-
-
-@router.get("/news/fetch")
-def fetch_news(feed: Optional[str] = None):
- """
- Busca notícias dos feeds RSS
- Se feed não for especificado, busca de todos
- """
- if feed:
- if feed not in news_service.RSS_FEEDS:
- raise HTTPException(status_code=404, detail="Feed not found")
- url = news_service.RSS_FEEDS[feed]
- articles = news_service.fetch_feed(url)
- else:
- articles = news_service.fetch_all_feeds()
-
- return articles
-
-
-@router.get("/news/search")
-def search_news(q: str):
- """Busca notícias por palavra-chave via Google News"""
- return news_service.search_news(q)
-
-
-@router.post("/news/import")
-async def import_news(
- query: Optional[str] = None,
- feed: Optional[str] = None,
- auto_extract: bool = True,
- db: Session = Depends(get_scoped_db)
-):
- """
- Importa notícias como documentos no sistema
- auto_extract: Se True, usa LLM para extrair entidades de cada notícia
- """
- if query:
- articles = news_service.search_news(query)
- elif feed:
- if feed not in news_service.RSS_FEEDS:
- raise HTTPException(status_code=404, detail="Feed not found")
- articles = news_service.fetch_feed(news_service.RSS_FEEDS[feed])
- else:
- articles = news_service.fetch_all_feeds()
-
- imported = 0
- extracted_entities = 0
-
- for article in articles:
- # Check if document already exists (by URL)
- if article.get("url"):
- existing = db.query(Document).filter(
- Document.source_url == article["url"]
- ).first()
- if existing:
- continue
-
- doc_data = news_service.to_document(article)
- doc = Document(**doc_data)
- db.add(doc)
- db.commit()
- imported += 1
-
- # Extract entities from article content
- if auto_extract:
- try:
- text_to_analyze = f"{article.get('title', '')} {article.get('description', '')}".strip()
- if len(text_to_analyze) >= 20:
- result = await entity_extractor.extract(text_to_analyze[:2000])
-
- created_entities = {}
- for ext_entity in result.entities:
- # Check if entity exists
- existing_ent = db.query(Entity).filter(
- Entity.name.ilike(f"%{ext_entity.name}%")
- ).first()
-
- if existing_ent:
- created_entities[ext_entity.name] = existing_ent
- else:
- # Get coordinates for location entities
- lat, lng = None, None
- if ext_entity.type == "location":
- coords = await geocode(ext_entity.name)
- if coords:
- lat, lng = coords
-
- new_ent = Entity(
- name=ext_entity.name,
- type=ext_entity.type if ext_entity.type in ["person", "organization", "location", "event"] else "person",
- description=ext_entity.description or ext_entity.role,
- source="news_extraction",
- latitude=lat,
- longitude=lng,
- properties={"role": ext_entity.role, "aliases": ext_entity.aliases, "from_article": article.get('title', '')}
- )
- db.add(new_ent)
- db.commit()
- db.refresh(new_ent)
- created_entities[ext_entity.name] = new_ent
- extracted_entities += 1
-
- # Create relationships
- for rel in result.relationships:
- source_ent = created_entities.get(rel.source) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.source}%")).first()
- target_ent = created_entities.get(rel.target) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.target}%")).first()
-
- if source_ent and target_ent and source_ent.id != target_ent.id:
- existing_rel = db.query(Relationship).filter(
- Relationship.source_id == source_ent.id,
- Relationship.target_id == target_ent.id,
- Relationship.type == rel.relationship_type
- ).first()
-
- if not existing_rel:
- new_rel = Relationship(
- source_id=source_ent.id,
- target_id=target_ent.id,
- type=rel.relationship_type,
- properties={"context": rel.context}
- )
- db.add(new_rel)
-
- db.commit()
-
- except Exception as e:
- print(f"NER extraction error for article: {e}")
- # Continue without extraction
-
- return {
- "message": f"Imported {imported} articles",
- "total_found": len(articles),
- "extracted_entities": extracted_entities
- }
-
-
-# ========== Manual Import ==========
-
-@router.post("/bulk/entities")
-def bulk_import_entities(
- entities: List[dict],
- db: Session = Depends(get_scoped_db)
-):
- """
- Importa múltiplas entidades de uma vez
- Útil para importar de CSV/JSON
- """
- imported = 0
- for entity_data in entities:
- entity = Entity(
- type=entity_data.get("type", "unknown"),
- name=entity_data.get("name", "Unnamed"),
- description=entity_data.get("description"),
- properties=entity_data.get("properties", {}),
- latitude=entity_data.get("latitude"),
- longitude=entity_data.get("longitude"),
- source=entity_data.get("source", "manual")
- )
- db.add(entity)
- imported += 1
-
- db.commit()
-
- return {"message": f"Imported {imported} entities"}
diff --git a/app/api/routes/investigate.py b/app/api/routes/investigate.py
deleted file mode 100644
index 646857df8ac0eed0f99ac443367d25c7a6af1512..0000000000000000000000000000000000000000
--- a/app/api/routes/investigate.py
+++ /dev/null
@@ -1,207 +0,0 @@
-"""
-Investigation API Routes - Build dossiers on companies and people
-"""
-from fastapi import APIRouter, HTTPException, Depends
-from pydantic import BaseModel, Field
-from typing import Optional, List, Dict, Any
-from sqlalchemy.orm import Session
-
-from app.services.investigation import (
- investigar_empresa,
- investigar_pessoa,
- dossier_to_dict
-)
-from app.services.brazil_apis import consultar_cnpj
-from app.services.investigator_agent import investigator_agent
-from app.api.deps import get_scoped_db
-
-
-router = APIRouter(prefix="/investigate", tags=["Investigation"])
-
-
-class InvestigateCompanyRequest(BaseModel):
- """Request to investigate a company"""
- cnpj: str = Field(..., min_length=11, description="CNPJ da empresa")
-
-
-class InvestigatePersonRequest(BaseModel):
- """Request to investigate a person"""
- nome: str = Field(..., min_length=2, description="Nome da pessoa")
- cpf: Optional[str] = Field(None, description="CPF (opcional)")
-
-
-class DossierResponse(BaseModel):
- """Dossier response"""
- tipo: str
- alvo: str
- cnpj_cpf: Optional[str]
- red_flags: List[str]
- score_risco: int
- data_geracao: str
- fonte_dados: List[str]
- secoes: Dict[str, Any]
-
-
-class CNPJResponse(BaseModel):
- """Quick CNPJ lookup response"""
- cnpj: str
- razao_social: str
- nome_fantasia: str
- situacao: str
- data_abertura: str
- capital_social: float
- endereco: str
- telefone: str
- email: str
- atividade: str
- socios: List[Dict[str, Any]]
-
-
-@router.post("/company", response_model=DossierResponse)
-async def investigate_company(request: InvestigateCompanyRequest):
- """
- Build a comprehensive dossier on a company.
-
- Collects:
- - Cadastral data from CNPJ
- - Partners/owners
- - Sanctions (CEIS, CNEP, CEPIM)
- - News and media mentions
- - Related entities
-
- Returns risk score and red flags.
- """
- try:
- dossier = await investigar_empresa(request.cnpj)
- return DossierResponse(**dossier_to_dict(dossier))
-
- except Exception as e:
- raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.post("/person", response_model=DossierResponse)
-async def investigate_person(request: InvestigatePersonRequest):
- """
- Build a dossier on a person.
-
- Note: Due to LGPD, personal data is limited.
- Mainly uses web search for public information.
- """
- try:
- dossier = await investigar_pessoa(request.nome, request.cpf)
- return DossierResponse(**dossier_to_dict(dossier))
-
- except Exception as e:
- raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.get("/cnpj/{cnpj}", response_model=CNPJResponse)
-async def lookup_cnpj(cnpj: str):
- """
- Quick CNPJ lookup - returns basic company data.
- """
- try:
- data = await consultar_cnpj(cnpj)
-
- if not data:
- raise HTTPException(status_code=404, detail="CNPJ não encontrado")
-
- return CNPJResponse(
- cnpj=data.cnpj,
- razao_social=data.razao_social,
- nome_fantasia=data.nome_fantasia,
- situacao=data.situacao,
- data_abertura=data.data_abertura,
- capital_social=data.capital_social,
- endereco=f"{data.logradouro}, {data.numero} - {data.bairro}, {data.cidade}/{data.uf}",
- telefone=data.telefone,
- email=data.email,
- atividade=f"{data.cnae_principal} - {data.cnae_descricao}",
- socios=data.socios
- )
-
- except HTTPException:
- raise
- except Exception as e:
- raise HTTPException(status_code=500, detail=str(e))
-
-
-# ===========================================
-# Autonomous Investigation Agent
-# ===========================================
-
-class AgentInvestigateRequest(BaseModel):
- """Request for autonomous investigation"""
- mission: str = Field(..., min_length=5, description="Missão de investigação em linguagem natural")
- max_iterations: int = Field(10, ge=1, le=20, description="Máximo de iterações do agente")
-
-
-class FindingResponse(BaseModel):
- """A finding from investigation"""
- title: str
- content: str
- source: str
- timestamp: str
-
-
-class AgentInvestigateResponse(BaseModel):
- """Response from autonomous investigation"""
- mission: str
- status: str
- report: str
- findings: List[FindingResponse]
- entities_discovered: int
- connections_mapped: int
- iterations: int
- tools_used: List[str]
-
-
-@router.post("/agent", response_model=AgentInvestigateResponse)
-async def investigate_with_agent(
- request: AgentInvestigateRequest,
- db: Session = Depends(get_scoped_db)
-):
- """
- Autonomous investigation with AI agent.
-
- The agent will:
- 1. Search NUMIDIUM for existing entities
- 2. Query CNPJ data for Brazilian companies
- 3. Search the web for news and public info
- 4. Follow leads and connections
- 5. Generate a comprehensive report
-
- Example missions:
- - "Investigue a rede de empresas de João Silva"
- - "Descubra os sócios da empresa CNPJ 11.222.333/0001-44"
- - "Pesquise sobre a empresa XYZ e suas conexões"
- """
- try:
- result = await investigator_agent.investigate(
- mission=request.mission,
- db=db,
- max_iterations=request.max_iterations
- )
-
- return AgentInvestigateResponse(
- mission=result.mission,
- status=result.status,
- report=result.report,
- findings=[
- FindingResponse(
- title=f.title,
- content=f.content,
- source=f.source,
- timestamp=f.timestamp
- )
- for f in result.findings
- ],
- entities_discovered=len(result.entities_discovered),
- connections_mapped=len(result.connections_mapped),
- iterations=result.iterations,
- tools_used=result.tools_used
- )
-
- except Exception as e:
- raise HTTPException(status_code=500, detail=str(e))
-
diff --git a/app/api/routes/projects.py b/app/api/routes/projects.py
deleted file mode 100644
index d283d8b2d566c49e7a32cba8acc10b39307b4299..0000000000000000000000000000000000000000
--- a/app/api/routes/projects.py
+++ /dev/null
@@ -1,135 +0,0 @@
-"""
-Projects API Routes - Workspace management
-"""
-from fastapi import APIRouter, Depends, HTTPException
-from pydantic import BaseModel
-from typing import Optional, List
-from datetime import datetime
-from sqlalchemy.orm import Session
-
-from app.api.deps import get_scoped_db
-from app.models import Project, Entity, Relationship
-
-
-router = APIRouter(prefix="/projects", tags=["Projects"])
-
-
-class ProjectCreate(BaseModel):
- name: str
- description: Optional[str] = None
- color: str = "#00d4ff"
- icon: str = "folder"
-
-
-class ProjectResponse(BaseModel):
- id: str
- name: str
- description: Optional[str]
- color: str
- icon: str
- entity_count: int = 0
- created_at: datetime
-
- class Config:
- from_attributes = True
-
-
-@router.get("", response_model=List[ProjectResponse])
-def list_projects(db: Session = Depends(get_scoped_db)):
- """List all projects"""
- projects = db.query(Project).order_by(Project.created_at.desc()).all()
-
- result = []
- for p in projects:
- entity_count = db.query(Entity).filter(Entity.project_id == p.id).count()
- result.append(ProjectResponse(
- id=p.id,
- name=p.name,
- description=p.description,
- color=p.color,
- icon=p.icon,
- entity_count=entity_count,
- created_at=p.created_at
- ))
-
- return result
-
-
-@router.post("", response_model=ProjectResponse)
-def create_project(project: ProjectCreate, db: Session = Depends(get_scoped_db)):
- """Create a new project"""
- new_project = Project(
- name=project.name,
- description=project.description,
- color=project.color,
- icon=project.icon
- )
- db.add(new_project)
- db.commit()
- db.refresh(new_project)
-
- return ProjectResponse(
- id=new_project.id,
- name=new_project.name,
- description=new_project.description,
- color=new_project.color,
- icon=new_project.icon,
- entity_count=0,
- created_at=new_project.created_at
- )
-
-
-@router.get("/{project_id}", response_model=ProjectResponse)
-def get_project(project_id: str, db: Session = Depends(get_scoped_db)):
- """Get project by ID"""
- project = db.query(Project).filter(Project.id == project_id).first()
-
- if not project:
- raise HTTPException(status_code=404, detail="Project not found")
-
- entity_count = db.query(Entity).filter(Entity.project_id == project_id).count()
-
- return ProjectResponse(
- id=project.id,
- name=project.name,
- description=project.description,
- color=project.color,
- icon=project.icon,
- entity_count=entity_count,
- created_at=project.created_at
- )
-
-
-@router.delete("/{project_id}")
-def delete_project(project_id: str, db: Session = Depends(get_scoped_db)):
- """Delete project and optionally its entities"""
- project = db.query(Project).filter(Project.id == project_id).first()
-
- if not project:
- raise HTTPException(status_code=404, detail="Project not found")
-
- # Set entities and relationships to no project (null)
- db.query(Entity).filter(Entity.project_id == project_id).update({"project_id": None})
- db.query(Relationship).filter(Relationship.project_id == project_id).update({"project_id": None})
-
- db.delete(project)
- db.commit()
-
- return {"message": f"Project '{project.name}' deleted"}
-
-
-@router.put("/{project_id}")
-def update_project(project_id: str, project: ProjectCreate, db: Session = Depends(get_scoped_db)):
- """Update project"""
- existing = db.query(Project).filter(Project.id == project_id).first()
-
- if not existing:
- raise HTTPException(status_code=404, detail="Project not found")
-
- existing.name = project.name
- existing.description = project.description
- existing.color = project.color
- existing.icon = project.icon
- db.commit()
-
- return {"message": "Project updated"}
diff --git a/app/api/routes/relationships.py b/app/api/routes/relationships.py
deleted file mode 100644
index e5887de9ce8df297614adf63c4db8d365fc33114..0000000000000000000000000000000000000000
--- a/app/api/routes/relationships.py
+++ /dev/null
@@ -1,76 +0,0 @@
-"""
-Relationship CRUD Routes
-"""
-from fastapi import APIRouter, Depends, HTTPException, Query
-from sqlalchemy.orm import Session
-from typing import List, Optional
-
-from app.api.deps import get_scoped_db
-from app.models import Relationship, Entity
-from app.schemas import RelationshipCreate, RelationshipResponse
-
-router = APIRouter(prefix="/relationships", tags=["Relationships"])
-
-
-@router.get("/", response_model=List[RelationshipResponse])
-def list_relationships(
- type: Optional[str] = None,
- source_id: Optional[str] = None,
- target_id: Optional[str] = None,
- limit: int = Query(default=50, le=200),
- db: Session = Depends(get_scoped_db)
-):
- """Lista relacionamentos com filtros opcionais"""
- query = db.query(Relationship)
-
- if type:
- query = query.filter(Relationship.type == type)
- if source_id:
- query = query.filter(Relationship.source_id == source_id)
- if target_id:
- query = query.filter(Relationship.target_id == target_id)
-
- return query.limit(limit).all()
-
-
-@router.get("/types")
-def get_relationship_types(db: Session = Depends(get_scoped_db)):
- """Retorna todos os tipos de relacionamento unicos"""
- types = db.query(Relationship.type).distinct().all()
- return [t[0] for t in types]
-
-
-@router.post("/", response_model=RelationshipResponse, status_code=201)
-def create_relationship(
- rel: RelationshipCreate,
- db: Session = Depends(get_scoped_db)
-):
- """Cria um novo relacionamento entre entidades"""
- source = db.query(Entity).filter(Entity.id == rel.source_id).first()
- target = db.query(Entity).filter(Entity.id == rel.target_id).first()
-
- if not source:
- raise HTTPException(status_code=404, detail="Source entity not found")
- if not target:
- raise HTTPException(status_code=404, detail="Target entity not found")
-
- db_rel = Relationship(**rel.model_dump())
- db.add(db_rel)
- db.commit()
- db.refresh(db_rel)
- return db_rel
-
-
-@router.delete("/{relationship_id}")
-def delete_relationship(
- relationship_id: str,
- db: Session = Depends(get_scoped_db)
-):
- """Deleta um relacionamento"""
- db_rel = db.query(Relationship).filter(Relationship.id == relationship_id).first()
- if not db_rel:
- raise HTTPException(status_code=404, detail="Relationship not found")
-
- db.delete(db_rel)
- db.commit()
- return {"message": "Relationship deleted"}
diff --git a/app/api/routes/research.py b/app/api/routes/research.py
deleted file mode 100644
index 41eb6efdb31bbc7cb0da78df28ce780a75fc0f9b..0000000000000000000000000000000000000000
--- a/app/api/routes/research.py
+++ /dev/null
@@ -1,158 +0,0 @@
-"""
-Research API Routes - Deep research with automatic entity extraction
-"""
-from fastapi import APIRouter, Depends, HTTPException
-from pydantic import BaseModel, Field
-from typing import Optional, List
-import traceback
-from sqlalchemy.orm import Session
-
-from app.api.deps import get_scoped_db
-from app.services import lancer
-from app.services.nlp import entity_extractor
-from app.services.geocoding import geocode
-from app.models.entity import Entity, Relationship
-
-
-router = APIRouter(prefix="/research", tags=["Research"])
-
-
-class ResearchRequest(BaseModel):
- """Request model for research"""
- query: str = Field(..., min_length=3, description="Research query")
- mode: str = Field(default="search", description="Research mode: search, deep, heavy")
- max_results: int = Field(default=10, le=20)
- auto_extract: bool = Field(default=True, description="Auto-extract entities using NER")
-
-
-class ResearchResponse(BaseModel):
- """Response model for research"""
- query: str
- answer: Optional[str]
- sources: List[dict]
- citations: List[dict]
- extracted_entities: int
- extracted_relationships: int
- processing_time_ms: float
-
-
-@router.post("", response_model=ResearchResponse)
-async def research(request: ResearchRequest, db: Session = Depends(get_scoped_db)):
- """
- Perform AI-powered research using Lancer API and optionally extract entities.
-
- Modes:
- - search: Fast search with AI synthesis
- - deep: Multi-dimensional deep research (slower, more comprehensive)
- - heavy: Search with full content scraping
- """
- try:
- # Call Lancer API based on mode
- if request.mode == "deep":
- result = await lancer.deep_research(request.query)
- elif request.mode == "heavy":
- result = await lancer.heavy_search(request.query, request.max_results)
- else:
- result = await lancer.search(request.query, request.max_results)
-
- extracted_entities = 0
- extracted_relationships = 0
-
- # Extract entities if enabled
- if request.auto_extract and result.raw_text:
- try:
- # Limit text to avoid token limits
- text_to_analyze = result.raw_text[:5000]
- ner_result = await entity_extractor.extract(text_to_analyze)
-
- created_entities = {}
-
- # Create entities
- for entity in ner_result.entities:
- # Check if exists
- existing = db.query(Entity).filter(
- Entity.name.ilike(f"%{entity.name}%")
- ).first()
-
- if existing:
- created_entities[entity.name] = existing
- else:
- # Geocode if location
- lat, lng = None, None
- if entity.type == "location":
- coords = await geocode(entity.name)
- if coords:
- lat, lng = coords
-
- new_entity = Entity(
- name=entity.name,
- type=entity.type if entity.type in ["person", "organization", "location", "event"] else "person",
- description=entity.description or entity.role or "",
- source="lancer_research",
- latitude=lat,
- longitude=lng,
- properties={
- "role": entity.role,
- "aliases": entity.aliases,
- "research_query": request.query
- }
- )
- db.add(new_entity)
- db.commit()
- db.refresh(new_entity)
- created_entities[entity.name] = new_entity
- extracted_entities += 1
-
- # Create relationships
- for rel in ner_result.relationships:
- source_ent = created_entities.get(rel.source) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.source}%")).first()
- target_ent = created_entities.get(rel.target) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.target}%")).first()
-
- if source_ent and target_ent and source_ent.id != target_ent.id:
- existing_rel = db.query(Relationship).filter(
- Relationship.source_id == source_ent.id,
- Relationship.target_id == target_ent.id,
- Relationship.type == rel.relationship_type
- ).first()
-
- if not existing_rel:
- new_rel = Relationship(
- source_id=source_ent.id,
- target_id=target_ent.id,
- type=rel.relationship_type,
- properties={"context": rel.context, "research_query": request.query}
- )
- db.add(new_rel)
- extracted_relationships += 1
-
- db.commit()
-
- except Exception as e:
- print(f"NER extraction error: {e}")
- traceback.print_exc()
-
- # Prepare sources for response
- sources = [
- {
- "title": r.title,
- "url": r.url,
- "content": r.content[:300] if r.content else "",
- "score": r.score
- }
- for r in result.results[:10]
- ]
-
- return ResearchResponse(
- query=result.query,
- answer=result.answer,
- sources=sources,
- citations=result.citations,
- extracted_entities=extracted_entities,
- extracted_relationships=extracted_relationships,
- processing_time_ms=result.processing_time_ms
- )
-
- except Exception as e:
- print(f"Research error: {e}")
- traceback.print_exc()
- raise HTTPException(status_code=500, detail=str(e))
diff --git a/app/api/routes/search.py b/app/api/routes/search.py
deleted file mode 100644
index 27ad925fb6abc0eb121ff2660bc06fd55fd322f0..0000000000000000000000000000000000000000
--- a/app/api/routes/search.py
+++ /dev/null
@@ -1,126 +0,0 @@
-"""
-Search and Analytics Routes
-"""
-from fastapi import APIRouter, Depends, Query
-from sqlalchemy.orm import Session
-from sqlalchemy import or_, func
-from typing import Optional
-
-from app.api.deps import get_scoped_db
-from app.models import Entity, Relationship, Event, Document
-from app.schemas import SearchResult, SystemStats
-
-router = APIRouter(prefix="/search", tags=["Search"])
-
-
-@router.get("", response_model=SearchResult)
-def global_search(
- q: str = Query(..., min_length=2, description="Search query"),
- types: Optional[str] = Query(None, description="Entity types (comma-separated)"),
- limit: int = Query(default=20, le=100),
- db: Session = Depends(get_scoped_db)
-):
- """
- Busca global em todas as entidades, eventos e documentos.
- """
- search_term = f"%{q}%"
- type_filter = types.split(",") if types else None
-
- entity_query = db.query(Entity).filter(
- or_(
- Entity.name.ilike(search_term),
- Entity.description.ilike(search_term)
- )
- )
- if type_filter:
- entity_query = entity_query.filter(Entity.type.in_(type_filter))
- entities = entity_query.limit(limit).all()
-
- events = db.query(Event).filter(
- or_(
- Event.title.ilike(search_term),
- Event.description.ilike(search_term)
- )
- ).limit(limit).all()
-
- documents = db.query(Document).filter(
- or_(
- Document.title.ilike(search_term),
- Document.content.ilike(search_term)
- )
- ).limit(limit).all()
-
- return SearchResult(
- entities=entities,
- events=events,
- documents=documents
- )
-
-
-@router.get("/stats", response_model=SystemStats)
-def get_system_stats(db: Session = Depends(get_scoped_db)):
- """
- Retorna estatisticas gerais do sistema.
- """
- total_entities = db.query(Entity).count()
- total_relationships = db.query(Relationship).count()
- total_events = db.query(Event).count()
- total_documents = db.query(Document).count()
-
- type_counts = db.query(
- Entity.type,
- func.count(Entity.id)
- ).group_by(Entity.type).all()
-
- entities_by_type = {t: c for t, c in type_counts}
-
- recent = db.query(Entity).order_by(Entity.created_at.desc()).limit(10).all()
- recent_activity = [
- {
- "id": e.id,
- "type": e.type,
- "name": e.name,
- "created_at": e.created_at.isoformat()
- }
- for e in recent
- ]
-
- return SystemStats(
- total_entities=total_entities,
- total_relationships=total_relationships,
- total_events=total_events,
- total_documents=total_documents,
- entities_by_type=entities_by_type,
- recent_activity=recent_activity
- )
-
-
-@router.get("/geo")
-def get_geo_data(
- entity_type: Optional[str] = None,
- db: Session = Depends(get_scoped_db)
-):
- """
- Retorna entidades com geolocalizacao.
- """
- query = db.query(Entity).filter(
- Entity.latitude.isnot(None),
- Entity.longitude.isnot(None)
- )
-
- if entity_type:
- query = query.filter(Entity.type == entity_type)
-
- entities = query.all()
-
- return [
- {
- "id": e.id,
- "type": e.type,
- "name": e.name,
- "lat": e.latitude,
- "lng": e.longitude,
- "properties": e.properties
- }
- for e in entities
- ]
diff --git a/app/api/routes/session.py b/app/api/routes/session.py
deleted file mode 100644
index c81ea29a5363a585f72aa5f0df7e2798292c189e..0000000000000000000000000000000000000000
--- a/app/api/routes/session.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""
-Session management routes
-"""
-from fastapi import APIRouter, Header, Cookie, Response, Request
-from typing import Optional
-import uuid
-
-from app.core.database import create_new_session_id
-from app.config import settings
-
-router = APIRouter(prefix="/session", tags=["Session"])
-
-
-@router.post("/create")
-def create_session(response: Response, request: Request):
- """Create a new session and return session_id"""
- session_id = create_new_session_id()
- secure = settings.cookie_secure
- samesite = settings.cookie_samesite
- proto = request.headers.get("x-forwarded-proto", request.url.scheme)
- if proto != "https" and secure:
- secure = False
- samesite = "lax"
- response.set_cookie(
- key="numidium_session",
- value=session_id,
- max_age=60*60*24*365, # 1 year
- httponly=True,
- samesite=samesite,
- secure=secure
- )
- return {"session_id": session_id}
-
-
-@router.get("/current")
-def get_current_session(
- numidium_session: Optional[str] = Cookie(None),
- x_session_id: Optional[str] = Header(None)
-):
- """Get current session ID"""
- session_id = x_session_id or numidium_session
- if not session_id:
- return {"session_id": None, "message": "No session. Call POST /session/create"}
- return {"session_id": session_id}
diff --git a/app/api/routes/timeline.py b/app/api/routes/timeline.py
deleted file mode 100644
index fa45453faf038d34277ffb6a5f1481a2748a8b0e..0000000000000000000000000000000000000000
--- a/app/api/routes/timeline.py
+++ /dev/null
@@ -1,165 +0,0 @@
-"""
-Timeline API Routes - Temporal view of entities and relationships
-"""
-from fastapi import APIRouter, Depends, Query
-from pydantic import BaseModel
-from typing import Optional, List, Dict, Any
-from datetime import datetime, timedelta
-from collections import defaultdict
-from sqlalchemy.orm import Session
-
-from app.api.deps import get_scoped_db
-from app.models.entity import Entity, Relationship
-
-
-router = APIRouter(prefix="/timeline", tags=["Timeline"])
-
-
-class TimelineEvent(BaseModel):
- id: str
- type: str # "entity" or "relationship"
- entity_type: Optional[str] = None
- name: str
- description: Optional[str] = None
- date: str
- icon: str
-
-
-class TimelineGroup(BaseModel):
- date: str
- label: str
- events: List[TimelineEvent]
-
-
-class TimelineResponse(BaseModel):
- groups: List[TimelineGroup]
- total_events: int
-
-
-@router.get("", response_model=TimelineResponse)
-async def get_timeline(
- days: int = Query(default=30, ge=1, le=365),
- entity_type: Optional[str] = None,
- limit: int = Query(default=100, ge=1, le=500),
- db: Session = Depends(get_scoped_db)
-):
- """
- Get timeline of recent entities and relationships.
- Groups events by date.
- """
- # Calculate date range
- end_date = datetime.now()
- start_date = end_date - timedelta(days=days)
-
- events = []
-
- # Get entities
- query = db.query(Entity).filter(
- Entity.created_at >= start_date
- )
-
- if entity_type:
- query = query.filter(Entity.type == entity_type)
-
- entities = query.order_by(Entity.created_at.desc()).limit(limit).all()
-
- icon_map = {
- "person": "👤",
- "organization": "🏢",
- "location": "📍",
- "event": "📅",
- "concept": "💡",
- "product": "📦"
- }
-
- for e in entities:
- # Prefer event_date over created_at
- date = e.event_date if e.event_date else e.created_at
- events.append(TimelineEvent(
- id=e.id,
- type="entity",
- entity_type=e.type,
- name=e.name,
- description=e.description[:100] if e.description else None,
- date=date.isoformat() if date else datetime.now().isoformat(),
- icon=icon_map.get(e.type, "📄")
- ))
-
- # Get relationships
- relationships = db.query(Relationship).filter(
- Relationship.created_at >= start_date
- ).order_by(Relationship.created_at.desc()).limit(limit // 2).all()
-
- for r in relationships:
- source = db.query(Entity).filter(Entity.id == r.source_id).first()
- target = db.query(Entity).filter(Entity.id == r.target_id).first()
-
- if source and target:
- # Prefer event_date over created_at
- date = r.event_date if r.event_date else r.created_at
- events.append(TimelineEvent(
- id=r.id,
- type="relationship",
- name=f"{source.name} → {target.name}",
- description=r.type,
- date=date.isoformat() if date else datetime.now().isoformat(),
- icon="🔗"
- ))
-
- # Sort by date
- events.sort(key=lambda x: x.date, reverse=True)
-
- # Group by date
- groups_dict = defaultdict(list)
- for event in events:
- date_key = event.date[:10] # YYYY-MM-DD
- groups_dict[date_key].append(event)
-
- # Format groups
- groups = []
- for date_key in sorted(groups_dict.keys(), reverse=True):
- try:
- dt = datetime.fromisoformat(date_key)
- label = dt.strftime("%d %b %Y")
- except:
- label = date_key
-
- groups.append(TimelineGroup(
- date=date_key,
- label=label,
- events=groups_dict[date_key]
- ))
-
- return TimelineResponse(
- groups=groups,
- total_events=len(events)
- )
-
-
-@router.get("/stats")
-async def get_timeline_stats(db: Session = Depends(get_scoped_db)):
- """Get statistics for timeline visualization"""
-
- # Count entities by type
- entity_counts = {}
- for entity_type in ["person", "organization", "location", "event", "concept"]:
- count = db.query(Entity).filter(Entity.type == entity_type).count()
- entity_counts[entity_type] = count
-
- # Count relationships
- relationship_count = db.query(Relationship).count()
-
- # Recent activity (last 7 days)
- week_ago = datetime.now() - timedelta(days=7)
- recent_entities = db.query(Entity).filter(Entity.created_at >= week_ago).count()
- recent_relationships = db.query(Relationship).filter(Relationship.created_at >= week_ago).count()
-
- return {
- "entity_counts": entity_counts,
- "relationship_count": relationship_count,
- "recent_activity": {
- "entities": recent_entities,
- "relationships": recent_relationships,
- "total": recent_entities + recent_relationships
- }
- }
diff --git a/app/config.py b/app/config.py
deleted file mode 100644
index 23f3497193305642c7ae08e7c907009e44c489f0..0000000000000000000000000000000000000000
--- a/app/config.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""
-Numidium Backend Configuration
-"""
-from pydantic_settings import BaseSettings
-from functools import lru_cache
-import os
-
-
-class Settings(BaseSettings):
- """Application settings"""
-
- # App Info
- app_name: str = "Numidium"
- app_version: str = "0.1.0"
- debug: bool = False
-
- # Database
- database_url: str = "sqlite:///./data/numidium.db"
-
- # APIs (opcional - pode configurar depois)
- newsapi_key: str = ""
-
- # Cerebras API for LLM-based entity extraction
- cerebras_api_key: str = ""
-
- # AetherMap API for semantic search and NER
- aethermap_url: str = "https://madras1-aethermap.hf.space"
-
- # CORS
- cors_origins: list[str] = ["*"]
-
- # Session cookie
- cookie_secure: bool = True
- cookie_samesite: str = "none"
-
- class Config:
- env_file = ".env"
- env_file_encoding = "utf-8"
-
-
-@lru_cache()
-def get_settings() -> Settings:
- """Get cached settings"""
- return Settings()
-
-
-settings = get_settings()
diff --git a/app/core/__init__.py b/app/core/__init__.py
deleted file mode 100644
index 0e8825ce5959f9f016f6f5ed46c2a54fdd15d9e8..0000000000000000000000000000000000000000
--- a/app/core/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# Core module
-from app.core.database import get_db, init_db, Base
diff --git a/app/core/__pycache__/__init__.cpython-311.pyc b/app/core/__pycache__/__init__.cpython-311.pyc
deleted file mode 100644
index c5dc2c47dff4d25a449c31d5b491838968bd8699..0000000000000000000000000000000000000000
Binary files a/app/core/__pycache__/__init__.cpython-311.pyc and /dev/null differ
diff --git a/app/core/__pycache__/database.cpython-311.pyc b/app/core/__pycache__/database.cpython-311.pyc
deleted file mode 100644
index 5d66b606dc407b3d70e7e6b1d62893eb13ff9d42..0000000000000000000000000000000000000000
Binary files a/app/core/__pycache__/database.cpython-311.pyc and /dev/null differ
diff --git a/app/core/database.py b/app/core/database.py
deleted file mode 100644
index 6fbd7f1d970d02b46df83e466a09287bfc0090be..0000000000000000000000000000000000000000
--- a/app/core/database.py
+++ /dev/null
@@ -1,115 +0,0 @@
-"""
-Database configuration and session management
-Per-session databases - each user session gets its own SQLite file
-"""
-from sqlalchemy import create_engine, text
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import sessionmaker, Session
-from typing import Optional
-import os
-import uuid
-
-# Ensure data directory exists
-os.makedirs("data/sessions", exist_ok=True)
-
-# Base class for models
-Base = declarative_base()
-
-# Cache for session engines
-_session_engines = {}
-_session_makers = {}
-
-
-def get_session_engine(session_id: str):
- """Get or create engine for a specific session"""
- if session_id not in _session_engines:
- db_path = f"data/sessions/{session_id}.db"
- engine = create_engine(
- f"sqlite:///./{db_path}",
- connect_args={"check_same_thread": False}
- )
- _session_engines[session_id] = engine
- _session_makers[session_id] = sessionmaker(autocommit=False, autoflush=False, bind=engine)
-
- # Initialize tables for this session
- Base.metadata.create_all(bind=engine)
- _run_migrations(engine)
-
- return _session_engines[session_id]
-
-
-def get_session_db(session_id: str):
- """Get database session for a specific user session"""
- get_session_engine(session_id) # Ensure engine exists
- SessionLocal = _session_makers[session_id]
- db = SessionLocal()
- try:
- yield db
- finally:
- db.close()
-
-
-def get_db_for_session(session_id: str) -> Session:
- """Direct session getter (non-generator) for routes"""
- get_session_engine(session_id)
- SessionLocal = _session_makers[session_id]
- return SessionLocal()
-
-
-# Legacy - default database for backwards compatibility
-from app.config import settings
-engine = create_engine(
- settings.database_url,
- connect_args={"check_same_thread": False}
-)
-SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
-
-
-def get_default_session() -> Session:
- """Create a new session for the default database."""
- return SessionLocal()
-
-
-def get_db():
- """Legacy: Default database session"""
- db = get_default_session()
- try:
- yield db
- finally:
- db.close()
-
-
-def _run_migrations(eng):
- """Run migrations on an engine"""
- with eng.connect() as conn:
- try:
- conn.execute(text("ALTER TABLE entities ADD COLUMN event_date DATETIME"))
- conn.commit()
- except Exception:
- pass
- try:
- conn.execute(text("ALTER TABLE relationships ADD COLUMN event_date DATETIME"))
- conn.commit()
- except Exception:
- pass
- try:
- conn.execute(text("ALTER TABLE entities ADD COLUMN project_id VARCHAR(36)"))
- conn.commit()
- except Exception:
- pass
- try:
- conn.execute(text("ALTER TABLE relationships ADD COLUMN project_id VARCHAR(36)"))
- conn.commit()
- except Exception:
- pass
-
-
-def init_db():
- """Initialize default database tables"""
- Base.metadata.create_all(bind=engine)
- _run_migrations(engine)
-
-
-def create_new_session_id() -> str:
- """Generate a new session ID"""
- return str(uuid.uuid4())
diff --git a/app/main.py b/app/main.py
deleted file mode 100644
index 7abfa0ca7fb7e31fc2a58e35b5528eb7b135bada..0000000000000000000000000000000000000000
--- a/app/main.py
+++ /dev/null
@@ -1,99 +0,0 @@
-"""
-Numidium Backend - Main Application
-Plataforma de Inteligência e Análise de Dados
-"""
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-from contextlib import asynccontextmanager
-
-from app.config import settings
-from app.core.database import init_db
-from app.api.routes import entities, relationships, events, search, ingest, analyze, graph, research, chat, investigate, dados_publicos, timeline, session, aethermap
-
-
-@asynccontextmanager
-async def lifespan(app: FastAPI):
- """Startup and shutdown events"""
- # Startup: Initialize database
- init_db()
- print("🚀 Numidium Backend started!")
- print(f"📊 Database: {settings.database_url}")
- yield
- # Shutdown
- print("👋 Numidium Backend shutting down...")
-
-
-# Create FastAPI app
-app = FastAPI(
- title="Numidium API",
- description="""
- ## 🔮 Sistema de Inteligência e Análise de Dados
-
- Backend do VANTAGE - Uma plataforma para:
- - 📥 Ingestão de dados de múltiplas fontes (Wikipedia, News, Manual)
- - 🔗 Mapeamento de conexões entre entidades
- - 🗺️ Visualização geográfica
- - 📊 Análise de grafos e relacionamentos
- - 🔍 Busca global
- """,
- version=settings.app_version,
- lifespan=lifespan
-)
-
-# CORS middleware
-app.add_middleware(
- CORSMiddleware,
- allow_origins=settings.cors_origins,
- allow_credentials=True,
- allow_methods=["*"],
- allow_headers=["*"],
-)
-
-# Include routers
-app.include_router(entities.router, prefix="/api/v1")
-app.include_router(relationships.router, prefix="/api/v1")
-app.include_router(events.router, prefix="/api/v1")
-app.include_router(search.router, prefix="/api/v1")
-app.include_router(ingest.router, prefix="/api/v1")
-app.include_router(analyze.router, prefix="/api/v1")
-app.include_router(graph.router, prefix="/api/v1")
-app.include_router(research.router, prefix="/api/v1")
-app.include_router(chat.router, prefix="/api/v1")
-app.include_router(investigate.router, prefix="/api/v1")
-app.include_router(dados_publicos.router, prefix="/api/v1")
-app.include_router(timeline.router, prefix="/api/v1")
-app.include_router(session.router, prefix="/api/v1")
-app.include_router(aethermap.router, prefix="/api/v1/aethermap", tags=["aethermap"])
-
-
-@app.get("/")
-def root():
- """Root endpoint - API info"""
- return {
- "name": "Numidium",
- "version": settings.app_version,
- "status": "online",
- "docs": "/docs",
- "description": "Sistema de Inteligência e Análise de Dados"
- }
-
-
-@app.get("/health")
-def health_check():
- """Health check endpoint for HF Spaces"""
- return {"status": "healthy"}
-
-
-@app.get("/api/v1")
-def api_info():
- """API v1 info"""
- return {
- "version": "1.0.0",
- "endpoints": {
- "entities": "/api/v1/entities",
- "relationships": "/api/v1/relationships",
- "events": "/api/v1/events",
- "search": "/api/v1/search",
- "ingest": "/api/v1/ingest"
- }
- }
diff --git a/app/models/__init__.py b/app/models/__init__.py
deleted file mode 100644
index dda9ada652332c2b420769a6ace731249e11cfd8..0000000000000000000000000000000000000000
--- a/app/models/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Models module
-from app.models.entity import Entity, Relationship, Event, Document
-from app.models.project import Project
diff --git a/app/models/__pycache__/__init__.cpython-311.pyc b/app/models/__pycache__/__init__.cpython-311.pyc
deleted file mode 100644
index 5bf1933151b2e8a290dc79c4647a626d3d0500ff..0000000000000000000000000000000000000000
Binary files a/app/models/__pycache__/__init__.cpython-311.pyc and /dev/null differ
diff --git a/app/models/__pycache__/entity.cpython-311.pyc b/app/models/__pycache__/entity.cpython-311.pyc
deleted file mode 100644
index 08def07731fef26be3662b7e40f9afca7961637f..0000000000000000000000000000000000000000
Binary files a/app/models/__pycache__/entity.cpython-311.pyc and /dev/null differ
diff --git a/app/models/__pycache__/project.cpython-311.pyc b/app/models/__pycache__/project.cpython-311.pyc
deleted file mode 100644
index 5e11c4a8127b91fa0a97a4fc6607860af7c04d37..0000000000000000000000000000000000000000
Binary files a/app/models/__pycache__/project.cpython-311.pyc and /dev/null differ
diff --git a/app/models/entity.py b/app/models/entity.py
deleted file mode 100644
index 07f9afbd7c789db76ca4d482de4655cd99eb3bda..0000000000000000000000000000000000000000
--- a/app/models/entity.py
+++ /dev/null
@@ -1,143 +0,0 @@
-"""
-SQLAlchemy Models for Numidium
-"""
-from sqlalchemy import Column, String, Text, DateTime, Float, JSON, ForeignKey, Table
-from sqlalchemy.orm import relationship
-from datetime import datetime
-import uuid
-
-from app.core.database import Base
-
-
-def generate_uuid():
- return str(uuid.uuid4())
-
-
-class Entity(Base):
- """
- Entidade - qualquer coisa rastreável no sistema
- Pode ser: pessoa, organização, local, veículo, evento, documento, etc.
- """
- __tablename__ = "entities"
-
- id = Column(String(36), primary_key=True, default=generate_uuid)
- project_id = Column(String(36), ForeignKey("projects.id"), nullable=True, index=True)
- type = Column(String(50), nullable=False, index=True) # person, organization, location, etc
- name = Column(String(255), nullable=False, index=True)
- description = Column(Text, nullable=True)
- properties = Column(JSON, default=dict) # Dados flexíveis
-
- # Geolocalização (opcional)
- latitude = Column(Float, nullable=True)
- longitude = Column(Float, nullable=True)
-
- # Data histórica do evento/entidade (quando aconteceu, não quando foi adicionado)
- event_date = Column(DateTime, nullable=True)
-
- # Fonte do dado
- source = Column(String(100), nullable=True) # wikipedia, newsapi, manual, etc
- source_url = Column(Text, nullable=True)
-
- # Timestamps
- created_at = Column(DateTime, default=datetime.utcnow)
- updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
-
- # Relacionamentos
- outgoing_relationships = relationship(
- "Relationship",
- foreign_keys="Relationship.source_id",
- back_populates="source_entity"
- )
- incoming_relationships = relationship(
- "Relationship",
- foreign_keys="Relationship.target_id",
- back_populates="target_entity"
- )
-
-
-class Relationship(Base):
- """
- Relacionamento entre duas entidades
- Exemplos: works_for, knows, owns, located_at, participated_in
- """
- __tablename__ = "relationships"
-
- id = Column(String(36), primary_key=True, default=generate_uuid)
- project_id = Column(String(36), ForeignKey("projects.id"), nullable=True, index=True)
- source_id = Column(String(36), ForeignKey("entities.id"), nullable=False)
- target_id = Column(String(36), ForeignKey("entities.id"), nullable=False)
- type = Column(String(50), nullable=False, index=True) # works_for, knows, owns, etc
- properties = Column(JSON, default=dict)
- confidence = Column(Float, default=1.0) # 0-1, quão certo estamos dessa conexão
-
- # Data histórica do relacionamento (quando aconteceu)
- event_date = Column(DateTime, nullable=True)
-
- # Fonte
- source = Column(String(100), nullable=True)
-
- # Timestamps
- created_at = Column(DateTime, default=datetime.utcnow)
-
- # Relacionamentos
- source_entity = relationship("Entity", foreign_keys=[source_id], back_populates="outgoing_relationships")
- target_entity = relationship("Entity", foreign_keys=[target_id], back_populates="incoming_relationships")
-
-
-class Event(Base):
- """
- Evento - algo que aconteceu envolvendo entidades
- """
- __tablename__ = "events"
-
- id = Column(String(36), primary_key=True, default=generate_uuid)
- type = Column(String(50), nullable=False, index=True)
- title = Column(String(255), nullable=False)
- description = Column(Text, nullable=True)
-
- # Quando aconteceu
- event_date = Column(DateTime, nullable=True)
-
- # Onde aconteceu
- location_name = Column(String(255), nullable=True)
- latitude = Column(Float, nullable=True)
- longitude = Column(Float, nullable=True)
-
- # Entidades envolvidas (armazenado como JSON array de IDs)
- entity_ids = Column(JSON, default=list)
-
- # Fonte
- source = Column(String(100), nullable=True)
- source_url = Column(Text, nullable=True)
-
- # Metadados
- properties = Column(JSON, default=dict)
-
- # Timestamps
- created_at = Column(DateTime, default=datetime.utcnow)
-
-
-class Document(Base):
- """
- Documento - texto/arquivo para análise
- """
- __tablename__ = "documents"
-
- id = Column(String(36), primary_key=True, default=generate_uuid)
- title = Column(String(255), nullable=False)
- content = Column(Text, nullable=True)
- summary = Column(Text, nullable=True) # Resumo gerado por IA
-
- # Tipo de documento
- doc_type = Column(String(50), default="text") # text, news, report, etc
-
- # Entidades mencionadas (extraídas por NLP)
- mentioned_entities = Column(JSON, default=list)
-
- # Fonte
- source = Column(String(100), nullable=True)
- source_url = Column(Text, nullable=True)
-
- # Timestamps
- published_at = Column(DateTime, nullable=True)
- created_at = Column(DateTime, default=datetime.utcnow)
diff --git a/app/models/project.py b/app/models/project.py
deleted file mode 100644
index 72f601e1975770622c146cc3b1b9fb6fbd912a3c..0000000000000000000000000000000000000000
--- a/app/models/project.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Project Model - Workspaces for organizing investigations
-"""
-from sqlalchemy import Column, String, Text, DateTime
-from datetime import datetime
-import uuid
-
-from app.core.database import Base
-
-
-def generate_uuid():
- return str(uuid.uuid4())
-
-
-class Project(Base):
- """
- Projeto/Workspace - agrupa entidades e relacionamentos por investigação
- """
- __tablename__ = "projects"
-
- id = Column(String(36), primary_key=True, default=generate_uuid)
- name = Column(String(255), nullable=False)
- description = Column(Text, nullable=True)
- color = Column(String(7), default="#00d4ff") # Hex color for UI
- icon = Column(String(50), default="folder") # Icon name
-
- # Timestamps
- created_at = Column(DateTime, default=datetime.utcnow)
- updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
diff --git a/app/schemas/__init__.py b/app/schemas/__init__.py
deleted file mode 100644
index 5afaa5cdacc5762ea19abe607c7dab6309e351a8..0000000000000000000000000000000000000000
--- a/app/schemas/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# Schemas module
-from app.schemas.schemas import (
- EntityCreate, EntityUpdate, EntityResponse,
- RelationshipCreate, RelationshipResponse,
- EventCreate, EventResponse,
- DocumentCreate, DocumentResponse,
- GraphData, GraphNode, GraphEdge,
- SearchQuery, SearchResult,
- SystemStats
-)
diff --git a/app/schemas/__pycache__/__init__.cpython-311.pyc b/app/schemas/__pycache__/__init__.cpython-311.pyc
deleted file mode 100644
index 5a0991c3a362725e79629796654b9dc0ed9c9668..0000000000000000000000000000000000000000
Binary files a/app/schemas/__pycache__/__init__.cpython-311.pyc and /dev/null differ
diff --git a/app/schemas/__pycache__/schemas.cpython-311.pyc b/app/schemas/__pycache__/schemas.cpython-311.pyc
deleted file mode 100644
index 8358505555f3036c07fa84d01cd9cd01b7b97b9f..0000000000000000000000000000000000000000
Binary files a/app/schemas/__pycache__/schemas.cpython-311.pyc and /dev/null differ
diff --git a/app/schemas/schemas.py b/app/schemas/schemas.py
deleted file mode 100644
index afbff0c301ddb1fbe8cae0e4848fcafc48082ff0..0000000000000000000000000000000000000000
--- a/app/schemas/schemas.py
+++ /dev/null
@@ -1,163 +0,0 @@
-"""
-Pydantic Schemas for API validation
-"""
-from pydantic import BaseModel, Field
-from typing import Optional, List, Any
-from datetime import datetime
-
-
-# ========== Entity Schemas ==========
-
-class EntityBase(BaseModel):
- type: str = Field(..., description="Tipo da entidade: person, organization, location, etc")
- name: str = Field(..., description="Nome da entidade")
- description: Optional[str] = None
- properties: dict = Field(default_factory=dict)
- latitude: Optional[float] = None
- longitude: Optional[float] = None
- source: Optional[str] = None
- source_url: Optional[str] = None
-
-
-class EntityCreate(EntityBase):
- pass
-
-
-class EntityUpdate(BaseModel):
- type: Optional[str] = None
- name: Optional[str] = None
- description: Optional[str] = None
- properties: Optional[dict] = None
- latitude: Optional[float] = None
- longitude: Optional[float] = None
-
-
-class EntityResponse(EntityBase):
- id: str
- created_at: datetime
- updated_at: datetime
-
- class Config:
- from_attributes = True
-
-
-# ========== Relationship Schemas ==========
-
-class RelationshipBase(BaseModel):
- source_id: str
- target_id: str
- type: str = Field(..., description="Tipo: works_for, knows, owns, located_at, etc")
- properties: dict = Field(default_factory=dict)
- confidence: float = Field(default=1.0, ge=0, le=1)
- source: Optional[str] = None
-
-
-class RelationshipCreate(RelationshipBase):
- pass
-
-
-class RelationshipResponse(RelationshipBase):
- id: str
- created_at: datetime
-
- class Config:
- from_attributes = True
-
-
-# ========== Event Schemas ==========
-
-class EventBase(BaseModel):
- type: str
- title: str
- description: Optional[str] = None
- event_date: Optional[datetime] = None
- location_name: Optional[str] = None
- latitude: Optional[float] = None
- longitude: Optional[float] = None
- entity_ids: List[str] = Field(default_factory=list)
- source: Optional[str] = None
- source_url: Optional[str] = None
- properties: dict = Field(default_factory=dict)
-
-
-class EventCreate(EventBase):
- pass
-
-
-class EventResponse(EventBase):
- id: str
- created_at: datetime
-
- class Config:
- from_attributes = True
-
-
-# ========== Document Schemas ==========
-
-class DocumentBase(BaseModel):
- title: str
- content: Optional[str] = None
- doc_type: str = "text"
- source: Optional[str] = None
- source_url: Optional[str] = None
- published_at: Optional[datetime] = None
-
-
-class DocumentCreate(DocumentBase):
- pass
-
-
-class DocumentResponse(DocumentBase):
- id: str
- summary: Optional[str] = None
- mentioned_entities: List[str] = []
- created_at: datetime
-
- class Config:
- from_attributes = True
-
-
-# ========== Graph Schemas ==========
-
-class GraphNode(BaseModel):
- id: str
- type: str
- name: str
- properties: dict = {}
-
-
-class GraphEdge(BaseModel):
- source: str
- target: str
- type: str
- confidence: float = 1.0
-
-
-class GraphData(BaseModel):
- nodes: List[GraphNode]
- edges: List[GraphEdge]
-
-
-# ========== Search Schemas ==========
-
-class SearchQuery(BaseModel):
- query: str
- entity_types: Optional[List[str]] = None
- limit: int = Field(default=20, le=100)
-
-
-class SearchResult(BaseModel):
- entities: List[EntityResponse]
- events: List[EventResponse]
- documents: List[DocumentResponse]
-
-
-# ========== Stats Schemas ==========
-
-class SystemStats(BaseModel):
- total_entities: int
- total_relationships: int
- total_events: int
- total_documents: int
- entities_by_type: dict
- recent_activity: List[dict]
diff --git a/app/services/__init__.py b/app/services/__init__.py
deleted file mode 100644
index c7f87b77ce421c83e59588e4c341ebab500c3c41..0000000000000000000000000000000000000000
--- a/app/services/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Services module
diff --git a/app/services/__pycache__/__init__.cpython-311.pyc b/app/services/__pycache__/__init__.cpython-311.pyc
deleted file mode 100644
index bdd1095f0f04ab7b53c9f32b8bbce7e4a48236e3..0000000000000000000000000000000000000000
Binary files a/app/services/__pycache__/__init__.cpython-311.pyc and /dev/null differ
diff --git a/app/services/__pycache__/brazil_apis.cpython-311.pyc b/app/services/__pycache__/brazil_apis.cpython-311.pyc
deleted file mode 100644
index 9751973d578e79e328d2912fe354747fb95d79a9..0000000000000000000000000000000000000000
Binary files a/app/services/__pycache__/brazil_apis.cpython-311.pyc and /dev/null differ
diff --git a/app/services/__pycache__/geocoding.cpython-311.pyc b/app/services/__pycache__/geocoding.cpython-311.pyc
deleted file mode 100644
index 664fbab62acd7d6c1db2527f8ab8b4b7a11662e9..0000000000000000000000000000000000000000
Binary files a/app/services/__pycache__/geocoding.cpython-311.pyc and /dev/null differ
diff --git a/app/services/__pycache__/investigation.cpython-311.pyc b/app/services/__pycache__/investigation.cpython-311.pyc
deleted file mode 100644
index 219ab0bfb74e241ee442a062277765b2d3f84c26..0000000000000000000000000000000000000000
Binary files a/app/services/__pycache__/investigation.cpython-311.pyc and /dev/null differ
diff --git a/app/services/__pycache__/investigator_agent.cpython-311.pyc b/app/services/__pycache__/investigator_agent.cpython-311.pyc
deleted file mode 100644
index 17cd96446bf23aa53179b163aba52d3f2b0ece2e..0000000000000000000000000000000000000000
Binary files a/app/services/__pycache__/investigator_agent.cpython-311.pyc and /dev/null differ
diff --git a/app/services/__pycache__/lancer.cpython-311.pyc b/app/services/__pycache__/lancer.cpython-311.pyc
deleted file mode 100644
index 0415c81ac5d96a648b50a300ce010fe32a9a53bb..0000000000000000000000000000000000000000
Binary files a/app/services/__pycache__/lancer.cpython-311.pyc and /dev/null differ
diff --git a/app/services/__pycache__/transparencia_api.cpython-311.pyc b/app/services/__pycache__/transparencia_api.cpython-311.pyc
deleted file mode 100644
index 8758135b5b049f71eceec09add4595fb7b11dd12..0000000000000000000000000000000000000000
Binary files a/app/services/__pycache__/transparencia_api.cpython-311.pyc and /dev/null differ
diff --git a/app/services/aethermap_client.py b/app/services/aethermap_client.py
deleted file mode 100644
index 2e9a6490f843a94391b376e528db87554c8e31cf..0000000000000000000000000000000000000000
--- a/app/services/aethermap_client.py
+++ /dev/null
@@ -1,343 +0,0 @@
-"""
-AetherMap Client
-Client para integração com AetherMap API - busca semântica, NER e análise de grafos.
-"""
-import httpx
-import json
-import io
-from typing import List, Dict, Any, Optional
-from dataclasses import dataclass, field
-from datetime import datetime
-import logging
-
-from app.config import settings
-
-logger = logging.getLogger(__name__)
-
-
-# URL base do AetherMap (HuggingFace Space)
-AETHERMAP_URL = getattr(settings, 'aethermap_url', 'https://madras1-aethermap.hf.space')
-
-
-@dataclass
-class ProcessResult:
- """Resultado do processamento de documentos"""
- job_id: str
- num_documents: int
- num_clusters: int
- num_noise: int
- metrics: Dict[str, Any] = field(default_factory=dict)
- cluster_analysis: Dict[str, Any] = field(default_factory=dict)
-
-
-@dataclass
-class SearchResult:
- """Resultado de busca semântica"""
- summary: str # Resposta RAG gerada pelo LLM
- results: List[Dict[str, Any]] = field(default_factory=list)
-
-
-@dataclass
-class EntityNode:
- """Nó de entidade no grafo"""
- entity: str
- entity_type: str
- docs: int
- degree: int = 0
- centrality: float = 0.0
- role: str = "peripheral" # hub, connector, peripheral
-
-
-@dataclass
-class EntityEdge:
- """Aresta do grafo de entidades"""
- source_entity: str
- target_entity: str
- weight: int
- reason: str
-
-
-@dataclass
-class EntityGraphResult:
- """Resultado da extração de entidades"""
- nodes: List[EntityNode] = field(default_factory=list)
- edges: List[EntityEdge] = field(default_factory=list)
- hubs: List[Dict[str, Any]] = field(default_factory=list)
- insights: Dict[str, Any] = field(default_factory=dict)
-
-
-@dataclass
-class GraphAnalysis:
- """Análise do grafo via LLM"""
- analysis: str
- key_entities: List[str] = field(default_factory=list)
- relationships: List[str] = field(default_factory=list)
-
-
-class AetherMapClient:
- """
- Client para AetherMap API.
-
- Funcionalidades:
- - Processamento de documentos (embeddings + clusters)
- - Busca semântica RAG (FAISS + BM25 + reranking + LLM)
- - Extração de entidades NER
- - Análise de grafo via LLM
- """
-
- def __init__(self, base_url: str = None, timeout: float = 600.0):
- self.base_url = (base_url or AETHERMAP_URL).rstrip('/')
- self.timeout = timeout
- self._current_job_id: Optional[str] = None
-
- @property
- def current_job_id(self) -> Optional[str]:
- """Retorna o job_id atual"""
- return self._current_job_id
-
- async def process_documents(
- self,
- texts: List[str],
- fast_mode: bool = True,
- min_cluster_size: int = 0,
- min_samples: int = 0
- ) -> ProcessResult:
- """
- Processa uma lista de textos gerando embeddings e clusters.
-
- Args:
- texts: Lista de textos/documentos
- fast_mode: Se True, usa PCA (rápido). Se False, usa UMAP (preciso)
- min_cluster_size: Tamanho mínimo do cluster (0=auto)
- min_samples: Mínimo de amostras (0=auto)
-
- Returns:
- ProcessResult com job_id e métricas
- """
- # Criar arquivo TXT em memória
- content = "\n".join(texts)
- file_bytes = content.encode('utf-8')
-
- try:
- async with httpx.AsyncClient(timeout=self.timeout) as client:
- files = {
- 'file': ('documents.txt', io.BytesIO(file_bytes), 'text/plain')
- }
- data = {
- 'n_samples': str(len(texts)),
- 'fast_mode': 'true' if fast_mode else 'false',
- 'min_cluster_size': str(min_cluster_size),
- 'min_samples': str(min_samples)
- }
-
- logger.info(f"AetherMap: Processando {len(texts)} documentos para {self.base_url}/process/")
-
- response = await client.post(
- f"{self.base_url}/process/",
- files=files,
- data=data
- )
-
- logger.info(f"AetherMap: Response status {response.status_code}")
-
- if response.status_code != 200:
- error_text = response.text[:500] if response.text else "No response body"
- logger.error(f"AetherMap error: {response.status_code} - {error_text}")
- raise Exception(f"AetherMap error: {response.status_code} - {error_text}")
-
- result = response.json()
-
- self._current_job_id = result.get('job_id')
- metadata = result.get('metadata', {})
-
- logger.info(f"AetherMap: Job criado {self._current_job_id}")
-
- return ProcessResult(
- job_id=self._current_job_id or "unknown",
- num_documents=metadata.get('num_documents_processed', len(texts)),
- num_clusters=metadata.get('num_clusters_found', 0),
- num_noise=metadata.get('num_noise_points', 0),
- metrics=result.get('metrics', {}),
- cluster_analysis=result.get('cluster_analysis', {})
- )
- except httpx.TimeoutException:
- logger.error(f"AetherMap: Timeout ao conectar com {self.base_url}")
- raise Exception(f"Timeout: AetherMap Space pode estar dormindo. Tente novamente em alguns segundos.")
- except httpx.ConnectError as e:
- logger.error(f"AetherMap: Erro de conexão: {e}")
- raise Exception(f"Erro de conexão com AetherMap: {e}")
- except Exception as e:
- logger.error(f"AetherMap: Erro inesperado: {e}")
- raise
-
- async def semantic_search(
- self,
- query: str,
- job_id: str = None,
- turbo_mode: bool = False
- ) -> SearchResult:
- """
- Busca semântica RAG híbrida nos documentos processados.
-
- Args:
- query: Termo de busca
- job_id: ID do job (se não fornecido, usa o último)
- turbo_mode: Se True, busca mais rápida (menos precisa)
-
- Returns:
- SearchResult com resumo e resultados
- """
- job_id = job_id or self._current_job_id
- if not job_id:
- raise ValueError("Nenhum job_id disponível. Processe documentos primeiro.")
-
- async with httpx.AsyncClient(timeout=self.timeout) as client:
- data = {
- 'query': query,
- 'job_id': job_id,
- 'turbo_mode': 'true' if turbo_mode else 'false'
- }
-
- logger.info(f"AetherMap: Buscando '{query}'...")
-
- response = await client.post(
- f"{self.base_url}/search/",
- data=data
- )
-
- if response.status_code != 200:
- raise Exception(f"AetherMap search error: {response.status_code} - {response.text}")
-
- result = response.json()
-
- return SearchResult(
- summary=result.get('summary', ''),
- results=result.get('results', [])
- )
-
- async def extract_entities(self, job_id: str = None) -> EntityGraphResult:
- """
- Extrai entidades nomeadas (NER) e cria grafo de conexões.
-
- Args:
- job_id: ID do job (se não fornecido, usa o último)
-
- Returns:
- EntityGraphResult com nós, arestas e insights
- """
- job_id = job_id or self._current_job_id
- if not job_id:
- raise ValueError("Nenhum job_id disponível. Processe documentos primeiro.")
-
- async with httpx.AsyncClient(timeout=self.timeout) as client:
- data = {'job_id': job_id}
-
- logger.info(f"AetherMap: Extraindo entidades...")
-
- response = await client.post(
- f"{self.base_url}/entity_graph/",
- data=data
- )
-
- if response.status_code != 200:
- raise Exception(f"AetherMap entity_graph error: {response.status_code} - {response.text}")
-
- result = response.json()
-
- # Converter para dataclasses
- nodes = [
- EntityNode(
- entity=n.get('entity', ''),
- entity_type=n.get('type', ''),
- docs=n.get('docs', 0),
- degree=n.get('degree', 0),
- centrality=n.get('centrality', 0.0),
- role=n.get('role', 'peripheral')
- )
- for n in result.get('nodes', [])
- ]
-
- edges = [
- EntityEdge(
- source_entity=e.get('source_entity', ''),
- target_entity=e.get('target_entity', ''),
- weight=e.get('weight', 0),
- reason=e.get('reason', '')
- )
- for e in result.get('edges', [])
- ]
-
- return EntityGraphResult(
- nodes=nodes,
- edges=edges,
- hubs=result.get('hubs', []),
- insights=result.get('insights', {})
- )
-
- async def analyze_graph(self, job_id: str = None) -> GraphAnalysis:
- """
- Usa LLM para analisar o Knowledge Graph e extrair insights.
-
- Args:
- job_id: ID do job (se não fornecido, usa o último)
-
- Returns:
- GraphAnalysis com análise textual
- """
- job_id = job_id or self._current_job_id
- if not job_id:
- raise ValueError("Nenhum job_id disponível. Processe documentos primeiro.")
-
- async with httpx.AsyncClient(timeout=self.timeout) as client:
- data = {'job_id': job_id}
-
- logger.info(f"AetherMap: Analisando grafo com LLM...")
-
- response = await client.post(
- f"{self.base_url}/analyze_graph/",
- data=data
- )
-
- if response.status_code != 200:
- raise Exception(f"AetherMap analyze_graph error: {response.status_code} - {response.text}")
-
- result = response.json()
-
- return GraphAnalysis(
- analysis=result.get('analysis', ''),
- key_entities=result.get('key_entities', []),
- relationships=result.get('relationships', [])
- )
-
- async def describe_clusters(self, job_id: str = None) -> Dict[str, Any]:
- """
- Usa LLM para descrever cada cluster encontrado.
-
- Args:
- job_id: ID do job (se não fornecido, usa o último)
-
- Returns:
- Dict com insights por cluster
- """
- job_id = job_id or self._current_job_id
- if not job_id:
- raise ValueError("Nenhum job_id disponível. Processe documentos primeiro.")
-
- async with httpx.AsyncClient(timeout=self.timeout) as client:
- data = {'job_id': job_id}
-
- logger.info(f"AetherMap: Descrevendo clusters...")
-
- response = await client.post(
- f"{self.base_url}/describe_clusters/",
- data=data
- )
-
- if response.status_code != 200:
- raise Exception(f"AetherMap describe_clusters error: {response.status_code} - {response.text}")
-
- return response.json()
-
-
-# Instância global do client
-aethermap = AetherMapClient()
diff --git a/app/services/analysis/__init__.py b/app/services/analysis/__init__.py
deleted file mode 100644
index 18e8fe19c13e9ec59fb147e63518a8ddbeef5f25..0000000000000000000000000000000000000000
--- a/app/services/analysis/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Analysis services
diff --git a/app/services/brazil_apis.py b/app/services/brazil_apis.py
deleted file mode 100644
index 3cf938529a35708355664dd05f60288519c0d7df..0000000000000000000000000000000000000000
--- a/app/services/brazil_apis.py
+++ /dev/null
@@ -1,218 +0,0 @@
-"""
-Brazilian Data APIs Service
-Consolidates access to public Brazilian data APIs for investigation
-"""
-import httpx
-from typing import Optional, Dict, Any, List
-from dataclasses import dataclass, field
-import re
-
-
-# API URLs
-CNPJA_URL = "https://api.cnpja.com.br/office"
-OPENCNPJ_URL = "https://api.opencnpj.org/v1/cnpj"
-BRASILAPI_CNPJ = "https://brasilapi.com.br/api/cnpj/v1"
-BRASILAPI_CEP = "https://brasilapi.com.br/api/cep/v2"
-
-
-@dataclass
-class CompanyData:
- """Data structure for company information"""
- cnpj: str
- razao_social: str = ""
- nome_fantasia: str = ""
- situacao: str = ""
- data_abertura: str = ""
- natureza_juridica: str = ""
- capital_social: float = 0.0
- porte: str = ""
-
- # Address
- logradouro: str = ""
- numero: str = ""
- complemento: str = ""
- bairro: str = ""
- cidade: str = ""
- uf: str = ""
- cep: str = ""
-
- # Contact
- telefone: str = ""
- email: str = ""
-
- # Activity
- cnae_principal: str = ""
- cnae_descricao: str = ""
- cnaes_secundarios: List[str] = field(default_factory=list)
-
- # Partners/Owners
- socios: List[Dict[str, Any]] = field(default_factory=list)
-
- # Source
- fonte: str = ""
-
-
-def clean_cnpj(cnpj: str) -> str:
- """Remove formatting from CNPJ"""
- return re.sub(r'[^0-9]', '', cnpj)
-
-
-async def consultar_cnpj(cnpj: str) -> Optional[CompanyData]:
- """
- Query CNPJ data from available APIs.
- Tries BrasilAPI first (more reliable), then falls back to others.
- """
- cnpj_clean = clean_cnpj(cnpj)
-
- if len(cnpj_clean) != 14:
- return None
-
- # Try BrasilAPI first
- result = await _query_brasilapi(cnpj_clean)
- if result:
- return result
-
- # Fallback to OpenCNPJ
- result = await _query_opencnpj(cnpj_clean)
- if result:
- return result
-
- return None
-
-
-async def _query_brasilapi(cnpj: str) -> Optional[CompanyData]:
- """Query BrasilAPI for CNPJ data"""
- try:
- async with httpx.AsyncClient(timeout=30.0) as client:
- response = await client.get(f"{BRASILAPI_CNPJ}/{cnpj}")
-
- if response.status_code != 200:
- return None
-
- data = response.json()
-
- # Parse partners
- socios = []
- for socio in data.get("qsa", []):
- socios.append({
- "nome": socio.get("nome_socio", ""),
- "qualificacao": socio.get("qualificacao_socio", ""),
- "cpf_cnpj": socio.get("cnpj_cpf_do_socio", ""),
- "data_entrada": socio.get("data_entrada_sociedade", "")
- })
-
- # Parse CNAEs
- cnaes_sec = []
- for cnae in data.get("cnaes_secundarios", []):
- if isinstance(cnae, dict):
- cnaes_sec.append(f"{cnae.get('codigo', '')} - {cnae.get('descricao', '')}")
- else:
- cnaes_sec.append(str(cnae))
-
- return CompanyData(
- cnpj=cnpj,
- razao_social=data.get("razao_social", ""),
- nome_fantasia=data.get("nome_fantasia", ""),
- situacao=data.get("descricao_situacao_cadastral", ""),
- data_abertura=data.get("data_inicio_atividade", ""),
- natureza_juridica=data.get("natureza_juridica", ""),
- capital_social=float(data.get("capital_social", 0)),
- porte=data.get("porte", ""),
- logradouro=data.get("logradouro", ""),
- numero=data.get("numero", ""),
- complemento=data.get("complemento", ""),
- bairro=data.get("bairro", ""),
- cidade=data.get("municipio", ""),
- uf=data.get("uf", ""),
- cep=data.get("cep", ""),
- telefone=data.get("ddd_telefone_1", ""),
- email=data.get("email", ""),
- cnae_principal=str(data.get("cnae_fiscal", "")),
- cnae_descricao=data.get("cnae_fiscal_descricao", ""),
- cnaes_secundarios=cnaes_sec,
- socios=socios,
- fonte="BrasilAPI"
- )
-
- except Exception as e:
- print(f"BrasilAPI error: {e}")
- return None
-
-
-async def _query_opencnpj(cnpj: str) -> Optional[CompanyData]:
- """Query OpenCNPJ API"""
- try:
- async with httpx.AsyncClient(timeout=30.0) as client:
- response = await client.get(f"{OPENCNPJ_URL}/{cnpj}")
-
- if response.status_code != 200:
- return None
-
- data = response.json()
-
- # Parse partners
- socios = []
- for socio in data.get("socios", []):
- socios.append({
- "nome": socio.get("nome", ""),
- "qualificacao": socio.get("qualificacao", ""),
- "cpf_cnpj": "",
- "data_entrada": socio.get("data_entrada", "")
- })
-
- return CompanyData(
- cnpj=cnpj,
- razao_social=data.get("razao_social", ""),
- nome_fantasia=data.get("nome_fantasia", ""),
- situacao=data.get("situacao_cadastral", ""),
- data_abertura=data.get("data_inicio_atividade", ""),
- natureza_juridica=data.get("natureza_juridica", ""),
- capital_social=float(data.get("capital_social", 0) or 0),
- porte=data.get("porte", ""),
- logradouro=data.get("logradouro", ""),
- numero=data.get("numero", ""),
- complemento=data.get("complemento", ""),
- bairro=data.get("bairro", ""),
- cidade=data.get("municipio", ""),
- uf=data.get("uf", ""),
- cep=data.get("cep", ""),
- telefone=data.get("telefone", ""),
- email=data.get("email", ""),
- cnae_principal=data.get("cnae_principal", {}).get("codigo", ""),
- cnae_descricao=data.get("cnae_principal", {}).get("descricao", ""),
- cnaes_secundarios=[],
- socios=socios,
- fonte="OpenCNPJ"
- )
-
- except Exception as e:
- print(f"OpenCNPJ error: {e}")
- return None
-
-
-async def consultar_cep(cep: str) -> Optional[Dict[str, Any]]:
- """Query address by CEP"""
- cep_clean = re.sub(r'[^0-9]', '', cep)
-
- try:
- async with httpx.AsyncClient(timeout=15.0) as client:
- response = await client.get(f"{BRASILAPI_CEP}/{cep_clean}")
-
- if response.status_code != 200:
- return None
-
- return response.json()
-
- except Exception as e:
- print(f"CEP query error: {e}")
- return None
-
-
-async def buscar_empresas_por_nome(nome: str, uf: Optional[str] = None) -> List[Dict[str, Any]]:
- """
- Search companies by name using web search (via Lancer).
- This is a workaround since direct name search APIs are paid.
- """
- # This would need Lancer integration for web search
- # For now, return empty - will be filled by investigation service
- return []
diff --git a/app/services/chat.py b/app/services/chat.py
deleted file mode 100644
index 89595f334653e11a19d2103c28ccfaeb97110844..0000000000000000000000000000000000000000
--- a/app/services/chat.py
+++ /dev/null
@@ -1,213 +0,0 @@
-"""
-Chat Service - Intelligent chat with RAG capabilities
-Uses local database + Lancer for comprehensive responses
-"""
-import httpx
-from typing import Optional, List, Dict, Any
-from sqlalchemy.orm import Session
-
-from app.config import settings
-from app.models.entity import Entity, Relationship
-
-
-LANCER_URL = "https://madras1-lancer.hf.space/api/v1"
-
-SYSTEM_PROMPT = """Você é um assistente de inteligência do NUMIDIUM.
-Você tem acesso a um grafo de conhecimento com entidades e relacionamentos,
-e pode pesquisar na web para informações atualizadas.
-
-Responda em português brasileiro de forma clara e direta.
-Se não tiver certeza, diga que não sabe em vez de inventar."""
-
-
-class ChatService:
- """Chat service with RAG using local database and Lancer"""
-
- def __init__(self):
- self.api_url = "https://api.cerebras.ai/v1/chat/completions"
- self.conversation_history: Dict[str, List[Dict[str, str]]] = {}
-
- def _get_history(self, session_id: Optional[str]) -> List[Dict[str, str]]:
- key = session_id or "default"
- if key not in self.conversation_history:
- self.conversation_history[key] = []
- return self.conversation_history[key]
-
- def clear_history(self, session_id: Optional[str] = None):
- """Clear conversation history"""
- key = session_id or "default"
- self.conversation_history.pop(key, None)
-
- def _get_local_context(self, query: str, db: Session, limit: int = 5) -> str:
- """Get relevant entities from local database"""
- # Search entities by name
- entities = db.query(Entity).filter(
- Entity.name.ilike(f"%{query}%")
- ).limit(limit).all()
-
- # Also search by description
- if len(entities) < limit:
- desc_entities = db.query(Entity).filter(
- Entity.description.ilike(f"%{query}%")
- ).limit(limit - len(entities)).all()
- entities.extend(desc_entities)
-
- if not entities:
- # Try splitting query into words
- words = query.split()
- for word in words:
- if len(word) > 3:
- word_entities = db.query(Entity).filter(
- Entity.name.ilike(f"%{word}%")
- ).limit(2).all()
- entities.extend(word_entities)
-
- if not entities:
- return ""
-
- context_parts = []
- seen_ids = set()
-
- for entity in entities:
- if entity.id in seen_ids:
- continue
- seen_ids.add(entity.id)
-
- ctx = f"• {entity.name} ({entity.type})"
- if entity.description:
- ctx += f": {entity.description[:200]}"
-
- # Get relationships
- relationships = db.query(Relationship).filter(
- (Relationship.source_id == entity.id) |
- (Relationship.target_id == entity.id)
- ).limit(5).all()
-
- if relationships:
- related = []
- for rel in relationships:
- if rel.source_id == entity.id:
- target = db.query(Entity).filter(Entity.id == rel.target_id).first()
- if target:
- related.append(f"{rel.type} → {target.name}")
- else:
- source = db.query(Entity).filter(Entity.id == rel.source_id).first()
- if source:
- related.append(f"{source.name} → {rel.type}")
-
- if related:
- ctx += f" | Relações: {', '.join(related[:3])}"
-
- context_parts.append(ctx)
-
- return "\n".join(context_parts)
-
- async def _get_web_context(self, query: str) -> str:
- """Get context from Lancer web search"""
- try:
- async with httpx.AsyncClient(timeout=30.0) as client:
- response = await client.post(
- f"{LANCER_URL}/search",
- json={
- "query": query,
- "max_results": 5,
- "include_answer": True
- }
- )
-
- if response.status_code == 200:
- data = response.json()
- if data.get("answer"):
- return f"Informações da web:\n{data['answer'][:1000]}"
-
- return ""
- except Exception as e:
- print(f"Lancer error: {e}")
- return ""
-
- async def _call_llm(self, messages: List[Dict[str, str]]) -> str:
- """Call Cerebras LLM"""
- try:
- async with httpx.AsyncClient(timeout=60.0) as client:
- response = await client.post(
- self.api_url,
- headers={
- "Authorization": f"Bearer {settings.cerebras_api_key}",
- "Content-Type": "application/json"
- },
- json={
- "model": "qwen-3-32b",
- "messages": messages,
- "temperature": 0.7,
- "max_tokens": 2048
- }
- )
-
- if response.status_code == 200:
- data = response.json()
- return data["choices"][0]["message"]["content"]
- else:
- return f"Erro na API: {response.status_code}"
-
- except Exception as e:
- return f"Erro: {str(e)}"
-
- async def chat(
- self,
- message: str,
- db: Session,
- use_web: bool = True,
- use_history: bool = True,
- session_id: Optional[str] = None
- ) -> Dict[str, Any]:
- """Process chat message with RAG"""
- history = self._get_history(session_id)
-
- # Get local context
- local_context = self._get_local_context(message, db)
-
- # Get web context if enabled
- web_context = ""
- if use_web:
- web_context = await self._get_web_context(message)
-
- # Build context
- context_parts = []
- if local_context:
- context_parts.append(f"📊 Conhecimento local:\n{local_context}")
- if web_context:
- context_parts.append(f"🌐 {web_context}")
-
- context = "\n\n".join(context_parts) if context_parts else "Nenhum contexto disponível."
-
- # Build messages
- messages = [{"role": "system", "content": SYSTEM_PROMPT}]
-
- if use_history and history:
- messages.extend(history[-6:])
-
- user_message = f"""Contexto:
-{context}
-
-Pergunta: {message}"""
-
- messages.append({"role": "user", "content": user_message})
-
- # Call LLM
- response = await self._call_llm(messages)
-
- # Store history
- if use_history:
- history.append({"role": "user", "content": message})
- history.append({"role": "assistant", "content": response})
-
- return {
- "answer": response,
- "local_context_used": bool(local_context),
- "web_context_used": bool(web_context),
- "entities_found": local_context.count("•") if local_context else 0
- }
-
-
-# Singleton
-chat_service = ChatService()
diff --git a/app/services/geocoding.py b/app/services/geocoding.py
deleted file mode 100644
index 06863f2be60350c5cd8251ca8cfa063809135cea..0000000000000000000000000000000000000000
--- a/app/services/geocoding.py
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
-Geocoding Service - Uses Nominatim (OpenStreetMap) for free geocoding
-"""
-import httpx
-from typing import Optional, Tuple
-import asyncio
-
-
-NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
-USER_AGENT = "NUMIDIUM/1.0 (Intelligence System)"
-
-
-async def geocode(location_name: str) -> Optional[Tuple[float, float]]:
- """
- Convert a location name to coordinates using Nominatim.
- Returns (latitude, longitude) or None if not found.
-
- Note: Nominatim has rate limits (1 request/second), so be careful with batch operations.
- """
- try:
- async with httpx.AsyncClient(timeout=10.0) as client:
- response = await client.get(
- NOMINATIM_URL,
- params={
- "q": location_name,
- "format": "json",
- "limit": 1,
- "addressdetails": 0
- },
- headers={
- "User-Agent": USER_AGENT
- }
- )
-
- if response.status_code == 200:
- data = response.json()
- if data and len(data) > 0:
- lat = float(data[0]["lat"])
- lon = float(data[0]["lon"])
- return (lat, lon)
-
- return None
-
- except Exception as e:
- print(f"Geocoding error for '{location_name}': {e}")
- return None
-
-
-async def geocode_batch(location_names: list[str], delay: float = 1.0) -> dict[str, Tuple[float, float]]:
- """
- Geocode multiple locations with proper rate limiting.
- Returns a dict mapping location names to (lat, lon) tuples.
- """
- results = {}
-
- for name in location_names:
- coords = await geocode(name)
- if coords:
- results[name] = coords
- # Respect Nominatim rate limits
- await asyncio.sleep(delay)
-
- return results
diff --git a/app/services/ibge_api.py b/app/services/ibge_api.py
deleted file mode 100644
index 26d5000ed2798dfe0f7a1ce55603f305dad74783..0000000000000000000000000000000000000000
--- a/app/services/ibge_api.py
+++ /dev/null
@@ -1,192 +0,0 @@
-"""
-IBGE API Service
-Access to Brazilian geographic and demographic data
-"""
-import httpx
-from typing import Optional, Dict, Any, List
-from dataclasses import dataclass
-
-
-IBGE_BASE_URL = "https://servicodados.ibge.gov.br/api/v1"
-
-
-@dataclass
-class Estado:
- """Brazilian state data"""
- id: int
- sigla: str
- nome: str
- regiao: str
-
-
-@dataclass
-class Municipio:
- """Brazilian municipality data"""
- id: int
- nome: str
- estado_sigla: str
- estado_nome: str
- regiao: str
- # Optional enriched data
- populacao: Optional[int] = None
- area_km2: Optional[float] = None
-
-
-async def listar_estados() -> List[Estado]:
- """List all Brazilian states"""
- try:
- async with httpx.AsyncClient(timeout=15.0) as client:
- response = await client.get(f"{IBGE_BASE_URL}/localidades/estados")
-
- if response.status_code != 200:
- return []
-
- data = response.json()
- estados = []
-
- for item in data:
- estados.append(Estado(
- id=item["id"],
- sigla=item["sigla"],
- nome=item["nome"],
- regiao=item.get("regiao", {}).get("nome", "")
- ))
-
- return sorted(estados, key=lambda x: x.nome)
-
- except Exception as e:
- print(f"IBGE estados error: {e}")
- return []
-
-
-async def listar_municipios(uf: str) -> List[Municipio]:
- """List all municipalities in a state"""
- try:
- async with httpx.AsyncClient(timeout=15.0) as client:
- response = await client.get(
- f"{IBGE_BASE_URL}/localidades/estados/{uf}/municipios"
- )
-
- if response.status_code != 200:
- return []
-
- data = response.json()
- municipios = []
-
- for item in data:
- municipios.append(Municipio(
- id=item["id"],
- nome=item["nome"],
- estado_sigla=uf.upper(),
- estado_nome=item.get("microrregiao", {}).get("mesorregiao", {}).get("UF", {}).get("nome", ""),
- regiao=item.get("microrregiao", {}).get("mesorregiao", {}).get("UF", {}).get("regiao", {}).get("nome", "")
- ))
-
- return sorted(municipios, key=lambda x: x.nome)
-
- except Exception as e:
- print(f"IBGE municipios error: {e}")
- return []
-
-
-async def buscar_municipio(nome: str, uf: Optional[str] = None) -> List[Municipio]:
- """Search for municipalities by name"""
- try:
- # If UF provided, search only that state
- if uf:
- municipios = await listar_municipios(uf)
- return [m for m in municipios if nome.lower() in m.nome.lower()]
-
- # Otherwise search all states (slower)
- async with httpx.AsyncClient(timeout=30.0) as client:
- response = await client.get(f"{IBGE_BASE_URL}/localidades/municipios")
-
- if response.status_code != 200:
- return []
-
- data = response.json()
- results = []
-
- for item in data:
- if nome.lower() in item["nome"].lower():
- uf_info = item.get("microrregiao", {}).get("mesorregiao", {}).get("UF", {})
- results.append(Municipio(
- id=item["id"],
- nome=item["nome"],
- estado_sigla=uf_info.get("sigla", ""),
- estado_nome=uf_info.get("nome", ""),
- regiao=uf_info.get("regiao", {}).get("nome", "")
- ))
-
- return results[:20] # Limit results
-
- except Exception as e:
- print(f"IBGE search error: {e}")
- return []
-
-
-async def obter_municipio_por_id(id_municipio: int) -> Optional[Municipio]:
- """Get municipality by IBGE code"""
- try:
- async with httpx.AsyncClient(timeout=15.0) as client:
- response = await client.get(
- f"{IBGE_BASE_URL}/localidades/municipios/{id_municipio}"
- )
-
- if response.status_code != 200:
- return None
-
- item = response.json()
- uf_info = item.get("microrregiao", {}).get("mesorregiao", {}).get("UF", {})
-
- return Municipio(
- id=item["id"],
- nome=item["nome"],
- estado_sigla=uf_info.get("sigla", ""),
- estado_nome=uf_info.get("nome", ""),
- regiao=uf_info.get("regiao", {}).get("nome", "")
- )
-
- except Exception as e:
- print(f"IBGE municipio error: {e}")
- return None
-
-
-async def enriquecer_localizacao(cidade: str, uf: Optional[str] = None) -> Dict[str, Any]:
- """
- Enrich a location name with IBGE data.
- Useful for adding context to extracted locations.
- """
- resultado = {
- "cidade_original": cidade,
- "encontrado": False,
- "ibge_codigo": None,
- "cidade": None,
- "estado": None,
- "estado_sigla": None,
- "regiao": None
- }
-
- municipios = await buscar_municipio(cidade, uf)
-
- if municipios:
- # Take best match (exact or first)
- melhor = None
- for m in municipios:
- if m.nome.lower() == cidade.lower():
- melhor = m
- break
-
- if not melhor:
- melhor = municipios[0]
-
- resultado.update({
- "encontrado": True,
- "ibge_codigo": melhor.id,
- "cidade": melhor.nome,
- "estado": melhor.estado_nome,
- "estado_sigla": melhor.estado_sigla,
- "regiao": melhor.regiao
- })
-
- return resultado
diff --git a/app/services/ingestion/__init__.py b/app/services/ingestion/__init__.py
deleted file mode 100644
index 53751fc389795a6893e21379a16b0680f55cda41..0000000000000000000000000000000000000000
--- a/app/services/ingestion/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Ingestion services
-from app.services.ingestion.wikipedia import wikipedia_scraper
-from app.services.ingestion.news import news_service
diff --git a/app/services/ingestion/__pycache__/__init__.cpython-311.pyc b/app/services/ingestion/__pycache__/__init__.cpython-311.pyc
deleted file mode 100644
index de09d686a52c85f16de0eac33cbd28ca9065604d..0000000000000000000000000000000000000000
Binary files a/app/services/ingestion/__pycache__/__init__.cpython-311.pyc and /dev/null differ
diff --git a/app/services/ingestion/__pycache__/news.cpython-311.pyc b/app/services/ingestion/__pycache__/news.cpython-311.pyc
deleted file mode 100644
index 47a4ad23456ff8907ab2a47285b1b74cd099a8fe..0000000000000000000000000000000000000000
Binary files a/app/services/ingestion/__pycache__/news.cpython-311.pyc and /dev/null differ
diff --git a/app/services/ingestion/__pycache__/wikipedia.cpython-311.pyc b/app/services/ingestion/__pycache__/wikipedia.cpython-311.pyc
deleted file mode 100644
index 215244f9f9e1bdf8dc6071c4e0237f41318f352a..0000000000000000000000000000000000000000
Binary files a/app/services/ingestion/__pycache__/wikipedia.cpython-311.pyc and /dev/null differ
diff --git a/app/services/ingestion/news.py b/app/services/ingestion/news.py
deleted file mode 100644
index 1aba8df40e8cfb6d2cc19900fea89cf6ce04cf14..0000000000000000000000000000000000000000
--- a/app/services/ingestion/news.py
+++ /dev/null
@@ -1,86 +0,0 @@
-"""
-News API Client Service
-Usa RSS feeds públicos para não precisar de API key
-"""
-import feedparser
-import requests
-from typing import List, Dict
-from datetime import datetime
-import re
-
-
-class NewsService:
- """Serviço para buscar notícias de fontes públicas via RSS"""
-
- # RSS feeds públicos brasileiros e internacionais
- RSS_FEEDS = {
- "g1": "https://g1.globo.com/rss/g1/",
- "folha": "https://feeds.folha.uol.com.br/folha/rss/rss091.xml",
- "bbc_brasil": "https://www.bbc.com/portuguese/articles/rss.xml",
- "reuters": "https://www.reutersagency.com/feed/",
- "google_news_br": "https://news.google.com/rss?hl=pt-BR&gl=BR&ceid=BR:pt-419"
- }
-
- def fetch_feed(self, feed_url: str) -> List[Dict]:
- """Busca artigos de um feed RSS"""
- try:
- feed = feedparser.parse(feed_url)
- articles = []
-
- for entry in feed.entries[:20]: # Limitar a 20 artigos
- published = None
- if hasattr(entry, 'published_parsed') and entry.published_parsed:
- published = datetime(*entry.published_parsed[:6])
-
- articles.append({
- "title": entry.get("title", ""),
- "description": self._clean_html(entry.get("summary", "")),
- "url": entry.get("link", ""),
- "published_at": published,
- "source": feed.feed.get("title", "Unknown")
- })
-
- return articles
- except Exception as e:
- print(f"Error fetching feed {feed_url}: {e}")
- return []
-
- def fetch_all_feeds(self) -> List[Dict]:
- """Busca artigos de todos os feeds configurados"""
- all_articles = []
- for name, url in self.RSS_FEEDS.items():
- articles = self.fetch_feed(url)
- for article in articles:
- article["feed_name"] = name
- all_articles.extend(articles)
- return all_articles
-
- def search_news(self, query: str) -> List[Dict]:
- """
- Busca notícias pelo Google News RSS
- """
- # Google News RSS search
- search_url = f"https://news.google.com/rss/search?q={query}&hl=pt-BR&gl=BR&ceid=BR:pt-419"
- return self.fetch_feed(search_url)
-
- def _clean_html(self, text: str) -> str:
- """Remove HTML tags do texto"""
- clean = re.compile('<.*?>')
- return re.sub(clean, '', text)
-
- def to_document(self, article: Dict) -> Dict:
- """
- Converte um artigo de notícia para o formato Document
- """
- return {
- "title": article["title"],
- "content": article.get("description", ""),
- "doc_type": "news",
- "source": article.get("source", "news"),
- "source_url": article.get("url"),
- "published_at": article.get("published_at")
- }
-
-
-# Singleton instance
-news_service = NewsService()
diff --git a/app/services/ingestion/wikipedia.py b/app/services/ingestion/wikipedia.py
deleted file mode 100644
index 2c64a6f77d4bcd406506966ad4b1c3a75972a8e3..0000000000000000000000000000000000000000
--- a/app/services/ingestion/wikipedia.py
+++ /dev/null
@@ -1,215 +0,0 @@
-"""
-Wikipedia Scraper Service
-"""
-import requests
-from bs4 import BeautifulSoup
-from typing import Optional, Dict, List
-import re
-
-
-class WikipediaScraper:
- """Scraper para extrair dados da Wikipedia"""
-
- BASE_URL = "https://pt.wikipedia.org"
- API_URL = "https://pt.wikipedia.org/w/api.php"
-
- # User-Agent obrigatório para API da Wikipedia
- HEADERS = {
- "User-Agent": "NumidiumBot/1.0 (https://github.com/numidium; contact@numidium.app) Python/3.11"
- }
-
- def search(self, query: str, limit: int = 10) -> List[Dict]:
- """
- Busca artigos na Wikipedia
- """
- try:
- params = {
- "action": "query",
- "list": "search",
- "srsearch": query,
- "srlimit": limit,
- "format": "json"
- }
-
- response = requests.get(
- self.API_URL,
- params=params,
- headers=self.HEADERS,
- timeout=10
- )
- response.raise_for_status()
- data = response.json()
-
- results = []
- for item in data.get("query", {}).get("search", []):
- results.append({
- "title": item["title"],
- "snippet": BeautifulSoup(item["snippet"], "html.parser").get_text(),
- "pageid": item["pageid"]
- })
-
- return results
- except Exception as e:
- print(f"Wikipedia search error: {e}")
- return []
-
- def get_article(self, title: str) -> Optional[Dict]:
- """
- Busca informações completas de um artigo
- """
- try:
- params = {
- "action": "query",
- "titles": title,
- "prop": "extracts|pageimages|coordinates|categories",
- "exintro": True,
- "explaintext": True,
- "pithumbsize": 300,
- "format": "json"
- }
-
- response = requests.get(
- self.API_URL,
- params=params,
- headers=self.HEADERS,
- timeout=10
- )
- response.raise_for_status()
- data = response.json()
-
- pages = data.get("query", {}).get("pages", {})
- for page_id, page in pages.items():
- if page_id == "-1":
- return None
-
- result = {
- "title": page.get("title"),
- "extract": page.get("extract"),
- "pageid": page.get("pageid"),
- "url": f"{self.BASE_URL}/wiki/{page.get('title', '').replace(' ', '_')}",
- "thumbnail": page.get("thumbnail", {}).get("source"),
- "categories": [c["title"].replace("Categoria:", "")
- for c in page.get("categories", [])]
- }
-
- # Coordenadas se disponíveis
- if "coordinates" in page:
- coords = page["coordinates"][0]
- result["latitude"] = coords.get("lat")
- result["longitude"] = coords.get("lon")
-
- return result
-
- return None
- except Exception as e:
- print(f"Wikipedia article error: {e}")
- return None
-
- def get_infobox(self, title: str) -> Dict:
- """
- Tenta extrair dados estruturados do infobox de um artigo
- """
- try:
- url = f"{self.BASE_URL}/wiki/{title.replace(' ', '_')}"
- response = requests.get(url, headers=self.HEADERS, timeout=10)
- soup = BeautifulSoup(response.text, "html.parser")
-
- infobox = soup.find("table", class_="infobox")
- if not infobox:
- return {}
-
- data = {}
- for row in infobox.find_all("tr"):
- header = row.find("th")
- cell = row.find("td")
- if header and cell:
- key = header.get_text(strip=True)
- value = cell.get_text(strip=True)
- # Clean up the value
- value = re.sub(r'\[\d+\]', '', value) # Remove references
- data[key] = value
-
- return data
- except Exception as e:
- print(f"Infobox error: {e}")
- return {}
-
- def scrape_person(self, name: str) -> Optional[Dict]:
- """
- Scrape dados de uma pessoa da Wikipedia
- Retorna dados formatados para criar uma Entity
- """
- article = self.get_article(name)
- if not article:
- return None
-
- infobox = self.get_infobox(name)
-
- return {
- "type": "person",
- "name": article["title"],
- "description": article.get("extract"),
- "source": "wikipedia",
- "source_url": article["url"],
- "properties": {
- "thumbnail": article.get("thumbnail"),
- "categories": article.get("categories", []),
- **infobox
- },
- "latitude": article.get("latitude"),
- "longitude": article.get("longitude")
- }
-
- def scrape_organization(self, name: str) -> Optional[Dict]:
- """
- Scrape dados de uma organização da Wikipedia
- """
- article = self.get_article(name)
- if not article:
- return None
-
- infobox = self.get_infobox(name)
-
- return {
- "type": "organization",
- "name": article["title"],
- "description": article.get("extract"),
- "source": "wikipedia",
- "source_url": article["url"],
- "properties": {
- "thumbnail": article.get("thumbnail"),
- "categories": article.get("categories", []),
- **infobox
- },
- "latitude": article.get("latitude"),
- "longitude": article.get("longitude")
- }
-
- def scrape_location(self, name: str) -> Optional[Dict]:
- """
- Scrape dados de um local da Wikipedia
- """
- article = self.get_article(name)
- if not article:
- return None
-
- infobox = self.get_infobox(name)
-
- return {
- "type": "location",
- "name": article["title"],
- "description": article.get("extract"),
- "source": "wikipedia",
- "source_url": article["url"],
- "properties": {
- "thumbnail": article.get("thumbnail"),
- "categories": article.get("categories", []),
- **infobox
- },
- "latitude": article.get("latitude"),
- "longitude": article.get("longitude")
- }
-
-
-# Singleton instance
-wikipedia_scraper = WikipediaScraper()
diff --git a/app/services/investigation.py b/app/services/investigation.py
deleted file mode 100644
index cfbfc764bbd26579e5e92285959e952e3d8afa7e..0000000000000000000000000000000000000000
--- a/app/services/investigation.py
+++ /dev/null
@@ -1,324 +0,0 @@
-"""
-Investigation Service - Builds comprehensive dossiers
-Combines CNPJ data, transparency/sanctions, Lancer web search, and NER
-"""
-import httpx
-from typing import Optional, Dict, Any, List
-from dataclasses import dataclass, field, asdict
-import asyncio
-
-from app.services.brazil_apis import consultar_cnpj, CompanyData
-from app.services.transparencia_api import verificar_sancoes
-# from app.services.tse_api import buscar_politico # TSE API needs fixing
-from app.services import lancer
-from app.services.nlp import entity_extractor
-from app.core.database import get_db
-from app.models.entity import Entity, Relationship
-
-
-LANCER_URL = "https://madras1-lancer.hf.space/api/v1"
-
-
-@dataclass
-class DossierSection:
- """A section of the dossier"""
- titulo: str
- conteudo: Any
- status: str = "ok" # ok, warning, danger, info
- icone: str = "📋"
-
-
-@dataclass
-class Dossier:
- """Complete investigation dossier"""
- tipo: str # "organization" or "person"
- alvo: str # Target name
- cnpj_cpf: Optional[str] = None
-
- # Sections
- dados_cadastrais: Optional[DossierSection] = None
- socios: Optional[DossierSection] = None
- sancoes: Optional[DossierSection] = None
- dados_politicos: Optional[DossierSection] = None # TSE data
- noticias: Optional[DossierSection] = None
- entidades_relacionadas: Optional[DossierSection] = None
-
- # Metadata
- red_flags: List[str] = field(default_factory=list)
- score_risco: int = 0 # 0-100
- data_geracao: str = ""
- fonte_dados: List[str] = field(default_factory=list)
-
-
-async def investigar_empresa(nome_ou_cnpj: str) -> Dossier:
- """
- Investigate a company and build a comprehensive dossier.
- """
- import re
- from datetime import datetime
-
- dossier = Dossier(
- tipo="organization",
- alvo=nome_ou_cnpj,
- data_geracao=datetime.now().isoformat()
- )
-
- # Check if input is CNPJ
- cnpj_clean = re.sub(r'[^0-9]', '', nome_ou_cnpj)
- is_cnpj = len(cnpj_clean) == 14
-
- company_data = None
-
- # 1. Get company data from CNPJ
- if is_cnpj:
- dossier.cnpj_cpf = cnpj_clean
- company_data = await consultar_cnpj(cnpj_clean)
-
- if company_data:
- dossier.alvo = company_data.razao_social or company_data.nome_fantasia or nome_ou_cnpj
- dossier.fonte_dados.append(company_data.fonte)
-
- # Build cadastral section
- dossier.dados_cadastrais = DossierSection(
- titulo="Dados Cadastrais",
- icone="🏢",
- conteudo={
- "cnpj": company_data.cnpj,
- "razao_social": company_data.razao_social,
- "nome_fantasia": company_data.nome_fantasia,
- "situacao": company_data.situacao,
- "data_abertura": company_data.data_abertura,
- "natureza_juridica": company_data.natureza_juridica,
- "capital_social": company_data.capital_social,
- "porte": company_data.porte,
- "endereco": f"{company_data.logradouro}, {company_data.numero} - {company_data.bairro}, {company_data.cidade}/{company_data.uf}",
- "cep": company_data.cep,
- "telefone": company_data.telefone,
- "email": company_data.email,
- "atividade_principal": f"{company_data.cnae_principal} - {company_data.cnae_descricao}"
- }
- )
-
- # Check situação for red flags
- if company_data.situacao and "ATIVA" not in company_data.situacao.upper():
- dossier.red_flags.append(f"⚠️ Situação cadastral: {company_data.situacao}")
- dossier.dados_cadastrais.status = "warning"
-
- # Build partners section
- if company_data.socios:
- dossier.socios = DossierSection(
- titulo=f"Sócios ({len(company_data.socios)})",
- icone="👥",
- conteudo=company_data.socios
- )
-
- # 2. Check sanctions/transparency
- if dossier.cnpj_cpf:
- sancoes = await verificar_sancoes(dossier.cnpj_cpf)
- dossier.fonte_dados.append("Portal da Transparência")
-
- if sancoes["tem_sancoes"]:
- dossier.red_flags.append(f"🚨 Encontrado em {sancoes['total_sancoes']} lista(s) de sanções")
- dossier.score_risco += 40
-
- dossier.sancoes = DossierSection(
- titulo=f"Sanções ({sancoes['total_sancoes']})",
- icone="⚠️",
- status="danger",
- conteudo=sancoes
- )
- else:
- dossier.sancoes = DossierSection(
- titulo="Sanções",
- icone="✅",
- status="ok",
- conteudo={"mensagem": "Nenhuma sanção encontrada nos cadastros públicos"}
- )
-
- # 3. Web search for news and context
- search_query = dossier.alvo
- if company_data and company_data.nome_fantasia:
- search_query = company_data.nome_fantasia
-
- try:
- web_result = await lancer.search(f"{search_query} notícias escândalos processos", max_results=8)
-
- if web_result.answer or web_result.results:
- dossier.fonte_dados.append("Lancer Web Search")
-
- news_content = {
- "resumo": web_result.answer or "Sem resumo disponível",
- "fontes": [
- {"titulo": r.title, "url": r.url, "snippet": r.content[:200]}
- for r in web_result.results[:5]
- ]
- }
-
- dossier.noticias = DossierSection(
- titulo="Notícias e Mídia",
- icone="📰",
- conteudo=news_content
- )
-
- # Check for negative keywords in news
- negative_keywords = ["escândalo", "fraude", "corrupção", "prisão", "investigado", "denúncia", "irregularidade"]
- raw_text = (web_result.answer or "").lower()
- for kw in negative_keywords:
- if kw in raw_text:
- dossier.red_flags.append(f"📰 Menção a '{kw}' encontrada nas notícias")
- dossier.noticias.status = "warning"
- dossier.score_risco += 10
- break
- except Exception as e:
- print(f"Web search error: {e}")
-
- # 4. Extract related entities using NER
- if dossier.noticias and dossier.noticias.conteudo.get("resumo"):
- try:
- text_to_analyze = dossier.noticias.conteudo.get("resumo", "")[:3000]
- ner_result = await entity_extractor.extract(text_to_analyze)
-
- if ner_result.entities:
- entities = [
- {"nome": e.name, "tipo": e.type, "descricao": e.description or e.role}
- for e in ner_result.entities[:10]
- ]
-
- dossier.entidades_relacionadas = DossierSection(
- titulo=f"Entidades Relacionadas ({len(entities)})",
- icone="🔗",
- conteudo=entities
- )
- except Exception as e:
- print(f"NER error: {e}")
-
- # Calculate final risk score
- dossier.score_risco = min(100, dossier.score_risco + len(dossier.red_flags) * 5)
-
- return dossier
-
-
-async def investigar_pessoa(nome: str, cpf: Optional[str] = None) -> Dossier:
- """
- Investigate a person and build a dossier.
- Note: CPF data is heavily protected by LGPD, so mainly uses web search.
- """
- from datetime import datetime
-
- dossier = Dossier(
- tipo="person",
- alvo=nome,
- cnpj_cpf=cpf,
- data_geracao=datetime.now().isoformat()
- )
-
- # 1. Check sanctions if CPF provided
- if cpf:
- sancoes = await verificar_sancoes(cpf)
- dossier.fonte_dados.append("Portal da Transparência")
-
- if sancoes["tem_sancoes"]:
- dossier.red_flags.append(f"🚨 Encontrado em {sancoes['total_sancoes']} lista(s) de sanções")
- dossier.score_risco += 50
-
- dossier.sancoes = DossierSection(
- titulo=f"Sanções ({sancoes['total_sancoes']})",
- icone="⚠️",
- status="danger",
- conteudo=sancoes
- )
-
- # 2. Check TSE for political data (DISABLED - API needs fixing)
- # try:
- # tse_data = await buscar_politico(nome)
- # if tse_data.get("encontrado"):
- # dossier.fonte_dados.append("TSE (DivulgaCand)")
- # candidaturas = tse_data.get("candidaturas", [])
- # patrimonio = tse_data.get("total_patrimonio", 0)
- # partidos = tse_data.get("partidos", [])
- # dossier.dados_politicos = DossierSection(...)
- # except Exception as e:
- # print(f"TSE search error: {e}")
-
-
- # 3. Web search for information
- try:
- web_result = await lancer.search(f'"{nome}" biografia cargo empresa', max_results=10)
-
- if web_result.answer or web_result.results:
- dossier.fonte_dados.append("Lancer Web Search")
-
- dossier.noticias = DossierSection(
- titulo="Informações Públicas",
- icone="🌐",
- conteudo={
- "resumo": web_result.answer or "Informações limitadas",
- "fontes": [
- {"titulo": r.title, "url": r.url, "snippet": r.content[:200]}
- for r in web_result.results[:5]
- ]
- }
- )
-
- # Check for negative keywords
- negative_keywords = ["preso", "condenado", "investigado", "acusado", "escândalo", "fraude"]
- raw_text = (web_result.answer or "").lower()
- for kw in negative_keywords:
- if kw in raw_text:
- dossier.red_flags.append(f"📰 Menção a '{kw}' encontrada")
- dossier.noticias.status = "warning"
- dossier.score_risco += 15
- break
- except Exception as e:
- print(f"Web search error: {e}")
-
- # 3. Extract related entities
- if dossier.noticias and dossier.noticias.conteudo.get("resumo"):
- try:
- ner_result = await entity_extractor.extract(dossier.noticias.conteudo["resumo"][:2000])
-
- if ner_result.entities:
- entities = [
- {"nome": e.name, "tipo": e.type, "descricao": e.description or e.role}
- for e in ner_result.entities[:10]
- if e.name.lower() != nome.lower() # Exclude the target
- ]
-
- if entities:
- dossier.entidades_relacionadas = DossierSection(
- titulo=f"Conexões ({len(entities)})",
- icone="🔗",
- conteudo=entities
- )
- except Exception as e:
- print(f"NER error: {e}")
-
- dossier.score_risco = min(100, dossier.score_risco + len(dossier.red_flags) * 5)
-
- return dossier
-
-
-def dossier_to_dict(dossier: Dossier) -> Dict[str, Any]:
- """Convert dossier to dictionary for JSON response"""
- result = {
- "tipo": dossier.tipo,
- "alvo": dossier.alvo,
- "cnpj_cpf": dossier.cnpj_cpf,
- "red_flags": dossier.red_flags,
- "score_risco": dossier.score_risco,
- "data_geracao": dossier.data_geracao,
- "fonte_dados": dossier.fonte_dados,
- "secoes": {}
- }
-
- for field_name in ["dados_cadastrais", "socios", "sancoes", "dados_politicos", "noticias", "entidades_relacionadas"]:
- section = getattr(dossier, field_name)
- if section:
- result["secoes"][field_name] = {
- "titulo": section.titulo,
- "icone": section.icone,
- "status": section.status,
- "conteudo": section.conteudo
- }
-
- return result
diff --git a/app/services/investigator_agent.py b/app/services/investigator_agent.py
deleted file mode 100644
index 56b74ad4c994947ed35f3185df53fc586a4232cc..0000000000000000000000000000000000000000
--- a/app/services/investigator_agent.py
+++ /dev/null
@@ -1,659 +0,0 @@
-"""
-Investigator Agent - Autonomous Investigation with Tool Calling
-Uses Cerebras native tool calling for multi-source investigations
-"""
-import json
-import re
-import httpx
-from typing import Optional, List, Dict, Any
-from dataclasses import dataclass, field
-from datetime import datetime
-from sqlalchemy.orm import Session
-
-from app.config import settings
-from app.services import lancer
-from app.services.brazil_apis import consultar_cnpj
-from app.models.entity import Entity, Relationship
-
-
-def sanitize_text(text: str) -> str:
- """
- Clean up text from model that may contain thinking artifacts.
- Only removes thinking tags, does NOT remove valid characters.
- """
- if not text:
- return text
-
- # Remove thinking tags and content between them
- text = re.sub(r'.*?', '', text, flags=re.DOTALL)
- text = re.sub(r'<\|think\|>.*?<\|/think\|>', '', text, flags=re.DOTALL)
-
- # Remove other common model artifacts like <|...|> tags
- text = re.sub(r'<\|.*?\|>', '', text)
-
- # Clean up excessive newlines only
- text = re.sub(r'\n{3,}', '\n\n', text)
-
- return text.strip()
-
-
-@dataclass
-class Finding:
- """A discovery made during investigation"""
- title: str
- content: str
- source: str
- timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
-
-
-@dataclass
-class InvestigationResult:
- """Complete investigation result"""
- mission: str
- findings: List[Finding]
- entities_discovered: List[Dict[str, Any]]
- connections_mapped: List[Dict[str, Any]]
- report: str
- iterations: int
- tools_used: List[str]
- status: str = "completed"
-
-
-# Tool definitions for Cerebras API
-TOOLS = [
- {
- "type": "function",
- "function": {
- "name": "search_entity",
- "description": "Buscar entidade no NUMIDIUM (grafo de conhecimento) por nome. Use para encontrar pessoas, empresas ou locais já conhecidos.",
- "parameters": {
- "type": "object",
- "properties": {
- "query": {
- "type": "string",
- "description": "Nome ou termo para buscar"
- },
- "entity_type": {
- "type": "string",
- "enum": ["person", "organization", "location", "any"],
- "description": "Tipo de entidade (opcional)"
- }
- },
- "required": ["query"]
- }
- }
- },
- {
- "type": "function",
- "function": {
- "name": "get_connections",
- "description": "Obter a rede de conexões de uma entidade específica. Retorna entidades relacionadas.",
- "parameters": {
- "type": "object",
- "properties": {
- "entity_id": {
- "type": "string",
- "description": "ID da entidade no NUMIDIUM"
- }
- },
- "required": ["entity_id"]
- }
- }
- },
- {
- "type": "function",
- "function": {
- "name": "lookup_cnpj",
- "description": "Consultar dados de uma empresa brasileira pelo CNPJ. Retorna razão social, sócios, endereço, CNAEs, etc.",
- "parameters": {
- "type": "object",
- "properties": {
- "cnpj": {
- "type": "string",
- "description": "CNPJ da empresa (com ou sem formatação)"
- }
- },
- "required": ["cnpj"]
- }
- }
- },
- {
- "type": "function",
- "function": {
- "name": "web_search",
- "description": "Pesquisar informações na web. Use para buscar notícias, artigos e informações públicas.",
- "parameters": {
- "type": "object",
- "properties": {
- "query": {
- "type": "string",
- "description": "Termo de busca"
- },
- "freshness": {
- "type": "string",
- "enum": ["day", "week", "month", "any"],
- "description": "Frescor dos resultados",
- "default": "any"
- }
- },
- "required": ["query"]
- }
- }
- },
- {
- "type": "function",
- "function": {
- "name": "deep_research",
- "description": "Pesquisa profunda e multi-dimensional sobre um tema. Use para tópicos complexos.",
- "parameters": {
- "type": "object",
- "properties": {
- "topic": {
- "type": "string",
- "description": "Tópico para pesquisa profunda"
- }
- },
- "required": ["topic"]
- }
- }
- },
- {
- "type": "function",
- "function": {
- "name": "save_finding",
- "description": "Salvar uma descoberta importante da investigação.",
- "parameters": {
- "type": "object",
- "properties": {
- "title": {
- "type": "string",
- "description": "Título curto da descoberta"
- },
- "content": {
- "type": "string",
- "description": "Conteúdo detalhado"
- },
- "source": {
- "type": "string",
- "description": "Fonte da informação"
- }
- },
- "required": ["title", "content", "source"]
- }
- }
- },
- {
- "type": "function",
- "function": {
- "name": "finish_investigation",
- "description": "Finalizar a investigação e gerar o relatório final.",
- "parameters": {
- "type": "object",
- "properties": {
- "summary": {
- "type": "string",
- "description": "Resumo das descobertas principais"
- }
- },
- "required": ["summary"]
- }
- }
- }
-]
-
-
-SYSTEM_PROMPT = """Você é um agente investigador autônomo do sistema NUMIDIUM/AVANGARD. /no_think
-
-Sua missão é investigar temas usando múltiplas fontes de dados:
-- NUMIDIUM: Grafo de conhecimento com entidades e relacionamentos
-- Consulta CNPJ: Dados oficiais de empresas brasileiras (BrasilAPI)
-- Web Search: Pesquisa na internet via Lancer
-
-## Estratégia de Investigação:
-
-1. Comece buscando no NUMIDIUM se já temos informações sobre o alvo
-2. Para empresas brasileiras, consulte o CNPJ para obter sócios e dados
-3. Use web_search para buscar notícias e informações públicas
-4. Para cada sócio/conexão descoberta, considere investigar mais a fundo
-5. Use save_finding para registrar descobertas importantes
-6. Quando tiver informações suficientes, use finish_investigation
-
-## Regras:
-- Seja metódico e siga pistas
-- Não invente informações - use apenas dados das ferramentas
-- Priorize qualidade sobre quantidade
-- Cite sempre as fontes
-- NÃO use pensamento interno ou tags . Responda diretamente."""
-
-
-class InvestigatorAgent:
- """Autonomous investigation agent with tool calling"""
-
- def __init__(self):
- self.api_url = "https://api.cerebras.ai/v1/chat/completions"
- self.api_key = settings.cerebras_api_key
- self.model = "zai-glm-4.7"
-
- # Investigation state
- self.findings: List[Finding] = []
- self.entities_discovered: List[Dict[str, Any]] = []
- self.connections_mapped: List[Dict[str, Any]] = []
- self.tools_used: List[str] = []
- self.messages: List[Dict[str, Any]] = []
- self.db: Optional[Session] = None
-
- def _reset_state(self):
- """Reset investigation state"""
- self.findings = []
- self.entities_discovered = []
- self.connections_mapped = []
- self.tools_used = []
- self.messages = []
-
- async def _call_llm(
- self,
- messages: List[Dict[str, Any]],
- tools: List[Dict] = None
- ) -> Dict[str, Any]:
- """Call Cerebras API with tool calling support"""
- try:
- payload = {
- "model": self.model,
- "messages": messages,
- "temperature": 0.3,
- "max_tokens": 2048,
- }
-
- if tools:
- payload["tools"] = tools
- payload["tool_choice"] = "auto"
- payload["parallel_tool_calls"] = True
-
- async with httpx.AsyncClient(timeout=60.0) as client:
- response = await client.post(
- self.api_url,
- headers={
- "Authorization": f"Bearer {self.api_key}",
- "Content-Type": "application/json"
- },
- json=payload
- )
-
- if response.status_code != 200:
- raise Exception(f"API error: {response.status_code} - {response.text}")
-
- return response.json()
-
- except Exception as e:
- raise Exception(f"LLM call failed: {str(e)}")
-
- async def _execute_tool(self, tool_name: str, arguments: Dict) -> str:
- """Execute a tool and return the result"""
- self.tools_used.append(tool_name)
-
- try:
- if tool_name == "search_entity":
- return await self._search_entity(
- arguments.get("query", ""),
- arguments.get("entity_type")
- )
-
- elif tool_name == "get_connections":
- return await self._get_connections(arguments.get("entity_id"))
-
- elif tool_name == "lookup_cnpj":
- return await self._lookup_cnpj(arguments.get("cnpj", ""))
-
-
- elif tool_name == "web_search":
- return await self._web_search(
- arguments.get("query", ""),
- arguments.get("freshness", "any")
- )
-
- elif tool_name == "deep_research":
- return await self._deep_research(arguments.get("topic", ""))
-
- elif tool_name == "aether_search":
- return await self._aether_search(arguments.get("query", ""))
-
- elif tool_name == "aether_entities":
- return await self._aether_entities()
-
- elif tool_name == "save_finding":
- finding = Finding(
- title=arguments.get("title", ""),
- content=arguments.get("content", ""),
- source=arguments.get("source", "")
- )
- self.findings.append(finding)
- return f"Descoberta salva: {finding.title}"
-
- elif tool_name == "finish_investigation":
- return f"INVESTIGATION_COMPLETE: {arguments.get('summary', '')}"
-
- else:
- return f"Ferramenta desconhecida: {tool_name}"
-
- except Exception as e:
- return f"Erro ao executar {tool_name}: {str(e)}"
-
- async def _search_entity(self, query: str, entity_type: Optional[str]) -> str:
- """Search entities in database"""
- if not self.db:
- return "Erro: Banco de dados não disponível"
-
- q = self.db.query(Entity).filter(Entity.name.ilike(f"%{query}%"))
- if entity_type and entity_type != "any":
- q = q.filter(Entity.type == entity_type)
-
- entities = q.limit(10).all()
-
- if entities:
- result = []
- for e in entities:
- self.entities_discovered.append({
- "id": str(e.id),
- "name": e.name,
- "type": e.type
- })
- result.append({
- "id": str(e.id),
- "name": e.name,
- "type": e.type,
- "description": e.description[:200] if e.description else None
- })
- return json.dumps(result, ensure_ascii=False, indent=2)
-
- return "Nenhuma entidade encontrada no NUMIDIUM."
-
- async def _get_connections(self, entity_id: str) -> str:
- """Get entity connections"""
- if not self.db:
- return "Erro: Banco de dados não disponível"
-
- relationships = self.db.query(Relationship).filter(
- (Relationship.source_id == entity_id) | (Relationship.target_id == entity_id)
- ).limit(20).all()
-
- if relationships:
- connections = []
- for rel in relationships:
- source = self.db.query(Entity).filter(Entity.id == rel.source_id).first()
- target = self.db.query(Entity).filter(Entity.id == rel.target_id).first()
- if source and target:
- connections.append({
- "source": source.name,
- "target": target.name,
- "type": rel.type
- })
- return json.dumps(connections, ensure_ascii=False, indent=2)
-
- return "Nenhuma conexão encontrada."
-
- async def _lookup_cnpj(self, cnpj: str) -> str:
- """Lookup CNPJ via BrasilAPI"""
- cnpj_clean = cnpj.replace(".", "").replace("/", "").replace("-", "")
- result = await consultar_cnpj(cnpj_clean)
-
- if result:
- data = {
- "razao_social": result.razao_social,
- "nome_fantasia": result.nome_fantasia,
- "situacao": result.situacao,
- "data_abertura": result.data_abertura,
- "capital_social": result.capital_social,
- "endereco": f"{result.logradouro}, {result.numero} - {result.cidade}/{result.uf}",
- "cnae": f"{result.cnae_principal} - {result.cnae_descricao}",
- "socios": result.socios
- }
- return json.dumps(data, ensure_ascii=False, indent=2)
-
- return "CNPJ não encontrado."
-
- async def _lookup_phone(self, phone: str) -> str:
- """Lookup phone number via NumVerify API"""
- # Clean phone number - keep only digits
- phone_clean = "".join(c for c in phone if c.isdigit())
-
- # NumVerify API key (free tier: 100 req/month)
- numverify_key = getattr(settings, 'numverify_api_key', None)
-
- if not numverify_key:
- # Fallback: just do a web search for the number
- return await self._web_search(f'"{phone_clean}" telefone', "any")
-
- try:
- async with httpx.AsyncClient(timeout=10.0) as client:
- response = await client.get(
- "http://apilayer.net/api/validate",
- params={
- "access_key": numverify_key,
- "number": phone_clean,
- "country_code": "", # Auto-detect
- "format": 1
- }
- )
-
- if response.status_code == 200:
- data = response.json()
-
- if data.get("valid"):
- result = {
- "numero": data.get("international_format"),
- "valido": True,
- "pais": data.get("country_name"),
- "codigo_pais": data.get("country_code"),
- "operadora": data.get("carrier"),
- "tipo_linha": data.get("line_type"), # mobile, landline, etc
- "localizacao": data.get("location")
- }
- return json.dumps(result, ensure_ascii=False, indent=2)
- else:
- return f"Número {phone_clean} não é válido ou não foi encontrado."
-
- return "Erro ao consultar número."
-
- except Exception as e:
- # Fallback to web search
- return await self._web_search(f'"{phone_clean}" telefone', "any")
-
- async def _web_search(self, query: str, freshness: str) -> str:
- """Web search via Lancer"""
- try:
- result = await lancer.search(query, max_results=5, freshness=freshness)
- if result.answer:
- return f"Resumo: {result.answer}\n\nFontes: {len(result.results)} resultados"
- return "Nenhum resultado encontrado."
- except Exception as e:
- return f"Erro na busca web: {str(e)}"
-
- async def _deep_research(self, topic: str) -> str:
- """Deep research via Lancer"""
- try:
- result = await lancer.deep_research(topic, max_dimensions=3)
- if result.answer:
- return result.answer
- return "Pesquisa profunda não retornou resultados."
- except Exception as e:
- return f"Erro na pesquisa: {str(e)}"
-
- async def _aether_search(self, query: str) -> str:
- """Semantic search via AetherMap"""
- try:
- # Check if we have a job_id cached
- if not aethermap.current_job_id:
- # Index entities from database first
- if self.db:
- entities = self.db.query(Entity).limit(500).all()
- if entities:
- texts = []
- for e in entities:
- text = f"{e.name} ({e.type})"
- if e.description:
- text += f": {e.description[:500]}"
- texts.append(text)
-
- if texts:
- result = await aethermap.process_documents(texts, fast_mode=True)
- # Continue with search
-
- if aethermap.current_job_id:
- result = await aethermap.semantic_search(query, turbo_mode=True)
- return f"RAG Response:\n{result.summary}"
- else:
- return "Nenhum documento indexado no AetherMap."
-
- except Exception as e:
- return f"Erro no AetherMap search: {str(e)}"
-
- async def _aether_entities(self) -> str:
- """Extract NER entities via AetherMap"""
- try:
- if not aethermap.current_job_id:
- return "Nenhum documento indexado. Use aether_search primeiro."
-
- result = await aethermap.extract_entities()
-
- # Format response
- output = []
-
- if result.hubs:
- output.append("**Entidades Centrais (Hubs):**")
- for hub in result.hubs[:5]:
- output.append(f"- {hub.get('entity')} ({hub.get('type')}): {hub.get('degree')} conexões")
-
- if result.insights:
- output.append(f"\n**Insights:**")
- output.append(f"- Total de conexões: {result.insights.get('total_connections', 0)}")
- output.append(f"- Grau médio: {result.insights.get('avg_degree', 0)}")
-
- if result.edges:
- output.append(f"\n**Top 5 Relacionamentos:**")
- for edge in result.edges[:5]:
- output.append(f"- {edge.source_entity} <-> {edge.target_entity}: {edge.reason}")
-
- return "\n".join(output) if output else "Nenhuma entidade significativa encontrada."
-
- except Exception as e:
- return f"Erro na extração de entidades: {str(e)}"
-
- async def investigate(
- self,
- mission: str,
- db: Session,
- max_iterations: int = 10
- ) -> InvestigationResult:
- """Main investigation loop"""
- self._reset_state()
- self.db = db
-
- self.messages = [
- {"role": "system", "content": SYSTEM_PROMPT},
- {"role": "user", "content": f"Missão de investigação: {mission}\n\nComece a investigação."}
- ]
-
- iteration = 0
- final_summary = ""
-
- while iteration < max_iterations:
- iteration += 1
-
- response = await self._call_llm(self.messages, TOOLS)
-
- choice = response["choices"][0]
- message = choice["message"]
- self.messages.append(message)
-
- tool_calls = message.get("tool_calls", [])
-
- if not tool_calls:
- if message.get("content"):
- final_summary = message["content"]
- break
-
- for tool_call in tool_calls:
- func = tool_call["function"]
- tool_name = func["name"]
-
- try:
- arguments = json.loads(func["arguments"])
- except:
- arguments = {}
-
- result = await self._execute_tool(tool_name, arguments)
-
- if result.startswith("INVESTIGATION_COMPLETE:"):
- final_summary = result.replace("INVESTIGATION_COMPLETE:", "").strip()
- break
-
- self.messages.append({
- "role": "tool",
- "tool_call_id": tool_call["id"],
- "content": result
- })
-
- if final_summary:
- break
-
- if not final_summary:
- final_summary = await self._generate_report(mission)
-
- # Sanitize all text outputs to remove thinking artifacts
- final_summary = sanitize_text(final_summary)
-
- # Sanitize findings content
- sanitized_findings = []
- for f in self.findings:
- sanitized_findings.append(Finding(
- title=sanitize_text(f.title),
- content=sanitize_text(f.content),
- source=f.source,
- timestamp=f.timestamp
- ))
-
- return InvestigationResult(
- mission=mission,
- findings=sanitized_findings,
- entities_discovered=self.entities_discovered,
- connections_mapped=self.connections_mapped,
- report=final_summary,
- iterations=iteration,
- tools_used=list(set(self.tools_used)),
- status="completed"
- )
-
- async def _generate_report(self, mission: str) -> str:
- """Generate final report"""
- findings_text = "\n".join([
- f"- {f.title}: {f.content} (Fonte: {f.source})"
- for f in self.findings
- ]) or "Nenhuma descoberta registrada."
-
- entities_text = ", ".join([
- e.get("name", "Unknown") for e in self.entities_discovered[:10]
- ]) or "Nenhuma entidade."
-
- prompt = f"""Gere um relatório de investigação:
-
-Missão: {mission}
-
-Descobertas:
-{findings_text}
-
-Entidades: {entities_text}
-
-Ferramentas usadas: {', '.join(set(self.tools_used))}
-
-Gere relatório estruturado com: Resumo Executivo, Descobertas, Entidades, Recomendações."""
-
- response = await self._call_llm([
- {"role": "system", "content": "Gere relatórios concisos."},
- {"role": "user", "content": prompt}
- ])
-
- return sanitize_text(response["choices"][0]["message"]["content"])
-
-
-# Singleton
-investigator_agent = InvestigatorAgent()
diff --git a/app/services/lancer.py b/app/services/lancer.py
deleted file mode 100644
index 179868cdd00136f0a9376b6ea6fdff3df5b48abf..0000000000000000000000000000000000000000
--- a/app/services/lancer.py
+++ /dev/null
@@ -1,198 +0,0 @@
-"""
-Lancer Deep Research Service
-Integrates with Lancer Search API for AI-powered research
-"""
-import httpx
-from typing import Optional, List, Dict, Any
-from dataclasses import dataclass
-
-
-LANCER_BASE_URL = "https://madras1-lancer.hf.space"
-
-
-@dataclass
-class SearchResult:
- """Individual search result from Lancer"""
- title: str
- url: str
- content: str
- score: float
- published_date: Optional[str] = None
-
-
-@dataclass
-class ResearchResponse:
- """Response from Lancer research/search"""
- query: str
- answer: Optional[str]
- results: List[SearchResult]
- citations: List[Dict[str, Any]]
- processing_time_ms: float
- raw_text: str # Combined text for NER extraction
-
-
-async def search(
- query: str,
- max_results: int = 10,
- freshness: str = "any"
-) -> ResearchResponse:
- """
- Perform a search with AI synthesis using Lancer API.
- """
- try:
- async with httpx.AsyncClient(timeout=60.0) as client:
- response = await client.post(
- f"{LANCER_BASE_URL}/api/v1/search",
- json={
- "query": query,
- "max_results": max_results,
- "freshness": freshness,
- "include_answer": True
- }
- )
-
- if response.status_code != 200:
- raise Exception(f"Lancer API error: {response.status_code}")
-
- data = response.json()
-
- results = [
- SearchResult(
- title=r.get("title", ""),
- url=r.get("url", ""),
- content=r.get("content", ""),
- score=r.get("score", 0.0),
- published_date=r.get("published_date")
- )
- for r in data.get("results", [])
- ]
-
- # Combine all text for NER
- raw_text = data.get("answer", "") or ""
- for r in results:
- raw_text += f"\n{r.title}. {r.content}"
-
- return ResearchResponse(
- query=data.get("query", query),
- answer=data.get("answer"),
- results=results,
- citations=data.get("citations", []),
- processing_time_ms=data.get("processing_time_ms", 0),
- raw_text=raw_text
- )
-
- except Exception as e:
- raise Exception(f"Lancer search failed: {str(e)}")
-
-
-async def deep_research(
- query: str,
- max_dimensions: int = 5,
- max_sources_per_dim: int = 5
-) -> ResearchResponse:
- """
- Perform deep multi-dimensional research using Lancer API.
- This provides richer, more comprehensive analysis.
- """
- try:
- async with httpx.AsyncClient(timeout=120.0) as client:
- response = await client.post(
- f"{LANCER_BASE_URL}/api/v1/research/deep",
- json={
- "query": query,
- "max_dimensions": max_dimensions,
- "max_sources_per_dim": max_sources_per_dim,
- "max_total_searches": 20
- }
- )
-
- if response.status_code != 200:
- raise Exception(f"Lancer API error: {response.status_code}")
-
- data = response.json()
-
- # Deep research returns a different format - adapt it
- results = []
- raw_text = ""
-
- # Extract from dimensions if present
- if "dimensions" in data:
- for dim in data["dimensions"]:
- dim_name = dim.get("dimension", "")
- raw_text += f"\n## {dim_name}\n"
- for r in dim.get("results", []):
- results.append(SearchResult(
- title=r.get("title", ""),
- url=r.get("url", ""),
- content=r.get("content", ""),
- score=r.get("score", 0.0)
- ))
- raw_text += f"{r.get('title', '')}. {r.get('content', '')}\n"
-
- # Add final report
- final_report = data.get("final_report", data.get("report", ""))
- if final_report:
- raw_text = final_report + "\n" + raw_text
-
- return ResearchResponse(
- query=query,
- answer=final_report,
- results=results,
- citations=data.get("citations", []),
- processing_time_ms=data.get("processing_time_ms", 0),
- raw_text=raw_text
- )
-
- except Exception as e:
- raise Exception(f"Lancer deep research failed: {str(e)}")
-
-
-async def heavy_search(
- query: str,
- max_results: int = 5
-) -> ResearchResponse:
- """
- Heavy search with full content scraping from sources.
- Slower but provides more context.
- """
- try:
- async with httpx.AsyncClient(timeout=90.0) as client:
- response = await client.post(
- f"{LANCER_BASE_URL}/api/v1/search/heavy",
- json={
- "query": query,
- "max_results": max_results,
- "include_answer": True
- }
- )
-
- if response.status_code != 200:
- raise Exception(f"Lancer API error: {response.status_code}")
-
- data = response.json()
-
- results = [
- SearchResult(
- title=r.get("title", ""),
- url=r.get("url", ""),
- content=r.get("content", ""),
- score=r.get("score", 0.0)
- )
- for r in data.get("results", [])
- ]
-
- raw_text = data.get("answer", "") or ""
- for r in results:
- raw_text += f"\n{r.title}. {r.content}"
-
- return ResearchResponse(
- query=query,
- answer=data.get("answer"),
- results=results,
- citations=data.get("citations", []),
- processing_time_ms=data.get("processing_time_ms", 0),
- raw_text=raw_text
- )
-
- except Exception as e:
- raise Exception(f"Lancer heavy search failed: {str(e)}")
diff --git a/app/services/nlp/__init__.py b/app/services/nlp/__init__.py
deleted file mode 100644
index e9265c7e61b3b29a87dcd75c4455abd114be3e18..0000000000000000000000000000000000000000
--- a/app/services/nlp/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# NLP Services
-from .entity_extractor import entity_extractor
diff --git a/app/services/nlp/__pycache__/__init__.cpython-311.pyc b/app/services/nlp/__pycache__/__init__.cpython-311.pyc
deleted file mode 100644
index 8671a044592ad7e7b9a10ee976be1a78f1f7958d..0000000000000000000000000000000000000000
Binary files a/app/services/nlp/__pycache__/__init__.cpython-311.pyc and /dev/null differ
diff --git a/app/services/nlp/__pycache__/entity_extractor.cpython-311.pyc b/app/services/nlp/__pycache__/entity_extractor.cpython-311.pyc
deleted file mode 100644
index a7aac7bb33e176996105a8d539ec88db2b3ceaf5..0000000000000000000000000000000000000000
Binary files a/app/services/nlp/__pycache__/entity_extractor.cpython-311.pyc and /dev/null differ
diff --git a/app/services/nlp/entity_extractor.py b/app/services/nlp/entity_extractor.py
deleted file mode 100644
index 8855cc0c67661840ce99ce63f7aafe9da23e60b2..0000000000000000000000000000000000000000
--- a/app/services/nlp/entity_extractor.py
+++ /dev/null
@@ -1,265 +0,0 @@
-"""
-Entity Extractor Service - LLM-based NER
-Uses Cerebras API with Qwen 3 235B for intelligent entity and relationship extraction
-"""
-import json
-import re
-from typing import Dict, List, Optional, Any
-from dataclasses import dataclass
-import httpx
-
-from app.config import settings
-
-
-@dataclass
-class ExtractedEntity:
- """Represents an extracted entity"""
- name: str
- type: str # person, organization, location, event
- role: Optional[str] = None
- aliases: Optional[List[str]] = None
- description: Optional[str] = None
- latitude: Optional[float] = None
- longitude: Optional[float] = None
- event_date: Optional[str] = None # Date in ISO format (YYYY-MM-DD)
-
-
-@dataclass
-class ExtractedRelationship:
- """Represents a relationship between entities"""
- source: str
- target: str
- relationship_type: str
- context: Optional[str] = None
- event_date: Optional[str] = None # Date in ISO format (YYYY-MM-DD)
-
-
-@dataclass
-class ExtractedEvent:
- """Represents an extracted event"""
- description: str
- event_type: Optional[str] = None
- date: Optional[str] = None
- location: Optional[str] = None
- participants: Optional[List[str]] = None
-
-
-@dataclass
-class ExtractionResult:
- """Complete extraction result"""
- entities: List[ExtractedEntity]
- relationships: List[ExtractedRelationship]
- events: List[ExtractedEvent]
- raw_response: Optional[str] = None
-
-
-EXTRACTION_PROMPT = """Você é um especialista em extração de informações estruturadas de textos.
-
-Analise o texto fornecido e extraia TODAS as entidades, relacionamentos e eventos mencionados.
-
-## Regras:
-1. Identifique entidades: pessoas, organizações, locais, eventos
-2. Para PESSOAS: inclua nome completo (se mencionado ou conhecido), cargo/função
-3. Para ORGANIZAÇÕES: inclua nome oficial e siglas
-4. Para LOCAIS: seja específico (cidade, país, endereço)
-5. Identifique RELACIONAMENTOS entre entidades (quem trabalha onde, quem conhece quem, etc.)
-6. Identifique EVENTOS mencionados (reuniões, anúncios, eleições, etc.)
-7. EXTRAIA DATAS sempre que mencionadas (formato YYYY-MM-DD ou YYYY se só o ano)
-
-## Formato de resposta (JSON válido):
-```json
-{{
- "entities": [
- {{
- "name": "Nome Completo",
- "type": "person|organization|location|event",
- "role": "cargo ou função (opcional)",
- "aliases": ["apelidos", "siglas"],
- "description": "breve descrição se relevante",
- "event_date": "YYYY-MM-DD ou YYYY (data relevante como nascimento, fundação, etc)"
- }}
- ],
- "relationships": [
- {{
- "source": "Nome da Entidade 1",
- "target": "Nome da Entidade 2",
- "relationship_type": "tipo de relação (trabalha em, preside, fundou, reuniu-se com, etc.)",
- "context": "contexto da relação",
- "event_date": "YYYY-MM-DD ou YYYY (quando o relacionamento aconteceu/iniciou)"
- }}
- ],
- "events": [
- {{
- "description": "O que aconteceu",
- "event_type": "meeting|announcement|election|crime|etc",
- "date": "YYYY-MM-DD ou YYYY",
- "location": "local se mencionado",
- "participants": ["lista de participantes"]
- }}
- ]
-}}
-```
-
-Retorne APENAS o JSON, sem texto adicional.
-
-## Texto para análise:
-{text}
-"""
-
-
-class EntityExtractor:
- """
- LLM-based Entity Extractor using Cerebras API
- """
-
- def __init__(self):
- self.api_key = settings.cerebras_api_key
- self.base_url = "https://api.cerebras.ai/v1"
- self.model = "qwen-3-235b-a22b-instruct-2507"
- self.timeout = 60.0
-
- async def extract(self, text: str) -> ExtractionResult:
- """
- Extract entities, relationships, and events from text using LLM
-
- Args:
- text: The text to analyze
-
- Returns:
- ExtractionResult with all extracted information
- """
- if not self.api_key:
- raise ValueError("CEREBRAS_API_KEY not configured. Please set the environment variable.")
-
- if not text or len(text.strip()) < 10:
- return ExtractionResult(entities=[], relationships=[], events=[])
-
- # Prepare the prompt
- prompt = EXTRACTION_PROMPT.format(text=text)
-
- try:
- # Call Cerebras API
- async with httpx.AsyncClient(timeout=self.timeout) as client:
- response = await client.post(
- f"{self.base_url}/chat/completions",
- headers={
- "Authorization": f"Bearer {self.api_key}",
- "Content-Type": "application/json"
- },
- json={
- "model": self.model,
- "messages": [
- {
- "role": "system",
- "content": "Você é um assistente especialista em extração de entidades e relacionamentos. Sempre responda em JSON válido."
- },
- {
- "role": "user",
- "content": prompt
- }
- ],
- "temperature": 0.1, # Low temperature for consistent extraction
- "max_tokens": 4096
- }
- )
-
- if response.status_code != 200:
- error_text = response.text
- print(f"Cerebras API error: {response.status_code} - {error_text}")
- raise ValueError(f"Cerebras API error: {response.status_code}")
-
- data = response.json()
-
- # Parse the response
- raw_content = data["choices"][0]["message"]["content"]
- return self._parse_response(raw_content)
-
- except httpx.TimeoutException:
- print("Cerebras API timeout")
- raise ValueError("API timeout - please try again with shorter text")
- except httpx.RequestError as e:
- print(f"Cerebras API request error: {e}")
- raise ValueError(f"API connection error: {str(e)}")
- except KeyError as e:
- print(f"Unexpected API response format: {e}")
- raise ValueError("Unexpected API response format")
-
- def _parse_response(self, content: str) -> ExtractionResult:
- """Parse the LLM response into structured data"""
- try:
- # Try to extract JSON from the response
- # Sometimes the model wraps it in ```json ... ```
- json_match = re.search(r'```json\s*(.*?)\s*```', content, re.DOTALL)
- if json_match:
- json_str = json_match.group(1)
- else:
- # Try to find raw JSON
- json_match = re.search(r'\{.*\}', content, re.DOTALL)
- if json_match:
- json_str = json_match.group(0)
- else:
- json_str = content
-
- data = json.loads(json_str)
-
- # Parse entities
- entities = []
- for e in data.get("entities", []):
- entities.append(ExtractedEntity(
- name=e.get("name", ""),
- type=e.get("type", "unknown"),
- role=e.get("role"),
- aliases=e.get("aliases", []),
- description=e.get("description"),
- event_date=e.get("event_date")
- ))
-
- # Parse relationships
- relationships = []
- for r in data.get("relationships", []):
- relationships.append(ExtractedRelationship(
- source=r.get("source", ""),
- target=r.get("target", ""),
- relationship_type=r.get("relationship_type", "related_to"),
- context=r.get("context"),
- event_date=r.get("event_date")
- ))
-
- # Parse events
- events = []
- for ev in data.get("events", []):
- events.append(ExtractedEvent(
- description=ev.get("description", ""),
- event_type=ev.get("event_type"),
- date=ev.get("date"),
- location=ev.get("location"),
- participants=ev.get("participants", [])
- ))
-
- return ExtractionResult(
- entities=entities,
- relationships=relationships,
- events=events,
- raw_response=content
- )
-
- except json.JSONDecodeError as e:
- print(f"Failed to parse LLM response: {e}")
- print(f"Raw content: {content}")
- return ExtractionResult(
- entities=[],
- relationships=[],
- events=[],
- raw_response=content
- )
-
- def extract_sync(self, text: str) -> ExtractionResult:
- """
- Synchronous version of extract for non-async contexts
- """
- import asyncio
- return asyncio.run(self.extract(text))
-
-
-# Singleton instance
-entity_extractor = EntityExtractor()
diff --git a/app/services/transparencia_api.py b/app/services/transparencia_api.py
deleted file mode 100644
index 13face26b77d772b00023efd68c0a6af8dc03d9e..0000000000000000000000000000000000000000
--- a/app/services/transparencia_api.py
+++ /dev/null
@@ -1,146 +0,0 @@
-"""
-Portal da Transparência APIs
-Access to Brazilian government transparency data
-"""
-import httpx
-from typing import Optional, Dict, Any, List
-from dataclasses import dataclass
-
-
-# Portal da Transparência base URL
-TRANSPARENCIA_URL = "https://api.portaldatransparencia.gov.br/api-de-dados"
-
-
-@dataclass
-class SanctionRecord:
- """Data structure for sanction/punishment records"""
- tipo: str # CEIS, CNEP, CEPIM
- cpf_cnpj: str
- nome: str
- tipo_pessoa: str # 'F' or 'J'
-
- # Sanction details
- tipo_sancao: str = ""
- data_inicio: str = ""
- data_fim: str = ""
- orgao_sancionador: str = ""
- uf_orgao: str = ""
- fundamentacao_legal: str = ""
-
- # Source
- fonte_url: str = ""
-
-
-async def consultar_ceis(cnpj_cpf: str, token: Optional[str] = None) -> List[SanctionRecord]:
- """
- Query CEIS - Cadastro de Empresas Inidôneas e Suspensas
- Note: Requires authentication token from Portal da Transparência
- """
- # Without token, we can still try - some endpoints work without auth
- return await _query_sanctions("ceis", cnpj_cpf, token)
-
-
-async def consultar_cnep(cnpj_cpf: str, token: Optional[str] = None) -> List[SanctionRecord]:
- """
- Query CNEP - Cadastro Nacional de Empresas Punidas
- """
- return await _query_sanctions("cnep", cnpj_cpf, token)
-
-
-async def consultar_cepim(cnpj_cpf: str, token: Optional[str] = None) -> List[SanctionRecord]:
- """
- Query CEPIM - Cadastro de Entidades Privadas sem Fins Lucrativos Impedidas
- """
- return await _query_sanctions("cepim", cnpj_cpf, token)
-
-
-async def _query_sanctions(
- endpoint: str,
- cnpj_cpf: str,
- token: Optional[str] = None
-) -> List[SanctionRecord]:
- """Internal function to query sanction APIs"""
- try:
- headers = {}
- if token:
- headers["chave-api-dados"] = token
-
- params = {"cnpjCpf": cnpj_cpf}
-
- async with httpx.AsyncClient(timeout=30.0) as client:
- response = await client.get(
- f"{TRANSPARENCIA_URL}/{endpoint}",
- params=params,
- headers=headers
- )
-
- if response.status_code == 401:
- # Need authentication - return empty for now
- print(f"Portal da Transparência requires authentication for {endpoint}")
- return []
-
- if response.status_code != 200:
- return []
-
- data = response.json()
- if not isinstance(data, list):
- data = [data] if data else []
-
- records = []
- for item in data:
- records.append(SanctionRecord(
- tipo=endpoint.upper(),
- cpf_cnpj=item.get("cpfCnpj", ""),
- nome=item.get("nomeRazaoSocial", item.get("nome", "")),
- tipo_pessoa=item.get("tipoPessoa", ""),
- tipo_sancao=item.get("tipoSancao", {}).get("descricao", "") if isinstance(item.get("tipoSancao"), dict) else str(item.get("tipoSancao", "")),
- data_inicio=item.get("dataInicioSancao", ""),
- data_fim=item.get("dataFimSancao", ""),
- orgao_sancionador=item.get("orgaoSancionador", {}).get("nome", "") if isinstance(item.get("orgaoSancionador"), dict) else str(item.get("orgaoSancionador", "")),
- uf_orgao=item.get("ufOrgaoSancionador", ""),
- fundamentacao_legal=item.get("fundamentacaoLegal", ""),
- fonte_url=f"https://portaldatransparencia.gov.br/{endpoint}"
- ))
-
- return records
-
- except Exception as e:
- print(f"Transparência API error ({endpoint}): {e}")
- return []
-
-
-async def verificar_sancoes(cnpj_cpf: str, token: Optional[str] = None) -> Dict[str, Any]:
- """
- Check all sanction databases for a CNPJ/CPF
- Returns consolidated result
- """
- import asyncio
-
- # Query all databases in parallel
- ceis_task = consultar_ceis(cnpj_cpf, token)
- cnep_task = consultar_cnep(cnpj_cpf, token)
- cepim_task = consultar_cepim(cnpj_cpf, token)
-
- ceis, cnep, cepim = await asyncio.gather(ceis_task, cnep_task, cepim_task)
-
- all_sanctions = ceis + cnep + cepim
-
- return {
- "cnpj_cpf": cnpj_cpf,
- "tem_sancoes": len(all_sanctions) > 0,
- "total_sancoes": len(all_sanctions),
- "ceis": len(ceis),
- "cnep": len(cnep),
- "cepim": len(cepim),
- "registros": [
- {
- "tipo": s.tipo,
- "tipo_sancao": s.tipo_sancao,
- "orgao": s.orgao_sancionador,
- "inicio": s.data_inicio,
- "fim": s.data_fim,
- "fundamentacao": s.fundamentacao_legal
- }
- for s in all_sanctions
- ]
- }
diff --git a/app/services/tse_api.py b/app/services/tse_api.py
deleted file mode 100644
index e851625961d9a2c673f2eab9d91c44145d05e9cc..0000000000000000000000000000000000000000
--- a/app/services/tse_api.py
+++ /dev/null
@@ -1,270 +0,0 @@
-"""
-TSE (Tribunal Superior Eleitoral) API Service
-Access to Brazilian electoral data - candidates, assets, donations
-"""
-import httpx
-from typing import Optional, Dict, Any, List
-from dataclasses import dataclass, field
-
-
-# DivulgaCand API (unofficial but functional)
-TSE_DIVULGACAND_URL = "https://divulgacandcontas.tse.jus.br/divulga/rest/v1"
-
-
-@dataclass
-class Candidato:
- """Electoral candidate data"""
- id: int
- nome: str
- nome_urna: str
- cpf_parcial: str = "" # TSE only shows partial
- numero: str = ""
- cargo: str = ""
- partido_sigla: str = ""
- partido_nome: str = ""
- coligacao: str = ""
- situacao: str = ""
-
- # Location
- uf: str = ""
- municipio: str = ""
-
- # Personal
- data_nascimento: str = ""
- genero: str = ""
- grau_instrucao: str = ""
- ocupacao: str = ""
-
- # Assets
- total_bens: float = 0.0
- bens: List[Dict[str, Any]] = field(default_factory=list)
-
- # Campaign
- total_receitas: float = 0.0
- total_despesas: float = 0.0
-
-
-@dataclass
-class Eleicao:
- """Election metadata"""
- id: int
- ano: int
- descricao: str
- turno: int = 1
-
-
-async def listar_eleicoes() -> List[Eleicao]:
- """List available elections"""
- try:
- async with httpx.AsyncClient(timeout=15.0) as client:
- response = await client.get(f"{TSE_DIVULGACAND_URL}/eleicao/ordinarias")
-
- if response.status_code != 200:
- return []
-
- data = response.json()
- eleicoes = []
-
- for item in data:
- eleicoes.append(Eleicao(
- id=item.get("id", 0),
- ano=item.get("ano", 0),
- descricao=item.get("descricaoEleicao", ""),
- turno=item.get("turno", 1)
- ))
-
- return sorted(eleicoes, key=lambda x: x.ano, reverse=True)
-
- except Exception as e:
- print(f"TSE eleicoes error: {e}")
- return []
-
-
-async def buscar_candidatos(
- nome: str,
- ano: int = 2024,
- uf: Optional[str] = None,
- cargo: Optional[str] = None
-) -> List[Candidato]:
- """
- Search for candidates by name.
-
- Args:
- nome: Candidate name to search
- ano: Election year (default 2024)
- uf: State filter (optional)
- cargo: Position filter (optional)
- """
- try:
- # First get the election ID for the year
- eleicoes = await listar_eleicoes()
- eleicao = next((e for e in eleicoes if e.ano == ano), None)
-
- if not eleicao:
- # Try common election IDs
- eleicao_id = {2024: 546, 2022: 544, 2020: 426, 2018: 295}.get(ano, 546)
- else:
- eleicao_id = eleicao.id
-
- # Build search URL
- base_url = f"{TSE_DIVULGACAND_URL}/candidatura/listar/{ano}/{eleicao_id}"
-
- params = {"nomeCompleto": nome}
- if uf:
- params["uf"] = uf.upper()
- if cargo:
- params["cargo"] = cargo
-
- async with httpx.AsyncClient(timeout=30.0) as client:
- response = await client.get(base_url, params=params)
-
- if response.status_code != 200:
- return []
-
- data = response.json()
- candidatos_data = data.get("candidatos", [])
-
- candidatos = []
- for item in candidatos_data:
- candidatos.append(Candidato(
- id=item.get("id", 0),
- nome=item.get("nomeCompleto", ""),
- nome_urna=item.get("nomeUrna", ""),
- cpf_parcial=item.get("cpf", "")[:3] + ".***.***-**" if item.get("cpf") else "",
- numero=str(item.get("numero", "")),
- cargo=item.get("cargo", {}).get("nome", "") if isinstance(item.get("cargo"), dict) else str(item.get("cargo", "")),
- partido_sigla=item.get("partido", {}).get("sigla", "") if isinstance(item.get("partido"), dict) else "",
- partido_nome=item.get("partido", {}).get("nome", "") if isinstance(item.get("partido"), dict) else "",
- uf=item.get("ufSigla", "") or item.get("uf", ""),
- municipio=item.get("municipio", {}).get("nome", "") if isinstance(item.get("municipio"), dict) else "",
- situacao=item.get("situacao", ""),
- total_bens=float(item.get("totalDeBens", 0) or 0)
- ))
-
- return candidatos
-
- except Exception as e:
- print(f"TSE search error: {e}")
- return []
-
-
-async def obter_candidato_detalhes(
- id_candidato: int,
- ano: int = 2024,
- eleicao_id: Optional[int] = None
-) -> Optional[Candidato]:
- """Get detailed candidate information including assets"""
- try:
- if not eleicao_id:
- eleicao_id = {2024: 546, 2022: 544, 2020: 426, 2018: 295}.get(ano, 546)
-
- async with httpx.AsyncClient(timeout=30.0) as client:
- # Get candidate details
- response = await client.get(
- f"{TSE_DIVULGACAND_URL}/candidatura/buscar/{ano}/{eleicao_id}/candidato/{id_candidato}"
- )
-
- if response.status_code != 200:
- return None
-
- item = response.json()
-
- candidato = Candidato(
- id=item.get("id", 0),
- nome=item.get("nomeCompleto", ""),
- nome_urna=item.get("nomeUrna", ""),
- numero=str(item.get("numero", "")),
- cargo=item.get("cargo", {}).get("nome", "") if isinstance(item.get("cargo"), dict) else "",
- partido_sigla=item.get("partido", {}).get("sigla", "") if isinstance(item.get("partido"), dict) else "",
- partido_nome=item.get("partido", {}).get("nome", "") if isinstance(item.get("partido"), dict) else "",
- uf=item.get("ufSigla", ""),
- municipio=item.get("localCandidatura", ""),
- situacao=item.get("situacao", ""),
- data_nascimento=item.get("dataNascimento", ""),
- genero=item.get("genero", ""),
- grau_instrucao=item.get("grauInstrucao", ""),
- ocupacao=item.get("ocupacao", ""),
- total_bens=float(item.get("totalDeBens", 0) or 0)
- )
-
- # Try to get assets (bens)
- try:
- bens_response = await client.get(
- f"{TSE_DIVULGACAND_URL}/candidatura/buscar/{ano}/{eleicao_id}/candidato/{id_candidato}/bens"
- )
- if bens_response.status_code == 200:
- bens_data = bens_response.json()
- candidato.bens = [
- {
- "tipo": b.get("tipoBem", ""),
- "descricao": b.get("descricao", ""),
- "valor": float(b.get("valor", 0) or 0)
- }
- for b in bens_data
- ]
- except:
- pass
-
- return candidato
-
- except Exception as e:
- print(f"TSE details error: {e}")
- return None
-
-
-async def buscar_politico(nome: str) -> Dict[str, Any]:
- """
- Search for a politician across multiple elections.
- Returns consolidated information.
- """
- resultado = {
- "nome": nome,
- "encontrado": False,
- "candidaturas": [],
- "ultimo_cargo": None,
- "total_patrimonio": 0.0,
- "partidos": set(),
- "ufs": set()
- }
-
- # Search in recent elections - continue through ALL years
- for ano in [2024, 2022, 2020, 2018]:
- try:
- candidatos = await buscar_candidatos(nome, ano=ano)
- print(f"TSE: Buscando '{nome}' em {ano} - encontrados: {len(candidatos)}")
-
- for c in candidatos:
- # Match if nome is in the candidate's full name
- if nome.lower() in c.nome.lower() or nome.lower() in c.nome_urna.lower():
- resultado["encontrado"] = True
- resultado["candidaturas"].append({
- "ano": ano,
- "cargo": c.cargo,
- "partido": c.partido_sigla,
- "uf": c.uf,
- "situacao": c.situacao,
- "patrimonio": c.total_bens
- })
-
- if c.partido_sigla:
- resultado["partidos"].add(c.partido_sigla)
- if c.uf:
- resultado["ufs"].add(c.uf)
-
- if c.total_bens > resultado["total_patrimonio"]:
- resultado["total_patrimonio"] = c.total_bens
-
- if not resultado["ultimo_cargo"]:
- resultado["ultimo_cargo"] = f"{c.cargo} ({ano})"
- except Exception as e:
- print(f"TSE search {ano} error: {e}")
- continue
-
- # Convert sets to lists for JSON
- resultado["partidos"] = list(resultado["partidos"])
- resultado["ufs"] = list(resultado["ufs"])
-
- print(f"TSE resultado para '{nome}': encontrado={resultado['encontrado']}, candidaturas={len(resultado['candidaturas'])}")
-
- return resultado
-