Madras1 commited on
Commit
c793087
·
verified ·
1 Parent(s): cd3abcc

Upload 79 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Dockerfile +13 -22
  2. README.md +22 -6
  3. app/__init__.py +1 -0
  4. app/__pycache__/__init__.cpython-311.pyc +0 -0
  5. app/__pycache__/config.cpython-311.pyc +0 -0
  6. app/api/__init__.py +1 -0
  7. app/api/__pycache__/__init__.cpython-311.pyc +0 -0
  8. app/api/__pycache__/deps.cpython-311.pyc +0 -0
  9. app/api/deps.py +35 -0
  10. app/api/routes/__init__.py +2 -0
  11. app/api/routes/__pycache__/__init__.cpython-311.pyc +0 -0
  12. app/api/routes/__pycache__/entities.cpython-311.pyc +0 -0
  13. app/api/routes/__pycache__/events.cpython-311.pyc +0 -0
  14. app/api/routes/__pycache__/ingest.cpython-311.pyc +0 -0
  15. app/api/routes/__pycache__/investigate.cpython-311.pyc +0 -0
  16. app/api/routes/__pycache__/relationships.cpython-311.pyc +0 -0
  17. app/api/routes/__pycache__/search.cpython-311.pyc +0 -0
  18. app/api/routes/aethermap.py +307 -0
  19. app/api/routes/analyze.py +309 -0
  20. app/api/routes/chat.py +63 -0
  21. app/api/routes/dados_publicos.py +155 -0
  22. app/api/routes/entities.py +353 -0
  23. app/api/routes/events.py +113 -0
  24. app/api/routes/graph.py +173 -0
  25. app/api/routes/ingest.py +341 -0
  26. app/api/routes/investigate.py +207 -0
  27. app/api/routes/projects.py +135 -0
  28. app/api/routes/relationships.py +76 -0
  29. app/api/routes/research.py +158 -0
  30. app/api/routes/search.py +126 -0
  31. app/api/routes/session.py +44 -0
  32. app/api/routes/timeline.py +165 -0
  33. app/config.py +47 -0
  34. app/core/__init__.py +2 -0
  35. app/core/__pycache__/__init__.cpython-311.pyc +0 -0
  36. app/core/__pycache__/database.cpython-311.pyc +0 -0
  37. app/core/database.py +115 -0
  38. app/main.py +99 -0
  39. app/models/__init__.py +3 -0
  40. app/models/__pycache__/__init__.cpython-311.pyc +0 -0
  41. app/models/__pycache__/entity.cpython-311.pyc +0 -0
  42. app/models/__pycache__/project.cpython-311.pyc +0 -0
  43. app/models/entity.py +143 -0
  44. app/models/project.py +29 -0
  45. app/schemas/__init__.py +10 -0
  46. app/schemas/__pycache__/__init__.cpython-311.pyc +0 -0
  47. app/schemas/__pycache__/schemas.cpython-311.pyc +0 -0
  48. app/schemas/schemas.py +163 -0
  49. app/services/__init__.py +1 -0
  50. app/services/__pycache__/__init__.cpython-311.pyc +0 -0
Dockerfile CHANGED
@@ -1,33 +1,24 @@
1
- # ==============================================================================
2
- # Dockerfile — AetherMap API (versão profissional)
3
- # ==============================================================================
4
 
5
- # Imagem Python robusta (não slim → evita erros de build)
6
- FROM python:3.10
7
-
8
- # Define diretório da aplicação
9
  WORKDIR /app
10
 
11
- # --- INSTALAR TORCH CPU ANTES (CRÍTICO!) ---
12
- # Isso garante que a versão certa (CPU) seja instalada
13
- RUN pip install --no-cache-dir \
14
- torch \
15
- torchvision \
16
- torchaudio \
17
- --index-url https://download.pytorch.org/whl/cpu
18
-
19
 
20
- # Copiar requirements
21
  COPY requirements.txt .
22
-
23
- # Instalar dependências restantes
24
  RUN pip install --no-cache-dir -r requirements.txt
25
 
26
- # Copiar código da aplicação
27
  COPY . .
28
 
29
- # Expor porta usada pelo Hugging Face Spaces
 
 
 
30
  EXPOSE 7860
31
 
32
- # Comando padrão para executar FastAPI
33
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM python:3.11-slim
 
 
2
 
 
 
 
 
3
  WORKDIR /app
4
 
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ gcc \
8
+ && rm -rf /var/lib/apt/lists/*
 
 
 
 
9
 
10
+ # Copy requirements first for better caching
11
  COPY requirements.txt .
 
 
12
  RUN pip install --no-cache-dir -r requirements.txt
13
 
14
+ # Copy application code
15
  COPY . .
16
 
17
+ # Create data directory for SQLite
18
+ RUN mkdir -p /app/data
19
+
20
+ # Expose port (HF Spaces uses 7860)
21
  EXPOSE 7860
22
 
23
+ # Run the application
24
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,27 @@
1
  ---
2
- title: AetherMap
3
- emoji: 🦀
4
- colorFrom: indigo
5
- colorTo: pink
6
  sdk: docker
7
  pinned: false
8
- license: apache-2.0
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Numidium
3
+ emoji: 🔮
4
+ colorFrom: blue
5
+ colorTo: red
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
+ # Numidium API
11
+
12
+ Backend do sistema de inteligência Numidium/VANTAGE.
13
+
14
+ ## Endpoints
15
+
16
+ - `/docs` - Documentação Swagger
17
+ - `/api/v1/entities` - CRUD de entidades
18
+ - `/api/v1/relationships` - Conexões
19
+ - `/api/v1/events` - Eventos
20
+ - `/api/v1/search` - Busca global
21
+ - `/api/v1/ingest` - Ingestão de dados (Wikipedia, News)
22
+
23
+ ## Stack
24
+
25
+ - FastAPI
26
+ - SQLite
27
+ - BeautifulSoup (scraping)
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Numidium Backend App
app/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (156 Bytes). View file
 
app/__pycache__/config.cpython-311.pyc ADDED
Binary file (1.76 kB). View file
 
app/api/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # API module
app/api/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (162 Bytes). View file
 
app/api/__pycache__/deps.cpython-311.pyc ADDED
Binary file (1.64 kB). View file
 
app/api/deps.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ API dependencies.
3
+ """
4
+ from typing import Generator, Optional
5
+
6
+ from fastapi import Cookie, Header
7
+ from sqlalchemy.orm import Session
8
+
9
+ from app.core.database import get_db_for_session, get_default_session
10
+
11
+
12
+ def get_session_id(
13
+ x_session_id: Optional[str] = Header(None),
14
+ numidium_session: Optional[str] = Cookie(None)
15
+ ) -> Optional[str]:
16
+ """Return the session id from header or cookie."""
17
+ return x_session_id or numidium_session
18
+
19
+
20
+ def get_scoped_db(
21
+ x_session_id: Optional[str] = Header(None),
22
+ numidium_session: Optional[str] = Cookie(None)
23
+ ) -> Generator[Session, None, None]:
24
+ """
25
+ Provide a session-scoped DB if available, otherwise the default DB.
26
+ """
27
+ session_id = x_session_id or numidium_session
28
+ if session_id:
29
+ db = get_db_for_session(session_id)
30
+ else:
31
+ db = get_default_session()
32
+ try:
33
+ yield db
34
+ finally:
35
+ db.close()
app/api/routes/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # API Routes module
2
+ from app.api.routes import entities, relationships, events, search, ingest
app/api/routes/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (329 Bytes). View file
 
app/api/routes/__pycache__/entities.cpython-311.pyc ADDED
Binary file (18.5 kB). View file
 
app/api/routes/__pycache__/events.cpython-311.pyc ADDED
Binary file (7.14 kB). View file
 
app/api/routes/__pycache__/ingest.cpython-311.pyc ADDED
Binary file (16 kB). View file
 
app/api/routes/__pycache__/investigate.cpython-311.pyc ADDED
Binary file (10.1 kB). View file
 
app/api/routes/__pycache__/relationships.cpython-311.pyc ADDED
Binary file (5.04 kB). View file
 
app/api/routes/__pycache__/search.cpython-311.pyc ADDED
Binary file (7 kB). View file
 
app/api/routes/aethermap.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AetherMap Routes - Document Mapping & Semantic Search
3
+ Integrates with AetherMap API for document clustering, NER, and semantic search.
4
+ """
5
+ from fastapi import APIRouter, HTTPException, UploadFile, File, Form, Depends
6
+ from pydantic import BaseModel, Field
7
+ from typing import Optional, List, Dict, Any
8
+ from sqlalchemy.orm import Session
9
+ import io
10
+
11
+ from app.api.deps import get_scoped_db
12
+ from app.services.aethermap_client import aethermap, ProcessResult, SearchResult, EntityGraphResult
13
+
14
+
15
+ router = APIRouter()
16
+
17
+
18
+ # ============================================================================
19
+ # Request/Response Models
20
+ # ============================================================================
21
+
22
+ class IndexDocumentsRequest(BaseModel):
23
+ """Request to index documents from text list"""
24
+ documents: List[str] = Field(..., description="Lista de textos para indexar")
25
+ fast_mode: bool = Field(True, description="Modo rápido (PCA) ou preciso (UMAP)")
26
+
27
+
28
+ class IndexEntitiesRequest(BaseModel):
29
+ """Request to index entities from NUMIDIUM database"""
30
+ entity_types: Optional[List[str]] = Field(None, description="Filtrar por tipos de entidade")
31
+ limit: int = Field(500, description="Limite de entidades")
32
+
33
+
34
+ class SemanticSearchRequest(BaseModel):
35
+ """Request for semantic search"""
36
+ query: str = Field(..., description="Termo de busca")
37
+ turbo_mode: bool = Field(True, description="Modo turbo (mais rápido)")
38
+
39
+
40
+ class IndexResponse(BaseModel):
41
+ """Response from indexing"""
42
+ job_id: str
43
+ num_documents: int
44
+ num_clusters: int
45
+ num_noise: int
46
+ metrics: Dict[str, Any] = {}
47
+ cluster_analysis: Dict[str, Any] = {}
48
+
49
+
50
+ class SearchResponse(BaseModel):
51
+ """Response from search"""
52
+ summary: str
53
+ results: List[Dict[str, Any]] = []
54
+
55
+
56
+ class EntityGraphResponse(BaseModel):
57
+ """Response from NER extraction"""
58
+ hubs: List[Dict[str, Any]] = []
59
+ insights: Dict[str, Any] = {}
60
+ node_count: int = 0
61
+ edge_count: int = 0
62
+
63
+
64
+ class StatusResponse(BaseModel):
65
+ """AetherMap status"""
66
+ connected: bool
67
+ job_id: Optional[str] = None
68
+ documents_indexed: int = 0
69
+
70
+
71
+ # ============================================================================
72
+ # Endpoints
73
+ # ============================================================================
74
+
75
+ @router.get("/status", response_model=StatusResponse)
76
+ async def get_status():
77
+ """
78
+ Get AetherMap connection status.
79
+ """
80
+ return StatusResponse(
81
+ connected=True,
82
+ job_id=aethermap.current_job_id,
83
+ documents_indexed=0 # TODO: track this
84
+ )
85
+
86
+
87
+ @router.post("/index", response_model=IndexResponse)
88
+ async def index_documents(request: IndexDocumentsRequest):
89
+ """
90
+ Index a list of documents for semantic search.
91
+
92
+ The documents will be:
93
+ - Embedded using sentence transformers
94
+ - Clustered using HDBSCAN
95
+ - Indexed in FAISS + BM25 for hybrid search
96
+ """
97
+ try:
98
+ if not request.documents:
99
+ raise HTTPException(status_code=400, detail="Nenhum documento fornecido")
100
+
101
+ result = await aethermap.process_documents(
102
+ texts=request.documents,
103
+ fast_mode=request.fast_mode
104
+ )
105
+
106
+ return IndexResponse(
107
+ job_id=result.job_id,
108
+ num_documents=result.num_documents,
109
+ num_clusters=result.num_clusters,
110
+ num_noise=result.num_noise,
111
+ metrics=result.metrics,
112
+ cluster_analysis=result.cluster_analysis
113
+ )
114
+
115
+ except Exception as e:
116
+ raise HTTPException(status_code=500, detail=str(e))
117
+
118
+
119
+ @router.post("/index-entities", response_model=IndexResponse)
120
+ async def index_entities(
121
+ request: IndexEntitiesRequest,
122
+ db: Session = Depends(get_scoped_db)
123
+ ):
124
+ """
125
+ Index entities from NUMIDIUM database.
126
+
127
+ Collects entity names and descriptions, sends to AetherMap for processing.
128
+ """
129
+ from app.models.entity import Entity
130
+
131
+ try:
132
+ query = db.query(Entity)
133
+
134
+ if request.entity_types:
135
+ query = query.filter(Entity.type.in_(request.entity_types))
136
+
137
+ entities = query.limit(request.limit).all()
138
+
139
+ if not entities:
140
+ raise HTTPException(status_code=404, detail="Nenhuma entidade encontrada")
141
+
142
+ # Build text representations
143
+ documents = []
144
+ for e in entities:
145
+ text = f"{e.name} ({e.type})"
146
+ if e.description:
147
+ text += f": {e.description[:1000]}"
148
+ documents.append(text)
149
+
150
+ result = await aethermap.process_documents(
151
+ texts=documents,
152
+ fast_mode=request.fast_mode if hasattr(request, 'fast_mode') else True
153
+ )
154
+
155
+ return IndexResponse(
156
+ job_id=result.job_id,
157
+ num_documents=result.num_documents,
158
+ num_clusters=result.num_clusters,
159
+ num_noise=result.num_noise,
160
+ metrics=result.metrics,
161
+ cluster_analysis=result.cluster_analysis
162
+ )
163
+
164
+ except HTTPException:
165
+ raise
166
+ except Exception as e:
167
+ raise HTTPException(status_code=500, detail=str(e))
168
+
169
+
170
+ @router.post("/upload", response_model=IndexResponse)
171
+ async def upload_documents(
172
+ file: UploadFile = File(...),
173
+ fast_mode: bool = Form(True)
174
+ ):
175
+ """
176
+ Upload a file (TXT or CSV) for indexing.
177
+
178
+ - TXT: One document per line
179
+ - CSV: Will use first text column found
180
+ """
181
+ try:
182
+ content = await file.read()
183
+ text = content.decode('utf-8', errors='ignore')
184
+
185
+ # Split by lines for TXT
186
+ documents = [line.strip() for line in text.splitlines() if line.strip()]
187
+
188
+ if not documents:
189
+ raise HTTPException(status_code=400, detail="Arquivo vazio ou sem texto válido")
190
+
191
+ result = await aethermap.process_documents(
192
+ texts=documents,
193
+ fast_mode=fast_mode
194
+ )
195
+
196
+ return IndexResponse(
197
+ job_id=result.job_id,
198
+ num_documents=result.num_documents,
199
+ num_clusters=result.num_clusters,
200
+ num_noise=result.num_noise,
201
+ metrics=result.metrics,
202
+ cluster_analysis=result.cluster_analysis
203
+ )
204
+
205
+ except HTTPException:
206
+ raise
207
+ except Exception as e:
208
+ raise HTTPException(status_code=500, detail=str(e))
209
+
210
+
211
+ @router.post("/search", response_model=SearchResponse)
212
+ async def semantic_search(request: SemanticSearchRequest):
213
+ """
214
+ Semantic search in indexed documents.
215
+
216
+ Uses hybrid RAG (FAISS + BM25 + reranking + LLM).
217
+ Returns a summary answering the query with citations.
218
+ """
219
+ try:
220
+ if not aethermap.current_job_id:
221
+ raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
222
+
223
+ result = await aethermap.semantic_search(
224
+ query=request.query,
225
+ turbo_mode=request.turbo_mode
226
+ )
227
+
228
+ return SearchResponse(
229
+ summary=result.summary,
230
+ results=result.results
231
+ )
232
+
233
+ except HTTPException:
234
+ raise
235
+ except Exception as e:
236
+ raise HTTPException(status_code=500, detail=str(e))
237
+
238
+
239
+ @router.post("/entities", response_model=EntityGraphResponse)
240
+ async def extract_entities():
241
+ """
242
+ Extract named entities (NER) from indexed documents.
243
+
244
+ Returns:
245
+ - Hub entities (most connected)
246
+ - Relationship insights
247
+ - Graph metrics
248
+ """
249
+ try:
250
+ if not aethermap.current_job_id:
251
+ raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
252
+
253
+ result = await aethermap.extract_entities()
254
+
255
+ return EntityGraphResponse(
256
+ hubs=result.hubs,
257
+ insights=result.insights,
258
+ node_count=len(result.nodes),
259
+ edge_count=len(result.edges)
260
+ )
261
+
262
+ except HTTPException:
263
+ raise
264
+ except Exception as e:
265
+ raise HTTPException(status_code=500, detail=str(e))
266
+
267
+
268
+ @router.post("/analyze")
269
+ async def analyze_graph():
270
+ """
271
+ Analyze entity graph using LLM.
272
+
273
+ Returns semantic insights about relationships and patterns.
274
+ """
275
+ try:
276
+ if not aethermap.current_job_id:
277
+ raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
278
+
279
+ result = await aethermap.analyze_graph()
280
+
281
+ return {
282
+ "analysis": result.analysis,
283
+ "key_entities": result.key_entities,
284
+ "relationships": result.relationships
285
+ }
286
+
287
+ except HTTPException:
288
+ raise
289
+ except Exception as e:
290
+ raise HTTPException(status_code=500, detail=str(e))
291
+
292
+
293
+ @router.post("/describe-clusters")
294
+ async def describe_clusters():
295
+ """
296
+ Get LLM descriptions for each cluster found.
297
+ """
298
+ try:
299
+ if not aethermap.current_job_id:
300
+ raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
301
+
302
+ result = await aethermap.describe_clusters()
303
+
304
+ return result
305
+
306
+ except Exception as e:
307
+ raise HTTPException(status_code=500, detail=str(e))
app/api/routes/analyze.py ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Analyze API Routes - LLM-based text analysis
3
+ """
4
+ from fastapi import APIRouter, Depends, HTTPException
5
+ from pydantic import BaseModel, Field
6
+ from typing import Optional, List
7
+ from sqlalchemy.orm import Session
8
+ import traceback
9
+
10
+ from app.api.deps import get_scoped_db
11
+ from app.services.nlp import entity_extractor
12
+ from app.services.geocoding import geocode
13
+ from app.models.entity import Entity, Relationship, Event
14
+ from app.config import settings
15
+
16
+
17
+ router = APIRouter(prefix="/analyze", tags=["Analysis"])
18
+
19
+
20
+ class AnalyzeRequest(BaseModel):
21
+ """Request model for text analysis"""
22
+ text: str = Field(..., min_length=10, description="Text to analyze")
23
+ auto_create: bool = Field(default=False, description="Auto-create extracted entities in database")
24
+
25
+
26
+ class ExtractedEntityResponse(BaseModel):
27
+ """Response model for an extracted entity"""
28
+ name: str
29
+ type: str
30
+ role: Optional[str] = None
31
+ aliases: Optional[List[str]] = None
32
+ description: Optional[str] = None
33
+ created: bool = False # Whether it was created in DB
34
+ entity_id: Optional[str] = None # DB ID if created
35
+
36
+
37
+ class ExtractedRelationshipResponse(BaseModel):
38
+ """Response model for an extracted relationship"""
39
+ source: str
40
+ target: str
41
+ relationship_type: str
42
+ context: Optional[str] = None
43
+ created: bool = False
44
+
45
+
46
+ class ExtractedEventResponse(BaseModel):
47
+ """Response model for an extracted event"""
48
+ description: str
49
+ event_type: Optional[str] = None
50
+ date: Optional[str] = None
51
+ location: Optional[str] = None
52
+ participants: Optional[List[str]] = None
53
+ created: bool = False
54
+ event_id: Optional[str] = None
55
+
56
+
57
+ class AnalyzeResponse(BaseModel):
58
+ """Response model for analysis"""
59
+ entities: List[ExtractedEntityResponse]
60
+ relationships: List[ExtractedRelationshipResponse]
61
+ events: List[ExtractedEventResponse]
62
+ stats: dict
63
+
64
+
65
+ @router.post("", response_model=AnalyzeResponse)
66
+ async def analyze_text(request: AnalyzeRequest, db: Session = Depends(get_scoped_db)):
67
+ """
68
+ Analyze text using LLM to extract entities, relationships, and events.
69
+
70
+ Uses Cerebras API with Qwen 3 235B for intelligent extraction.
71
+
72
+ Args:
73
+ text: Text to analyze (min 10 characters)
74
+ auto_create: If true, automatically creates entities in the database
75
+
76
+ Returns:
77
+ Extracted entities, relationships, events, and statistics
78
+ """
79
+ try:
80
+ # Extract using LLM
81
+ result = await entity_extractor.extract(request.text)
82
+
83
+ # Prepare response
84
+ entities_response = []
85
+ relationships_response = []
86
+ events_response = []
87
+
88
+ created_entities = 0
89
+ created_relationships = 0
90
+ created_events = 0
91
+
92
+ # Helper function to parse date strings
93
+ def parse_date(date_str):
94
+ if not date_str:
95
+ return None
96
+ from datetime import datetime
97
+ try:
98
+ # Try YYYY-MM-DD format
99
+ return datetime.strptime(date_str[:10], "%Y-%m-%d")
100
+ except:
101
+ try:
102
+ # Try YYYY format
103
+ return datetime.strptime(date_str[:4], "%Y")
104
+ except:
105
+ return None
106
+
107
+ # Process entities
108
+ for entity in result.entities:
109
+ entity_data = ExtractedEntityResponse(
110
+ name=entity.name,
111
+ type=entity.type,
112
+ role=entity.role,
113
+ aliases=entity.aliases,
114
+ description=entity.description,
115
+ created=False
116
+ )
117
+
118
+ if request.auto_create and entity.name:
119
+ # Check if entity already exists
120
+ existing = db.query(Entity).filter(
121
+ Entity.name.ilike(f"%{entity.name}%")
122
+ ).first()
123
+
124
+ if not existing:
125
+ # Get coordinates for location entities
126
+ lat, lng = None, None
127
+ if entity.type == "location":
128
+ coords = await geocode(entity.name)
129
+ if coords:
130
+ lat, lng = coords
131
+
132
+ # Parse event_date if available
133
+ event_date = parse_date(getattr(entity, 'event_date', None))
134
+
135
+ # Create new entity
136
+ new_entity = Entity(
137
+ name=entity.name,
138
+ type=entity.type if entity.type in ["person", "organization", "location", "event"] else "person",
139
+ description=entity.description or entity.role or "",
140
+ source="llm_extraction",
141
+ latitude=lat,
142
+ longitude=lng,
143
+ event_date=event_date,
144
+ properties={"role": entity.role, "aliases": entity.aliases}
145
+ )
146
+ db.add(new_entity)
147
+ db.commit()
148
+ db.refresh(new_entity)
149
+
150
+ entity_data.created = True
151
+ entity_data.entity_id = new_entity.id
152
+ created_entities += 1
153
+ else:
154
+ entity_data.entity_id = existing.id
155
+
156
+ entities_response.append(entity_data)
157
+
158
+ # Process relationships
159
+ for rel in result.relationships:
160
+ rel_data = ExtractedRelationshipResponse(
161
+ source=rel.source,
162
+ target=rel.target,
163
+ relationship_type=rel.relationship_type,
164
+ context=rel.context,
165
+ created=False
166
+ )
167
+
168
+ if request.auto_create:
169
+ # Find source and target entities
170
+ source_entity = db.query(Entity).filter(
171
+ Entity.name.ilike(f"%{rel.source}%")
172
+ ).first()
173
+ target_entity = db.query(Entity).filter(
174
+ Entity.name.ilike(f"%{rel.target}%")
175
+ ).first()
176
+
177
+ if source_entity and target_entity:
178
+ # Check if relationship exists
179
+ existing_rel = db.query(Relationship).filter(
180
+ Relationship.source_id == source_entity.id,
181
+ Relationship.target_id == target_entity.id,
182
+ Relationship.type == rel.relationship_type
183
+ ).first()
184
+
185
+ if not existing_rel:
186
+ # Parse event_date if available
187
+ rel_event_date = parse_date(getattr(rel, 'event_date', None))
188
+
189
+ new_rel = Relationship(
190
+ source_id=source_entity.id,
191
+ target_id=target_entity.id,
192
+ type=rel.relationship_type,
193
+ event_date=rel_event_date,
194
+ properties={"context": rel.context}
195
+ )
196
+ db.add(new_rel)
197
+ db.commit()
198
+ rel_data.created = True
199
+ created_relationships += 1
200
+
201
+ relationships_response.append(rel_data)
202
+
203
+ # Process events
204
+ for event in result.events:
205
+ event_data = ExtractedEventResponse(
206
+ description=event.description,
207
+ event_type=event.event_type,
208
+ date=event.date,
209
+ location=event.location,
210
+ participants=event.participants,
211
+ created=False
212
+ )
213
+
214
+ if request.auto_create and event.description:
215
+ # Create event
216
+ new_event = Event(
217
+ title=event.description[:100] if len(event.description) > 100 else event.description,
218
+ description=event.description,
219
+ type=event.event_type or "general",
220
+ source="llm_extraction"
221
+ )
222
+ db.add(new_event)
223
+ db.commit()
224
+ db.refresh(new_event)
225
+
226
+ event_data.created = True
227
+ event_data.event_id = new_event.id
228
+ created_events += 1
229
+
230
+ events_response.append(event_data)
231
+
232
+ return AnalyzeResponse(
233
+ entities=entities_response,
234
+ relationships=relationships_response,
235
+ events=events_response,
236
+ stats={
237
+ "total_entities": len(entities_response),
238
+ "total_relationships": len(relationships_response),
239
+ "total_events": len(events_response),
240
+ "created_entities": created_entities,
241
+ "created_relationships": created_relationships,
242
+ "created_events": created_events
243
+ }
244
+ )
245
+
246
+ except Exception as e:
247
+ # Log the full error with traceback
248
+ print(f"=== ANALYZE ERROR ===")
249
+ print(f"Error type: {type(e).__name__}")
250
+ print(f"Error message: {str(e)}")
251
+ print(f"Traceback:")
252
+ traceback.print_exc()
253
+ print(f"=== END ERROR ===")
254
+ raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
255
+
256
+
257
+ @router.get("/debug")
258
+ async def debug_config():
259
+ """
260
+ Debug endpoint to check if API is configured correctly.
261
+ """
262
+ api_key = settings.cerebras_api_key
263
+ return {
264
+ "cerebras_api_key_configured": bool(api_key),
265
+ "cerebras_api_key_length": len(api_key) if api_key else 0,
266
+ "cerebras_api_key_preview": f"{api_key[:8]}...{api_key[-4:]}" if api_key and len(api_key) > 12 else "NOT SET"
267
+ }
268
+
269
+
270
+ @router.post("/quick")
271
+ async def quick_analyze(request: AnalyzeRequest):
272
+ """
273
+ Quick analysis without database operations.
274
+ Returns only extracted data without creating anything.
275
+ """
276
+ try:
277
+ result = await entity_extractor.extract(request.text)
278
+
279
+ return {
280
+ "entities": [
281
+ {
282
+ "name": e.name,
283
+ "type": e.type,
284
+ "role": e.role,
285
+ "aliases": e.aliases
286
+ }
287
+ for e in result.entities
288
+ ],
289
+ "relationships": [
290
+ {
291
+ "source": r.source,
292
+ "target": r.target,
293
+ "type": r.relationship_type,
294
+ "context": r.context
295
+ }
296
+ for r in result.relationships
297
+ ],
298
+ "events": [
299
+ {
300
+ "description": ev.description,
301
+ "type": ev.event_type,
302
+ "date": ev.date,
303
+ "participants": ev.participants
304
+ }
305
+ for ev in result.events
306
+ ]
307
+ }
308
+ except Exception as e:
309
+ raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
app/api/routes/chat.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Chat API Routes - Intelligent chat with RAG
3
+ """
4
+ from fastapi import APIRouter, Depends, HTTPException
5
+ from pydantic import BaseModel, Field
6
+ from typing import Optional
7
+ from sqlalchemy.orm import Session
8
+
9
+ from app.api.deps import get_scoped_db, get_session_id
10
+ from app.services.chat import chat_service
11
+
12
+
13
+ router = APIRouter(prefix="/chat", tags=["Chat"])
14
+
15
+
16
+ class ChatRequest(BaseModel):
17
+ """Chat request model"""
18
+ message: str = Field(..., min_length=1, description="User message")
19
+ use_web: bool = Field(default=True, description="Include web search")
20
+ use_history: bool = Field(default=True, description="Use conversation history")
21
+
22
+
23
+ class ChatResponse(BaseModel):
24
+ """Chat response model"""
25
+ answer: str
26
+ local_context_used: bool
27
+ web_context_used: bool
28
+ entities_found: int
29
+
30
+
31
+ @router.post("", response_model=ChatResponse)
32
+ async def chat(
33
+ request: ChatRequest,
34
+ db: Session = Depends(get_scoped_db),
35
+ session_id: Optional[str] = Depends(get_session_id)
36
+ ):
37
+ """
38
+ Send a message and get an intelligent response.
39
+
40
+ Uses:
41
+ - Local NUMIDIUM knowledge (entities/relationships)
42
+ - Lancer web search (if enabled)
43
+ - Cerebras LLM for synthesis
44
+ """
45
+ try:
46
+ result = await chat_service.chat(
47
+ message=request.message,
48
+ db=db,
49
+ use_web=request.use_web,
50
+ use_history=request.use_history,
51
+ session_id=session_id
52
+ )
53
+ return ChatResponse(**result)
54
+
55
+ except Exception as e:
56
+ raise HTTPException(status_code=500, detail=str(e))
57
+
58
+
59
+ @router.post("/clear")
60
+ async def clear_history(session_id: Optional[str] = Depends(get_session_id)):
61
+ """Clear conversation history"""
62
+ chat_service.clear_history(session_id=session_id)
63
+ return {"message": "Historico limpo"}
app/api/routes/dados_publicos.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Public Data API Routes - IBGE and TSE data access
3
+ """
4
+ from fastapi import APIRouter, HTTPException, Query
5
+ from pydantic import BaseModel, Field
6
+ from typing import Optional, List, Dict, Any
7
+
8
+ from app.services.ibge_api import (
9
+ listar_estados,
10
+ listar_municipios,
11
+ buscar_municipio,
12
+ enriquecer_localizacao
13
+ )
14
+ from app.services.tse_api import (
15
+ listar_eleicoes,
16
+ buscar_candidatos,
17
+ obter_candidato_detalhes,
18
+ buscar_politico
19
+ )
20
+
21
+
22
+ router = APIRouter(prefix="/dados", tags=["Public Data"])
23
+
24
+
25
+ # ========== IBGE Endpoints ==========
26
+
27
+ class EstadoResponse(BaseModel):
28
+ id: int
29
+ sigla: str
30
+ nome: str
31
+ regiao: str
32
+
33
+
34
+ class MunicipioResponse(BaseModel):
35
+ id: int
36
+ nome: str
37
+ estado_sigla: str
38
+ estado_nome: str
39
+ regiao: str
40
+
41
+
42
+ @router.get("/ibge/estados", response_model=List[EstadoResponse])
43
+ async def get_estados():
44
+ """List all Brazilian states"""
45
+ estados = await listar_estados()
46
+ return [EstadoResponse(**e.__dict__) for e in estados]
47
+
48
+
49
+ @router.get("/ibge/municipios/{uf}", response_model=List[MunicipioResponse])
50
+ async def get_municipios(uf: str):
51
+ """List municipalities in a state"""
52
+ municipios = await listar_municipios(uf)
53
+ return [MunicipioResponse(**m.__dict__) for m in municipios]
54
+
55
+
56
+ @router.get("/ibge/buscar")
57
+ async def buscar_cidade(
58
+ nome: str = Query(..., min_length=2),
59
+ uf: Optional[str] = None
60
+ ):
61
+ """Search for a municipality by name"""
62
+ municipios = await buscar_municipio(nome, uf)
63
+ return [MunicipioResponse(**m.__dict__) for m in municipios]
64
+
65
+
66
+ @router.get("/ibge/enriquecer")
67
+ async def enriquecer_cidade(
68
+ cidade: str = Query(..., min_length=2),
69
+ uf: Optional[str] = None
70
+ ):
71
+ """Enrich a location name with IBGE data"""
72
+ return await enriquecer_localizacao(cidade, uf)
73
+
74
+
75
+ # ========== TSE Endpoints ==========
76
+
77
+ class EleicaoResponse(BaseModel):
78
+ id: int
79
+ ano: int
80
+ descricao: str
81
+ turno: int
82
+
83
+
84
+ class CandidatoResponse(BaseModel):
85
+ id: int
86
+ nome: str
87
+ nome_urna: str
88
+ numero: str
89
+ cargo: str
90
+ partido_sigla: str
91
+ uf: str
92
+ municipio: str
93
+ situacao: str
94
+ total_bens: float
95
+
96
+
97
+ class CandidatoDetalhadoResponse(BaseModel):
98
+ id: int
99
+ nome: str
100
+ nome_urna: str
101
+ numero: str
102
+ cargo: str
103
+ partido_sigla: str
104
+ partido_nome: str
105
+ uf: str
106
+ municipio: str
107
+ situacao: str
108
+ data_nascimento: str
109
+ genero: str
110
+ grau_instrucao: str
111
+ ocupacao: str
112
+ total_bens: float
113
+ bens: List[Dict[str, Any]]
114
+
115
+
116
+ @router.get("/tse/eleicoes", response_model=List[EleicaoResponse])
117
+ async def get_eleicoes():
118
+ """List available elections"""
119
+ eleicoes = await listar_eleicoes()
120
+ return [EleicaoResponse(**e.__dict__) for e in eleicoes]
121
+
122
+
123
+ @router.get("/tse/candidatos")
124
+ async def get_candidatos(
125
+ nome: str = Query(..., min_length=3),
126
+ ano: int = Query(default=2024),
127
+ uf: Optional[str] = None,
128
+ cargo: Optional[str] = None
129
+ ):
130
+ """Search for candidates by name"""
131
+ candidatos = await buscar_candidatos(nome, ano=ano, uf=uf, cargo=cargo)
132
+ return [CandidatoResponse(**c.__dict__) for c in candidatos]
133
+
134
+
135
+ @router.get("/tse/candidato/{id_candidato}")
136
+ async def get_candidato_detalhes(
137
+ id_candidato: int,
138
+ ano: int = Query(default=2024)
139
+ ):
140
+ """Get detailed candidate information including assets"""
141
+ candidato = await obter_candidato_detalhes(id_candidato, ano=ano)
142
+
143
+ if not candidato:
144
+ raise HTTPException(status_code=404, detail="Candidato não encontrado")
145
+
146
+ return CandidatoDetalhadoResponse(**candidato.__dict__)
147
+
148
+
149
+ @router.get("/tse/politico")
150
+ async def pesquisar_politico(nome: str = Query(..., min_length=3)):
151
+ """
152
+ Search for a politician across multiple elections.
153
+ Returns consolidated career information.
154
+ """
155
+ return await buscar_politico(nome)
app/api/routes/entities.py ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Entity CRUD Routes
3
+ """
4
+ from fastapi import APIRouter, Depends, HTTPException, Query
5
+ from sqlalchemy.orm import Session
6
+ from sqlalchemy import or_
7
+ from typing import List, Optional
8
+
9
+ from app.api.deps import get_scoped_db
10
+ from app.models import Entity, Relationship
11
+ from app.schemas import EntityCreate, EntityUpdate, EntityResponse, GraphData, GraphNode, GraphEdge
12
+
13
+ router = APIRouter(prefix="/entities", tags=["Entities"])
14
+
15
+
16
+ @router.get("", response_model=List[EntityResponse])
17
+ def list_entities(
18
+ type: Optional[str] = None,
19
+ search: Optional[str] = None,
20
+ project_id: Optional[str] = None,
21
+ limit: int = Query(default=50, le=200),
22
+ offset: int = 0,
23
+ db: Session = Depends(get_scoped_db)
24
+ ):
25
+ """Lista todas as entidades com filtros opcionais"""
26
+ query = db.query(Entity)
27
+
28
+ if project_id:
29
+ query = query.filter(Entity.project_id == project_id)
30
+
31
+ if type:
32
+ query = query.filter(Entity.type == type)
33
+
34
+ if search:
35
+ query = query.filter(
36
+ or_(
37
+ Entity.name.ilike(f"%{search}%"),
38
+ Entity.description.ilike(f"%{search}%")
39
+ )
40
+ )
41
+
42
+ query = query.order_by(Entity.created_at.desc())
43
+ return query.offset(offset).limit(limit).all()
44
+
45
+
46
+ @router.get("/types")
47
+ def get_entity_types(db: Session = Depends(get_scoped_db)):
48
+ """Retorna todos os tipos de entidade únicos"""
49
+ types = db.query(Entity.type).distinct().all()
50
+ return [t[0] for t in types]
51
+
52
+
53
+ @router.get("/suggest-merge")
54
+ async def suggest_merge_candidates(
55
+ limit: int = Query(default=10, le=50),
56
+ db: Session = Depends(get_scoped_db)
57
+ ):
58
+ """
59
+ Use LLM to find potential duplicate entities that could be merged.
60
+ Returns pairs of entities that might be the same.
61
+ """
62
+ import httpx
63
+ import json
64
+ import re
65
+ from app.config import settings
66
+
67
+ # Get all entities
68
+ entities = db.query(Entity).order_by(Entity.name).limit(200).all()
69
+
70
+ if len(entities) < 2:
71
+ return {"candidates": [], "message": "Not enough entities to compare"}
72
+
73
+ # Build entity list for LLM
74
+ entity_list = []
75
+ for e in entities:
76
+ aliases = (e.properties or {}).get("aliases", [])
77
+ entity_list.append({
78
+ "id": e.id,
79
+ "name": e.name,
80
+ "type": e.type,
81
+ "aliases": aliases[:5] if aliases else []
82
+ })
83
+
84
+ # Ask LLM to find duplicates
85
+ prompt = f"""Analise esta lista de entidades e encontre possíveis DUPLICATAS (mesma pessoa/organização/local com nomes diferentes).
86
+
87
+ Entidades:
88
+ {entity_list[:100]}
89
+
90
+ Retorne APENAS um JSON válido com pares de IDs que são provavelmente a mesma entidade:
91
+ ```json
92
+ {{
93
+ "duplicates": [
94
+ {{
95
+ "id1": "uuid1",
96
+ "id2": "uuid2",
97
+ "confidence": 0.95,
98
+ "reason": "Mesmo nome com variação"
99
+ }}
100
+ ]
101
+ }}
102
+ ```
103
+
104
+ Se não houver duplicatas, retorne: {{"duplicates": []}}
105
+ """
106
+
107
+ try:
108
+ async with httpx.AsyncClient(timeout=30.0) as client:
109
+ response = await client.post(
110
+ "https://api.cerebras.ai/v1/chat/completions",
111
+ headers={
112
+ "Authorization": f"Bearer {settings.cerebras_api_key}",
113
+ "Content-Type": "application/json"
114
+ },
115
+ json={
116
+ "model": "zai-glm-4.7",
117
+ "messages": [
118
+ {"role": "system", "content": "Você é um especialista em detecção de entidades duplicadas. Responda apenas em JSON válido."},
119
+ {"role": "user", "content": prompt}
120
+ ],
121
+ "temperature": 0.1,
122
+ "max_tokens": 1024
123
+ }
124
+ )
125
+
126
+ if response.status_code != 200:
127
+ return {"candidates": [], "error": "LLM API error"}
128
+
129
+ data = response.json()
130
+ content = data["choices"][0]["message"]["content"]
131
+
132
+ # Parse JSON from response
133
+ json_match = re.search(r'\{.*\}', content, re.DOTALL)
134
+ if json_match:
135
+ result = json.loads(json_match.group(0))
136
+
137
+ # Enrich with entity names
138
+ candidates = []
139
+ for dup in result.get("duplicates", [])[:limit]:
140
+ e1 = next((e for e in entities if e.id == dup.get("id1")), None)
141
+ e2 = next((e for e in entities if e.id == dup.get("id2")), None)
142
+ if e1 and e2:
143
+ candidates.append({
144
+ "entity1": {"id": e1.id, "name": e1.name, "type": e1.type},
145
+ "entity2": {"id": e2.id, "name": e2.name, "type": e2.type},
146
+ "confidence": dup.get("confidence", 0.5),
147
+ "reason": dup.get("reason", "Possível duplicata")
148
+ })
149
+
150
+ return {"candidates": candidates}
151
+
152
+ return {"candidates": [], "message": "No duplicates found"}
153
+
154
+ except Exception as e:
155
+ return {"candidates": [], "error": str(e)}
156
+
157
+
158
+ @router.get("/{entity_id}", response_model=EntityResponse)
159
+ def get_entity(entity_id: str, db: Session = Depends(get_scoped_db)):
160
+ """Busca uma entidade por ID"""
161
+ entity = db.query(Entity).filter(Entity.id == entity_id).first()
162
+ if not entity:
163
+ raise HTTPException(status_code=404, detail="Entity not found")
164
+ return entity
165
+
166
+
167
+ @router.post("", response_model=EntityResponse, status_code=201)
168
+ def create_entity(entity: EntityCreate, db: Session = Depends(get_scoped_db)):
169
+ """Cria uma nova entidade"""
170
+ db_entity = Entity(**entity.model_dump())
171
+ db.add(db_entity)
172
+ db.commit()
173
+ db.refresh(db_entity)
174
+ return db_entity
175
+
176
+
177
+ @router.put("/{entity_id}", response_model=EntityResponse)
178
+ def update_entity(entity_id: str, entity: EntityUpdate, db: Session = Depends(get_scoped_db)):
179
+ """Atualiza uma entidade existente"""
180
+ db_entity = db.query(Entity).filter(Entity.id == entity_id).first()
181
+ if not db_entity:
182
+ raise HTTPException(status_code=404, detail="Entity not found")
183
+
184
+ update_data = entity.model_dump(exclude_unset=True)
185
+ for field, value in update_data.items():
186
+ setattr(db_entity, field, value)
187
+
188
+ db.commit()
189
+ db.refresh(db_entity)
190
+ return db_entity
191
+
192
+
193
+ @router.delete("/{entity_id}")
194
+ def delete_entity(entity_id: str, db: Session = Depends(get_scoped_db)):
195
+ """Deleta uma entidade"""
196
+ db_entity = db.query(Entity).filter(Entity.id == entity_id).first()
197
+ if not db_entity:
198
+ raise HTTPException(status_code=404, detail="Entity not found")
199
+
200
+ # Delete related relationships
201
+ db.query(Relationship).filter(
202
+ or_(
203
+ Relationship.source_id == entity_id,
204
+ Relationship.target_id == entity_id
205
+ )
206
+ ).delete()
207
+
208
+ db.delete(db_entity)
209
+ db.commit()
210
+ return {"message": "Entity deleted"}
211
+
212
+
213
+ @router.get("/{entity_id}/connections", response_model=GraphData)
214
+ def get_entity_connections(
215
+ entity_id: str,
216
+ depth: int = Query(default=1, le=3),
217
+ db: Session = Depends(get_scoped_db)
218
+ ):
219
+ """
220
+ Retorna o grafo de conexões de uma entidade
221
+ Usado para visualização de rede no frontend
222
+ """
223
+ entity = db.query(Entity).filter(Entity.id == entity_id).first()
224
+ if not entity:
225
+ raise HTTPException(status_code=404, detail="Entity not found")
226
+
227
+ nodes = {}
228
+ edges = []
229
+ visited = set()
230
+
231
+ def explore(eid: str, current_depth: int):
232
+ if current_depth > depth or eid in visited:
233
+ return
234
+ visited.add(eid)
235
+
236
+ e = db.query(Entity).filter(Entity.id == eid).first()
237
+ if not e:
238
+ return
239
+
240
+ nodes[e.id] = GraphNode(
241
+ id=e.id,
242
+ type=e.type,
243
+ name=e.name,
244
+ properties=e.properties or {}
245
+ )
246
+
247
+ # Outgoing relationships
248
+ for rel in db.query(Relationship).filter(Relationship.source_id == eid).all():
249
+ edges.append(GraphEdge(
250
+ source=rel.source_id,
251
+ target=rel.target_id,
252
+ type=rel.type,
253
+ confidence=rel.confidence
254
+ ))
255
+ explore(rel.target_id, current_depth + 1)
256
+
257
+ # Incoming relationships
258
+ for rel in db.query(Relationship).filter(Relationship.target_id == eid).all():
259
+ edges.append(GraphEdge(
260
+ source=rel.source_id,
261
+ target=rel.target_id,
262
+ type=rel.type,
263
+ confidence=rel.confidence
264
+ ))
265
+ explore(rel.source_id, current_depth + 1)
266
+
267
+ explore(entity_id, 0)
268
+
269
+ return GraphData(
270
+ nodes=list(nodes.values()),
271
+ edges=edges
272
+ )
273
+
274
+
275
+ @router.post("/merge")
276
+ def merge_entities(
277
+ primary_id: str,
278
+ secondary_id: str,
279
+ db: Session = Depends(get_scoped_db)
280
+ ):
281
+ """
282
+ Merge two entities into one.
283
+ The primary entity is kept, the secondary is deleted.
284
+ All relationships from secondary are transferred to primary.
285
+ """
286
+ if primary_id == secondary_id:
287
+ raise HTTPException(status_code=400, detail="Cannot merge entity with itself")
288
+
289
+ primary = db.query(Entity).filter(Entity.id == primary_id).first()
290
+ secondary = db.query(Entity).filter(Entity.id == secondary_id).first()
291
+
292
+ if not primary:
293
+ raise HTTPException(status_code=404, detail="Primary entity not found")
294
+ if not secondary:
295
+ raise HTTPException(status_code=404, detail="Secondary entity not found")
296
+
297
+ # Merge properties
298
+ primary_props = primary.properties or {}
299
+ secondary_props = secondary.properties or {}
300
+
301
+ # Add aliases from secondary
302
+ aliases = primary_props.get("aliases", []) or []
303
+ if secondary.name not in aliases:
304
+ aliases.append(secondary.name)
305
+ secondary_aliases = secondary_props.get("aliases", []) or []
306
+ for alias in secondary_aliases:
307
+ if alias not in aliases:
308
+ aliases.append(alias)
309
+ primary_props["aliases"] = aliases
310
+
311
+ # Add merge history
312
+ merge_history = primary_props.get("merged_from", []) or []
313
+ merge_history.append({
314
+ "id": secondary.id,
315
+ "name": secondary.name,
316
+ "source": secondary.source
317
+ })
318
+ primary_props["merged_from"] = merge_history
319
+
320
+ # Combine descriptions if primary has none
321
+ if not primary.description and secondary.description:
322
+ primary.description = secondary.description
323
+
324
+ primary.properties = primary_props
325
+
326
+ # Transfer relationships from secondary to primary
327
+ # Update source_id
328
+ db.query(Relationship).filter(
329
+ Relationship.source_id == secondary_id
330
+ ).update({"source_id": primary_id})
331
+
332
+ # Update target_id
333
+ db.query(Relationship).filter(
334
+ Relationship.target_id == secondary_id
335
+ ).update({"target_id": primary_id})
336
+
337
+ # Delete duplicate relationships (same source, target, type)
338
+ # This is a simple approach - in production you'd want more sophisticated deduplication
339
+
340
+ # Delete the secondary entity
341
+ db.delete(secondary)
342
+ db.commit()
343
+ db.refresh(primary)
344
+
345
+ return {
346
+ "message": f"Merged '{secondary.name}' into '{primary.name}'",
347
+ "primary": {
348
+ "id": primary.id,
349
+ "name": primary.name,
350
+ "aliases": aliases
351
+ }
352
+ }
353
+
app/api/routes/events.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Events CRUD Routes
3
+ """
4
+ from fastapi import APIRouter, Depends, HTTPException, Query
5
+ from sqlalchemy.orm import Session
6
+ from sqlalchemy import or_
7
+ from typing import List, Optional
8
+ from datetime import datetime
9
+
10
+ from app.api.deps import get_scoped_db
11
+ from app.models import Event
12
+ from app.schemas import EventCreate, EventResponse
13
+
14
+ router = APIRouter(prefix="/events", tags=["Events"])
15
+
16
+
17
+ @router.get("/", response_model=List[EventResponse])
18
+ def list_events(
19
+ type: Optional[str] = None,
20
+ search: Optional[str] = None,
21
+ start_date: Optional[datetime] = None,
22
+ end_date: Optional[datetime] = None,
23
+ limit: int = Query(default=50, le=200),
24
+ offset: int = 0,
25
+ db: Session = Depends(get_scoped_db)
26
+ ):
27
+ """Lista eventos com filtros opcionais"""
28
+ query = db.query(Event)
29
+
30
+ if type:
31
+ query = query.filter(Event.type == type)
32
+
33
+ if search:
34
+ query = query.filter(
35
+ or_(
36
+ Event.title.ilike(f"%{search}%"),
37
+ Event.description.ilike(f"%{search}%")
38
+ )
39
+ )
40
+
41
+ if start_date:
42
+ query = query.filter(Event.event_date >= start_date)
43
+ if end_date:
44
+ query = query.filter(Event.event_date <= end_date)
45
+
46
+ query = query.order_by(Event.event_date.desc().nullslast())
47
+ return query.offset(offset).limit(limit).all()
48
+
49
+
50
+ @router.get("/types")
51
+ def get_event_types(db: Session = Depends(get_scoped_db)):
52
+ """Retorna todos os tipos de evento unicos"""
53
+ types = db.query(Event.type).distinct().all()
54
+ return [t[0] for t in types]
55
+
56
+
57
+ @router.get("/timeline")
58
+ def get_timeline(
59
+ entity_id: Optional[str] = None,
60
+ limit: int = Query(default=50, le=200),
61
+ db: Session = Depends(get_scoped_db)
62
+ ):
63
+ """
64
+ Retorna eventos em formato timeline.
65
+ """
66
+ query = db.query(Event).filter(Event.event_date.isnot(None))
67
+
68
+ if entity_id:
69
+ query = query.filter(Event.entity_ids.contains([entity_id]))
70
+
71
+ events = query.order_by(Event.event_date.asc()).limit(limit).all()
72
+
73
+ return [
74
+ {
75
+ "id": e.id,
76
+ "title": e.title,
77
+ "date": e.event_date.isoformat() if e.event_date else None,
78
+ "type": e.type,
79
+ "location": e.location_name
80
+ }
81
+ for e in events
82
+ ]
83
+
84
+
85
+ @router.get("/{event_id}", response_model=EventResponse)
86
+ def get_event(event_id: str, db: Session = Depends(get_scoped_db)):
87
+ """Busca um evento por ID"""
88
+ event = db.query(Event).filter(Event.id == event_id).first()
89
+ if not event:
90
+ raise HTTPException(status_code=404, detail="Event not found")
91
+ return event
92
+
93
+
94
+ @router.post("/", response_model=EventResponse, status_code=201)
95
+ def create_event(event: EventCreate, db: Session = Depends(get_scoped_db)):
96
+ """Cria um novo evento"""
97
+ db_event = Event(**event.model_dump())
98
+ db.add(db_event)
99
+ db.commit()
100
+ db.refresh(db_event)
101
+ return db_event
102
+
103
+
104
+ @router.delete("/{event_id}")
105
+ def delete_event(event_id: str, db: Session = Depends(get_scoped_db)):
106
+ """Deleta um evento"""
107
+ db_event = db.query(Event).filter(Event.id == event_id).first()
108
+ if not db_event:
109
+ raise HTTPException(status_code=404, detail="Event not found")
110
+
111
+ db.delete(db_event)
112
+ db.commit()
113
+ return {"message": "Event deleted"}
app/api/routes/graph.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Graph API Routes - Network visualization endpoints
3
+ """
4
+ from fastapi import APIRouter, Depends, HTTPException, Query
5
+ from typing import Optional, List
6
+ from sqlalchemy.orm import Session
7
+ from sqlalchemy import or_
8
+
9
+ from app.api.deps import get_scoped_db
10
+ from app.models.entity import Entity, Relationship
11
+
12
+
13
+ router = APIRouter(prefix="/graph", tags=["Graph"])
14
+
15
+
16
+ @router.get("")
17
+ async def get_graph(
18
+ entity_type: Optional[str] = Query(None, description="Filter by entity type"),
19
+ limit: int = Query(100, le=500, description="Maximum number of entities"),
20
+ db: Session = Depends(get_scoped_db)
21
+ ):
22
+ """
23
+ Get graph data for visualization.
24
+ Returns nodes (entities) and edges (relationships).
25
+ """
26
+ try:
27
+ # Get entities
28
+ query = db.query(Entity)
29
+ if entity_type:
30
+ query = query.filter(Entity.type == entity_type)
31
+
32
+ entities = query.limit(limit).all()
33
+ entity_ids = [e.id for e in entities]
34
+
35
+ # Get relationships between these entities
36
+ relationships = db.query(Relationship).filter(
37
+ or_(
38
+ Relationship.source_id.in_(entity_ids),
39
+ Relationship.target_id.in_(entity_ids)
40
+ )
41
+ ).all()
42
+
43
+ # Format for Cytoscape.js
44
+ nodes = []
45
+ for e in entities:
46
+ nodes.append({
47
+ "data": {
48
+ "id": e.id,
49
+ "label": e.name[:30] + "..." if len(e.name) > 30 else e.name,
50
+ "fullName": e.name,
51
+ "type": e.type,
52
+ "description": e.description[:100] if e.description else "",
53
+ "source": e.source or "unknown"
54
+ }
55
+ })
56
+
57
+ edges = []
58
+ for r in relationships:
59
+ if r.source_id in entity_ids and r.target_id in entity_ids:
60
+ edges.append({
61
+ "data": {
62
+ "id": r.id,
63
+ "source": r.source_id,
64
+ "target": r.target_id,
65
+ "label": r.type,
66
+ "type": r.type
67
+ }
68
+ })
69
+
70
+ return {
71
+ "nodes": nodes,
72
+ "edges": edges,
73
+ "stats": {
74
+ "total_nodes": len(nodes),
75
+ "total_edges": len(edges)
76
+ }
77
+ }
78
+
79
+ except Exception as e:
80
+ raise HTTPException(status_code=500, detail=f"Failed to get graph: {str(e)}")
81
+
82
+
83
+ @router.get("/entity/{entity_id}")
84
+ async def get_entity_graph(
85
+ entity_id: str,
86
+ depth: int = Query(1, ge=1, le=3, description="How many levels of connections to include"),
87
+ db: Session = Depends(get_scoped_db)
88
+ ):
89
+ """
90
+ Get graph centered on a specific entity.
91
+ """
92
+ try:
93
+ # Get the central entity
94
+ central = db.query(Entity).filter(Entity.id == entity_id).first()
95
+ if not central:
96
+ raise HTTPException(status_code=404, detail="Entity not found")
97
+
98
+ # Collect entity IDs at each depth level
99
+ collected_ids = {entity_id}
100
+ current_level = {entity_id}
101
+
102
+ for _ in range(depth):
103
+ rels = db.query(Relationship).filter(
104
+ or_(
105
+ Relationship.source_id.in_(current_level),
106
+ Relationship.target_id.in_(current_level)
107
+ )
108
+ ).all()
109
+
110
+ next_level = set()
111
+ for r in rels:
112
+ next_level.add(r.source_id)
113
+ next_level.add(r.target_id)
114
+
115
+ current_level = next_level - collected_ids
116
+ collected_ids.update(next_level)
117
+
118
+ # Get all entities
119
+ entities = db.query(Entity).filter(Entity.id.in_(collected_ids)).all()
120
+
121
+ # Get all relationships between collected entities
122
+ relationships = db.query(Relationship).filter(
123
+ Relationship.source_id.in_(collected_ids),
124
+ Relationship.target_id.in_(collected_ids)
125
+ ).all()
126
+
127
+ # Format for Cytoscape
128
+ nodes = []
129
+ for e in entities:
130
+ nodes.append({
131
+ "data": {
132
+ "id": e.id,
133
+ "label": e.name[:30] + "..." if len(e.name) > 30 else e.name,
134
+ "fullName": e.name,
135
+ "type": e.type,
136
+ "description": e.description[:100] if e.description else "",
137
+ "source": e.source or "unknown",
138
+ "isCentral": e.id == entity_id
139
+ }
140
+ })
141
+
142
+ edges = []
143
+ for r in relationships:
144
+ edges.append({
145
+ "data": {
146
+ "id": r.id,
147
+ "source": r.source_id,
148
+ "target": r.target_id,
149
+ "label": r.type,
150
+ "type": r.type
151
+ }
152
+ })
153
+
154
+ return {
155
+ "central": {
156
+ "id": central.id,
157
+ "name": central.name,
158
+ "type": central.type
159
+ },
160
+ "nodes": nodes,
161
+ "edges": edges,
162
+ "stats": {
163
+ "total_nodes": len(nodes),
164
+ "total_edges": len(edges),
165
+ "depth": depth
166
+ }
167
+ }
168
+
169
+ except HTTPException:
170
+ raise
171
+ except Exception as e:
172
+ raise HTTPException(status_code=500, detail=f"Failed to get entity graph: {str(e)}")
173
+
app/api/routes/ingest.py ADDED
@@ -0,0 +1,341 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Data Ingestion Routes
3
+ Endpoints para importar dados de fontes externas
4
+ """
5
+ from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
6
+ from sqlalchemy.orm import Session
7
+ from typing import Optional, List
8
+ from datetime import datetime
9
+ import asyncio
10
+
11
+ from app.api.deps import get_scoped_db
12
+ from app.models import Entity, Document, Relationship
13
+ from app.schemas import EntityResponse, DocumentResponse
14
+ from app.services.ingestion import wikipedia_scraper, news_service
15
+ from app.services.nlp import entity_extractor
16
+ from app.services.geocoding import geocode
17
+
18
+ router = APIRouter(prefix="/ingest", tags=["Data Ingestion"])
19
+
20
+
21
+ def parse_event_date(date_str):
22
+ """Parse date string to datetime object"""
23
+ if not date_str:
24
+ return None
25
+ try:
26
+ # Try YYYY-MM-DD format
27
+ return datetime.strptime(date_str[:10], "%Y-%m-%d")
28
+ except:
29
+ try:
30
+ # Try YYYY format
31
+ return datetime.strptime(date_str[:4], "%Y")
32
+ except:
33
+ return None
34
+
35
+
36
+ # ========== Wikipedia ==========
37
+
38
+ @router.get("/wikipedia/search")
39
+ def search_wikipedia(q: str, limit: int = 10):
40
+ """Busca artigos na Wikipedia"""
41
+ results = wikipedia_scraper.search(q, limit)
42
+ return results
43
+
44
+
45
+ @router.post("/wikipedia/entity", response_model=EntityResponse)
46
+ async def import_from_wikipedia(
47
+ title: str,
48
+ entity_type: str = "person",
49
+ project_id: Optional[str] = None,
50
+ auto_extract: bool = True,
51
+ db: Session = Depends(get_scoped_db)
52
+ ):
53
+ """
54
+ Importa uma entidade da Wikipedia
55
+ entity_type: person, organization, location
56
+ project_id: ID do projeto para associar a entidade
57
+ auto_extract: Se True, usa LLM para extrair entidades relacionadas
58
+ """
59
+ # Check if entity already exists
60
+ existing = db.query(Entity).filter(
61
+ Entity.name == title,
62
+ Entity.source == "wikipedia"
63
+ ).first()
64
+
65
+ if existing:
66
+ return existing
67
+
68
+ # Scrape based on type
69
+ if entity_type == "person":
70
+ data = wikipedia_scraper.scrape_person(title)
71
+ elif entity_type == "organization":
72
+ data = wikipedia_scraper.scrape_organization(title)
73
+ elif entity_type == "location":
74
+ data = wikipedia_scraper.scrape_location(title)
75
+ else:
76
+ data = wikipedia_scraper.scrape_person(title) # default
77
+
78
+ if not data:
79
+ raise HTTPException(status_code=404, detail="Article not found on Wikipedia")
80
+
81
+ # Create main entity with project_id
82
+ entity = Entity(**data)
83
+ entity.project_id = project_id
84
+ db.add(entity)
85
+ db.commit()
86
+ db.refresh(entity)
87
+
88
+ # Auto-extract entities and relationships using LLM
89
+ if auto_extract and data.get("description"):
90
+ try:
91
+ # Limit text to avoid token limits
92
+ text_to_analyze = data["description"][:3000]
93
+ result = await entity_extractor.extract(text_to_analyze)
94
+
95
+ # Create extracted entities
96
+ created_entities = {}
97
+ for ext_entity in result.entities:
98
+ # Skip if same as main entity
99
+ if ext_entity.name.lower() == title.lower():
100
+ created_entities[ext_entity.name] = entity
101
+ continue
102
+
103
+ # Check if entity exists (by similar name)
104
+ existing_ent = db.query(Entity).filter(
105
+ Entity.name.ilike(f"%{ext_entity.name}%")
106
+ ).first()
107
+
108
+ if existing_ent:
109
+ created_entities[ext_entity.name] = existing_ent
110
+ else:
111
+ # Get coordinates for location entities
112
+ lat, lng = None, None
113
+ if ext_entity.type == "location":
114
+ coords = await geocode(ext_entity.name)
115
+ if coords:
116
+ lat, lng = coords
117
+
118
+ # Parse event_date
119
+ event_date = parse_event_date(getattr(ext_entity, 'event_date', None))
120
+
121
+ new_ent = Entity(
122
+ name=ext_entity.name,
123
+ type=ext_entity.type if ext_entity.type in ["person", "organization", "location", "event"] else "person",
124
+ description=ext_entity.description or ext_entity.role,
125
+ source="wikipedia_extraction",
126
+ latitude=lat,
127
+ longitude=lng,
128
+ event_date=event_date,
129
+ project_id=project_id,
130
+ properties={"role": ext_entity.role, "aliases": ext_entity.aliases, "extracted_from": title}
131
+ )
132
+ db.add(new_ent)
133
+ db.commit()
134
+ db.refresh(new_ent)
135
+ created_entities[ext_entity.name] = new_ent
136
+
137
+ # Create relationships
138
+ for rel in result.relationships:
139
+ source_ent = created_entities.get(rel.source) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.source}%")).first()
140
+ target_ent = created_entities.get(rel.target) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.target}%")).first()
141
+
142
+ if source_ent and target_ent and source_ent.id != target_ent.id:
143
+ # Check if relationship exists
144
+ existing_rel = db.query(Relationship).filter(
145
+ Relationship.source_id == source_ent.id,
146
+ Relationship.target_id == target_ent.id,
147
+ Relationship.type == rel.relationship_type
148
+ ).first()
149
+
150
+ if not existing_rel:
151
+ # Parse relationship event_date
152
+ rel_event_date = parse_event_date(getattr(rel, 'event_date', None))
153
+
154
+ new_rel = Relationship(
155
+ source_id=source_ent.id,
156
+ target_id=target_ent.id,
157
+ type=rel.relationship_type,
158
+ event_date=rel_event_date,
159
+ properties={"context": rel.context, "extracted_from": title}
160
+ )
161
+ db.add(new_rel)
162
+
163
+ db.commit()
164
+
165
+ except Exception as e:
166
+ print(f"NER extraction error: {e}")
167
+ # Continue without extraction if it fails
168
+
169
+ return entity
170
+
171
+
172
+ # ========== News ==========
173
+
174
+ @router.get("/news/feeds")
175
+ def list_available_feeds():
176
+ """Lista os feeds de notícias disponíveis"""
177
+ return list(news_service.RSS_FEEDS.keys())
178
+
179
+
180
+ @router.get("/news/fetch")
181
+ def fetch_news(feed: Optional[str] = None):
182
+ """
183
+ Busca notícias dos feeds RSS
184
+ Se feed não for especificado, busca de todos
185
+ """
186
+ if feed:
187
+ if feed not in news_service.RSS_FEEDS:
188
+ raise HTTPException(status_code=404, detail="Feed not found")
189
+ url = news_service.RSS_FEEDS[feed]
190
+ articles = news_service.fetch_feed(url)
191
+ else:
192
+ articles = news_service.fetch_all_feeds()
193
+
194
+ return articles
195
+
196
+
197
+ @router.get("/news/search")
198
+ def search_news(q: str):
199
+ """Busca notícias por palavra-chave via Google News"""
200
+ return news_service.search_news(q)
201
+
202
+
203
+ @router.post("/news/import")
204
+ async def import_news(
205
+ query: Optional[str] = None,
206
+ feed: Optional[str] = None,
207
+ auto_extract: bool = True,
208
+ db: Session = Depends(get_scoped_db)
209
+ ):
210
+ """
211
+ Importa notícias como documentos no sistema
212
+ auto_extract: Se True, usa LLM para extrair entidades de cada notícia
213
+ """
214
+ if query:
215
+ articles = news_service.search_news(query)
216
+ elif feed:
217
+ if feed not in news_service.RSS_FEEDS:
218
+ raise HTTPException(status_code=404, detail="Feed not found")
219
+ articles = news_service.fetch_feed(news_service.RSS_FEEDS[feed])
220
+ else:
221
+ articles = news_service.fetch_all_feeds()
222
+
223
+ imported = 0
224
+ extracted_entities = 0
225
+
226
+ for article in articles:
227
+ # Check if document already exists (by URL)
228
+ if article.get("url"):
229
+ existing = db.query(Document).filter(
230
+ Document.source_url == article["url"]
231
+ ).first()
232
+ if existing:
233
+ continue
234
+
235
+ doc_data = news_service.to_document(article)
236
+ doc = Document(**doc_data)
237
+ db.add(doc)
238
+ db.commit()
239
+ imported += 1
240
+
241
+ # Extract entities from article content
242
+ if auto_extract:
243
+ try:
244
+ text_to_analyze = f"{article.get('title', '')} {article.get('description', '')}".strip()
245
+ if len(text_to_analyze) >= 20:
246
+ result = await entity_extractor.extract(text_to_analyze[:2000])
247
+
248
+ created_entities = {}
249
+ for ext_entity in result.entities:
250
+ # Check if entity exists
251
+ existing_ent = db.query(Entity).filter(
252
+ Entity.name.ilike(f"%{ext_entity.name}%")
253
+ ).first()
254
+
255
+ if existing_ent:
256
+ created_entities[ext_entity.name] = existing_ent
257
+ else:
258
+ # Get coordinates for location entities
259
+ lat, lng = None, None
260
+ if ext_entity.type == "location":
261
+ coords = await geocode(ext_entity.name)
262
+ if coords:
263
+ lat, lng = coords
264
+
265
+ new_ent = Entity(
266
+ name=ext_entity.name,
267
+ type=ext_entity.type if ext_entity.type in ["person", "organization", "location", "event"] else "person",
268
+ description=ext_entity.description or ext_entity.role,
269
+ source="news_extraction",
270
+ latitude=lat,
271
+ longitude=lng,
272
+ properties={"role": ext_entity.role, "aliases": ext_entity.aliases, "from_article": article.get('title', '')}
273
+ )
274
+ db.add(new_ent)
275
+ db.commit()
276
+ db.refresh(new_ent)
277
+ created_entities[ext_entity.name] = new_ent
278
+ extracted_entities += 1
279
+
280
+ # Create relationships
281
+ for rel in result.relationships:
282
+ source_ent = created_entities.get(rel.source) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.source}%")).first()
283
+ target_ent = created_entities.get(rel.target) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.target}%")).first()
284
+
285
+ if source_ent and target_ent and source_ent.id != target_ent.id:
286
+ existing_rel = db.query(Relationship).filter(
287
+ Relationship.source_id == source_ent.id,
288
+ Relationship.target_id == target_ent.id,
289
+ Relationship.type == rel.relationship_type
290
+ ).first()
291
+
292
+ if not existing_rel:
293
+ new_rel = Relationship(
294
+ source_id=source_ent.id,
295
+ target_id=target_ent.id,
296
+ type=rel.relationship_type,
297
+ properties={"context": rel.context}
298
+ )
299
+ db.add(new_rel)
300
+
301
+ db.commit()
302
+
303
+ except Exception as e:
304
+ print(f"NER extraction error for article: {e}")
305
+ # Continue without extraction
306
+
307
+ return {
308
+ "message": f"Imported {imported} articles",
309
+ "total_found": len(articles),
310
+ "extracted_entities": extracted_entities
311
+ }
312
+
313
+
314
+ # ========== Manual Import ==========
315
+
316
+ @router.post("/bulk/entities")
317
+ def bulk_import_entities(
318
+ entities: List[dict],
319
+ db: Session = Depends(get_scoped_db)
320
+ ):
321
+ """
322
+ Importa múltiplas entidades de uma vez
323
+ Útil para importar de CSV/JSON
324
+ """
325
+ imported = 0
326
+ for entity_data in entities:
327
+ entity = Entity(
328
+ type=entity_data.get("type", "unknown"),
329
+ name=entity_data.get("name", "Unnamed"),
330
+ description=entity_data.get("description"),
331
+ properties=entity_data.get("properties", {}),
332
+ latitude=entity_data.get("latitude"),
333
+ longitude=entity_data.get("longitude"),
334
+ source=entity_data.get("source", "manual")
335
+ )
336
+ db.add(entity)
337
+ imported += 1
338
+
339
+ db.commit()
340
+
341
+ return {"message": f"Imported {imported} entities"}
app/api/routes/investigate.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Investigation API Routes - Build dossiers on companies and people
3
+ """
4
+ from fastapi import APIRouter, HTTPException, Depends
5
+ from pydantic import BaseModel, Field
6
+ from typing import Optional, List, Dict, Any
7
+ from sqlalchemy.orm import Session
8
+
9
+ from app.services.investigation import (
10
+ investigar_empresa,
11
+ investigar_pessoa,
12
+ dossier_to_dict
13
+ )
14
+ from app.services.brazil_apis import consultar_cnpj
15
+ from app.services.investigator_agent import investigator_agent
16
+ from app.api.deps import get_scoped_db
17
+
18
+
19
+ router = APIRouter(prefix="/investigate", tags=["Investigation"])
20
+
21
+
22
+ class InvestigateCompanyRequest(BaseModel):
23
+ """Request to investigate a company"""
24
+ cnpj: str = Field(..., min_length=11, description="CNPJ da empresa")
25
+
26
+
27
+ class InvestigatePersonRequest(BaseModel):
28
+ """Request to investigate a person"""
29
+ nome: str = Field(..., min_length=2, description="Nome da pessoa")
30
+ cpf: Optional[str] = Field(None, description="CPF (opcional)")
31
+
32
+
33
+ class DossierResponse(BaseModel):
34
+ """Dossier response"""
35
+ tipo: str
36
+ alvo: str
37
+ cnpj_cpf: Optional[str]
38
+ red_flags: List[str]
39
+ score_risco: int
40
+ data_geracao: str
41
+ fonte_dados: List[str]
42
+ secoes: Dict[str, Any]
43
+
44
+
45
+ class CNPJResponse(BaseModel):
46
+ """Quick CNPJ lookup response"""
47
+ cnpj: str
48
+ razao_social: str
49
+ nome_fantasia: str
50
+ situacao: str
51
+ data_abertura: str
52
+ capital_social: float
53
+ endereco: str
54
+ telefone: str
55
+ email: str
56
+ atividade: str
57
+ socios: List[Dict[str, Any]]
58
+
59
+
60
+ @router.post("/company", response_model=DossierResponse)
61
+ async def investigate_company(request: InvestigateCompanyRequest):
62
+ """
63
+ Build a comprehensive dossier on a company.
64
+
65
+ Collects:
66
+ - Cadastral data from CNPJ
67
+ - Partners/owners
68
+ - Sanctions (CEIS, CNEP, CEPIM)
69
+ - News and media mentions
70
+ - Related entities
71
+
72
+ Returns risk score and red flags.
73
+ """
74
+ try:
75
+ dossier = await investigar_empresa(request.cnpj)
76
+ return DossierResponse(**dossier_to_dict(dossier))
77
+
78
+ except Exception as e:
79
+ raise HTTPException(status_code=500, detail=str(e))
80
+
81
+
82
+ @router.post("/person", response_model=DossierResponse)
83
+ async def investigate_person(request: InvestigatePersonRequest):
84
+ """
85
+ Build a dossier on a person.
86
+
87
+ Note: Due to LGPD, personal data is limited.
88
+ Mainly uses web search for public information.
89
+ """
90
+ try:
91
+ dossier = await investigar_pessoa(request.nome, request.cpf)
92
+ return DossierResponse(**dossier_to_dict(dossier))
93
+
94
+ except Exception as e:
95
+ raise HTTPException(status_code=500, detail=str(e))
96
+
97
+
98
+ @router.get("/cnpj/{cnpj}", response_model=CNPJResponse)
99
+ async def lookup_cnpj(cnpj: str):
100
+ """
101
+ Quick CNPJ lookup - returns basic company data.
102
+ """
103
+ try:
104
+ data = await consultar_cnpj(cnpj)
105
+
106
+ if not data:
107
+ raise HTTPException(status_code=404, detail="CNPJ não encontrado")
108
+
109
+ return CNPJResponse(
110
+ cnpj=data.cnpj,
111
+ razao_social=data.razao_social,
112
+ nome_fantasia=data.nome_fantasia,
113
+ situacao=data.situacao,
114
+ data_abertura=data.data_abertura,
115
+ capital_social=data.capital_social,
116
+ endereco=f"{data.logradouro}, {data.numero} - {data.bairro}, {data.cidade}/{data.uf}",
117
+ telefone=data.telefone,
118
+ email=data.email,
119
+ atividade=f"{data.cnae_principal} - {data.cnae_descricao}",
120
+ socios=data.socios
121
+ )
122
+
123
+ except HTTPException:
124
+ raise
125
+ except Exception as e:
126
+ raise HTTPException(status_code=500, detail=str(e))
127
+
128
+
129
+ # ===========================================
130
+ # Autonomous Investigation Agent
131
+ # ===========================================
132
+
133
+ class AgentInvestigateRequest(BaseModel):
134
+ """Request for autonomous investigation"""
135
+ mission: str = Field(..., min_length=5, description="Missão de investigação em linguagem natural")
136
+ max_iterations: int = Field(10, ge=1, le=20, description="Máximo de iterações do agente")
137
+
138
+
139
+ class FindingResponse(BaseModel):
140
+ """A finding from investigation"""
141
+ title: str
142
+ content: str
143
+ source: str
144
+ timestamp: str
145
+
146
+
147
+ class AgentInvestigateResponse(BaseModel):
148
+ """Response from autonomous investigation"""
149
+ mission: str
150
+ status: str
151
+ report: str
152
+ findings: List[FindingResponse]
153
+ entities_discovered: int
154
+ connections_mapped: int
155
+ iterations: int
156
+ tools_used: List[str]
157
+
158
+
159
+ @router.post("/agent", response_model=AgentInvestigateResponse)
160
+ async def investigate_with_agent(
161
+ request: AgentInvestigateRequest,
162
+ db: Session = Depends(get_scoped_db)
163
+ ):
164
+ """
165
+ Autonomous investigation with AI agent.
166
+
167
+ The agent will:
168
+ 1. Search NUMIDIUM for existing entities
169
+ 2. Query CNPJ data for Brazilian companies
170
+ 3. Search the web for news and public info
171
+ 4. Follow leads and connections
172
+ 5. Generate a comprehensive report
173
+
174
+ Example missions:
175
+ - "Investigue a rede de empresas de João Silva"
176
+ - "Descubra os sócios da empresa CNPJ 11.222.333/0001-44"
177
+ - "Pesquise sobre a empresa XYZ e suas conexões"
178
+ """
179
+ try:
180
+ result = await investigator_agent.investigate(
181
+ mission=request.mission,
182
+ db=db,
183
+ max_iterations=request.max_iterations
184
+ )
185
+
186
+ return AgentInvestigateResponse(
187
+ mission=result.mission,
188
+ status=result.status,
189
+ report=result.report,
190
+ findings=[
191
+ FindingResponse(
192
+ title=f.title,
193
+ content=f.content,
194
+ source=f.source,
195
+ timestamp=f.timestamp
196
+ )
197
+ for f in result.findings
198
+ ],
199
+ entities_discovered=len(result.entities_discovered),
200
+ connections_mapped=len(result.connections_mapped),
201
+ iterations=result.iterations,
202
+ tools_used=result.tools_used
203
+ )
204
+
205
+ except Exception as e:
206
+ raise HTTPException(status_code=500, detail=str(e))
207
+
app/api/routes/projects.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Projects API Routes - Workspace management
3
+ """
4
+ from fastapi import APIRouter, Depends, HTTPException
5
+ from pydantic import BaseModel
6
+ from typing import Optional, List
7
+ from datetime import datetime
8
+ from sqlalchemy.orm import Session
9
+
10
+ from app.api.deps import get_scoped_db
11
+ from app.models import Project, Entity, Relationship
12
+
13
+
14
+ router = APIRouter(prefix="/projects", tags=["Projects"])
15
+
16
+
17
+ class ProjectCreate(BaseModel):
18
+ name: str
19
+ description: Optional[str] = None
20
+ color: str = "#00d4ff"
21
+ icon: str = "folder"
22
+
23
+
24
+ class ProjectResponse(BaseModel):
25
+ id: str
26
+ name: str
27
+ description: Optional[str]
28
+ color: str
29
+ icon: str
30
+ entity_count: int = 0
31
+ created_at: datetime
32
+
33
+ class Config:
34
+ from_attributes = True
35
+
36
+
37
+ @router.get("", response_model=List[ProjectResponse])
38
+ def list_projects(db: Session = Depends(get_scoped_db)):
39
+ """List all projects"""
40
+ projects = db.query(Project).order_by(Project.created_at.desc()).all()
41
+
42
+ result = []
43
+ for p in projects:
44
+ entity_count = db.query(Entity).filter(Entity.project_id == p.id).count()
45
+ result.append(ProjectResponse(
46
+ id=p.id,
47
+ name=p.name,
48
+ description=p.description,
49
+ color=p.color,
50
+ icon=p.icon,
51
+ entity_count=entity_count,
52
+ created_at=p.created_at
53
+ ))
54
+
55
+ return result
56
+
57
+
58
+ @router.post("", response_model=ProjectResponse)
59
+ def create_project(project: ProjectCreate, db: Session = Depends(get_scoped_db)):
60
+ """Create a new project"""
61
+ new_project = Project(
62
+ name=project.name,
63
+ description=project.description,
64
+ color=project.color,
65
+ icon=project.icon
66
+ )
67
+ db.add(new_project)
68
+ db.commit()
69
+ db.refresh(new_project)
70
+
71
+ return ProjectResponse(
72
+ id=new_project.id,
73
+ name=new_project.name,
74
+ description=new_project.description,
75
+ color=new_project.color,
76
+ icon=new_project.icon,
77
+ entity_count=0,
78
+ created_at=new_project.created_at
79
+ )
80
+
81
+
82
+ @router.get("/{project_id}", response_model=ProjectResponse)
83
+ def get_project(project_id: str, db: Session = Depends(get_scoped_db)):
84
+ """Get project by ID"""
85
+ project = db.query(Project).filter(Project.id == project_id).first()
86
+
87
+ if not project:
88
+ raise HTTPException(status_code=404, detail="Project not found")
89
+
90
+ entity_count = db.query(Entity).filter(Entity.project_id == project_id).count()
91
+
92
+ return ProjectResponse(
93
+ id=project.id,
94
+ name=project.name,
95
+ description=project.description,
96
+ color=project.color,
97
+ icon=project.icon,
98
+ entity_count=entity_count,
99
+ created_at=project.created_at
100
+ )
101
+
102
+
103
+ @router.delete("/{project_id}")
104
+ def delete_project(project_id: str, db: Session = Depends(get_scoped_db)):
105
+ """Delete project and optionally its entities"""
106
+ project = db.query(Project).filter(Project.id == project_id).first()
107
+
108
+ if not project:
109
+ raise HTTPException(status_code=404, detail="Project not found")
110
+
111
+ # Set entities and relationships to no project (null)
112
+ db.query(Entity).filter(Entity.project_id == project_id).update({"project_id": None})
113
+ db.query(Relationship).filter(Relationship.project_id == project_id).update({"project_id": None})
114
+
115
+ db.delete(project)
116
+ db.commit()
117
+
118
+ return {"message": f"Project '{project.name}' deleted"}
119
+
120
+
121
+ @router.put("/{project_id}")
122
+ def update_project(project_id: str, project: ProjectCreate, db: Session = Depends(get_scoped_db)):
123
+ """Update project"""
124
+ existing = db.query(Project).filter(Project.id == project_id).first()
125
+
126
+ if not existing:
127
+ raise HTTPException(status_code=404, detail="Project not found")
128
+
129
+ existing.name = project.name
130
+ existing.description = project.description
131
+ existing.color = project.color
132
+ existing.icon = project.icon
133
+ db.commit()
134
+
135
+ return {"message": "Project updated"}
app/api/routes/relationships.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Relationship CRUD Routes
3
+ """
4
+ from fastapi import APIRouter, Depends, HTTPException, Query
5
+ from sqlalchemy.orm import Session
6
+ from typing import List, Optional
7
+
8
+ from app.api.deps import get_scoped_db
9
+ from app.models import Relationship, Entity
10
+ from app.schemas import RelationshipCreate, RelationshipResponse
11
+
12
+ router = APIRouter(prefix="/relationships", tags=["Relationships"])
13
+
14
+
15
+ @router.get("/", response_model=List[RelationshipResponse])
16
+ def list_relationships(
17
+ type: Optional[str] = None,
18
+ source_id: Optional[str] = None,
19
+ target_id: Optional[str] = None,
20
+ limit: int = Query(default=50, le=200),
21
+ db: Session = Depends(get_scoped_db)
22
+ ):
23
+ """Lista relacionamentos com filtros opcionais"""
24
+ query = db.query(Relationship)
25
+
26
+ if type:
27
+ query = query.filter(Relationship.type == type)
28
+ if source_id:
29
+ query = query.filter(Relationship.source_id == source_id)
30
+ if target_id:
31
+ query = query.filter(Relationship.target_id == target_id)
32
+
33
+ return query.limit(limit).all()
34
+
35
+
36
+ @router.get("/types")
37
+ def get_relationship_types(db: Session = Depends(get_scoped_db)):
38
+ """Retorna todos os tipos de relacionamento unicos"""
39
+ types = db.query(Relationship.type).distinct().all()
40
+ return [t[0] for t in types]
41
+
42
+
43
+ @router.post("/", response_model=RelationshipResponse, status_code=201)
44
+ def create_relationship(
45
+ rel: RelationshipCreate,
46
+ db: Session = Depends(get_scoped_db)
47
+ ):
48
+ """Cria um novo relacionamento entre entidades"""
49
+ source = db.query(Entity).filter(Entity.id == rel.source_id).first()
50
+ target = db.query(Entity).filter(Entity.id == rel.target_id).first()
51
+
52
+ if not source:
53
+ raise HTTPException(status_code=404, detail="Source entity not found")
54
+ if not target:
55
+ raise HTTPException(status_code=404, detail="Target entity not found")
56
+
57
+ db_rel = Relationship(**rel.model_dump())
58
+ db.add(db_rel)
59
+ db.commit()
60
+ db.refresh(db_rel)
61
+ return db_rel
62
+
63
+
64
+ @router.delete("/{relationship_id}")
65
+ def delete_relationship(
66
+ relationship_id: str,
67
+ db: Session = Depends(get_scoped_db)
68
+ ):
69
+ """Deleta um relacionamento"""
70
+ db_rel = db.query(Relationship).filter(Relationship.id == relationship_id).first()
71
+ if not db_rel:
72
+ raise HTTPException(status_code=404, detail="Relationship not found")
73
+
74
+ db.delete(db_rel)
75
+ db.commit()
76
+ return {"message": "Relationship deleted"}
app/api/routes/research.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Research API Routes - Deep research with automatic entity extraction
3
+ """
4
+ from fastapi import APIRouter, Depends, HTTPException
5
+ from pydantic import BaseModel, Field
6
+ from typing import Optional, List
7
+ import traceback
8
+ from sqlalchemy.orm import Session
9
+
10
+ from app.api.deps import get_scoped_db
11
+ from app.services import lancer
12
+ from app.services.nlp import entity_extractor
13
+ from app.services.geocoding import geocode
14
+ from app.models.entity import Entity, Relationship
15
+
16
+
17
+ router = APIRouter(prefix="/research", tags=["Research"])
18
+
19
+
20
+ class ResearchRequest(BaseModel):
21
+ """Request model for research"""
22
+ query: str = Field(..., min_length=3, description="Research query")
23
+ mode: str = Field(default="search", description="Research mode: search, deep, heavy")
24
+ max_results: int = Field(default=10, le=20)
25
+ auto_extract: bool = Field(default=True, description="Auto-extract entities using NER")
26
+
27
+
28
+ class ResearchResponse(BaseModel):
29
+ """Response model for research"""
30
+ query: str
31
+ answer: Optional[str]
32
+ sources: List[dict]
33
+ citations: List[dict]
34
+ extracted_entities: int
35
+ extracted_relationships: int
36
+ processing_time_ms: float
37
+
38
+
39
+ @router.post("", response_model=ResearchResponse)
40
+ async def research(request: ResearchRequest, db: Session = Depends(get_scoped_db)):
41
+ """
42
+ Perform AI-powered research using Lancer API and optionally extract entities.
43
+
44
+ Modes:
45
+ - search: Fast search with AI synthesis
46
+ - deep: Multi-dimensional deep research (slower, more comprehensive)
47
+ - heavy: Search with full content scraping
48
+ """
49
+ try:
50
+ # Call Lancer API based on mode
51
+ if request.mode == "deep":
52
+ result = await lancer.deep_research(request.query)
53
+ elif request.mode == "heavy":
54
+ result = await lancer.heavy_search(request.query, request.max_results)
55
+ else:
56
+ result = await lancer.search(request.query, request.max_results)
57
+
58
+ extracted_entities = 0
59
+ extracted_relationships = 0
60
+
61
+ # Extract entities if enabled
62
+ if request.auto_extract and result.raw_text:
63
+ try:
64
+ # Limit text to avoid token limits
65
+ text_to_analyze = result.raw_text[:5000]
66
+ ner_result = await entity_extractor.extract(text_to_analyze)
67
+
68
+ created_entities = {}
69
+
70
+ # Create entities
71
+ for entity in ner_result.entities:
72
+ # Check if exists
73
+ existing = db.query(Entity).filter(
74
+ Entity.name.ilike(f"%{entity.name}%")
75
+ ).first()
76
+
77
+ if existing:
78
+ created_entities[entity.name] = existing
79
+ else:
80
+ # Geocode if location
81
+ lat, lng = None, None
82
+ if entity.type == "location":
83
+ coords = await geocode(entity.name)
84
+ if coords:
85
+ lat, lng = coords
86
+
87
+ new_entity = Entity(
88
+ name=entity.name,
89
+ type=entity.type if entity.type in ["person", "organization", "location", "event"] else "person",
90
+ description=entity.description or entity.role or "",
91
+ source="lancer_research",
92
+ latitude=lat,
93
+ longitude=lng,
94
+ properties={
95
+ "role": entity.role,
96
+ "aliases": entity.aliases,
97
+ "research_query": request.query
98
+ }
99
+ )
100
+ db.add(new_entity)
101
+ db.commit()
102
+ db.refresh(new_entity)
103
+ created_entities[entity.name] = new_entity
104
+ extracted_entities += 1
105
+
106
+ # Create relationships
107
+ for rel in ner_result.relationships:
108
+ source_ent = created_entities.get(rel.source) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.source}%")).first()
109
+ target_ent = created_entities.get(rel.target) or db.query(Entity).filter(Entity.name.ilike(f"%{rel.target}%")).first()
110
+
111
+ if source_ent and target_ent and source_ent.id != target_ent.id:
112
+ existing_rel = db.query(Relationship).filter(
113
+ Relationship.source_id == source_ent.id,
114
+ Relationship.target_id == target_ent.id,
115
+ Relationship.type == rel.relationship_type
116
+ ).first()
117
+
118
+ if not existing_rel:
119
+ new_rel = Relationship(
120
+ source_id=source_ent.id,
121
+ target_id=target_ent.id,
122
+ type=rel.relationship_type,
123
+ properties={"context": rel.context, "research_query": request.query}
124
+ )
125
+ db.add(new_rel)
126
+ extracted_relationships += 1
127
+
128
+ db.commit()
129
+
130
+ except Exception as e:
131
+ print(f"NER extraction error: {e}")
132
+ traceback.print_exc()
133
+
134
+ # Prepare sources for response
135
+ sources = [
136
+ {
137
+ "title": r.title,
138
+ "url": r.url,
139
+ "content": r.content[:300] if r.content else "",
140
+ "score": r.score
141
+ }
142
+ for r in result.results[:10]
143
+ ]
144
+
145
+ return ResearchResponse(
146
+ query=result.query,
147
+ answer=result.answer,
148
+ sources=sources,
149
+ citations=result.citations,
150
+ extracted_entities=extracted_entities,
151
+ extracted_relationships=extracted_relationships,
152
+ processing_time_ms=result.processing_time_ms
153
+ )
154
+
155
+ except Exception as e:
156
+ print(f"Research error: {e}")
157
+ traceback.print_exc()
158
+ raise HTTPException(status_code=500, detail=str(e))
app/api/routes/search.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Search and Analytics Routes
3
+ """
4
+ from fastapi import APIRouter, Depends, Query
5
+ from sqlalchemy.orm import Session
6
+ from sqlalchemy import or_, func
7
+ from typing import Optional
8
+
9
+ from app.api.deps import get_scoped_db
10
+ from app.models import Entity, Relationship, Event, Document
11
+ from app.schemas import SearchResult, SystemStats
12
+
13
+ router = APIRouter(prefix="/search", tags=["Search"])
14
+
15
+
16
+ @router.get("", response_model=SearchResult)
17
+ def global_search(
18
+ q: str = Query(..., min_length=2, description="Search query"),
19
+ types: Optional[str] = Query(None, description="Entity types (comma-separated)"),
20
+ limit: int = Query(default=20, le=100),
21
+ db: Session = Depends(get_scoped_db)
22
+ ):
23
+ """
24
+ Busca global em todas as entidades, eventos e documentos.
25
+ """
26
+ search_term = f"%{q}%"
27
+ type_filter = types.split(",") if types else None
28
+
29
+ entity_query = db.query(Entity).filter(
30
+ or_(
31
+ Entity.name.ilike(search_term),
32
+ Entity.description.ilike(search_term)
33
+ )
34
+ )
35
+ if type_filter:
36
+ entity_query = entity_query.filter(Entity.type.in_(type_filter))
37
+ entities = entity_query.limit(limit).all()
38
+
39
+ events = db.query(Event).filter(
40
+ or_(
41
+ Event.title.ilike(search_term),
42
+ Event.description.ilike(search_term)
43
+ )
44
+ ).limit(limit).all()
45
+
46
+ documents = db.query(Document).filter(
47
+ or_(
48
+ Document.title.ilike(search_term),
49
+ Document.content.ilike(search_term)
50
+ )
51
+ ).limit(limit).all()
52
+
53
+ return SearchResult(
54
+ entities=entities,
55
+ events=events,
56
+ documents=documents
57
+ )
58
+
59
+
60
+ @router.get("/stats", response_model=SystemStats)
61
+ def get_system_stats(db: Session = Depends(get_scoped_db)):
62
+ """
63
+ Retorna estatisticas gerais do sistema.
64
+ """
65
+ total_entities = db.query(Entity).count()
66
+ total_relationships = db.query(Relationship).count()
67
+ total_events = db.query(Event).count()
68
+ total_documents = db.query(Document).count()
69
+
70
+ type_counts = db.query(
71
+ Entity.type,
72
+ func.count(Entity.id)
73
+ ).group_by(Entity.type).all()
74
+
75
+ entities_by_type = {t: c for t, c in type_counts}
76
+
77
+ recent = db.query(Entity).order_by(Entity.created_at.desc()).limit(10).all()
78
+ recent_activity = [
79
+ {
80
+ "id": e.id,
81
+ "type": e.type,
82
+ "name": e.name,
83
+ "created_at": e.created_at.isoformat()
84
+ }
85
+ for e in recent
86
+ ]
87
+
88
+ return SystemStats(
89
+ total_entities=total_entities,
90
+ total_relationships=total_relationships,
91
+ total_events=total_events,
92
+ total_documents=total_documents,
93
+ entities_by_type=entities_by_type,
94
+ recent_activity=recent_activity
95
+ )
96
+
97
+
98
+ @router.get("/geo")
99
+ def get_geo_data(
100
+ entity_type: Optional[str] = None,
101
+ db: Session = Depends(get_scoped_db)
102
+ ):
103
+ """
104
+ Retorna entidades com geolocalizacao.
105
+ """
106
+ query = db.query(Entity).filter(
107
+ Entity.latitude.isnot(None),
108
+ Entity.longitude.isnot(None)
109
+ )
110
+
111
+ if entity_type:
112
+ query = query.filter(Entity.type == entity_type)
113
+
114
+ entities = query.all()
115
+
116
+ return [
117
+ {
118
+ "id": e.id,
119
+ "type": e.type,
120
+ "name": e.name,
121
+ "lat": e.latitude,
122
+ "lng": e.longitude,
123
+ "properties": e.properties
124
+ }
125
+ for e in entities
126
+ ]
app/api/routes/session.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Session management routes
3
+ """
4
+ from fastapi import APIRouter, Header, Cookie, Response, Request
5
+ from typing import Optional
6
+ import uuid
7
+
8
+ from app.core.database import create_new_session_id
9
+ from app.config import settings
10
+
11
+ router = APIRouter(prefix="/session", tags=["Session"])
12
+
13
+
14
+ @router.post("/create")
15
+ def create_session(response: Response, request: Request):
16
+ """Create a new session and return session_id"""
17
+ session_id = create_new_session_id()
18
+ secure = settings.cookie_secure
19
+ samesite = settings.cookie_samesite
20
+ proto = request.headers.get("x-forwarded-proto", request.url.scheme)
21
+ if proto != "https" and secure:
22
+ secure = False
23
+ samesite = "lax"
24
+ response.set_cookie(
25
+ key="numidium_session",
26
+ value=session_id,
27
+ max_age=60*60*24*365, # 1 year
28
+ httponly=True,
29
+ samesite=samesite,
30
+ secure=secure
31
+ )
32
+ return {"session_id": session_id}
33
+
34
+
35
+ @router.get("/current")
36
+ def get_current_session(
37
+ numidium_session: Optional[str] = Cookie(None),
38
+ x_session_id: Optional[str] = Header(None)
39
+ ):
40
+ """Get current session ID"""
41
+ session_id = x_session_id or numidium_session
42
+ if not session_id:
43
+ return {"session_id": None, "message": "No session. Call POST /session/create"}
44
+ return {"session_id": session_id}
app/api/routes/timeline.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Timeline API Routes - Temporal view of entities and relationships
3
+ """
4
+ from fastapi import APIRouter, Depends, Query
5
+ from pydantic import BaseModel
6
+ from typing import Optional, List, Dict, Any
7
+ from datetime import datetime, timedelta
8
+ from collections import defaultdict
9
+ from sqlalchemy.orm import Session
10
+
11
+ from app.api.deps import get_scoped_db
12
+ from app.models.entity import Entity, Relationship
13
+
14
+
15
+ router = APIRouter(prefix="/timeline", tags=["Timeline"])
16
+
17
+
18
+ class TimelineEvent(BaseModel):
19
+ id: str
20
+ type: str # "entity" or "relationship"
21
+ entity_type: Optional[str] = None
22
+ name: str
23
+ description: Optional[str] = None
24
+ date: str
25
+ icon: str
26
+
27
+
28
+ class TimelineGroup(BaseModel):
29
+ date: str
30
+ label: str
31
+ events: List[TimelineEvent]
32
+
33
+
34
+ class TimelineResponse(BaseModel):
35
+ groups: List[TimelineGroup]
36
+ total_events: int
37
+
38
+
39
+ @router.get("", response_model=TimelineResponse)
40
+ async def get_timeline(
41
+ days: int = Query(default=30, ge=1, le=365),
42
+ entity_type: Optional[str] = None,
43
+ limit: int = Query(default=100, ge=1, le=500),
44
+ db: Session = Depends(get_scoped_db)
45
+ ):
46
+ """
47
+ Get timeline of recent entities and relationships.
48
+ Groups events by date.
49
+ """
50
+ # Calculate date range
51
+ end_date = datetime.now()
52
+ start_date = end_date - timedelta(days=days)
53
+
54
+ events = []
55
+
56
+ # Get entities
57
+ query = db.query(Entity).filter(
58
+ Entity.created_at >= start_date
59
+ )
60
+
61
+ if entity_type:
62
+ query = query.filter(Entity.type == entity_type)
63
+
64
+ entities = query.order_by(Entity.created_at.desc()).limit(limit).all()
65
+
66
+ icon_map = {
67
+ "person": "👤",
68
+ "organization": "🏢",
69
+ "location": "📍",
70
+ "event": "📅",
71
+ "concept": "💡",
72
+ "product": "📦"
73
+ }
74
+
75
+ for e in entities:
76
+ # Prefer event_date over created_at
77
+ date = e.event_date if e.event_date else e.created_at
78
+ events.append(TimelineEvent(
79
+ id=e.id,
80
+ type="entity",
81
+ entity_type=e.type,
82
+ name=e.name,
83
+ description=e.description[:100] if e.description else None,
84
+ date=date.isoformat() if date else datetime.now().isoformat(),
85
+ icon=icon_map.get(e.type, "📄")
86
+ ))
87
+
88
+ # Get relationships
89
+ relationships = db.query(Relationship).filter(
90
+ Relationship.created_at >= start_date
91
+ ).order_by(Relationship.created_at.desc()).limit(limit // 2).all()
92
+
93
+ for r in relationships:
94
+ source = db.query(Entity).filter(Entity.id == r.source_id).first()
95
+ target = db.query(Entity).filter(Entity.id == r.target_id).first()
96
+
97
+ if source and target:
98
+ # Prefer event_date over created_at
99
+ date = r.event_date if r.event_date else r.created_at
100
+ events.append(TimelineEvent(
101
+ id=r.id,
102
+ type="relationship",
103
+ name=f"{source.name} → {target.name}",
104
+ description=r.type,
105
+ date=date.isoformat() if date else datetime.now().isoformat(),
106
+ icon="🔗"
107
+ ))
108
+
109
+ # Sort by date
110
+ events.sort(key=lambda x: x.date, reverse=True)
111
+
112
+ # Group by date
113
+ groups_dict = defaultdict(list)
114
+ for event in events:
115
+ date_key = event.date[:10] # YYYY-MM-DD
116
+ groups_dict[date_key].append(event)
117
+
118
+ # Format groups
119
+ groups = []
120
+ for date_key in sorted(groups_dict.keys(), reverse=True):
121
+ try:
122
+ dt = datetime.fromisoformat(date_key)
123
+ label = dt.strftime("%d %b %Y")
124
+ except:
125
+ label = date_key
126
+
127
+ groups.append(TimelineGroup(
128
+ date=date_key,
129
+ label=label,
130
+ events=groups_dict[date_key]
131
+ ))
132
+
133
+ return TimelineResponse(
134
+ groups=groups,
135
+ total_events=len(events)
136
+ )
137
+
138
+
139
+ @router.get("/stats")
140
+ async def get_timeline_stats(db: Session = Depends(get_scoped_db)):
141
+ """Get statistics for timeline visualization"""
142
+
143
+ # Count entities by type
144
+ entity_counts = {}
145
+ for entity_type in ["person", "organization", "location", "event", "concept"]:
146
+ count = db.query(Entity).filter(Entity.type == entity_type).count()
147
+ entity_counts[entity_type] = count
148
+
149
+ # Count relationships
150
+ relationship_count = db.query(Relationship).count()
151
+
152
+ # Recent activity (last 7 days)
153
+ week_ago = datetime.now() - timedelta(days=7)
154
+ recent_entities = db.query(Entity).filter(Entity.created_at >= week_ago).count()
155
+ recent_relationships = db.query(Relationship).filter(Relationship.created_at >= week_ago).count()
156
+
157
+ return {
158
+ "entity_counts": entity_counts,
159
+ "relationship_count": relationship_count,
160
+ "recent_activity": {
161
+ "entities": recent_entities,
162
+ "relationships": recent_relationships,
163
+ "total": recent_entities + recent_relationships
164
+ }
165
+ }
app/config.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Numidium Backend Configuration
3
+ """
4
+ from pydantic_settings import BaseSettings
5
+ from functools import lru_cache
6
+ import os
7
+
8
+
9
+ class Settings(BaseSettings):
10
+ """Application settings"""
11
+
12
+ # App Info
13
+ app_name: str = "Numidium"
14
+ app_version: str = "0.1.0"
15
+ debug: bool = False
16
+
17
+ # Database
18
+ database_url: str = "sqlite:///./data/numidium.db"
19
+
20
+ # APIs (opcional - pode configurar depois)
21
+ newsapi_key: str = ""
22
+
23
+ # Cerebras API for LLM-based entity extraction
24
+ cerebras_api_key: str = ""
25
+
26
+ # AetherMap API for semantic search and NER
27
+ aethermap_url: str = "https://madras1-aethermap.hf.space"
28
+
29
+ # CORS
30
+ cors_origins: list[str] = ["*"]
31
+
32
+ # Session cookie
33
+ cookie_secure: bool = True
34
+ cookie_samesite: str = "none"
35
+
36
+ class Config:
37
+ env_file = ".env"
38
+ env_file_encoding = "utf-8"
39
+
40
+
41
+ @lru_cache()
42
+ def get_settings() -> Settings:
43
+ """Get cached settings"""
44
+ return Settings()
45
+
46
+
47
+ settings = get_settings()
app/core/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Core module
2
+ from app.core.database import get_db, init_db, Base
app/core/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (270 Bytes). View file
 
app/core/__pycache__/database.cpython-311.pyc ADDED
Binary file (5.58 kB). View file
 
app/core/database.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Database configuration and session management
3
+ Per-session databases - each user session gets its own SQLite file
4
+ """
5
+ from sqlalchemy import create_engine, text
6
+ from sqlalchemy.ext.declarative import declarative_base
7
+ from sqlalchemy.orm import sessionmaker, Session
8
+ from typing import Optional
9
+ import os
10
+ import uuid
11
+
12
+ # Ensure data directory exists
13
+ os.makedirs("data/sessions", exist_ok=True)
14
+
15
+ # Base class for models
16
+ Base = declarative_base()
17
+
18
+ # Cache for session engines
19
+ _session_engines = {}
20
+ _session_makers = {}
21
+
22
+
23
+ def get_session_engine(session_id: str):
24
+ """Get or create engine for a specific session"""
25
+ if session_id not in _session_engines:
26
+ db_path = f"data/sessions/{session_id}.db"
27
+ engine = create_engine(
28
+ f"sqlite:///./{db_path}",
29
+ connect_args={"check_same_thread": False}
30
+ )
31
+ _session_engines[session_id] = engine
32
+ _session_makers[session_id] = sessionmaker(autocommit=False, autoflush=False, bind=engine)
33
+
34
+ # Initialize tables for this session
35
+ Base.metadata.create_all(bind=engine)
36
+ _run_migrations(engine)
37
+
38
+ return _session_engines[session_id]
39
+
40
+
41
+ def get_session_db(session_id: str):
42
+ """Get database session for a specific user session"""
43
+ get_session_engine(session_id) # Ensure engine exists
44
+ SessionLocal = _session_makers[session_id]
45
+ db = SessionLocal()
46
+ try:
47
+ yield db
48
+ finally:
49
+ db.close()
50
+
51
+
52
+ def get_db_for_session(session_id: str) -> Session:
53
+ """Direct session getter (non-generator) for routes"""
54
+ get_session_engine(session_id)
55
+ SessionLocal = _session_makers[session_id]
56
+ return SessionLocal()
57
+
58
+
59
+ # Legacy - default database for backwards compatibility
60
+ from app.config import settings
61
+ engine = create_engine(
62
+ settings.database_url,
63
+ connect_args={"check_same_thread": False}
64
+ )
65
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
66
+
67
+
68
+ def get_default_session() -> Session:
69
+ """Create a new session for the default database."""
70
+ return SessionLocal()
71
+
72
+
73
+ def get_db():
74
+ """Legacy: Default database session"""
75
+ db = get_default_session()
76
+ try:
77
+ yield db
78
+ finally:
79
+ db.close()
80
+
81
+
82
+ def _run_migrations(eng):
83
+ """Run migrations on an engine"""
84
+ with eng.connect() as conn:
85
+ try:
86
+ conn.execute(text("ALTER TABLE entities ADD COLUMN event_date DATETIME"))
87
+ conn.commit()
88
+ except Exception:
89
+ pass
90
+ try:
91
+ conn.execute(text("ALTER TABLE relationships ADD COLUMN event_date DATETIME"))
92
+ conn.commit()
93
+ except Exception:
94
+ pass
95
+ try:
96
+ conn.execute(text("ALTER TABLE entities ADD COLUMN project_id VARCHAR(36)"))
97
+ conn.commit()
98
+ except Exception:
99
+ pass
100
+ try:
101
+ conn.execute(text("ALTER TABLE relationships ADD COLUMN project_id VARCHAR(36)"))
102
+ conn.commit()
103
+ except Exception:
104
+ pass
105
+
106
+
107
+ def init_db():
108
+ """Initialize default database tables"""
109
+ Base.metadata.create_all(bind=engine)
110
+ _run_migrations(engine)
111
+
112
+
113
+ def create_new_session_id() -> str:
114
+ """Generate a new session ID"""
115
+ return str(uuid.uuid4())
app/main.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Numidium Backend - Main Application
3
+ Plataforma de Inteligência e Análise de Dados
4
+ """
5
+ from fastapi import FastAPI
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from contextlib import asynccontextmanager
8
+
9
+ from app.config import settings
10
+ from app.core.database import init_db
11
+ from app.api.routes import entities, relationships, events, search, ingest, analyze, graph, research, chat, investigate, dados_publicos, timeline, session, aethermap
12
+
13
+
14
+ @asynccontextmanager
15
+ async def lifespan(app: FastAPI):
16
+ """Startup and shutdown events"""
17
+ # Startup: Initialize database
18
+ init_db()
19
+ print("🚀 Numidium Backend started!")
20
+ print(f"📊 Database: {settings.database_url}")
21
+ yield
22
+ # Shutdown
23
+ print("👋 Numidium Backend shutting down...")
24
+
25
+
26
+ # Create FastAPI app
27
+ app = FastAPI(
28
+ title="Numidium API",
29
+ description="""
30
+ ## 🔮 Sistema de Inteligência e Análise de Dados
31
+
32
+ Backend do VANTAGE - Uma plataforma para:
33
+ - 📥 Ingestão de dados de múltiplas fontes (Wikipedia, News, Manual)
34
+ - 🔗 Mapeamento de conexões entre entidades
35
+ - 🗺️ Visualização geográfica
36
+ - 📊 Análise de grafos e relacionamentos
37
+ - 🔍 Busca global
38
+ """,
39
+ version=settings.app_version,
40
+ lifespan=lifespan
41
+ )
42
+
43
+ # CORS middleware
44
+ app.add_middleware(
45
+ CORSMiddleware,
46
+ allow_origins=settings.cors_origins,
47
+ allow_credentials=True,
48
+ allow_methods=["*"],
49
+ allow_headers=["*"],
50
+ )
51
+
52
+ # Include routers
53
+ app.include_router(entities.router, prefix="/api/v1")
54
+ app.include_router(relationships.router, prefix="/api/v1")
55
+ app.include_router(events.router, prefix="/api/v1")
56
+ app.include_router(search.router, prefix="/api/v1")
57
+ app.include_router(ingest.router, prefix="/api/v1")
58
+ app.include_router(analyze.router, prefix="/api/v1")
59
+ app.include_router(graph.router, prefix="/api/v1")
60
+ app.include_router(research.router, prefix="/api/v1")
61
+ app.include_router(chat.router, prefix="/api/v1")
62
+ app.include_router(investigate.router, prefix="/api/v1")
63
+ app.include_router(dados_publicos.router, prefix="/api/v1")
64
+ app.include_router(timeline.router, prefix="/api/v1")
65
+ app.include_router(session.router, prefix="/api/v1")
66
+ app.include_router(aethermap.router, prefix="/api/v1/aethermap", tags=["aethermap"])
67
+
68
+
69
+ @app.get("/")
70
+ def root():
71
+ """Root endpoint - API info"""
72
+ return {
73
+ "name": "Numidium",
74
+ "version": settings.app_version,
75
+ "status": "online",
76
+ "docs": "/docs",
77
+ "description": "Sistema de Inteligência e Análise de Dados"
78
+ }
79
+
80
+
81
+ @app.get("/health")
82
+ def health_check():
83
+ """Health check endpoint for HF Spaces"""
84
+ return {"status": "healthy"}
85
+
86
+
87
+ @app.get("/api/v1")
88
+ def api_info():
89
+ """API v1 info"""
90
+ return {
91
+ "version": "1.0.0",
92
+ "endpoints": {
93
+ "entities": "/api/v1/entities",
94
+ "relationships": "/api/v1/relationships",
95
+ "events": "/api/v1/events",
96
+ "search": "/api/v1/search",
97
+ "ingest": "/api/v1/ingest"
98
+ }
99
+ }
app/models/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Models module
2
+ from app.models.entity import Entity, Relationship, Event, Document
3
+ from app.models.project import Project
app/models/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (367 Bytes). View file
 
app/models/__pycache__/entity.cpython-311.pyc ADDED
Binary file (6.45 kB). View file
 
app/models/__pycache__/project.cpython-311.pyc ADDED
Binary file (1.76 kB). View file
 
app/models/entity.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SQLAlchemy Models for Numidium
3
+ """
4
+ from sqlalchemy import Column, String, Text, DateTime, Float, JSON, ForeignKey, Table
5
+ from sqlalchemy.orm import relationship
6
+ from datetime import datetime
7
+ import uuid
8
+
9
+ from app.core.database import Base
10
+
11
+
12
+ def generate_uuid():
13
+ return str(uuid.uuid4())
14
+
15
+
16
+ class Entity(Base):
17
+ """
18
+ Entidade - qualquer coisa rastreável no sistema
19
+ Pode ser: pessoa, organização, local, veículo, evento, documento, etc.
20
+ """
21
+ __tablename__ = "entities"
22
+
23
+ id = Column(String(36), primary_key=True, default=generate_uuid)
24
+ project_id = Column(String(36), ForeignKey("projects.id"), nullable=True, index=True)
25
+ type = Column(String(50), nullable=False, index=True) # person, organization, location, etc
26
+ name = Column(String(255), nullable=False, index=True)
27
+ description = Column(Text, nullable=True)
28
+ properties = Column(JSON, default=dict) # Dados flexíveis
29
+
30
+ # Geolocalização (opcional)
31
+ latitude = Column(Float, nullable=True)
32
+ longitude = Column(Float, nullable=True)
33
+
34
+ # Data histórica do evento/entidade (quando aconteceu, não quando foi adicionado)
35
+ event_date = Column(DateTime, nullable=True)
36
+
37
+ # Fonte do dado
38
+ source = Column(String(100), nullable=True) # wikipedia, newsapi, manual, etc
39
+ source_url = Column(Text, nullable=True)
40
+
41
+ # Timestamps
42
+ created_at = Column(DateTime, default=datetime.utcnow)
43
+ updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
44
+
45
+ # Relacionamentos
46
+ outgoing_relationships = relationship(
47
+ "Relationship",
48
+ foreign_keys="Relationship.source_id",
49
+ back_populates="source_entity"
50
+ )
51
+ incoming_relationships = relationship(
52
+ "Relationship",
53
+ foreign_keys="Relationship.target_id",
54
+ back_populates="target_entity"
55
+ )
56
+
57
+
58
+ class Relationship(Base):
59
+ """
60
+ Relacionamento entre duas entidades
61
+ Exemplos: works_for, knows, owns, located_at, participated_in
62
+ """
63
+ __tablename__ = "relationships"
64
+
65
+ id = Column(String(36), primary_key=True, default=generate_uuid)
66
+ project_id = Column(String(36), ForeignKey("projects.id"), nullable=True, index=True)
67
+ source_id = Column(String(36), ForeignKey("entities.id"), nullable=False)
68
+ target_id = Column(String(36), ForeignKey("entities.id"), nullable=False)
69
+ type = Column(String(50), nullable=False, index=True) # works_for, knows, owns, etc
70
+ properties = Column(JSON, default=dict)
71
+ confidence = Column(Float, default=1.0) # 0-1, quão certo estamos dessa conexão
72
+
73
+ # Data histórica do relacionamento (quando aconteceu)
74
+ event_date = Column(DateTime, nullable=True)
75
+
76
+ # Fonte
77
+ source = Column(String(100), nullable=True)
78
+
79
+ # Timestamps
80
+ created_at = Column(DateTime, default=datetime.utcnow)
81
+
82
+ # Relacionamentos
83
+ source_entity = relationship("Entity", foreign_keys=[source_id], back_populates="outgoing_relationships")
84
+ target_entity = relationship("Entity", foreign_keys=[target_id], back_populates="incoming_relationships")
85
+
86
+
87
+ class Event(Base):
88
+ """
89
+ Evento - algo que aconteceu envolvendo entidades
90
+ """
91
+ __tablename__ = "events"
92
+
93
+ id = Column(String(36), primary_key=True, default=generate_uuid)
94
+ type = Column(String(50), nullable=False, index=True)
95
+ title = Column(String(255), nullable=False)
96
+ description = Column(Text, nullable=True)
97
+
98
+ # Quando aconteceu
99
+ event_date = Column(DateTime, nullable=True)
100
+
101
+ # Onde aconteceu
102
+ location_name = Column(String(255), nullable=True)
103
+ latitude = Column(Float, nullable=True)
104
+ longitude = Column(Float, nullable=True)
105
+
106
+ # Entidades envolvidas (armazenado como JSON array de IDs)
107
+ entity_ids = Column(JSON, default=list)
108
+
109
+ # Fonte
110
+ source = Column(String(100), nullable=True)
111
+ source_url = Column(Text, nullable=True)
112
+
113
+ # Metadados
114
+ properties = Column(JSON, default=dict)
115
+
116
+ # Timestamps
117
+ created_at = Column(DateTime, default=datetime.utcnow)
118
+
119
+
120
+ class Document(Base):
121
+ """
122
+ Documento - texto/arquivo para análise
123
+ """
124
+ __tablename__ = "documents"
125
+
126
+ id = Column(String(36), primary_key=True, default=generate_uuid)
127
+ title = Column(String(255), nullable=False)
128
+ content = Column(Text, nullable=True)
129
+ summary = Column(Text, nullable=True) # Resumo gerado por IA
130
+
131
+ # Tipo de documento
132
+ doc_type = Column(String(50), default="text") # text, news, report, etc
133
+
134
+ # Entidades mencionadas (extraídas por NLP)
135
+ mentioned_entities = Column(JSON, default=list)
136
+
137
+ # Fonte
138
+ source = Column(String(100), nullable=True)
139
+ source_url = Column(Text, nullable=True)
140
+
141
+ # Timestamps
142
+ published_at = Column(DateTime, nullable=True)
143
+ created_at = Column(DateTime, default=datetime.utcnow)
app/models/project.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Project Model - Workspaces for organizing investigations
3
+ """
4
+ from sqlalchemy import Column, String, Text, DateTime
5
+ from datetime import datetime
6
+ import uuid
7
+
8
+ from app.core.database import Base
9
+
10
+
11
+ def generate_uuid():
12
+ return str(uuid.uuid4())
13
+
14
+
15
+ class Project(Base):
16
+ """
17
+ Projeto/Workspace - agrupa entidades e relacionamentos por investigação
18
+ """
19
+ __tablename__ = "projects"
20
+
21
+ id = Column(String(36), primary_key=True, default=generate_uuid)
22
+ name = Column(String(255), nullable=False)
23
+ description = Column(Text, nullable=True)
24
+ color = Column(String(7), default="#00d4ff") # Hex color for UI
25
+ icon = Column(String(50), default="folder") # Icon name
26
+
27
+ # Timestamps
28
+ created_at = Column(DateTime, default=datetime.utcnow)
29
+ updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
app/schemas/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Schemas module
2
+ from app.schemas.schemas import (
3
+ EntityCreate, EntityUpdate, EntityResponse,
4
+ RelationshipCreate, RelationshipResponse,
5
+ EventCreate, EventResponse,
6
+ DocumentCreate, DocumentResponse,
7
+ GraphData, GraphNode, GraphEdge,
8
+ SearchQuery, SearchResult,
9
+ SystemStats
10
+ )
app/schemas/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (725 Bytes). View file
 
app/schemas/__pycache__/schemas.cpython-311.pyc ADDED
Binary file (9.17 kB). View file
 
app/schemas/schemas.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pydantic Schemas for API validation
3
+ """
4
+ from pydantic import BaseModel, Field
5
+ from typing import Optional, List, Any
6
+ from datetime import datetime
7
+
8
+
9
+ # ========== Entity Schemas ==========
10
+
11
+ class EntityBase(BaseModel):
12
+ type: str = Field(..., description="Tipo da entidade: person, organization, location, etc")
13
+ name: str = Field(..., description="Nome da entidade")
14
+ description: Optional[str] = None
15
+ properties: dict = Field(default_factory=dict)
16
+ latitude: Optional[float] = None
17
+ longitude: Optional[float] = None
18
+ source: Optional[str] = None
19
+ source_url: Optional[str] = None
20
+
21
+
22
+ class EntityCreate(EntityBase):
23
+ pass
24
+
25
+
26
+ class EntityUpdate(BaseModel):
27
+ type: Optional[str] = None
28
+ name: Optional[str] = None
29
+ description: Optional[str] = None
30
+ properties: Optional[dict] = None
31
+ latitude: Optional[float] = None
32
+ longitude: Optional[float] = None
33
+
34
+
35
+ class EntityResponse(EntityBase):
36
+ id: str
37
+ created_at: datetime
38
+ updated_at: datetime
39
+
40
+ class Config:
41
+ from_attributes = True
42
+
43
+
44
+ # ========== Relationship Schemas ==========
45
+
46
+ class RelationshipBase(BaseModel):
47
+ source_id: str
48
+ target_id: str
49
+ type: str = Field(..., description="Tipo: works_for, knows, owns, located_at, etc")
50
+ properties: dict = Field(default_factory=dict)
51
+ confidence: float = Field(default=1.0, ge=0, le=1)
52
+ source: Optional[str] = None
53
+
54
+
55
+ class RelationshipCreate(RelationshipBase):
56
+ pass
57
+
58
+
59
+ class RelationshipResponse(RelationshipBase):
60
+ id: str
61
+ created_at: datetime
62
+
63
+ class Config:
64
+ from_attributes = True
65
+
66
+
67
+ # ========== Event Schemas ==========
68
+
69
+ class EventBase(BaseModel):
70
+ type: str
71
+ title: str
72
+ description: Optional[str] = None
73
+ event_date: Optional[datetime] = None
74
+ location_name: Optional[str] = None
75
+ latitude: Optional[float] = None
76
+ longitude: Optional[float] = None
77
+ entity_ids: List[str] = Field(default_factory=list)
78
+ source: Optional[str] = None
79
+ source_url: Optional[str] = None
80
+ properties: dict = Field(default_factory=dict)
81
+
82
+
83
+ class EventCreate(EventBase):
84
+ pass
85
+
86
+
87
+ class EventResponse(EventBase):
88
+ id: str
89
+ created_at: datetime
90
+
91
+ class Config:
92
+ from_attributes = True
93
+
94
+
95
+ # ========== Document Schemas ==========
96
+
97
+ class DocumentBase(BaseModel):
98
+ title: str
99
+ content: Optional[str] = None
100
+ doc_type: str = "text"
101
+ source: Optional[str] = None
102
+ source_url: Optional[str] = None
103
+ published_at: Optional[datetime] = None
104
+
105
+
106
+ class DocumentCreate(DocumentBase):
107
+ pass
108
+
109
+
110
+ class DocumentResponse(DocumentBase):
111
+ id: str
112
+ summary: Optional[str] = None
113
+ mentioned_entities: List[str] = []
114
+ created_at: datetime
115
+
116
+ class Config:
117
+ from_attributes = True
118
+
119
+
120
+ # ========== Graph Schemas ==========
121
+
122
+ class GraphNode(BaseModel):
123
+ id: str
124
+ type: str
125
+ name: str
126
+ properties: dict = {}
127
+
128
+
129
+ class GraphEdge(BaseModel):
130
+ source: str
131
+ target: str
132
+ type: str
133
+ confidence: float = 1.0
134
+
135
+
136
+ class GraphData(BaseModel):
137
+ nodes: List[GraphNode]
138
+ edges: List[GraphEdge]
139
+
140
+
141
+ # ========== Search Schemas ==========
142
+
143
+ class SearchQuery(BaseModel):
144
+ query: str
145
+ entity_types: Optional[List[str]] = None
146
+ limit: int = Field(default=20, le=100)
147
+
148
+
149
+ class SearchResult(BaseModel):
150
+ entities: List[EntityResponse]
151
+ events: List[EventResponse]
152
+ documents: List[DocumentResponse]
153
+
154
+
155
+ # ========== Stats Schemas ==========
156
+
157
+ class SystemStats(BaseModel):
158
+ total_entities: int
159
+ total_relationships: int
160
+ total_events: int
161
+ total_documents: int
162
+ entities_by_type: dict
163
+ recent_activity: List[dict]
app/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Services module
app/services/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (167 Bytes). View file