Madras1 commited on
Commit
d2fb3fb
·
verified ·
1 Parent(s): 8f5a30e

Upload 79 files

Browse files
app/api/routes/aethermap.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AetherMap Routes - Document Mapping & Semantic Search
3
+ Integrates with AetherMap API for document clustering, NER, and semantic search.
4
+ """
5
+ from fastapi import APIRouter, HTTPException, UploadFile, File, Form, Depends
6
+ from pydantic import BaseModel, Field
7
+ from typing import Optional, List, Dict, Any
8
+ from sqlalchemy.orm import Session
9
+ import io
10
+
11
+ from app.api.deps import get_scoped_db
12
+ from app.services.aethermap_client import aethermap, ProcessResult, SearchResult, EntityGraphResult
13
+
14
+
15
+ router = APIRouter()
16
+
17
+
18
+ # ============================================================================
19
+ # Request/Response Models
20
+ # ============================================================================
21
+
22
+ class IndexDocumentsRequest(BaseModel):
23
+ """Request to index documents from text list"""
24
+ documents: List[str] = Field(..., description="Lista de textos para indexar")
25
+ fast_mode: bool = Field(True, description="Modo rápido (PCA) ou preciso (UMAP)")
26
+
27
+
28
+ class IndexEntitiesRequest(BaseModel):
29
+ """Request to index entities from NUMIDIUM database"""
30
+ entity_types: Optional[List[str]] = Field(None, description="Filtrar por tipos de entidade")
31
+ limit: int = Field(500, description="Limite de entidades")
32
+
33
+
34
+ class SemanticSearchRequest(BaseModel):
35
+ """Request for semantic search"""
36
+ query: str = Field(..., description="Termo de busca")
37
+ turbo_mode: bool = Field(True, description="Modo turbo (mais rápido)")
38
+
39
+
40
+ class IndexResponse(BaseModel):
41
+ """Response from indexing"""
42
+ job_id: str
43
+ num_documents: int
44
+ num_clusters: int
45
+ num_noise: int
46
+ metrics: Dict[str, Any] = {}
47
+ cluster_analysis: Dict[str, Any] = {}
48
+
49
+
50
+ class SearchResponse(BaseModel):
51
+ """Response from search"""
52
+ summary: str
53
+ results: List[Dict[str, Any]] = []
54
+
55
+
56
+ class EntityGraphResponse(BaseModel):
57
+ """Response from NER extraction"""
58
+ hubs: List[Dict[str, Any]] = []
59
+ insights: Dict[str, Any] = {}
60
+ node_count: int = 0
61
+ edge_count: int = 0
62
+
63
+
64
+ class StatusResponse(BaseModel):
65
+ """AetherMap status"""
66
+ connected: bool
67
+ job_id: Optional[str] = None
68
+ documents_indexed: int = 0
69
+
70
+
71
+ # ============================================================================
72
+ # Endpoints
73
+ # ============================================================================
74
+
75
+ @router.get("/status", response_model=StatusResponse)
76
+ async def get_status():
77
+ """
78
+ Get AetherMap connection status.
79
+ """
80
+ return StatusResponse(
81
+ connected=True,
82
+ job_id=aethermap.current_job_id,
83
+ documents_indexed=0 # TODO: track this
84
+ )
85
+
86
+
87
+ @router.post("/index", response_model=IndexResponse)
88
+ async def index_documents(request: IndexDocumentsRequest):
89
+ """
90
+ Index a list of documents for semantic search.
91
+
92
+ The documents will be:
93
+ - Embedded using sentence transformers
94
+ - Clustered using HDBSCAN
95
+ - Indexed in FAISS + BM25 for hybrid search
96
+ """
97
+ try:
98
+ if not request.documents:
99
+ raise HTTPException(status_code=400, detail="Nenhum documento fornecido")
100
+
101
+ result = await aethermap.process_documents(
102
+ texts=request.documents,
103
+ fast_mode=request.fast_mode
104
+ )
105
+
106
+ return IndexResponse(
107
+ job_id=result.job_id,
108
+ num_documents=result.num_documents,
109
+ num_clusters=result.num_clusters,
110
+ num_noise=result.num_noise,
111
+ metrics=result.metrics,
112
+ cluster_analysis=result.cluster_analysis
113
+ )
114
+
115
+ except Exception as e:
116
+ raise HTTPException(status_code=500, detail=str(e))
117
+
118
+
119
+ @router.post("/index-entities", response_model=IndexResponse)
120
+ async def index_entities(
121
+ request: IndexEntitiesRequest,
122
+ db: Session = Depends(get_scoped_db)
123
+ ):
124
+ """
125
+ Index entities from NUMIDIUM database.
126
+
127
+ Collects entity names and descriptions, sends to AetherMap for processing.
128
+ """
129
+ from app.models.entity import Entity
130
+
131
+ try:
132
+ query = db.query(Entity)
133
+
134
+ if request.entity_types:
135
+ query = query.filter(Entity.type.in_(request.entity_types))
136
+
137
+ entities = query.limit(request.limit).all()
138
+
139
+ if not entities:
140
+ raise HTTPException(status_code=404, detail="Nenhuma entidade encontrada")
141
+
142
+ # Build text representations
143
+ documents = []
144
+ for e in entities:
145
+ text = f"{e.name} ({e.type})"
146
+ if e.description:
147
+ text += f": {e.description[:1000]}"
148
+ documents.append(text)
149
+
150
+ result = await aethermap.process_documents(
151
+ texts=documents,
152
+ fast_mode=request.fast_mode if hasattr(request, 'fast_mode') else True
153
+ )
154
+
155
+ return IndexResponse(
156
+ job_id=result.job_id,
157
+ num_documents=result.num_documents,
158
+ num_clusters=result.num_clusters,
159
+ num_noise=result.num_noise,
160
+ metrics=result.metrics,
161
+ cluster_analysis=result.cluster_analysis
162
+ )
163
+
164
+ except HTTPException:
165
+ raise
166
+ except Exception as e:
167
+ raise HTTPException(status_code=500, detail=str(e))
168
+
169
+
170
+ @router.post("/upload", response_model=IndexResponse)
171
+ async def upload_documents(
172
+ file: UploadFile = File(...),
173
+ fast_mode: bool = Form(True)
174
+ ):
175
+ """
176
+ Upload a file (TXT or CSV) for indexing.
177
+
178
+ - TXT: One document per line
179
+ - CSV: Will use first text column found
180
+ """
181
+ try:
182
+ content = await file.read()
183
+ text = content.decode('utf-8', errors='ignore')
184
+
185
+ # Split by lines for TXT
186
+ documents = [line.strip() for line in text.splitlines() if line.strip()]
187
+
188
+ if not documents:
189
+ raise HTTPException(status_code=400, detail="Arquivo vazio ou sem texto válido")
190
+
191
+ result = await aethermap.process_documents(
192
+ texts=documents,
193
+ fast_mode=fast_mode
194
+ )
195
+
196
+ return IndexResponse(
197
+ job_id=result.job_id,
198
+ num_documents=result.num_documents,
199
+ num_clusters=result.num_clusters,
200
+ num_noise=result.num_noise,
201
+ metrics=result.metrics,
202
+ cluster_analysis=result.cluster_analysis
203
+ )
204
+
205
+ except HTTPException:
206
+ raise
207
+ except Exception as e:
208
+ raise HTTPException(status_code=500, detail=str(e))
209
+
210
+
211
+ @router.post("/search", response_model=SearchResponse)
212
+ async def semantic_search(request: SemanticSearchRequest):
213
+ """
214
+ Semantic search in indexed documents.
215
+
216
+ Uses hybrid RAG (FAISS + BM25 + reranking + LLM).
217
+ Returns a summary answering the query with citations.
218
+ """
219
+ try:
220
+ if not aethermap.current_job_id:
221
+ raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
222
+
223
+ result = await aethermap.semantic_search(
224
+ query=request.query,
225
+ turbo_mode=request.turbo_mode
226
+ )
227
+
228
+ return SearchResponse(
229
+ summary=result.summary,
230
+ results=result.results
231
+ )
232
+
233
+ except HTTPException:
234
+ raise
235
+ except Exception as e:
236
+ raise HTTPException(status_code=500, detail=str(e))
237
+
238
+
239
+ @router.post("/entities", response_model=EntityGraphResponse)
240
+ async def extract_entities():
241
+ """
242
+ Extract named entities (NER) from indexed documents.
243
+
244
+ Returns:
245
+ - Hub entities (most connected)
246
+ - Relationship insights
247
+ - Graph metrics
248
+ """
249
+ try:
250
+ if not aethermap.current_job_id:
251
+ raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
252
+
253
+ result = await aethermap.extract_entities()
254
+
255
+ return EntityGraphResponse(
256
+ hubs=result.hubs,
257
+ insights=result.insights,
258
+ node_count=len(result.nodes),
259
+ edge_count=len(result.edges)
260
+ )
261
+
262
+ except HTTPException:
263
+ raise
264
+ except Exception as e:
265
+ raise HTTPException(status_code=500, detail=str(e))
266
+
267
+
268
+ @router.post("/analyze")
269
+ async def analyze_graph():
270
+ """
271
+ Analyze entity graph using LLM.
272
+
273
+ Returns semantic insights about relationships and patterns.
274
+ """
275
+ try:
276
+ if not aethermap.current_job_id:
277
+ raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
278
+
279
+ result = await aethermap.analyze_graph()
280
+
281
+ return {
282
+ "analysis": result.analysis,
283
+ "key_entities": result.key_entities,
284
+ "relationships": result.relationships
285
+ }
286
+
287
+ except HTTPException:
288
+ raise
289
+ except Exception as e:
290
+ raise HTTPException(status_code=500, detail=str(e))
291
+
292
+
293
+ @router.post("/describe-clusters")
294
+ async def describe_clusters():
295
+ """
296
+ Get LLM descriptions for each cluster found.
297
+ """
298
+ try:
299
+ if not aethermap.current_job_id:
300
+ raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
301
+
302
+ result = await aethermap.describe_clusters()
303
+
304
+ return result
305
+
306
+ except Exception as e:
307
+ raise HTTPException(status_code=500, detail=str(e))
app/config.py CHANGED
@@ -23,6 +23,9 @@ class Settings(BaseSettings):
23
  # Cerebras API for LLM-based entity extraction
24
  cerebras_api_key: str = ""
25
 
 
 
 
26
  # CORS
27
  cors_origins: list[str] = ["*"]
28
 
 
23
  # Cerebras API for LLM-based entity extraction
24
  cerebras_api_key: str = ""
25
 
26
+ # AetherMap API for semantic search and NER
27
+ aethermap_url: str = "https://madras1-aethermap.hf.space"
28
+
29
  # CORS
30
  cors_origins: list[str] = ["*"]
31
 
app/main.py CHANGED
@@ -8,7 +8,7 @@ from contextlib import asynccontextmanager
8
 
9
  from app.config import settings
10
  from app.core.database import init_db
11
- from app.api.routes import entities, relationships, events, search, ingest, analyze, graph, research, chat, investigate, dados_publicos, timeline, session
12
 
13
 
14
  @asynccontextmanager
@@ -63,6 +63,7 @@ app.include_router(investigate.router, prefix="/api/v1")
63
  app.include_router(dados_publicos.router, prefix="/api/v1")
64
  app.include_router(timeline.router, prefix="/api/v1")
65
  app.include_router(session.router, prefix="/api/v1")
 
66
 
67
 
68
  @app.get("/")
 
8
 
9
  from app.config import settings
10
  from app.core.database import init_db
11
+ from app.api.routes import entities, relationships, events, search, ingest, analyze, graph, research, chat, investigate, dados_publicos, timeline, session, aethermap
12
 
13
 
14
  @asynccontextmanager
 
63
  app.include_router(dados_publicos.router, prefix="/api/v1")
64
  app.include_router(timeline.router, prefix="/api/v1")
65
  app.include_router(session.router, prefix="/api/v1")
66
+ app.include_router(aethermap.router, prefix="/api/v1/aethermap", tags=["aethermap"])
67
 
68
 
69
  @app.get("/")
app/services/aethermap_client.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AetherMap Client
3
+ Client para integração com AetherMap API - busca semântica, NER e análise de grafos.
4
+ """
5
+ import httpx
6
+ import json
7
+ import io
8
+ from typing import List, Dict, Any, Optional
9
+ from dataclasses import dataclass, field
10
+ from datetime import datetime
11
+ import logging
12
+
13
+ from app.config import settings
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ # URL base do AetherMap (HuggingFace Space)
19
+ AETHERMAP_URL = getattr(settings, 'aethermap_url', 'https://madras1-aethermap.hf.space')
20
+
21
+
22
+ @dataclass
23
+ class ProcessResult:
24
+ """Resultado do processamento de documentos"""
25
+ job_id: str
26
+ num_documents: int
27
+ num_clusters: int
28
+ num_noise: int
29
+ metrics: Dict[str, Any] = field(default_factory=dict)
30
+ cluster_analysis: Dict[str, Any] = field(default_factory=dict)
31
+
32
+
33
+ @dataclass
34
+ class SearchResult:
35
+ """Resultado de busca semântica"""
36
+ summary: str # Resposta RAG gerada pelo LLM
37
+ results: List[Dict[str, Any]] = field(default_factory=list)
38
+
39
+
40
+ @dataclass
41
+ class EntityNode:
42
+ """Nó de entidade no grafo"""
43
+ entity: str
44
+ entity_type: str
45
+ docs: int
46
+ degree: int = 0
47
+ centrality: float = 0.0
48
+ role: str = "peripheral" # hub, connector, peripheral
49
+
50
+
51
+ @dataclass
52
+ class EntityEdge:
53
+ """Aresta do grafo de entidades"""
54
+ source_entity: str
55
+ target_entity: str
56
+ weight: int
57
+ reason: str
58
+
59
+
60
+ @dataclass
61
+ class EntityGraphResult:
62
+ """Resultado da extração de entidades"""
63
+ nodes: List[EntityNode] = field(default_factory=list)
64
+ edges: List[EntityEdge] = field(default_factory=list)
65
+ hubs: List[Dict[str, Any]] = field(default_factory=list)
66
+ insights: Dict[str, Any] = field(default_factory=dict)
67
+
68
+
69
+ @dataclass
70
+ class GraphAnalysis:
71
+ """Análise do grafo via LLM"""
72
+ analysis: str
73
+ key_entities: List[str] = field(default_factory=list)
74
+ relationships: List[str] = field(default_factory=list)
75
+
76
+
77
+ class AetherMapClient:
78
+ """
79
+ Client para AetherMap API.
80
+
81
+ Funcionalidades:
82
+ - Processamento de documentos (embeddings + clusters)
83
+ - Busca semântica RAG (FAISS + BM25 + reranking + LLM)
84
+ - Extração de entidades NER
85
+ - Análise de grafo via LLM
86
+ """
87
+
88
+ def __init__(self, base_url: str = None, timeout: float = 120.0):
89
+ self.base_url = (base_url or AETHERMAP_URL).rstrip('/')
90
+ self.timeout = timeout
91
+ self._current_job_id: Optional[str] = None
92
+
93
+ @property
94
+ def current_job_id(self) -> Optional[str]:
95
+ """Retorna o job_id atual"""
96
+ return self._current_job_id
97
+
98
+ async def process_documents(
99
+ self,
100
+ texts: List[str],
101
+ fast_mode: bool = True,
102
+ min_cluster_size: int = 0,
103
+ min_samples: int = 0
104
+ ) -> ProcessResult:
105
+ """
106
+ Processa uma lista de textos gerando embeddings e clusters.
107
+
108
+ Args:
109
+ texts: Lista de textos/documentos
110
+ fast_mode: Se True, usa PCA (rápido). Se False, usa UMAP (preciso)
111
+ min_cluster_size: Tamanho mínimo do cluster (0=auto)
112
+ min_samples: Mínimo de amostras (0=auto)
113
+
114
+ Returns:
115
+ ProcessResult com job_id e métricas
116
+ """
117
+ # Criar arquivo TXT em memória
118
+ content = "\n".join(texts)
119
+ file_bytes = content.encode('utf-8')
120
+
121
+ async with httpx.AsyncClient(timeout=self.timeout) as client:
122
+ files = {
123
+ 'file': ('documents.txt', io.BytesIO(file_bytes), 'text/plain')
124
+ }
125
+ data = {
126
+ 'n_samples': str(len(texts)),
127
+ 'fast_mode': 'true' if fast_mode else 'false',
128
+ 'min_cluster_size': str(min_cluster_size),
129
+ 'min_samples': str(min_samples)
130
+ }
131
+
132
+ logger.info(f"AetherMap: Processando {len(texts)} documentos...")
133
+
134
+ response = await client.post(
135
+ f"{self.base_url}/process/",
136
+ files=files,
137
+ data=data
138
+ )
139
+
140
+ if response.status_code != 200:
141
+ raise Exception(f"AetherMap error: {response.status_code} - {response.text}")
142
+
143
+ result = response.json()
144
+
145
+ self._current_job_id = result.get('job_id')
146
+ metadata = result.get('metadata', {})
147
+
148
+ logger.info(f"AetherMap: Job criado {self._current_job_id}")
149
+
150
+ return ProcessResult(
151
+ job_id=self._current_job_id,
152
+ num_documents=metadata.get('num_documents_processed', 0),
153
+ num_clusters=metadata.get('num_clusters_found', 0),
154
+ num_noise=metadata.get('num_noise_points', 0),
155
+ metrics=result.get('metrics', {}),
156
+ cluster_analysis=result.get('cluster_analysis', {})
157
+ )
158
+
159
+ async def semantic_search(
160
+ self,
161
+ query: str,
162
+ job_id: str = None,
163
+ turbo_mode: bool = False
164
+ ) -> SearchResult:
165
+ """
166
+ Busca semântica RAG híbrida nos documentos processados.
167
+
168
+ Args:
169
+ query: Termo de busca
170
+ job_id: ID do job (se não fornecido, usa o último)
171
+ turbo_mode: Se True, busca mais rápida (menos precisa)
172
+
173
+ Returns:
174
+ SearchResult com resumo e resultados
175
+ """
176
+ job_id = job_id or self._current_job_id
177
+ if not job_id:
178
+ raise ValueError("Nenhum job_id disponível. Processe documentos primeiro.")
179
+
180
+ async with httpx.AsyncClient(timeout=self.timeout) as client:
181
+ data = {
182
+ 'query': query,
183
+ 'job_id': job_id,
184
+ 'turbo_mode': 'true' if turbo_mode else 'false'
185
+ }
186
+
187
+ logger.info(f"AetherMap: Buscando '{query}'...")
188
+
189
+ response = await client.post(
190
+ f"{self.base_url}/search/",
191
+ data=data
192
+ )
193
+
194
+ if response.status_code != 200:
195
+ raise Exception(f"AetherMap search error: {response.status_code} - {response.text}")
196
+
197
+ result = response.json()
198
+
199
+ return SearchResult(
200
+ summary=result.get('summary', ''),
201
+ results=result.get('results', [])
202
+ )
203
+
204
+ async def extract_entities(self, job_id: str = None) -> EntityGraphResult:
205
+ """
206
+ Extrai entidades nomeadas (NER) e cria grafo de conexões.
207
+
208
+ Args:
209
+ job_id: ID do job (se não fornecido, usa o último)
210
+
211
+ Returns:
212
+ EntityGraphResult com nós, arestas e insights
213
+ """
214
+ job_id = job_id or self._current_job_id
215
+ if not job_id:
216
+ raise ValueError("Nenhum job_id disponível. Processe documentos primeiro.")
217
+
218
+ async with httpx.AsyncClient(timeout=self.timeout) as client:
219
+ data = {'job_id': job_id}
220
+
221
+ logger.info(f"AetherMap: Extraindo entidades...")
222
+
223
+ response = await client.post(
224
+ f"{self.base_url}/entity_graph/",
225
+ data=data
226
+ )
227
+
228
+ if response.status_code != 200:
229
+ raise Exception(f"AetherMap entity_graph error: {response.status_code} - {response.text}")
230
+
231
+ result = response.json()
232
+
233
+ # Converter para dataclasses
234
+ nodes = [
235
+ EntityNode(
236
+ entity=n.get('entity', ''),
237
+ entity_type=n.get('type', ''),
238
+ docs=n.get('docs', 0),
239
+ degree=n.get('degree', 0),
240
+ centrality=n.get('centrality', 0.0),
241
+ role=n.get('role', 'peripheral')
242
+ )
243
+ for n in result.get('nodes', [])
244
+ ]
245
+
246
+ edges = [
247
+ EntityEdge(
248
+ source_entity=e.get('source_entity', ''),
249
+ target_entity=e.get('target_entity', ''),
250
+ weight=e.get('weight', 0),
251
+ reason=e.get('reason', '')
252
+ )
253
+ for e in result.get('edges', [])
254
+ ]
255
+
256
+ return EntityGraphResult(
257
+ nodes=nodes,
258
+ edges=edges,
259
+ hubs=result.get('hubs', []),
260
+ insights=result.get('insights', {})
261
+ )
262
+
263
+ async def analyze_graph(self, job_id: str = None) -> GraphAnalysis:
264
+ """
265
+ Usa LLM para analisar o Knowledge Graph e extrair insights.
266
+
267
+ Args:
268
+ job_id: ID do job (se não fornecido, usa o último)
269
+
270
+ Returns:
271
+ GraphAnalysis com análise textual
272
+ """
273
+ job_id = job_id or self._current_job_id
274
+ if not job_id:
275
+ raise ValueError("Nenhum job_id disponível. Processe documentos primeiro.")
276
+
277
+ async with httpx.AsyncClient(timeout=self.timeout) as client:
278
+ data = {'job_id': job_id}
279
+
280
+ logger.info(f"AetherMap: Analisando grafo com LLM...")
281
+
282
+ response = await client.post(
283
+ f"{self.base_url}/analyze_graph/",
284
+ data=data
285
+ )
286
+
287
+ if response.status_code != 200:
288
+ raise Exception(f"AetherMap analyze_graph error: {response.status_code} - {response.text}")
289
+
290
+ result = response.json()
291
+
292
+ return GraphAnalysis(
293
+ analysis=result.get('analysis', ''),
294
+ key_entities=result.get('key_entities', []),
295
+ relationships=result.get('relationships', [])
296
+ )
297
+
298
+ async def describe_clusters(self, job_id: str = None) -> Dict[str, Any]:
299
+ """
300
+ Usa LLM para descrever cada cluster encontrado.
301
+
302
+ Args:
303
+ job_id: ID do job (se não fornecido, usa o último)
304
+
305
+ Returns:
306
+ Dict com insights por cluster
307
+ """
308
+ job_id = job_id or self._current_job_id
309
+ if not job_id:
310
+ raise ValueError("Nenhum job_id disponível. Processe documentos primeiro.")
311
+
312
+ async with httpx.AsyncClient(timeout=self.timeout) as client:
313
+ data = {'job_id': job_id}
314
+
315
+ logger.info(f"AetherMap: Descrevendo clusters...")
316
+
317
+ response = await client.post(
318
+ f"{self.base_url}/describe_clusters/",
319
+ data=data
320
+ )
321
+
322
+ if response.status_code != 200:
323
+ raise Exception(f"AetherMap describe_clusters error: {response.status_code} - {response.text}")
324
+
325
+ return response.json()
326
+
327
+
328
+ # Instância global do client
329
+ aethermap = AetherMapClient()
app/services/investigator_agent.py CHANGED
@@ -304,8 +304,6 @@ class InvestigatorAgent:
304
  elif tool_name == "lookup_cnpj":
305
  return await self._lookup_cnpj(arguments.get("cnpj", ""))
306
 
307
- elif tool_name == "lookup_phone":
308
- return await self._lookup_phone(arguments.get("phone", ""))
309
 
310
  elif tool_name == "web_search":
311
  return await self._web_search(
@@ -316,6 +314,12 @@ class InvestigatorAgent:
316
  elif tool_name == "deep_research":
317
  return await self._deep_research(arguments.get("topic", ""))
318
 
 
 
 
 
 
 
319
  elif tool_name == "save_finding":
320
  finding = Finding(
321
  title=arguments.get("title", ""),
@@ -474,6 +478,66 @@ class InvestigatorAgent:
474
  except Exception as e:
475
  return f"Erro na pesquisa: {str(e)}"
476
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
477
  async def investigate(
478
  self,
479
  mission: str,
 
304
  elif tool_name == "lookup_cnpj":
305
  return await self._lookup_cnpj(arguments.get("cnpj", ""))
306
 
 
 
307
 
308
  elif tool_name == "web_search":
309
  return await self._web_search(
 
314
  elif tool_name == "deep_research":
315
  return await self._deep_research(arguments.get("topic", ""))
316
 
317
+ elif tool_name == "aether_search":
318
+ return await self._aether_search(arguments.get("query", ""))
319
+
320
+ elif tool_name == "aether_entities":
321
+ return await self._aether_entities()
322
+
323
  elif tool_name == "save_finding":
324
  finding = Finding(
325
  title=arguments.get("title", ""),
 
478
  except Exception as e:
479
  return f"Erro na pesquisa: {str(e)}"
480
 
481
+ async def _aether_search(self, query: str) -> str:
482
+ """Semantic search via AetherMap"""
483
+ try:
484
+ # Check if we have a job_id cached
485
+ if not aethermap.current_job_id:
486
+ # Index entities from database first
487
+ if self.db:
488
+ entities = self.db.query(Entity).limit(500).all()
489
+ if entities:
490
+ texts = []
491
+ for e in entities:
492
+ text = f"{e.name} ({e.type})"
493
+ if e.description:
494
+ text += f": {e.description[:500]}"
495
+ texts.append(text)
496
+
497
+ if texts:
498
+ result = await aethermap.process_documents(texts, fast_mode=True)
499
+ # Continue with search
500
+
501
+ if aethermap.current_job_id:
502
+ result = await aethermap.semantic_search(query, turbo_mode=True)
503
+ return f"RAG Response:\n{result.summary}"
504
+ else:
505
+ return "Nenhum documento indexado no AetherMap."
506
+
507
+ except Exception as e:
508
+ return f"Erro no AetherMap search: {str(e)}"
509
+
510
+ async def _aether_entities(self) -> str:
511
+ """Extract NER entities via AetherMap"""
512
+ try:
513
+ if not aethermap.current_job_id:
514
+ return "Nenhum documento indexado. Use aether_search primeiro."
515
+
516
+ result = await aethermap.extract_entities()
517
+
518
+ # Format response
519
+ output = []
520
+
521
+ if result.hubs:
522
+ output.append("**Entidades Centrais (Hubs):**")
523
+ for hub in result.hubs[:5]:
524
+ output.append(f"- {hub.get('entity')} ({hub.get('type')}): {hub.get('degree')} conexões")
525
+
526
+ if result.insights:
527
+ output.append(f"\n**Insights:**")
528
+ output.append(f"- Total de conexões: {result.insights.get('total_connections', 0)}")
529
+ output.append(f"- Grau médio: {result.insights.get('avg_degree', 0)}")
530
+
531
+ if result.edges:
532
+ output.append(f"\n**Top 5 Relacionamentos:**")
533
+ for edge in result.edges[:5]:
534
+ output.append(f"- {edge.source_entity} <-> {edge.target_entity}: {edge.reason}")
535
+
536
+ return "\n".join(output) if output else "Nenhuma entidade significativa encontrada."
537
+
538
+ except Exception as e:
539
+ return f"Erro na extração de entidades: {str(e)}"
540
+
541
  async def investigate(
542
  self,
543
  mission: str,