Madras1 commited on
Commit
f07fc65
·
verified ·
1 Parent(s): 270c1c7

Upload 64 files

Browse files
app/api/routes/investigate.py CHANGED
@@ -1,9 +1,10 @@
1
  """
2
  Investigation API Routes - Build dossiers on companies and people
3
  """
4
- from fastapi import APIRouter, HTTPException
5
  from pydantic import BaseModel, Field
6
  from typing import Optional, List, Dict, Any
 
7
 
8
  from app.services.investigation import (
9
  investigar_empresa,
@@ -11,6 +12,8 @@ from app.services.investigation import (
11
  dossier_to_dict
12
  )
13
  from app.services.brazil_apis import consultar_cnpj
 
 
14
 
15
 
16
  router = APIRouter(prefix="/investigate", tags=["Investigation"])
@@ -121,3 +124,84 @@ async def lookup_cnpj(cnpj: str):
121
  raise
122
  except Exception as e:
123
  raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
  Investigation API Routes - Build dossiers on companies and people
3
  """
4
+ from fastapi import APIRouter, HTTPException, Depends
5
  from pydantic import BaseModel, Field
6
  from typing import Optional, List, Dict, Any
7
+ from sqlalchemy.orm import Session
8
 
9
  from app.services.investigation import (
10
  investigar_empresa,
 
12
  dossier_to_dict
13
  )
14
  from app.services.brazil_apis import consultar_cnpj
15
+ from app.services.investigator_agent import investigator_agent
16
+ from app.api.deps import get_db
17
 
18
 
19
  router = APIRouter(prefix="/investigate", tags=["Investigation"])
 
124
  raise
125
  except Exception as e:
126
  raise HTTPException(status_code=500, detail=str(e))
127
+
128
+
129
+ # ===========================================
130
+ # Autonomous Investigation Agent
131
+ # ===========================================
132
+
133
+ class AgentInvestigateRequest(BaseModel):
134
+ """Request for autonomous investigation"""
135
+ mission: str = Field(..., min_length=5, description="Missão de investigação em linguagem natural")
136
+ max_iterations: int = Field(10, ge=1, le=20, description="Máximo de iterações do agente")
137
+
138
+
139
+ class FindingResponse(BaseModel):
140
+ """A finding from investigation"""
141
+ title: str
142
+ content: str
143
+ source: str
144
+ timestamp: str
145
+
146
+
147
+ class AgentInvestigateResponse(BaseModel):
148
+ """Response from autonomous investigation"""
149
+ mission: str
150
+ status: str
151
+ report: str
152
+ findings: List[FindingResponse]
153
+ entities_discovered: int
154
+ connections_mapped: int
155
+ iterations: int
156
+ tools_used: List[str]
157
+
158
+
159
+ @router.post("/agent", response_model=AgentInvestigateResponse)
160
+ async def investigate_with_agent(
161
+ request: AgentInvestigateRequest,
162
+ db: Session = Depends(get_db)
163
+ ):
164
+ """
165
+ Autonomous investigation with AI agent.
166
+
167
+ The agent will:
168
+ 1. Search NUMIDIUM for existing entities
169
+ 2. Query CNPJ data for Brazilian companies
170
+ 3. Search the web for news and public info
171
+ 4. Follow leads and connections
172
+ 5. Generate a comprehensive report
173
+
174
+ Example missions:
175
+ - "Investigue a rede de empresas de João Silva"
176
+ - "Descubra os sócios da empresa CNPJ 11.222.333/0001-44"
177
+ - "Pesquise sobre a empresa XYZ e suas conexões"
178
+ """
179
+ try:
180
+ result = await investigator_agent.investigate(
181
+ mission=request.mission,
182
+ db=db,
183
+ max_iterations=request.max_iterations
184
+ )
185
+
186
+ return AgentInvestigateResponse(
187
+ mission=result.mission,
188
+ status=result.status,
189
+ report=result.report,
190
+ findings=[
191
+ FindingResponse(
192
+ title=f.title,
193
+ content=f.content,
194
+ source=f.source,
195
+ timestamp=f.timestamp
196
+ )
197
+ for f in result.findings
198
+ ],
199
+ entities_discovered=len(result.entities_discovered),
200
+ connections_mapped=len(result.connections_mapped),
201
+ iterations=result.iterations,
202
+ tools_used=result.tools_used
203
+ )
204
+
205
+ except Exception as e:
206
+ raise HTTPException(status_code=500, detail=str(e))
207
+
app/services/investigator_agent.py ADDED
@@ -0,0 +1,509 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Investigator Agent - Autonomous Investigation with Tool Calling
3
+ Uses Cerebras native tool calling for multi-source investigations
4
+ """
5
+ import json
6
+ import httpx
7
+ from typing import Optional, List, Dict, Any
8
+ from dataclasses import dataclass, field
9
+ from datetime import datetime
10
+ from sqlalchemy.orm import Session
11
+
12
+ from app.config import settings
13
+ from app.services import lancer
14
+ from app.services.brazil_apis import consultar_cnpj
15
+ from app.models.entity import Entity, Relationship
16
+
17
+
18
+ @dataclass
19
+ class Finding:
20
+ """A discovery made during investigation"""
21
+ title: str
22
+ content: str
23
+ source: str
24
+ timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
25
+
26
+
27
+ @dataclass
28
+ class InvestigationResult:
29
+ """Complete investigation result"""
30
+ mission: str
31
+ findings: List[Finding]
32
+ entities_discovered: List[Dict[str, Any]]
33
+ connections_mapped: List[Dict[str, Any]]
34
+ report: str
35
+ iterations: int
36
+ tools_used: List[str]
37
+ status: str = "completed"
38
+
39
+
40
+ # Tool definitions for Cerebras API
41
+ TOOLS = [
42
+ {
43
+ "type": "function",
44
+ "function": {
45
+ "name": "search_entity",
46
+ "description": "Buscar entidade no NUMIDIUM (grafo de conhecimento) por nome. Use para encontrar pessoas, empresas ou locais já conhecidos.",
47
+ "parameters": {
48
+ "type": "object",
49
+ "properties": {
50
+ "query": {
51
+ "type": "string",
52
+ "description": "Nome ou termo para buscar"
53
+ },
54
+ "entity_type": {
55
+ "type": "string",
56
+ "enum": ["person", "organization", "location", "any"],
57
+ "description": "Tipo de entidade (opcional)"
58
+ }
59
+ },
60
+ "required": ["query"]
61
+ }
62
+ }
63
+ },
64
+ {
65
+ "type": "function",
66
+ "function": {
67
+ "name": "get_connections",
68
+ "description": "Obter a rede de conexões de uma entidade específica. Retorna entidades relacionadas.",
69
+ "parameters": {
70
+ "type": "object",
71
+ "properties": {
72
+ "entity_id": {
73
+ "type": "string",
74
+ "description": "ID da entidade no NUMIDIUM"
75
+ }
76
+ },
77
+ "required": ["entity_id"]
78
+ }
79
+ }
80
+ },
81
+ {
82
+ "type": "function",
83
+ "function": {
84
+ "name": "lookup_cnpj",
85
+ "description": "Consultar dados de uma empresa brasileira pelo CNPJ. Retorna razão social, sócios, endereço, CNAEs, etc.",
86
+ "parameters": {
87
+ "type": "object",
88
+ "properties": {
89
+ "cnpj": {
90
+ "type": "string",
91
+ "description": "CNPJ da empresa (com ou sem formatação)"
92
+ }
93
+ },
94
+ "required": ["cnpj"]
95
+ }
96
+ }
97
+ },
98
+ {
99
+ "type": "function",
100
+ "function": {
101
+ "name": "web_search",
102
+ "description": "Pesquisar informações na web. Use para buscar notícias, artigos e informações públicas.",
103
+ "parameters": {
104
+ "type": "object",
105
+ "properties": {
106
+ "query": {
107
+ "type": "string",
108
+ "description": "Termo de busca"
109
+ },
110
+ "freshness": {
111
+ "type": "string",
112
+ "enum": ["day", "week", "month", "any"],
113
+ "description": "Frescor dos resultados",
114
+ "default": "any"
115
+ }
116
+ },
117
+ "required": ["query"]
118
+ }
119
+ }
120
+ },
121
+ {
122
+ "type": "function",
123
+ "function": {
124
+ "name": "deep_research",
125
+ "description": "Pesquisa profunda e multi-dimensional sobre um tema. Use para tópicos complexos.",
126
+ "parameters": {
127
+ "type": "object",
128
+ "properties": {
129
+ "topic": {
130
+ "type": "string",
131
+ "description": "Tópico para pesquisa profunda"
132
+ }
133
+ },
134
+ "required": ["topic"]
135
+ }
136
+ }
137
+ },
138
+ {
139
+ "type": "function",
140
+ "function": {
141
+ "name": "save_finding",
142
+ "description": "Salvar uma descoberta importante da investigação.",
143
+ "parameters": {
144
+ "type": "object",
145
+ "properties": {
146
+ "title": {
147
+ "type": "string",
148
+ "description": "Título curto da descoberta"
149
+ },
150
+ "content": {
151
+ "type": "string",
152
+ "description": "Conteúdo detalhado"
153
+ },
154
+ "source": {
155
+ "type": "string",
156
+ "description": "Fonte da informação"
157
+ }
158
+ },
159
+ "required": ["title", "content", "source"]
160
+ }
161
+ }
162
+ },
163
+ {
164
+ "type": "function",
165
+ "function": {
166
+ "name": "finish_investigation",
167
+ "description": "Finalizar a investigação e gerar o relatório final.",
168
+ "parameters": {
169
+ "type": "object",
170
+ "properties": {
171
+ "summary": {
172
+ "type": "string",
173
+ "description": "Resumo das descobertas principais"
174
+ }
175
+ },
176
+ "required": ["summary"]
177
+ }
178
+ }
179
+ }
180
+ ]
181
+
182
+
183
+ SYSTEM_PROMPT = """Você é um agente investigador autônomo do sistema NUMIDIUM/AVANGARD.
184
+
185
+ Sua missão é investigar temas usando múltiplas fontes de dados:
186
+ - NUMIDIUM: Grafo de conhecimento com entidades e relacionamentos
187
+ - Consulta CNPJ: Dados oficiais de empresas brasileiras (BrasilAPI)
188
+ - Web Search: Pesquisa na internet via Lancer
189
+
190
+ ## Estratégia de Investigação:
191
+
192
+ 1. Comece buscando no NUMIDIUM se já temos informações sobre o alvo
193
+ 2. Para empresas brasileiras, consulte o CNPJ para obter sócios e dados
194
+ 3. Use web_search para buscar notícias e informações públicas
195
+ 4. Para cada sócio/conexão descoberta, considere investigar mais a fundo
196
+ 5. Use save_finding para registrar descobertas importantes
197
+ 6. Quando tiver informações suficientes, use finish_investigation
198
+
199
+ ## Regras:
200
+ - Seja metódico e siga pistas
201
+ - Não invente informações - use apenas dados das ferramentas
202
+ - Priorize qualidade sobre quantidade
203
+ - Cite sempre as fontes"""
204
+
205
+
206
+ class InvestigatorAgent:
207
+ """Autonomous investigation agent with tool calling"""
208
+
209
+ def __init__(self):
210
+ self.api_url = "https://api.cerebras.ai/v1/chat/completions"
211
+ self.api_key = settings.cerebras_api_key
212
+ self.model = "qwen-3-32b"
213
+
214
+ # Investigation state
215
+ self.findings: List[Finding] = []
216
+ self.entities_discovered: List[Dict[str, Any]] = []
217
+ self.connections_mapped: List[Dict[str, Any]] = []
218
+ self.tools_used: List[str] = []
219
+ self.messages: List[Dict[str, Any]] = []
220
+ self.db: Optional[Session] = None
221
+
222
+ def _reset_state(self):
223
+ """Reset investigation state"""
224
+ self.findings = []
225
+ self.entities_discovered = []
226
+ self.connections_mapped = []
227
+ self.tools_used = []
228
+ self.messages = []
229
+
230
+ async def _call_llm(
231
+ self,
232
+ messages: List[Dict[str, Any]],
233
+ tools: List[Dict] = None
234
+ ) -> Dict[str, Any]:
235
+ """Call Cerebras API with tool calling support"""
236
+ try:
237
+ payload = {
238
+ "model": self.model,
239
+ "messages": messages,
240
+ "temperature": 0.3,
241
+ "max_tokens": 2048,
242
+ }
243
+
244
+ if tools:
245
+ payload["tools"] = tools
246
+ payload["tool_choice"] = "auto"
247
+ payload["parallel_tool_calls"] = True
248
+
249
+ async with httpx.AsyncClient(timeout=60.0) as client:
250
+ response = await client.post(
251
+ self.api_url,
252
+ headers={
253
+ "Authorization": f"Bearer {self.api_key}",
254
+ "Content-Type": "application/json"
255
+ },
256
+ json=payload
257
+ )
258
+
259
+ if response.status_code != 200:
260
+ raise Exception(f"API error: {response.status_code} - {response.text}")
261
+
262
+ return response.json()
263
+
264
+ except Exception as e:
265
+ raise Exception(f"LLM call failed: {str(e)}")
266
+
267
+ async def _execute_tool(self, tool_name: str, arguments: Dict) -> str:
268
+ """Execute a tool and return the result"""
269
+ self.tools_used.append(tool_name)
270
+
271
+ try:
272
+ if tool_name == "search_entity":
273
+ return await self._search_entity(
274
+ arguments.get("query", ""),
275
+ arguments.get("entity_type")
276
+ )
277
+
278
+ elif tool_name == "get_connections":
279
+ return await self._get_connections(arguments.get("entity_id"))
280
+
281
+ elif tool_name == "lookup_cnpj":
282
+ return await self._lookup_cnpj(arguments.get("cnpj", ""))
283
+
284
+ elif tool_name == "web_search":
285
+ return await self._web_search(
286
+ arguments.get("query", ""),
287
+ arguments.get("freshness", "any")
288
+ )
289
+
290
+ elif tool_name == "deep_research":
291
+ return await self._deep_research(arguments.get("topic", ""))
292
+
293
+ elif tool_name == "save_finding":
294
+ finding = Finding(
295
+ title=arguments.get("title", ""),
296
+ content=arguments.get("content", ""),
297
+ source=arguments.get("source", "")
298
+ )
299
+ self.findings.append(finding)
300
+ return f"Descoberta salva: {finding.title}"
301
+
302
+ elif tool_name == "finish_investigation":
303
+ return f"INVESTIGATION_COMPLETE: {arguments.get('summary', '')}"
304
+
305
+ else:
306
+ return f"Ferramenta desconhecida: {tool_name}"
307
+
308
+ except Exception as e:
309
+ return f"Erro ao executar {tool_name}: {str(e)}"
310
+
311
+ async def _search_entity(self, query: str, entity_type: Optional[str]) -> str:
312
+ """Search entities in database"""
313
+ if not self.db:
314
+ return "Erro: Banco de dados não disponível"
315
+
316
+ q = self.db.query(Entity).filter(Entity.name.ilike(f"%{query}%"))
317
+ if entity_type and entity_type != "any":
318
+ q = q.filter(Entity.type == entity_type)
319
+
320
+ entities = q.limit(10).all()
321
+
322
+ if entities:
323
+ result = []
324
+ for e in entities:
325
+ self.entities_discovered.append({
326
+ "id": str(e.id),
327
+ "name": e.name,
328
+ "type": e.type
329
+ })
330
+ result.append({
331
+ "id": str(e.id),
332
+ "name": e.name,
333
+ "type": e.type,
334
+ "description": e.description[:200] if e.description else None
335
+ })
336
+ return json.dumps(result, ensure_ascii=False, indent=2)
337
+
338
+ return "Nenhuma entidade encontrada no NUMIDIUM."
339
+
340
+ async def _get_connections(self, entity_id: str) -> str:
341
+ """Get entity connections"""
342
+ if not self.db:
343
+ return "Erro: Banco de dados não disponível"
344
+
345
+ relationships = self.db.query(Relationship).filter(
346
+ (Relationship.source_id == entity_id) | (Relationship.target_id == entity_id)
347
+ ).limit(20).all()
348
+
349
+ if relationships:
350
+ connections = []
351
+ for rel in relationships:
352
+ source = self.db.query(Entity).filter(Entity.id == rel.source_id).first()
353
+ target = self.db.query(Entity).filter(Entity.id == rel.target_id).first()
354
+ if source and target:
355
+ connections.append({
356
+ "source": source.name,
357
+ "target": target.name,
358
+ "type": rel.type
359
+ })
360
+ return json.dumps(connections, ensure_ascii=False, indent=2)
361
+
362
+ return "Nenhuma conexão encontrada."
363
+
364
+ async def _lookup_cnpj(self, cnpj: str) -> str:
365
+ """Lookup CNPJ via BrasilAPI"""
366
+ cnpj_clean = cnpj.replace(".", "").replace("/", "").replace("-", "")
367
+ result = await consultar_cnpj(cnpj_clean)
368
+
369
+ if result:
370
+ data = {
371
+ "razao_social": result.razao_social,
372
+ "nome_fantasia": result.nome_fantasia,
373
+ "situacao": result.situacao,
374
+ "data_abertura": result.data_abertura,
375
+ "capital_social": result.capital_social,
376
+ "endereco": f"{result.logradouro}, {result.numero} - {result.cidade}/{result.uf}",
377
+ "cnae": f"{result.cnae_principal} - {result.cnae_descricao}",
378
+ "socios": result.socios
379
+ }
380
+ return json.dumps(data, ensure_ascii=False, indent=2)
381
+
382
+ return "CNPJ não encontrado."
383
+
384
+ async def _web_search(self, query: str, freshness: str) -> str:
385
+ """Web search via Lancer"""
386
+ try:
387
+ result = await lancer.search(query, max_results=5, freshness=freshness)
388
+ if result.answer:
389
+ return f"Resumo: {result.answer}\n\nFontes: {len(result.results)} resultados"
390
+ return "Nenhum resultado encontrado."
391
+ except Exception as e:
392
+ return f"Erro na busca web: {str(e)}"
393
+
394
+ async def _deep_research(self, topic: str) -> str:
395
+ """Deep research via Lancer"""
396
+ try:
397
+ result = await lancer.deep_research(topic, max_dimensions=3)
398
+ if result.answer:
399
+ return result.answer
400
+ return "Pesquisa profunda não retornou resultados."
401
+ except Exception as e:
402
+ return f"Erro na pesquisa: {str(e)}"
403
+
404
+ async def investigate(
405
+ self,
406
+ mission: str,
407
+ db: Session,
408
+ max_iterations: int = 10
409
+ ) -> InvestigationResult:
410
+ """Main investigation loop"""
411
+ self._reset_state()
412
+ self.db = db
413
+
414
+ self.messages = [
415
+ {"role": "system", "content": SYSTEM_PROMPT},
416
+ {"role": "user", "content": f"Missão de investigação: {mission}\n\nComece a investigação."}
417
+ ]
418
+
419
+ iteration = 0
420
+ final_summary = ""
421
+
422
+ while iteration < max_iterations:
423
+ iteration += 1
424
+
425
+ response = await self._call_llm(self.messages, TOOLS)
426
+
427
+ choice = response["choices"][0]
428
+ message = choice["message"]
429
+ self.messages.append(message)
430
+
431
+ tool_calls = message.get("tool_calls", [])
432
+
433
+ if not tool_calls:
434
+ if message.get("content"):
435
+ final_summary = message["content"]
436
+ break
437
+
438
+ for tool_call in tool_calls:
439
+ func = tool_call["function"]
440
+ tool_name = func["name"]
441
+
442
+ try:
443
+ arguments = json.loads(func["arguments"])
444
+ except:
445
+ arguments = {}
446
+
447
+ result = await self._execute_tool(tool_name, arguments)
448
+
449
+ if result.startswith("INVESTIGATION_COMPLETE:"):
450
+ final_summary = result.replace("INVESTIGATION_COMPLETE:", "").strip()
451
+ break
452
+
453
+ self.messages.append({
454
+ "role": "tool",
455
+ "tool_call_id": tool_call["id"],
456
+ "content": result
457
+ })
458
+
459
+ if final_summary:
460
+ break
461
+
462
+ if not final_summary:
463
+ final_summary = await self._generate_report(mission)
464
+
465
+ return InvestigationResult(
466
+ mission=mission,
467
+ findings=self.findings,
468
+ entities_discovered=self.entities_discovered,
469
+ connections_mapped=self.connections_mapped,
470
+ report=final_summary,
471
+ iterations=iteration,
472
+ tools_used=list(set(self.tools_used)),
473
+ status="completed"
474
+ )
475
+
476
+ async def _generate_report(self, mission: str) -> str:
477
+ """Generate final report"""
478
+ findings_text = "\n".join([
479
+ f"- {f.title}: {f.content} (Fonte: {f.source})"
480
+ for f in self.findings
481
+ ]) or "Nenhuma descoberta registrada."
482
+
483
+ entities_text = ", ".join([
484
+ e.get("name", "Unknown") for e in self.entities_discovered[:10]
485
+ ]) or "Nenhuma entidade."
486
+
487
+ prompt = f"""Gere um relatório de investigação:
488
+
489
+ Missão: {mission}
490
+
491
+ Descobertas:
492
+ {findings_text}
493
+
494
+ Entidades: {entities_text}
495
+
496
+ Ferramentas usadas: {', '.join(set(self.tools_used))}
497
+
498
+ Gere relatório estruturado com: Resumo Executivo, Descobertas, Entidades, Recomendações."""
499
+
500
+ response = await self._call_llm([
501
+ {"role": "system", "content": "Gere relatórios concisos."},
502
+ {"role": "user", "content": prompt}
503
+ ])
504
+
505
+ return response["choices"][0]["message"]["content"]
506
+
507
+
508
+ # Singleton
509
+ investigator_agent = InvestigatorAgent()