Spaces:

Madras1
/

Numidium

Runtime error

App Files Files Community

Madras1 commited on Jan 20

Commit

f07fc65

verified ·

1 Parent(s): 270c1c7

Upload 64 files

Browse files

Files changed (2) hide show

app/api/routes/investigate.py +85 -1
app/services/investigator_agent.py +509 -0

app/api/routes/investigate.py CHANGED Viewed

@@ -1,9 +1,10 @@
 """
 Investigation API Routes - Build dossiers on companies and people
 """
-from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel, Field
 from typing import Optional, List, Dict, Any
 from app.services.investigation import (
     investigar_empresa,
@@ -11,6 +12,8 @@ from app.services.investigation import (
     dossier_to_dict
 )
 from app.services.brazil_apis import consultar_cnpj
 router = APIRouter(prefix="/investigate", tags=["Investigation"])
@@ -121,3 +124,84 @@ async def lookup_cnpj(cnpj: str):
         raise
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 """
 Investigation API Routes - Build dossiers on companies and people
 """
+from fastapi import APIRouter, HTTPException, Depends
 from pydantic import BaseModel, Field
 from typing import Optional, List, Dict, Any
+from sqlalchemy.orm import Session
 from app.services.investigation import (
     investigar_empresa,
     dossier_to_dict
 )
 from app.services.brazil_apis import consultar_cnpj
+from app.services.investigator_agent import investigator_agent
+from app.api.deps import get_db
 router = APIRouter(prefix="/investigate", tags=["Investigation"])
         raise
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+# ===========================================
+# Autonomous Investigation Agent
+# ===========================================
+class AgentInvestigateRequest(BaseModel):
+    """Request for autonomous investigation"""
+    mission: str = Field(..., min_length=5, description="Missão de investigação em linguagem natural")
+    max_iterations: int = Field(10, ge=1, le=20, description="Máximo de iterações do agente")
+class FindingResponse(BaseModel):
+    """A finding from investigation"""
+    title: str
+    content: str
+    source: str
+    timestamp: str
+class AgentInvestigateResponse(BaseModel):
+    """Response from autonomous investigation"""
+    mission: str
+    status: str
+    report: str
+    findings: List[FindingResponse]
+    entities_discovered: int
+    connections_mapped: int
+    iterations: int
+    tools_used: List[str]
+@router.post("/agent", response_model=AgentInvestigateResponse)
+async def investigate_with_agent(
+    request: AgentInvestigateRequest,
+    db: Session = Depends(get_db)
+):
+    """
+    Autonomous investigation with AI agent.
+    The agent will:
+    1. Search NUMIDIUM for existing entities
+    2. Query CNPJ data for Brazilian companies
+    3. Search the web for news and public info
+    4. Follow leads and connections
+    5. Generate a comprehensive report
+    Example missions:
+    - "Investigue a rede de empresas de João Silva"
+    - "Descubra os sócios da empresa CNPJ 11.222.333/0001-44"
+    - "Pesquise sobre a empresa XYZ e suas conexões"
+    """
+    try:
+        result = await investigator_agent.investigate(
+            mission=request.mission,
+            db=db,
+            max_iterations=request.max_iterations
+        )
+        return AgentInvestigateResponse(
+            mission=result.mission,
+            status=result.status,
+            report=result.report,
+            findings=[
+                FindingResponse(
+                    title=f.title,
+                    content=f.content,
+                    source=f.source,
+                    timestamp=f.timestamp
+                )
+                for f in result.findings
+            ],
+            entities_discovered=len(result.entities_discovered),
+            connections_mapped=len(result.connections_mapped),
+            iterations=result.iterations,
+            tools_used=result.tools_used
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

app/services/investigator_agent.py ADDED Viewed

	@@ -0,0 +1,509 @@

+"""
+Investigator Agent - Autonomous Investigation with Tool Calling
+Uses Cerebras native tool calling for multi-source investigations
+"""
+import json
+import httpx
+from typing import Optional, List, Dict, Any
+from dataclasses import dataclass, field
+from datetime import datetime
+from sqlalchemy.orm import Session
+from app.config import settings
+from app.services import lancer
+from app.services.brazil_apis import consultar_cnpj
+from app.models.entity import Entity, Relationship
+@dataclass
+class Finding:
+    """A discovery made during investigation"""
+    title: str
+    content: str
+    source: str
+    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
+@dataclass
+class InvestigationResult:
+    """Complete investigation result"""
+    mission: str
+    findings: List[Finding]
+    entities_discovered: List[Dict[str, Any]]
+    connections_mapped: List[Dict[str, Any]]
+    report: str
+    iterations: int
+    tools_used: List[str]
+    status: str = "completed"
+# Tool definitions for Cerebras API
+TOOLS = [
+    {
+        "type": "function",
+        "function": {
+            "name": "search_entity",
+            "description": "Buscar entidade no NUMIDIUM (grafo de conhecimento) por nome. Use para encontrar pessoas, empresas ou locais já conhecidos.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Nome ou termo para buscar"
+                    },
+                    "entity_type": {
+                        "type": "string",
+                        "enum": ["person", "organization", "location", "any"],
+                        "description": "Tipo de entidade (opcional)"
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_connections",
+            "description": "Obter a rede de conexões de uma entidade específica. Retorna entidades relacionadas.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "entity_id": {
+                        "type": "string",
+                        "description": "ID da entidade no NUMIDIUM"
+                    }
+                },
+                "required": ["entity_id"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "lookup_cnpj",
+            "description": "Consultar dados de uma empresa brasileira pelo CNPJ. Retorna razão social, sócios, endereço, CNAEs, etc.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "cnpj": {
+                        "type": "string",
+                        "description": "CNPJ da empresa (com ou sem formatação)"
+                    }
+                },
+                "required": ["cnpj"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "web_search",
+            "description": "Pesquisar informações na web. Use para buscar notícias, artigos e informações públicas.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Termo de busca"
+                    },
+                    "freshness": {
+                        "type": "string",
+                        "enum": ["day", "week", "month", "any"],
+                        "description": "Frescor dos resultados",
+                        "default": "any"
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "deep_research",
+            "description": "Pesquisa profunda e multi-dimensional sobre um tema. Use para tópicos complexos.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "topic": {
+                        "type": "string",
+                        "description": "Tópico para pesquisa profunda"
+                    }
+                },
+                "required": ["topic"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "save_finding",
+            "description": "Salvar uma descoberta importante da investigação.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "title": {
+                        "type": "string",
+                        "description": "Título curto da descoberta"
+                    },
+                    "content": {
+                        "type": "string",
+                        "description": "Conteúdo detalhado"
+                    },
+                    "source": {
+                        "type": "string",
+                        "description": "Fonte da informação"
+                    }
+                },
+                "required": ["title", "content", "source"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "finish_investigation",
+            "description": "Finalizar a investigação e gerar o relatório final.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "summary": {
+                        "type": "string",
+                        "description": "Resumo das descobertas principais"
+                    }
+                },
+                "required": ["summary"]
+            }
+        }
+    }
+]
+SYSTEM_PROMPT = """Você é um agente investigador autônomo do sistema NUMIDIUM/AVANGARD.
+Sua missão é investigar temas usando múltiplas fontes de dados:
+- NUMIDIUM: Grafo de conhecimento com entidades e relacionamentos
+- Consulta CNPJ: Dados oficiais de empresas brasileiras (BrasilAPI)
+- Web Search: Pesquisa na internet via Lancer
+## Estratégia de Investigação:
+1. Comece buscando no NUMIDIUM se já temos informações sobre o alvo
+2. Para empresas brasileiras, consulte o CNPJ para obter sócios e dados
+3. Use web_search para buscar notícias e informações públicas
+4. Para cada sócio/conexão descoberta, considere investigar mais a fundo
+5. Use save_finding para registrar descobertas importantes
+6. Quando tiver informações suficientes, use finish_investigation
+## Regras:
+- Seja metódico e siga pistas
+- Não invente informações - use apenas dados das ferramentas
+- Priorize qualidade sobre quantidade
+- Cite sempre as fontes"""
+class InvestigatorAgent:
+    """Autonomous investigation agent with tool calling"""
+    def __init__(self):
+        self.api_url = "https://api.cerebras.ai/v1/chat/completions"
+        self.api_key = settings.cerebras_api_key
+        self.model = "qwen-3-32b"
+        # Investigation state
+        self.findings: List[Finding] = []
+        self.entities_discovered: List[Dict[str, Any]] = []
+        self.connections_mapped: List[Dict[str, Any]] = []
+        self.tools_used: List[str] = []
+        self.messages: List[Dict[str, Any]] = []
+        self.db: Optional[Session] = None
+    def _reset_state(self):
+        """Reset investigation state"""
+        self.findings = []
+        self.entities_discovered = []
+        self.connections_mapped = []
+        self.tools_used = []
+        self.messages = []
+    async def _call_llm(
+        self,
+        messages: List[Dict[str, Any]],
+        tools: List[Dict] = None
+    ) -> Dict[str, Any]:
+        """Call Cerebras API with tool calling support"""
+        try:
+            payload = {
+                "model": self.model,
+                "messages": messages,
+                "temperature": 0.3,
+                "max_tokens": 2048,
+            }
+            if tools:
+                payload["tools"] = tools
+                payload["tool_choice"] = "auto"
+                payload["parallel_tool_calls"] = True
+            async with httpx.AsyncClient(timeout=60.0) as client:
+                response = await client.post(
+                    self.api_url,
+                    headers={
+                        "Authorization": f"Bearer {self.api_key}",
+                        "Content-Type": "application/json"
+                    },
+                    json=payload
+                )
+                if response.status_code != 200:
+                    raise Exception(f"API error: {response.status_code} - {response.text}")
+                return response.json()
+        except Exception as e:
+            raise Exception(f"LLM call failed: {str(e)}")
+    async def _execute_tool(self, tool_name: str, arguments: Dict) -> str:
+        """Execute a tool and return the result"""
+        self.tools_used.append(tool_name)
+        try:
+            if tool_name == "search_entity":
+                return await self._search_entity(
+                    arguments.get("query", ""),
+                    arguments.get("entity_type")
+                )
+            elif tool_name == "get_connections":
+                return await self._get_connections(arguments.get("entity_id"))
+            elif tool_name == "lookup_cnpj":
+                return await self._lookup_cnpj(arguments.get("cnpj", ""))
+            elif tool_name == "web_search":
+                return await self._web_search(
+                    arguments.get("query", ""),
+                    arguments.get("freshness", "any")
+                )
+            elif tool_name == "deep_research":
+                return await self._deep_research(arguments.get("topic", ""))
+            elif tool_name == "save_finding":
+                finding = Finding(
+                    title=arguments.get("title", ""),
+                    content=arguments.get("content", ""),
+                    source=arguments.get("source", "")
+                )
+                self.findings.append(finding)
+                return f"Descoberta salva: {finding.title}"
+            elif tool_name == "finish_investigation":
+                return f"INVESTIGATION_COMPLETE: {arguments.get('summary', '')}"
+            else:
+                return f"Ferramenta desconhecida: {tool_name}"
+        except Exception as e:
+            return f"Erro ao executar {tool_name}: {str(e)}"
+    async def _search_entity(self, query: str, entity_type: Optional[str]) -> str:
+        """Search entities in database"""
+        if not self.db:
+            return "Erro: Banco de dados não disponível"
+        q = self.db.query(Entity).filter(Entity.name.ilike(f"%{query}%"))
+        if entity_type and entity_type != "any":
+            q = q.filter(Entity.type == entity_type)
+        entities = q.limit(10).all()
+        if entities:
+            result = []
+            for e in entities:
+                self.entities_discovered.append({
+                    "id": str(e.id),
+                    "name": e.name,
+                    "type": e.type
+                })
+                result.append({
+                    "id": str(e.id),
+                    "name": e.name,
+                    "type": e.type,
+                    "description": e.description[:200] if e.description else None
+                })
+            return json.dumps(result, ensure_ascii=False, indent=2)
+        return "Nenhuma entidade encontrada no NUMIDIUM."
+    async def _get_connections(self, entity_id: str) -> str:
+        """Get entity connections"""
+        if not self.db:
+            return "Erro: Banco de dados não disponível"
+        relationships = self.db.query(Relationship).filter(
+            (Relationship.source_id == entity_id) | (Relationship.target_id == entity_id)
+        ).limit(20).all()
+        if relationships:
+            connections = []
+            for rel in relationships:
+                source = self.db.query(Entity).filter(Entity.id == rel.source_id).first()
+                target = self.db.query(Entity).filter(Entity.id == rel.target_id).first()
+                if source and target:
+                    connections.append({
+                        "source": source.name,
+                        "target": target.name,
+                        "type": rel.type
+                    })
+            return json.dumps(connections, ensure_ascii=False, indent=2)
+        return "Nenhuma conexão encontrada."
+    async def _lookup_cnpj(self, cnpj: str) -> str:
+        """Lookup CNPJ via BrasilAPI"""
+        cnpj_clean = cnpj.replace(".", "").replace("/", "").replace("-", "")
+        result = await consultar_cnpj(cnpj_clean)
+        if result:
+            data = {
+                "razao_social": result.razao_social,
+                "nome_fantasia": result.nome_fantasia,
+                "situacao": result.situacao,
+                "data_abertura": result.data_abertura,
+                "capital_social": result.capital_social,
+                "endereco": f"{result.logradouro}, {result.numero} - {result.cidade}/{result.uf}",
+                "cnae": f"{result.cnae_principal} - {result.cnae_descricao}",
+                "socios": result.socios
+            }
+            return json.dumps(data, ensure_ascii=False, indent=2)
+        return "CNPJ não encontrado."
+    async def _web_search(self, query: str, freshness: str) -> str:
+        """Web search via Lancer"""
+        try:
+            result = await lancer.search(query, max_results=5, freshness=freshness)
+            if result.answer:
+                return f"Resumo: {result.answer}\n\nFontes: {len(result.results)} resultados"
+            return "Nenhum resultado encontrado."
+        except Exception as e:
+            return f"Erro na busca web: {str(e)}"
+    async def _deep_research(self, topic: str) -> str:
+        """Deep research via Lancer"""
+        try:
+            result = await lancer.deep_research(topic, max_dimensions=3)
+            if result.answer:
+                return result.answer
+            return "Pesquisa profunda não retornou resultados."
+        except Exception as e:
+            return f"Erro na pesquisa: {str(e)}"
+    async def investigate(
+        self,
+        mission: str,
+        db: Session,
+        max_iterations: int = 10
+    ) -> InvestigationResult:
+        """Main investigation loop"""
+        self._reset_state()
+        self.db = db
+        self.messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": f"Missão de investigação: {mission}\n\nComece a investigação."}
+        ]
+        iteration = 0
+        final_summary = ""
+        while iteration < max_iterations:
+            iteration += 1
+            response = await self._call_llm(self.messages, TOOLS)
+            choice = response["choices"][0]
+            message = choice["message"]
+            self.messages.append(message)
+            tool_calls = message.get("tool_calls", [])
+            if not tool_calls:
+                if message.get("content"):
+                    final_summary = message["content"]
+                break
+            for tool_call in tool_calls:
+                func = tool_call["function"]
+                tool_name = func["name"]
+                try:
+                    arguments = json.loads(func["arguments"])
+                except:
+                    arguments = {}
+                result = await self._execute_tool(tool_name, arguments)
+                if result.startswith("INVESTIGATION_COMPLETE:"):
+                    final_summary = result.replace("INVESTIGATION_COMPLETE:", "").strip()
+                    break
+                self.messages.append({
+                    "role": "tool",
+                    "tool_call_id": tool_call["id"],
+                    "content": result
+                })
+            if final_summary:
+                break
+        if not final_summary:
+            final_summary = await self._generate_report(mission)
+        return InvestigationResult(
+            mission=mission,
+            findings=self.findings,
+            entities_discovered=self.entities_discovered,
+            connections_mapped=self.connections_mapped,
+            report=final_summary,
+            iterations=iteration,
+            tools_used=list(set(self.tools_used)),
+            status="completed"
+        )
+    async def _generate_report(self, mission: str) -> str:
+        """Generate final report"""
+        findings_text = "\n".join([
+            f"- {f.title}: {f.content} (Fonte: {f.source})"
+            for f in self.findings
+        ]) or "Nenhuma descoberta registrada."
+        entities_text = ", ".join([
+            e.get("name", "Unknown") for e in self.entities_discovered[:10]
+        ]) or "Nenhuma entidade."
+        prompt = f"""Gere um relatório de investigação:
+Missão: {mission}
+Descobertas:
+{findings_text}
+Entidades: {entities_text}
+Ferramentas usadas: {', '.join(set(self.tools_used))}
+Gere relatório estruturado com: Resumo Executivo, Descobertas, Entidades, Recomendações."""
+        response = await self._call_llm([
+            {"role": "system", "content": "Gere relatórios concisos."},
+            {"role": "user", "content": prompt}
+        ])
+        return response["choices"][0]["message"]["content"]
+# Singleton
+investigator_agent = InvestigatorAgent()