Madras1 commited on
Commit
f76a41f
·
verified ·
1 Parent(s): 9a10687

Upload 55 files

Browse files
app/api/routes/investigate.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Investigation API Routes - Build dossiers on companies and people
3
+ """
4
+ from fastapi import APIRouter, HTTPException
5
+ from pydantic import BaseModel, Field
6
+ from typing import Optional, List, Dict, Any
7
+
8
+ from app.services.investigation import (
9
+ investigar_empresa,
10
+ investigar_pessoa,
11
+ dossier_to_dict
12
+ )
13
+ from app.services.brazil_apis import consultar_cnpj
14
+
15
+
16
+ router = APIRouter(prefix="/investigate", tags=["Investigation"])
17
+
18
+
19
+ class InvestigateCompanyRequest(BaseModel):
20
+ """Request to investigate a company"""
21
+ cnpj: str = Field(..., min_length=11, description="CNPJ da empresa")
22
+
23
+
24
+ class InvestigatePersonRequest(BaseModel):
25
+ """Request to investigate a person"""
26
+ nome: str = Field(..., min_length=2, description="Nome da pessoa")
27
+ cpf: Optional[str] = Field(None, description="CPF (opcional)")
28
+
29
+
30
+ class DossierResponse(BaseModel):
31
+ """Dossier response"""
32
+ tipo: str
33
+ alvo: str
34
+ cnpj_cpf: Optional[str]
35
+ red_flags: List[str]
36
+ score_risco: int
37
+ data_geracao: str
38
+ fonte_dados: List[str]
39
+ secoes: Dict[str, Any]
40
+
41
+
42
+ class CNPJResponse(BaseModel):
43
+ """Quick CNPJ lookup response"""
44
+ cnpj: str
45
+ razao_social: str
46
+ nome_fantasia: str
47
+ situacao: str
48
+ data_abertura: str
49
+ capital_social: float
50
+ endereco: str
51
+ telefone: str
52
+ email: str
53
+ atividade: str
54
+ socios: List[Dict[str, Any]]
55
+
56
+
57
+ @router.post("/company", response_model=DossierResponse)
58
+ async def investigate_company(request: InvestigateCompanyRequest):
59
+ """
60
+ Build a comprehensive dossier on a company.
61
+
62
+ Collects:
63
+ - Cadastral data from CNPJ
64
+ - Partners/owners
65
+ - Sanctions (CEIS, CNEP, CEPIM)
66
+ - News and media mentions
67
+ - Related entities
68
+
69
+ Returns risk score and red flags.
70
+ """
71
+ try:
72
+ dossier = await investigar_empresa(request.cnpj)
73
+ return DossierResponse(**dossier_to_dict(dossier))
74
+
75
+ except Exception as e:
76
+ raise HTTPException(status_code=500, detail=str(e))
77
+
78
+
79
+ @router.post("/person", response_model=DossierResponse)
80
+ async def investigate_person(request: InvestigatePersonRequest):
81
+ """
82
+ Build a dossier on a person.
83
+
84
+ Note: Due to LGPD, personal data is limited.
85
+ Mainly uses web search for public information.
86
+ """
87
+ try:
88
+ dossier = await investigar_pessoa(request.nome, request.cpf)
89
+ return DossierResponse(**dossier_to_dict(dossier))
90
+
91
+ except Exception as e:
92
+ raise HTTPException(status_code=500, detail=str(e))
93
+
94
+
95
+ @router.get("/cnpj/{cnpj}", response_model=CNPJResponse)
96
+ async def lookup_cnpj(cnpj: str):
97
+ """
98
+ Quick CNPJ lookup - returns basic company data.
99
+ """
100
+ try:
101
+ data = await consultar_cnpj(cnpj)
102
+
103
+ if not data:
104
+ raise HTTPException(status_code=404, detail="CNPJ não encontrado")
105
+
106
+ return CNPJResponse(
107
+ cnpj=data.cnpj,
108
+ razao_social=data.razao_social,
109
+ nome_fantasia=data.nome_fantasia,
110
+ situacao=data.situacao,
111
+ data_abertura=data.data_abertura,
112
+ capital_social=data.capital_social,
113
+ endereco=f"{data.logradouro}, {data.numero} - {data.bairro}, {data.cidade}/{data.uf}",
114
+ telefone=data.telefone,
115
+ email=data.email,
116
+ atividade=f"{data.cnae_principal} - {data.cnae_descricao}",
117
+ socios=data.socios
118
+ )
119
+
120
+ except HTTPException:
121
+ raise
122
+ except Exception as e:
123
+ raise HTTPException(status_code=500, detail=str(e))
app/main.py CHANGED
@@ -8,7 +8,7 @@ from contextlib import asynccontextmanager
8
 
9
  from app.config import settings
10
  from app.core.database import init_db
11
- from app.api.routes import entities, relationships, events, search, ingest, analyze, graph, research, chat
12
 
13
 
14
  @asynccontextmanager
@@ -59,6 +59,7 @@ app.include_router(analyze.router, prefix="/api/v1")
59
  app.include_router(graph.router, prefix="/api/v1")
60
  app.include_router(research.router, prefix="/api/v1")
61
  app.include_router(chat.router, prefix="/api/v1")
 
62
 
63
 
64
  @app.get("/")
 
8
 
9
  from app.config import settings
10
  from app.core.database import init_db
11
+ from app.api.routes import entities, relationships, events, search, ingest, analyze, graph, research, chat, investigate
12
 
13
 
14
  @asynccontextmanager
 
59
  app.include_router(graph.router, prefix="/api/v1")
60
  app.include_router(research.router, prefix="/api/v1")
61
  app.include_router(chat.router, prefix="/api/v1")
62
+ app.include_router(investigate.router, prefix="/api/v1")
63
 
64
 
65
  @app.get("/")
app/services/brazil_apis.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Brazilian Data APIs Service
3
+ Consolidates access to public Brazilian data APIs for investigation
4
+ """
5
+ import httpx
6
+ from typing import Optional, Dict, Any, List
7
+ from dataclasses import dataclass, field
8
+ import re
9
+
10
+
11
+ # API URLs
12
+ CNPJA_URL = "https://api.cnpja.com.br/office"
13
+ OPENCNPJ_URL = "https://api.opencnpj.org/v1/cnpj"
14
+ BRASILAPI_CNPJ = "https://brasilapi.com.br/api/cnpj/v1"
15
+ BRASILAPI_CEP = "https://brasilapi.com.br/api/cep/v2"
16
+
17
+
18
+ @dataclass
19
+ class CompanyData:
20
+ """Data structure for company information"""
21
+ cnpj: str
22
+ razao_social: str = ""
23
+ nome_fantasia: str = ""
24
+ situacao: str = ""
25
+ data_abertura: str = ""
26
+ natureza_juridica: str = ""
27
+ capital_social: float = 0.0
28
+ porte: str = ""
29
+
30
+ # Address
31
+ logradouro: str = ""
32
+ numero: str = ""
33
+ complemento: str = ""
34
+ bairro: str = ""
35
+ cidade: str = ""
36
+ uf: str = ""
37
+ cep: str = ""
38
+
39
+ # Contact
40
+ telefone: str = ""
41
+ email: str = ""
42
+
43
+ # Activity
44
+ cnae_principal: str = ""
45
+ cnae_descricao: str = ""
46
+ cnaes_secundarios: List[str] = field(default_factory=list)
47
+
48
+ # Partners/Owners
49
+ socios: List[Dict[str, Any]] = field(default_factory=list)
50
+
51
+ # Source
52
+ fonte: str = ""
53
+
54
+
55
+ def clean_cnpj(cnpj: str) -> str:
56
+ """Remove formatting from CNPJ"""
57
+ return re.sub(r'[^0-9]', '', cnpj)
58
+
59
+
60
+ async def consultar_cnpj(cnpj: str) -> Optional[CompanyData]:
61
+ """
62
+ Query CNPJ data from available APIs.
63
+ Tries BrasilAPI first (more reliable), then falls back to others.
64
+ """
65
+ cnpj_clean = clean_cnpj(cnpj)
66
+
67
+ if len(cnpj_clean) != 14:
68
+ return None
69
+
70
+ # Try BrasilAPI first
71
+ result = await _query_brasilapi(cnpj_clean)
72
+ if result:
73
+ return result
74
+
75
+ # Fallback to OpenCNPJ
76
+ result = await _query_opencnpj(cnpj_clean)
77
+ if result:
78
+ return result
79
+
80
+ return None
81
+
82
+
83
+ async def _query_brasilapi(cnpj: str) -> Optional[CompanyData]:
84
+ """Query BrasilAPI for CNPJ data"""
85
+ try:
86
+ async with httpx.AsyncClient(timeout=30.0) as client:
87
+ response = await client.get(f"{BRASILAPI_CNPJ}/{cnpj}")
88
+
89
+ if response.status_code != 200:
90
+ return None
91
+
92
+ data = response.json()
93
+
94
+ # Parse partners
95
+ socios = []
96
+ for socio in data.get("qsa", []):
97
+ socios.append({
98
+ "nome": socio.get("nome_socio", ""),
99
+ "qualificacao": socio.get("qualificacao_socio", ""),
100
+ "cpf_cnpj": socio.get("cnpj_cpf_do_socio", ""),
101
+ "data_entrada": socio.get("data_entrada_sociedade", "")
102
+ })
103
+
104
+ # Parse CNAEs
105
+ cnaes_sec = []
106
+ for cnae in data.get("cnaes_secundarios", []):
107
+ if isinstance(cnae, dict):
108
+ cnaes_sec.append(f"{cnae.get('codigo', '')} - {cnae.get('descricao', '')}")
109
+ else:
110
+ cnaes_sec.append(str(cnae))
111
+
112
+ return CompanyData(
113
+ cnpj=cnpj,
114
+ razao_social=data.get("razao_social", ""),
115
+ nome_fantasia=data.get("nome_fantasia", ""),
116
+ situacao=data.get("descricao_situacao_cadastral", ""),
117
+ data_abertura=data.get("data_inicio_atividade", ""),
118
+ natureza_juridica=data.get("natureza_juridica", ""),
119
+ capital_social=float(data.get("capital_social", 0)),
120
+ porte=data.get("porte", ""),
121
+ logradouro=data.get("logradouro", ""),
122
+ numero=data.get("numero", ""),
123
+ complemento=data.get("complemento", ""),
124
+ bairro=data.get("bairro", ""),
125
+ cidade=data.get("municipio", ""),
126
+ uf=data.get("uf", ""),
127
+ cep=data.get("cep", ""),
128
+ telefone=data.get("ddd_telefone_1", ""),
129
+ email=data.get("email", ""),
130
+ cnae_principal=str(data.get("cnae_fiscal", "")),
131
+ cnae_descricao=data.get("cnae_fiscal_descricao", ""),
132
+ cnaes_secundarios=cnaes_sec,
133
+ socios=socios,
134
+ fonte="BrasilAPI"
135
+ )
136
+
137
+ except Exception as e:
138
+ print(f"BrasilAPI error: {e}")
139
+ return None
140
+
141
+
142
+ async def _query_opencnpj(cnpj: str) -> Optional[CompanyData]:
143
+ """Query OpenCNPJ API"""
144
+ try:
145
+ async with httpx.AsyncClient(timeout=30.0) as client:
146
+ response = await client.get(f"{OPENCNPJ_URL}/{cnpj}")
147
+
148
+ if response.status_code != 200:
149
+ return None
150
+
151
+ data = response.json()
152
+
153
+ # Parse partners
154
+ socios = []
155
+ for socio in data.get("socios", []):
156
+ socios.append({
157
+ "nome": socio.get("nome", ""),
158
+ "qualificacao": socio.get("qualificacao", ""),
159
+ "cpf_cnpj": "",
160
+ "data_entrada": socio.get("data_entrada", "")
161
+ })
162
+
163
+ return CompanyData(
164
+ cnpj=cnpj,
165
+ razao_social=data.get("razao_social", ""),
166
+ nome_fantasia=data.get("nome_fantasia", ""),
167
+ situacao=data.get("situacao_cadastral", ""),
168
+ data_abertura=data.get("data_inicio_atividade", ""),
169
+ natureza_juridica=data.get("natureza_juridica", ""),
170
+ capital_social=float(data.get("capital_social", 0) or 0),
171
+ porte=data.get("porte", ""),
172
+ logradouro=data.get("logradouro", ""),
173
+ numero=data.get("numero", ""),
174
+ complemento=data.get("complemento", ""),
175
+ bairro=data.get("bairro", ""),
176
+ cidade=data.get("municipio", ""),
177
+ uf=data.get("uf", ""),
178
+ cep=data.get("cep", ""),
179
+ telefone=data.get("telefone", ""),
180
+ email=data.get("email", ""),
181
+ cnae_principal=data.get("cnae_principal", {}).get("codigo", ""),
182
+ cnae_descricao=data.get("cnae_principal", {}).get("descricao", ""),
183
+ cnaes_secundarios=[],
184
+ socios=socios,
185
+ fonte="OpenCNPJ"
186
+ )
187
+
188
+ except Exception as e:
189
+ print(f"OpenCNPJ error: {e}")
190
+ return None
191
+
192
+
193
+ async def consultar_cep(cep: str) -> Optional[Dict[str, Any]]:
194
+ """Query address by CEP"""
195
+ cep_clean = re.sub(r'[^0-9]', '', cep)
196
+
197
+ try:
198
+ async with httpx.AsyncClient(timeout=15.0) as client:
199
+ response = await client.get(f"{BRASILAPI_CEP}/{cep_clean}")
200
+
201
+ if response.status_code != 200:
202
+ return None
203
+
204
+ return response.json()
205
+
206
+ except Exception as e:
207
+ print(f"CEP query error: {e}")
208
+ return None
209
+
210
+
211
+ async def buscar_empresas_por_nome(nome: str, uf: Optional[str] = None) -> List[Dict[str, Any]]:
212
+ """
213
+ Search companies by name using web search (via Lancer).
214
+ This is a workaround since direct name search APIs are paid.
215
+ """
216
+ # This would need Lancer integration for web search
217
+ # For now, return empty - will be filled by investigation service
218
+ return []
app/services/investigation.py ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Investigation Service - Builds comprehensive dossiers
3
+ Combines CNPJ data, transparency/sanctions, Lancer web search, and NER
4
+ """
5
+ import httpx
6
+ from typing import Optional, Dict, Any, List
7
+ from dataclasses import dataclass, field, asdict
8
+ import asyncio
9
+
10
+ from app.services.brazil_apis import consultar_cnpj, CompanyData
11
+ from app.services.transparencia_api import verificar_sancoes
12
+ from app.services import lancer
13
+ from app.services.nlp import entity_extractor
14
+ from app.core.database import get_db
15
+ from app.models.entity import Entity, Relationship
16
+
17
+
18
+ LANCER_URL = "https://madras1-lancer.hf.space/api/v1"
19
+
20
+
21
+ @dataclass
22
+ class DossierSection:
23
+ """A section of the dossier"""
24
+ titulo: str
25
+ conteudo: Any
26
+ status: str = "ok" # ok, warning, danger, info
27
+ icone: str = "📋"
28
+
29
+
30
+ @dataclass
31
+ class Dossier:
32
+ """Complete investigation dossier"""
33
+ tipo: str # "organization" or "person"
34
+ alvo: str # Target name
35
+ cnpj_cpf: Optional[str] = None
36
+
37
+ # Sections
38
+ dados_cadastrais: Optional[DossierSection] = None
39
+ socios: Optional[DossierSection] = None
40
+ sancoes: Optional[DossierSection] = None
41
+ noticias: Optional[DossierSection] = None
42
+ entidades_relacionadas: Optional[DossierSection] = None
43
+
44
+ # Metadata
45
+ red_flags: List[str] = field(default_factory=list)
46
+ score_risco: int = 0 # 0-100
47
+ data_geracao: str = ""
48
+ fonte_dados: List[str] = field(default_factory=list)
49
+
50
+
51
+ async def investigar_empresa(nome_ou_cnpj: str) -> Dossier:
52
+ """
53
+ Investigate a company and build a comprehensive dossier.
54
+ """
55
+ import re
56
+ from datetime import datetime
57
+
58
+ dossier = Dossier(
59
+ tipo="organization",
60
+ alvo=nome_ou_cnpj,
61
+ data_geracao=datetime.now().isoformat()
62
+ )
63
+
64
+ # Check if input is CNPJ
65
+ cnpj_clean = re.sub(r'[^0-9]', '', nome_ou_cnpj)
66
+ is_cnpj = len(cnpj_clean) == 14
67
+
68
+ company_data = None
69
+
70
+ # 1. Get company data from CNPJ
71
+ if is_cnpj:
72
+ dossier.cnpj_cpf = cnpj_clean
73
+ company_data = await consultar_cnpj(cnpj_clean)
74
+
75
+ if company_data:
76
+ dossier.alvo = company_data.razao_social or company_data.nome_fantasia or nome_ou_cnpj
77
+ dossier.fonte_dados.append(company_data.fonte)
78
+
79
+ # Build cadastral section
80
+ dossier.dados_cadastrais = DossierSection(
81
+ titulo="Dados Cadastrais",
82
+ icone="🏢",
83
+ conteudo={
84
+ "cnpj": company_data.cnpj,
85
+ "razao_social": company_data.razao_social,
86
+ "nome_fantasia": company_data.nome_fantasia,
87
+ "situacao": company_data.situacao,
88
+ "data_abertura": company_data.data_abertura,
89
+ "natureza_juridica": company_data.natureza_juridica,
90
+ "capital_social": company_data.capital_social,
91
+ "porte": company_data.porte,
92
+ "endereco": f"{company_data.logradouro}, {company_data.numero} - {company_data.bairro}, {company_data.cidade}/{company_data.uf}",
93
+ "cep": company_data.cep,
94
+ "telefone": company_data.telefone,
95
+ "email": company_data.email,
96
+ "atividade_principal": f"{company_data.cnae_principal} - {company_data.cnae_descricao}"
97
+ }
98
+ )
99
+
100
+ # Check situação for red flags
101
+ if company_data.situacao and "ATIVA" not in company_data.situacao.upper():
102
+ dossier.red_flags.append(f"⚠️ Situação cadastral: {company_data.situacao}")
103
+ dossier.dados_cadastrais.status = "warning"
104
+
105
+ # Build partners section
106
+ if company_data.socios:
107
+ dossier.socios = DossierSection(
108
+ titulo=f"Sócios ({len(company_data.socios)})",
109
+ icone="👥",
110
+ conteudo=company_data.socios
111
+ )
112
+
113
+ # 2. Check sanctions/transparency
114
+ if dossier.cnpj_cpf:
115
+ sancoes = await verificar_sancoes(dossier.cnpj_cpf)
116
+ dossier.fonte_dados.append("Portal da Transparência")
117
+
118
+ if sancoes["tem_sancoes"]:
119
+ dossier.red_flags.append(f"🚨 Encontrado em {sancoes['total_sancoes']} lista(s) de sanções")
120
+ dossier.score_risco += 40
121
+
122
+ dossier.sancoes = DossierSection(
123
+ titulo=f"Sanções ({sancoes['total_sancoes']})",
124
+ icone="⚠️",
125
+ status="danger",
126
+ conteudo=sancoes
127
+ )
128
+ else:
129
+ dossier.sancoes = DossierSection(
130
+ titulo="Sanções",
131
+ icone="✅",
132
+ status="ok",
133
+ conteudo={"mensagem": "Nenhuma sanção encontrada nos cadastros públicos"}
134
+ )
135
+
136
+ # 3. Web search for news and context
137
+ search_query = dossier.alvo
138
+ if company_data and company_data.nome_fantasia:
139
+ search_query = company_data.nome_fantasia
140
+
141
+ try:
142
+ web_result = await lancer.search(f"{search_query} notícias escândalos processos", max_results=8)
143
+
144
+ if web_result.answer or web_result.results:
145
+ dossier.fonte_dados.append("Lancer Web Search")
146
+
147
+ news_content = {
148
+ "resumo": web_result.answer or "Sem resumo disponível",
149
+ "fontes": [
150
+ {"titulo": r.title, "url": r.url, "snippet": r.content[:200]}
151
+ for r in web_result.results[:5]
152
+ ]
153
+ }
154
+
155
+ dossier.noticias = DossierSection(
156
+ titulo="Notícias e Mídia",
157
+ icone="📰",
158
+ conteudo=news_content
159
+ )
160
+
161
+ # Check for negative keywords in news
162
+ negative_keywords = ["escândalo", "fraude", "corrupção", "prisão", "investigado", "denúncia", "irregularidade"]
163
+ raw_text = (web_result.answer or "").lower()
164
+ for kw in negative_keywords:
165
+ if kw in raw_text:
166
+ dossier.red_flags.append(f"📰 Menção a '{kw}' encontrada nas notícias")
167
+ dossier.noticias.status = "warning"
168
+ dossier.score_risco += 10
169
+ break
170
+ except Exception as e:
171
+ print(f"Web search error: {e}")
172
+
173
+ # 4. Extract related entities using NER
174
+ if dossier.noticias and dossier.noticias.conteudo.get("resumo"):
175
+ try:
176
+ text_to_analyze = dossier.noticias.conteudo.get("resumo", "")[:3000]
177
+ ner_result = await entity_extractor.extract(text_to_analyze)
178
+
179
+ if ner_result.entities:
180
+ entities = [
181
+ {"nome": e.name, "tipo": e.type, "descricao": e.description or e.role}
182
+ for e in ner_result.entities[:10]
183
+ ]
184
+
185
+ dossier.entidades_relacionadas = DossierSection(
186
+ titulo=f"Entidades Relacionadas ({len(entities)})",
187
+ icone="🔗",
188
+ conteudo=entities
189
+ )
190
+ except Exception as e:
191
+ print(f"NER error: {e}")
192
+
193
+ # Calculate final risk score
194
+ dossier.score_risco = min(100, dossier.score_risco + len(dossier.red_flags) * 5)
195
+
196
+ return dossier
197
+
198
+
199
+ async def investigar_pessoa(nome: str, cpf: Optional[str] = None) -> Dossier:
200
+ """
201
+ Investigate a person and build a dossier.
202
+ Note: CPF data is heavily protected by LGPD, so mainly uses web search.
203
+ """
204
+ from datetime import datetime
205
+
206
+ dossier = Dossier(
207
+ tipo="person",
208
+ alvo=nome,
209
+ cnpj_cpf=cpf,
210
+ data_geracao=datetime.now().isoformat()
211
+ )
212
+
213
+ # 1. Check sanctions if CPF provided
214
+ if cpf:
215
+ sancoes = await verificar_sancoes(cpf)
216
+ dossier.fonte_dados.append("Portal da Transparência")
217
+
218
+ if sancoes["tem_sancoes"]:
219
+ dossier.red_flags.append(f"🚨 Encontrado em {sancoes['total_sancoes']} lista(s) de sanções")
220
+ dossier.score_risco += 50
221
+
222
+ dossier.sancoes = DossierSection(
223
+ titulo=f"Sanções ({sancoes['total_sancoes']})",
224
+ icone="⚠️",
225
+ status="danger",
226
+ conteudo=sancoes
227
+ )
228
+
229
+ # 2. Web search for information
230
+ try:
231
+ web_result = await lancer.search(f'"{nome}" biografia cargo empresa', max_results=10)
232
+
233
+ if web_result.answer or web_result.results:
234
+ dossier.fonte_dados.append("Lancer Web Search")
235
+
236
+ dossier.noticias = DossierSection(
237
+ titulo="Informações Públicas",
238
+ icone="🌐",
239
+ conteudo={
240
+ "resumo": web_result.answer or "Informações limitadas",
241
+ "fontes": [
242
+ {"titulo": r.title, "url": r.url, "snippet": r.content[:200]}
243
+ for r in web_result.results[:5]
244
+ ]
245
+ }
246
+ )
247
+
248
+ # Check for negative keywords
249
+ negative_keywords = ["preso", "condenado", "investigado", "acusado", "escândalo", "fraude"]
250
+ raw_text = (web_result.answer or "").lower()
251
+ for kw in negative_keywords:
252
+ if kw in raw_text:
253
+ dossier.red_flags.append(f"📰 Menção a '{kw}' encontrada")
254
+ dossier.noticias.status = "warning"
255
+ dossier.score_risco += 15
256
+ break
257
+ except Exception as e:
258
+ print(f"Web search error: {e}")
259
+
260
+ # 3. Extract related entities
261
+ if dossier.noticias and dossier.noticias.conteudo.get("resumo"):
262
+ try:
263
+ ner_result = await entity_extractor.extract(dossier.noticias.conteudo["resumo"][:2000])
264
+
265
+ if ner_result.entities:
266
+ entities = [
267
+ {"nome": e.name, "tipo": e.type, "descricao": e.description or e.role}
268
+ for e in ner_result.entities[:10]
269
+ if e.name.lower() != nome.lower() # Exclude the target
270
+ ]
271
+
272
+ if entities:
273
+ dossier.entidades_relacionadas = DossierSection(
274
+ titulo=f"Conexões ({len(entities)})",
275
+ icone="🔗",
276
+ conteudo=entities
277
+ )
278
+ except Exception as e:
279
+ print(f"NER error: {e}")
280
+
281
+ dossier.score_risco = min(100, dossier.score_risco + len(dossier.red_flags) * 5)
282
+
283
+ return dossier
284
+
285
+
286
+ def dossier_to_dict(dossier: Dossier) -> Dict[str, Any]:
287
+ """Convert dossier to dictionary for JSON response"""
288
+ result = {
289
+ "tipo": dossier.tipo,
290
+ "alvo": dossier.alvo,
291
+ "cnpj_cpf": dossier.cnpj_cpf,
292
+ "red_flags": dossier.red_flags,
293
+ "score_risco": dossier.score_risco,
294
+ "data_geracao": dossier.data_geracao,
295
+ "fonte_dados": dossier.fonte_dados,
296
+ "secoes": {}
297
+ }
298
+
299
+ for field_name in ["dados_cadastrais", "socios", "sancoes", "noticias", "entidades_relacionadas"]:
300
+ section = getattr(dossier, field_name)
301
+ if section:
302
+ result["secoes"][field_name] = {
303
+ "titulo": section.titulo,
304
+ "icone": section.icone,
305
+ "status": section.status,
306
+ "conteudo": section.conteudo
307
+ }
308
+
309
+ return result
app/services/transparencia_api.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Portal da Transparência APIs
3
+ Access to Brazilian government transparency data
4
+ """
5
+ import httpx
6
+ from typing import Optional, Dict, Any, List
7
+ from dataclasses import dataclass
8
+
9
+
10
+ # Portal da Transparência base URL
11
+ TRANSPARENCIA_URL = "https://api.portaldatransparencia.gov.br/api-de-dados"
12
+
13
+
14
+ @dataclass
15
+ class SanctionRecord:
16
+ """Data structure for sanction/punishment records"""
17
+ tipo: str # CEIS, CNEP, CEPIM
18
+ cpf_cnpj: str
19
+ nome: str
20
+ tipo_pessoa: str # 'F' or 'J'
21
+
22
+ # Sanction details
23
+ tipo_sancao: str = ""
24
+ data_inicio: str = ""
25
+ data_fim: str = ""
26
+ orgao_sancionador: str = ""
27
+ uf_orgao: str = ""
28
+ fundamentacao_legal: str = ""
29
+
30
+ # Source
31
+ fonte_url: str = ""
32
+
33
+
34
+ async def consultar_ceis(cnpj_cpf: str, token: Optional[str] = None) -> List[SanctionRecord]:
35
+ """
36
+ Query CEIS - Cadastro de Empresas Inidôneas e Suspensas
37
+ Note: Requires authentication token from Portal da Transparência
38
+ """
39
+ # Without token, we can still try - some endpoints work without auth
40
+ return await _query_sanctions("ceis", cnpj_cpf, token)
41
+
42
+
43
+ async def consultar_cnep(cnpj_cpf: str, token: Optional[str] = None) -> List[SanctionRecord]:
44
+ """
45
+ Query CNEP - Cadastro Nacional de Empresas Punidas
46
+ """
47
+ return await _query_sanctions("cnep", cnpj_cpf, token)
48
+
49
+
50
+ async def consultar_cepim(cnpj_cpf: str, token: Optional[str] = None) -> List[SanctionRecord]:
51
+ """
52
+ Query CEPIM - Cadastro de Entidades Privadas sem Fins Lucrativos Impedidas
53
+ """
54
+ return await _query_sanctions("cepim", cnpj_cpf, token)
55
+
56
+
57
+ async def _query_sanctions(
58
+ endpoint: str,
59
+ cnpj_cpf: str,
60
+ token: Optional[str] = None
61
+ ) -> List[SanctionRecord]:
62
+ """Internal function to query sanction APIs"""
63
+ try:
64
+ headers = {}
65
+ if token:
66
+ headers["chave-api-dados"] = token
67
+
68
+ params = {"cnpjCpf": cnpj_cpf}
69
+
70
+ async with httpx.AsyncClient(timeout=30.0) as client:
71
+ response = await client.get(
72
+ f"{TRANSPARENCIA_URL}/{endpoint}",
73
+ params=params,
74
+ headers=headers
75
+ )
76
+
77
+ if response.status_code == 401:
78
+ # Need authentication - return empty for now
79
+ print(f"Portal da Transparência requires authentication for {endpoint}")
80
+ return []
81
+
82
+ if response.status_code != 200:
83
+ return []
84
+
85
+ data = response.json()
86
+ if not isinstance(data, list):
87
+ data = [data] if data else []
88
+
89
+ records = []
90
+ for item in data:
91
+ records.append(SanctionRecord(
92
+ tipo=endpoint.upper(),
93
+ cpf_cnpj=item.get("cpfCnpj", ""),
94
+ nome=item.get("nomeRazaoSocial", item.get("nome", "")),
95
+ tipo_pessoa=item.get("tipoPessoa", ""),
96
+ tipo_sancao=item.get("tipoSancao", {}).get("descricao", "") if isinstance(item.get("tipoSancao"), dict) else str(item.get("tipoSancao", "")),
97
+ data_inicio=item.get("dataInicioSancao", ""),
98
+ data_fim=item.get("dataFimSancao", ""),
99
+ orgao_sancionador=item.get("orgaoSancionador", {}).get("nome", "") if isinstance(item.get("orgaoSancionador"), dict) else str(item.get("orgaoSancionador", "")),
100
+ uf_orgao=item.get("ufOrgaoSancionador", ""),
101
+ fundamentacao_legal=item.get("fundamentacaoLegal", ""),
102
+ fonte_url=f"https://portaldatransparencia.gov.br/{endpoint}"
103
+ ))
104
+
105
+ return records
106
+
107
+ except Exception as e:
108
+ print(f"Transparência API error ({endpoint}): {e}")
109
+ return []
110
+
111
+
112
+ async def verificar_sancoes(cnpj_cpf: str, token: Optional[str] = None) -> Dict[str, Any]:
113
+ """
114
+ Check all sanction databases for a CNPJ/CPF
115
+ Returns consolidated result
116
+ """
117
+ import asyncio
118
+
119
+ # Query all databases in parallel
120
+ ceis_task = consultar_ceis(cnpj_cpf, token)
121
+ cnep_task = consultar_cnep(cnpj_cpf, token)
122
+ cepim_task = consultar_cepim(cnpj_cpf, token)
123
+
124
+ ceis, cnep, cepim = await asyncio.gather(ceis_task, cnep_task, cepim_task)
125
+
126
+ all_sanctions = ceis + cnep + cepim
127
+
128
+ return {
129
+ "cnpj_cpf": cnpj_cpf,
130
+ "tem_sancoes": len(all_sanctions) > 0,
131
+ "total_sancoes": len(all_sanctions),
132
+ "ceis": len(ceis),
133
+ "cnep": len(cnep),
134
+ "cepim": len(cepim),
135
+ "registros": [
136
+ {
137
+ "tipo": s.tipo,
138
+ "tipo_sancao": s.tipo_sancao,
139
+ "orgao": s.orgao_sancionador,
140
+ "inicio": s.data_inicio,
141
+ "fim": s.data_fim,
142
+ "fundamentacao": s.fundamentacao_legal
143
+ }
144
+ for s in all_sanctions
145
+ ]
146
+ }