Madras1 commited on
Commit
1f9b97a
·
verified ·
1 Parent(s): 30854a8

Upload 77 files

Browse files
Files changed (1) hide show
  1. app/api/routes/entities.py +170 -170
app/api/routes/entities.py CHANGED
@@ -1,57 +1,162 @@
1
- """
2
- Entity CRUD Routes
3
- """
4
- from fastapi import APIRouter, Depends, HTTPException, Query
5
- from sqlalchemy.orm import Session
6
- from sqlalchemy import or_
7
- from typing import List, Optional
8
-
9
- from app.api.deps import get_scoped_db
10
- from app.models import Entity, Relationship
11
- from app.schemas import EntityCreate, EntityUpdate, EntityResponse, GraphData, GraphNode, GraphEdge
12
-
13
- router = APIRouter(prefix="/entities", tags=["Entities"])
14
-
15
-
16
- @router.get("", response_model=List[EntityResponse])
17
- def list_entities(
18
- type: Optional[str] = None,
19
- search: Optional[str] = None,
20
- project_id: Optional[str] = None,
21
- limit: int = Query(default=50, le=200),
22
- offset: int = 0,
23
- db: Session = Depends(get_scoped_db)
24
- ):
25
- """Lista todas as entidades com filtros opcionais"""
26
- query = db.query(Entity)
27
-
28
- if project_id:
29
- query = query.filter(Entity.project_id == project_id)
30
-
31
- if type:
32
- query = query.filter(Entity.type == type)
33
-
34
- if search:
35
- query = query.filter(
36
- or_(
37
- Entity.name.ilike(f"%{search}%"),
38
- Entity.description.ilike(f"%{search}%")
39
- )
40
- )
41
-
42
- query = query.order_by(Entity.created_at.desc())
43
- return query.offset(offset).limit(limit).all()
44
 
45
 
46
- @router.get("/types")
47
- def get_entity_types(db: Session = Depends(get_scoped_db)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  """Retorna todos os tipos de entidade únicos"""
49
  types = db.query(Entity.type).distinct().all()
50
  return [t[0] for t in types]
51
 
52
 
53
- @router.get("/{entity_id}", response_model=EntityResponse)
54
- def get_entity(entity_id: str, db: Session = Depends(get_scoped_db)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  """Busca uma entidade por ID"""
56
  entity = db.query(Entity).filter(Entity.id == entity_id).first()
57
  if not entity:
@@ -59,8 +164,8 @@ def get_entity(entity_id: str, db: Session = Depends(get_scoped_db)):
59
  return entity
60
 
61
 
62
- @router.post("", response_model=EntityResponse, status_code=201)
63
- def create_entity(entity: EntityCreate, db: Session = Depends(get_scoped_db)):
64
  """Cria uma nova entidade"""
65
  db_entity = Entity(**entity.model_dump())
66
  db.add(db_entity)
@@ -69,8 +174,8 @@ def create_entity(entity: EntityCreate, db: Session = Depends(get_scoped_db)):
69
  return db_entity
70
 
71
 
72
- @router.put("/{entity_id}", response_model=EntityResponse)
73
- def update_entity(entity_id: str, entity: EntityUpdate, db: Session = Depends(get_scoped_db)):
74
  """Atualiza uma entidade existente"""
75
  db_entity = db.query(Entity).filter(Entity.id == entity_id).first()
76
  if not db_entity:
@@ -85,8 +190,8 @@ def update_entity(entity_id: str, entity: EntityUpdate, db: Session = Depends(ge
85
  return db_entity
86
 
87
 
88
- @router.delete("/{entity_id}")
89
- def delete_entity(entity_id: str, db: Session = Depends(get_scoped_db)):
90
  """Deleta uma entidade"""
91
  db_entity = db.query(Entity).filter(Entity.id == entity_id).first()
92
  if not db_entity:
@@ -105,12 +210,12 @@ def delete_entity(entity_id: str, db: Session = Depends(get_scoped_db)):
105
  return {"message": "Entity deleted"}
106
 
107
 
108
- @router.get("/{entity_id}/connections", response_model=GraphData)
109
- def get_entity_connections(
110
- entity_id: str,
111
- depth: int = Query(default=1, le=3),
112
- db: Session = Depends(get_scoped_db)
113
- ):
114
  """
115
  Retorna o grafo de conexões de uma entidade
116
  Usado para visualização de rede no frontend
@@ -167,12 +272,12 @@ def get_entity_connections(
167
  )
168
 
169
 
170
- @router.post("/merge")
171
- def merge_entities(
172
- primary_id: str,
173
- secondary_id: str,
174
- db: Session = Depends(get_scoped_db)
175
- ):
176
  """
177
  Merge two entities into one.
178
  The primary entity is kept, the secondary is deleted.
@@ -246,108 +351,3 @@ def merge_entities(
246
  }
247
  }
248
 
249
-
250
- @router.get("/suggest-merge")
251
- async def suggest_merge_candidates(
252
- limit: int = Query(default=10, le=50),
253
- db: Session = Depends(get_scoped_db)
254
- ):
255
- """
256
- Use LLM to find potential duplicate entities that could be merged.
257
- Returns pairs of entities that might be the same.
258
- """
259
- import httpx
260
- from app.config import settings
261
-
262
- # Get all entities
263
- entities = db.query(Entity).order_by(Entity.name).limit(200).all()
264
-
265
- if len(entities) < 2:
266
- return {"candidates": [], "message": "Not enough entities to compare"}
267
-
268
- # Build entity list for LLM
269
- entity_list = []
270
- for e in entities:
271
- aliases = (e.properties or {}).get("aliases", [])
272
- entity_list.append({
273
- "id": e.id,
274
- "name": e.name,
275
- "type": e.type,
276
- "aliases": aliases[:5] if aliases else [] # Limit aliases
277
- })
278
-
279
- # Ask LLM to find duplicates
280
- prompt = f"""Analise esta lista de entidades e encontre possíveis DUPLICATAS (mesma pessoa/organização/local com nomes diferentes).
281
-
282
- Entidades:
283
- {entity_list[:100]}
284
-
285
- Retorne APENAS um JSON válido com pares de IDs que são provavelmente a mesma entidade:
286
- ```json
287
- {{
288
- "duplicates": [
289
- {{
290
- "id1": "uuid1",
291
- "id2": "uuid2",
292
- "confidence": 0.95,
293
- "reason": "Mesmo nome com variação"
294
- }}
295
- ]
296
- }}
297
- ```
298
-
299
- Se não houver duplicatas, retorne: {{"duplicates": []}}
300
- """
301
-
302
- try:
303
- async with httpx.AsyncClient(timeout=30.0) as client:
304
- response = await client.post(
305
- "https://api.cerebras.ai/v1/chat/completions",
306
- headers={
307
- "Authorization": f"Bearer {settings.cerebras_api_key}",
308
- "Content-Type": "application/json"
309
- },
310
- json={
311
- "model": "qwen-3-235b-a22b-instruct-2507",
312
- "messages": [
313
- {"role": "system", "content": "Você é um especialista em detecção de entidades duplicadas. Responda apenas em JSON válido."},
314
- {"role": "user", "content": prompt}
315
- ],
316
- "temperature": 0.1,
317
- "max_tokens": 1024
318
- }
319
- )
320
-
321
- if response.status_code != 200:
322
- return {"candidates": [], "error": "LLM API error"}
323
-
324
- data = response.json()
325
- content = data["choices"][0]["message"]["content"]
326
-
327
- # Parse JSON from response
328
- import json
329
- import re
330
- json_match = re.search(r'\{.*\}', content, re.DOTALL)
331
- if json_match:
332
- result = json.loads(json_match.group(0))
333
-
334
- # Enrich with entity names
335
- candidates = []
336
- for dup in result.get("duplicates", [])[:limit]:
337
- e1 = next((e for e in entities if e.id == dup.get("id1")), None)
338
- e2 = next((e for e in entities if e.id == dup.get("id2")), None)
339
- if e1 and e2:
340
- candidates.append({
341
- "entity1": {"id": e1.id, "name": e1.name, "type": e1.type},
342
- "entity2": {"id": e2.id, "name": e2.name, "type": e2.type},
343
- "confidence": dup.get("confidence", 0.5),
344
- "reason": dup.get("reason", "Possível duplicata")
345
- })
346
-
347
- return {"candidates": candidates}
348
-
349
- return {"candidates": [], "message": "No duplicates found"}
350
-
351
- except Exception as e:
352
- return {"candidates": [], "error": str(e)}
353
-
 
1
+ """
2
+ Entity CRUD Routes
3
+ """
4
+ from fastapi import APIRouter, Depends, HTTPException, Query
5
+ from sqlalchemy.orm import Session
6
+ from sqlalchemy import or_
7
+ from typing import List, Optional
8
+
9
+ from app.api.deps import get_scoped_db
10
+ from app.models import Entity, Relationship
11
+ from app.schemas import EntityCreate, EntityUpdate, EntityResponse, GraphData, GraphNode, GraphEdge
12
+
13
+ router = APIRouter(prefix="/entities", tags=["Entities"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
 
16
+ @router.get("", response_model=List[EntityResponse])
17
+ def list_entities(
18
+ type: Optional[str] = None,
19
+ search: Optional[str] = None,
20
+ project_id: Optional[str] = None,
21
+ limit: int = Query(default=50, le=200),
22
+ offset: int = 0,
23
+ db: Session = Depends(get_scoped_db)
24
+ ):
25
+ """Lista todas as entidades com filtros opcionais"""
26
+ query = db.query(Entity)
27
+
28
+ if project_id:
29
+ query = query.filter(Entity.project_id == project_id)
30
+
31
+ if type:
32
+ query = query.filter(Entity.type == type)
33
+
34
+ if search:
35
+ query = query.filter(
36
+ or_(
37
+ Entity.name.ilike(f"%{search}%"),
38
+ Entity.description.ilike(f"%{search}%")
39
+ )
40
+ )
41
+
42
+ query = query.order_by(Entity.created_at.desc())
43
+ return query.offset(offset).limit(limit).all()
44
+
45
+
46
+ @router.get("/types")
47
+ def get_entity_types(db: Session = Depends(get_scoped_db)):
48
  """Retorna todos os tipos de entidade únicos"""
49
  types = db.query(Entity.type).distinct().all()
50
  return [t[0] for t in types]
51
 
52
 
53
+ @router.get("/suggest-merge")
54
+ async def suggest_merge_candidates(
55
+ limit: int = Query(default=10, le=50),
56
+ db: Session = Depends(get_scoped_db)
57
+ ):
58
+ """
59
+ Use LLM to find potential duplicate entities that could be merged.
60
+ Returns pairs of entities that might be the same.
61
+ """
62
+ import httpx
63
+ import json
64
+ import re
65
+ from app.config import settings
66
+
67
+ # Get all entities
68
+ entities = db.query(Entity).order_by(Entity.name).limit(200).all()
69
+
70
+ if len(entities) < 2:
71
+ return {"candidates": [], "message": "Not enough entities to compare"}
72
+
73
+ # Build entity list for LLM
74
+ entity_list = []
75
+ for e in entities:
76
+ aliases = (e.properties or {}).get("aliases", [])
77
+ entity_list.append({
78
+ "id": e.id,
79
+ "name": e.name,
80
+ "type": e.type,
81
+ "aliases": aliases[:5] if aliases else []
82
+ })
83
+
84
+ # Ask LLM to find duplicates
85
+ prompt = f"""Analise esta lista de entidades e encontre possíveis DUPLICATAS (mesma pessoa/organização/local com nomes diferentes).
86
+
87
+ Entidades:
88
+ {entity_list[:100]}
89
+
90
+ Retorne APENAS um JSON válido com pares de IDs que são provavelmente a mesma entidade:
91
+ ```json
92
+ {{
93
+ "duplicates": [
94
+ {{
95
+ "id1": "uuid1",
96
+ "id2": "uuid2",
97
+ "confidence": 0.95,
98
+ "reason": "Mesmo nome com variação"
99
+ }}
100
+ ]
101
+ }}
102
+ ```
103
+
104
+ Se não houver duplicatas, retorne: {{"duplicates": []}}
105
+ """
106
+
107
+ try:
108
+ async with httpx.AsyncClient(timeout=30.0) as client:
109
+ response = await client.post(
110
+ "https://api.cerebras.ai/v1/chat/completions",
111
+ headers={
112
+ "Authorization": f"Bearer {settings.cerebras_api_key}",
113
+ "Content-Type": "application/json"
114
+ },
115
+ json={
116
+ "model": "zai-glm-4.7",
117
+ "messages": [
118
+ {"role": "system", "content": "Você é um especialista em detecção de entidades duplicadas. Responda apenas em JSON válido."},
119
+ {"role": "user", "content": prompt}
120
+ ],
121
+ "temperature": 0.1,
122
+ "max_tokens": 1024
123
+ }
124
+ )
125
+
126
+ if response.status_code != 200:
127
+ return {"candidates": [], "error": "LLM API error"}
128
+
129
+ data = response.json()
130
+ content = data["choices"][0]["message"]["content"]
131
+
132
+ # Parse JSON from response
133
+ json_match = re.search(r'\{.*\}', content, re.DOTALL)
134
+ if json_match:
135
+ result = json.loads(json_match.group(0))
136
+
137
+ # Enrich with entity names
138
+ candidates = []
139
+ for dup in result.get("duplicates", [])[:limit]:
140
+ e1 = next((e for e in entities if e.id == dup.get("id1")), None)
141
+ e2 = next((e for e in entities if e.id == dup.get("id2")), None)
142
+ if e1 and e2:
143
+ candidates.append({
144
+ "entity1": {"id": e1.id, "name": e1.name, "type": e1.type},
145
+ "entity2": {"id": e2.id, "name": e2.name, "type": e2.type},
146
+ "confidence": dup.get("confidence", 0.5),
147
+ "reason": dup.get("reason", "Possível duplicata")
148
+ })
149
+
150
+ return {"candidates": candidates}
151
+
152
+ return {"candidates": [], "message": "No duplicates found"}
153
+
154
+ except Exception as e:
155
+ return {"candidates": [], "error": str(e)}
156
+
157
+
158
+ @router.get("/{entity_id}", response_model=EntityResponse)
159
+ def get_entity(entity_id: str, db: Session = Depends(get_scoped_db)):
160
  """Busca uma entidade por ID"""
161
  entity = db.query(Entity).filter(Entity.id == entity_id).first()
162
  if not entity:
 
164
  return entity
165
 
166
 
167
+ @router.post("", response_model=EntityResponse, status_code=201)
168
+ def create_entity(entity: EntityCreate, db: Session = Depends(get_scoped_db)):
169
  """Cria uma nova entidade"""
170
  db_entity = Entity(**entity.model_dump())
171
  db.add(db_entity)
 
174
  return db_entity
175
 
176
 
177
+ @router.put("/{entity_id}", response_model=EntityResponse)
178
+ def update_entity(entity_id: str, entity: EntityUpdate, db: Session = Depends(get_scoped_db)):
179
  """Atualiza uma entidade existente"""
180
  db_entity = db.query(Entity).filter(Entity.id == entity_id).first()
181
  if not db_entity:
 
190
  return db_entity
191
 
192
 
193
+ @router.delete("/{entity_id}")
194
+ def delete_entity(entity_id: str, db: Session = Depends(get_scoped_db)):
195
  """Deleta uma entidade"""
196
  db_entity = db.query(Entity).filter(Entity.id == entity_id).first()
197
  if not db_entity:
 
210
  return {"message": "Entity deleted"}
211
 
212
 
213
+ @router.get("/{entity_id}/connections", response_model=GraphData)
214
+ def get_entity_connections(
215
+ entity_id: str,
216
+ depth: int = Query(default=1, le=3),
217
+ db: Session = Depends(get_scoped_db)
218
+ ):
219
  """
220
  Retorna o grafo de conexões de uma entidade
221
  Usado para visualização de rede no frontend
 
272
  )
273
 
274
 
275
+ @router.post("/merge")
276
+ def merge_entities(
277
+ primary_id: str,
278
+ secondary_id: str,
279
+ db: Session = Depends(get_scoped_db)
280
+ ):
281
  """
282
  Merge two entities into one.
283
  The primary entity is kept, the secondary is deleted.
 
351
  }
352
  }
353