Madras1 commited on
Commit
53d0eb2
·
verified ·
1 Parent(s): 672666d

Upload 59 files

Browse files
app/api/routes/analyze.py CHANGED
@@ -91,6 +91,21 @@ async def analyze_text(request: AnalyzeRequest):
91
 
92
  db = next(get_db())
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  # Process entities
95
  for entity in result.entities:
96
  entity_data = ExtractedEntityResponse(
@@ -116,6 +131,9 @@ async def analyze_text(request: AnalyzeRequest):
116
  if coords:
117
  lat, lng = coords
118
 
 
 
 
119
  # Create new entity
120
  new_entity = Entity(
121
  name=entity.name,
@@ -124,6 +142,7 @@ async def analyze_text(request: AnalyzeRequest):
124
  source="llm_extraction",
125
  latitude=lat,
126
  longitude=lng,
 
127
  properties={"role": entity.role, "aliases": entity.aliases}
128
  )
129
  db.add(new_entity)
@@ -166,10 +185,14 @@ async def analyze_text(request: AnalyzeRequest):
166
  ).first()
167
 
168
  if not existing_rel:
 
 
 
169
  new_rel = Relationship(
170
  source_id=source_entity.id,
171
  target_id=target_entity.id,
172
  type=rel.relationship_type,
 
173
  properties={"context": rel.context}
174
  )
175
  db.add(new_rel)
 
91
 
92
  db = next(get_db())
93
 
94
+ # Helper function to parse date strings
95
+ def parse_date(date_str):
96
+ if not date_str:
97
+ return None
98
+ from datetime import datetime
99
+ try:
100
+ # Try YYYY-MM-DD format
101
+ return datetime.strptime(date_str[:10], "%Y-%m-%d")
102
+ except:
103
+ try:
104
+ # Try YYYY format
105
+ return datetime.strptime(date_str[:4], "%Y")
106
+ except:
107
+ return None
108
+
109
  # Process entities
110
  for entity in result.entities:
111
  entity_data = ExtractedEntityResponse(
 
131
  if coords:
132
  lat, lng = coords
133
 
134
+ # Parse event_date if available
135
+ event_date = parse_date(getattr(entity, 'event_date', None))
136
+
137
  # Create new entity
138
  new_entity = Entity(
139
  name=entity.name,
 
142
  source="llm_extraction",
143
  latitude=lat,
144
  longitude=lng,
145
+ event_date=event_date,
146
  properties={"role": entity.role, "aliases": entity.aliases}
147
  )
148
  db.add(new_entity)
 
185
  ).first()
186
 
187
  if not existing_rel:
188
+ # Parse event_date if available
189
+ rel_event_date = parse_date(getattr(rel, 'event_date', None))
190
+
191
  new_rel = Relationship(
192
  source_id=source_entity.id,
193
  target_id=target_entity.id,
194
  type=rel.relationship_type,
195
+ event_date=rel_event_date,
196
  properties={"context": rel.context}
197
  )
198
  db.add(new_rel)
app/api/routes/ingest.py CHANGED
@@ -5,6 +5,7 @@ Endpoints para importar dados de fontes externas
5
  from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
6
  from sqlalchemy.orm import Session
7
  from typing import Optional, List
 
8
  import asyncio
9
 
10
  from app.core.database import get_db
@@ -17,6 +18,21 @@ from app.services.geocoding import geocode
17
  router = APIRouter(prefix="/ingest", tags=["Data Ingestion"])
18
 
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # ========== Wikipedia ==========
21
 
22
  @router.get("/wikipedia/search")
@@ -96,6 +112,9 @@ async def import_from_wikipedia(
96
  if coords:
97
  lat, lng = coords
98
 
 
 
 
99
  new_ent = Entity(
100
  name=ext_entity.name,
101
  type=ext_entity.type if ext_entity.type in ["person", "organization", "location", "event"] else "person",
@@ -103,6 +122,7 @@ async def import_from_wikipedia(
103
  source="wikipedia_extraction",
104
  latitude=lat,
105
  longitude=lng,
 
106
  properties={"role": ext_entity.role, "aliases": ext_entity.aliases, "extracted_from": title}
107
  )
108
  db.add(new_ent)
@@ -124,10 +144,14 @@ async def import_from_wikipedia(
124
  ).first()
125
 
126
  if not existing_rel:
 
 
 
127
  new_rel = Relationship(
128
  source_id=source_ent.id,
129
  target_id=target_ent.id,
130
  type=rel.relationship_type,
 
131
  properties={"context": rel.context, "extracted_from": title}
132
  )
133
  db.add(new_rel)
 
5
  from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
6
  from sqlalchemy.orm import Session
7
  from typing import Optional, List
8
+ from datetime import datetime
9
  import asyncio
10
 
11
  from app.core.database import get_db
 
18
  router = APIRouter(prefix="/ingest", tags=["Data Ingestion"])
19
 
20
 
21
+ def parse_event_date(date_str):
22
+ """Parse date string to datetime object"""
23
+ if not date_str:
24
+ return None
25
+ try:
26
+ # Try YYYY-MM-DD format
27
+ return datetime.strptime(date_str[:10], "%Y-%m-%d")
28
+ except:
29
+ try:
30
+ # Try YYYY format
31
+ return datetime.strptime(date_str[:4], "%Y")
32
+ except:
33
+ return None
34
+
35
+
36
  # ========== Wikipedia ==========
37
 
38
  @router.get("/wikipedia/search")
 
112
  if coords:
113
  lat, lng = coords
114
 
115
+ # Parse event_date
116
+ event_date = parse_event_date(getattr(ext_entity, 'event_date', None))
117
+
118
  new_ent = Entity(
119
  name=ext_entity.name,
120
  type=ext_entity.type if ext_entity.type in ["person", "organization", "location", "event"] else "person",
 
122
  source="wikipedia_extraction",
123
  latitude=lat,
124
  longitude=lng,
125
+ event_date=event_date,
126
  properties={"role": ext_entity.role, "aliases": ext_entity.aliases, "extracted_from": title}
127
  )
128
  db.add(new_ent)
 
144
  ).first()
145
 
146
  if not existing_rel:
147
+ # Parse relationship event_date
148
+ rel_event_date = parse_event_date(getattr(rel, 'event_date', None))
149
+
150
  new_rel = Relationship(
151
  source_id=source_ent.id,
152
  target_id=target_ent.id,
153
  type=rel.relationship_type,
154
+ event_date=rel_event_date,
155
  properties={"context": rel.context, "extracted_from": title}
156
  )
157
  db.add(new_rel)