last_edit / server /entity_extractor.py
Moharek
Deploy Moharek GEO Platform
a74b879
import re
from server.db import SessionLocal, Entity, EntityRelation, ActionEntity
def extract_entities_basic(text: str):
"""Simple extraction for Topics/Competitors using patterns or LLM."""
# In production: Use spaCy NER or LLM call
text = text.lower()
entities = []
# Mock some known entities for the GEO niche
topics = ['seo tools', 'backlinks', 'content marketing', 'ai seo', 'growth hack']
competitors = ['ahrefs', 'semrush', 'moz', 'brightedge', 'brand24']
for t in topics:
if t in text: entities.append({'name': t, 'type': 'topic'})
for c in competitors:
if c in text: entities.append({'name': c, 'type': 'competitor'})
return entities
def build_knowledge_graph(job_id: int, audit_data: dict):
"""Transforms audit text into a graph of related entities."""
db = SessionLocal()
try:
# Extract from main content or keywords
full_text = str(audit_data)
entities = extract_entities_basic(full_text)
entity_ids = []
for e in entities:
# Get or create entity
ent = db.query(Entity).filter(Entity.name == e['name']).first()
if not ent:
ent = Entity(name=e['name'], type=e['type'])
db.add(ent)
db.flush()
entity_ids.append(ent.id)
# Create relations (Simplified: everything found in one audit is 'related_to')
# In production: Use LLM to define 'competes_with', 'targets'...
for i in range(len(entity_ids)):
for j in range(i + 1, len(entity_ids)):
rel = db.query(EntityRelation).filter(
EntityRelation.source_id == entity_ids[i],
EntityRelation.target_id == entity_ids[j]
).first()
if not rel:
new_rel = EntityRelation(
source_id=entity_ids[i],
target_id=entity_ids[j],
relation_type="related_to"
)
db.add(new_rel)
db.commit()
return entity_ids
finally:
db.close()
def link_action_to_entity(action_id: int, entity_name: str):
"""Manually or automatically links a specific action to an entity."""
db = SessionLocal()
try:
ent = db.query(Entity).filter(Entity.name == entity_name.lower()).first()
if ent:
link = ActionEntity(action_id=action_id, entity_id=ent.id)
db.add(link)
db.commit()
return True
finally:
db.close()
return False