import re from server.db import SessionLocal, Entity, EntityRelation, ActionEntity def extract_entities_basic(text: str): """Simple extraction for Topics/Competitors using patterns or LLM.""" # In production: Use spaCy NER or LLM call text = text.lower() entities = [] # Mock some known entities for the GEO niche topics = ['seo tools', 'backlinks', 'content marketing', 'ai seo', 'growth hack'] competitors = ['ahrefs', 'semrush', 'moz', 'brightedge', 'brand24'] for t in topics: if t in text: entities.append({'name': t, 'type': 'topic'}) for c in competitors: if c in text: entities.append({'name': c, 'type': 'competitor'}) return entities def build_knowledge_graph(job_id: int, audit_data: dict): """Transforms audit text into a graph of related entities.""" db = SessionLocal() try: # Extract from main content or keywords full_text = str(audit_data) entities = extract_entities_basic(full_text) entity_ids = [] for e in entities: # Get or create entity ent = db.query(Entity).filter(Entity.name == e['name']).first() if not ent: ent = Entity(name=e['name'], type=e['type']) db.add(ent) db.flush() entity_ids.append(ent.id) # Create relations (Simplified: everything found in one audit is 'related_to') # In production: Use LLM to define 'competes_with', 'targets'... for i in range(len(entity_ids)): for j in range(i + 1, len(entity_ids)): rel = db.query(EntityRelation).filter( EntityRelation.source_id == entity_ids[i], EntityRelation.target_id == entity_ids[j] ).first() if not rel: new_rel = EntityRelation( source_id=entity_ids[i], target_id=entity_ids[j], relation_type="related_to" ) db.add(new_rel) db.commit() return entity_ids finally: db.close() def link_action_to_entity(action_id: int, entity_name: str): """Manually or automatically links a specific action to an entity.""" db = SessionLocal() try: ent = db.query(Entity).filter(Entity.name == entity_name.lower()).first() if ent: link = ActionEntity(action_id=action_id, entity_id=ent.id) db.add(link) db.commit() return True finally: db.close() return False