GraphResearcher / app /graph /relation_extractor.py
yugbirla's picture
Sync GraphRAG fusion quality cleanup and evaluation files
b7d0804
Raw
History Blame Contribute Delete
2.96 kB
import itertools
import re
from typing import List, Dict, Any
from app.graph.entity_extractor import split_sentences
from app.graph.graph_quality import is_noisy_entity_name
VERB_RELATION_MAP = {
"stands for": "STANDS_FOR",
"refers to": "REFERS_TO",
"uses": "USES",
"use": "USES",
"retrieves": "RETRIEVES",
"retrieve": "RETRIEVES",
"generates": "GENERATES",
"generate": "GENERATES",
"provides": "PROVIDES",
"provide": "PROVIDES",
"reduces": "REDUCES",
"reduce": "REDUCES",
"improves": "IMPROVES",
"improve": "IMPROVES",
"contains": "CONTAINS",
"include": "INCLUDES",
"includes": "INCLUDES"
}
def relation_id(source_id: str, relation_type: str, target_id: str) -> str:
return f"{source_id}__{relation_type.lower()}__{target_id}"[:160]
def entity_appears_in_sentence(entity_name: str, sentence: str) -> bool:
pattern = r"\b" + re.escape(entity_name) + r"\b"
return re.search(pattern, sentence, flags=re.IGNORECASE) is not None
def extract_relations_from_text(
text: str,
entities: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
if not text or len(entities) < 2:
return []
relations = []
sentences = split_sentences(text)
clean_entities = [
entity for entity in entities
if not is_noisy_entity_name(entity.get("name", ""))
]
if len(clean_entities) < 2:
return []
for sentence in sentences:
present_entities = [
entity for entity in clean_entities
if entity_appears_in_sentence(entity["name"], sentence)
]
# Avoid relation explosion
present_entities = present_entities[:5]
if len(present_entities) < 2:
continue
relation_type = detect_relation_type(sentence)
for source, target in itertools.combinations(present_entities, 2):
if source["entity_id"] == target["entity_id"]:
continue
if is_noisy_entity_name(source["name"]) or is_noisy_entity_name(target["name"]):
continue
relations.append(
{
"relation_id": relation_id(
source["entity_id"],
relation_type,
target["entity_id"]
),
"source_entity_id": source["entity_id"],
"target_entity_id": target["entity_id"],
"source_name": source["name"],
"target_name": target["name"],
"relation_type": relation_type,
"evidence_sentence": sentence
}
)
return relations
def detect_relation_type(sentence: str) -> str:
sentence_lower = sentence.lower()
for phrase, relation_type in VERB_RELATION_MAP.items():
if phrase in sentence_lower:
return relation_type
return "RELATED_TO"