Spaces:
Sleeping
Sleeping
File size: 2,957 Bytes
b7d0804 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import itertools
import re
from typing import List, Dict, Any
from app.graph.entity_extractor import split_sentences
from app.graph.graph_quality import is_noisy_entity_name
VERB_RELATION_MAP = {
"stands for": "STANDS_FOR",
"refers to": "REFERS_TO",
"uses": "USES",
"use": "USES",
"retrieves": "RETRIEVES",
"retrieve": "RETRIEVES",
"generates": "GENERATES",
"generate": "GENERATES",
"provides": "PROVIDES",
"provide": "PROVIDES",
"reduces": "REDUCES",
"reduce": "REDUCES",
"improves": "IMPROVES",
"improve": "IMPROVES",
"contains": "CONTAINS",
"include": "INCLUDES",
"includes": "INCLUDES"
}
def relation_id(source_id: str, relation_type: str, target_id: str) -> str:
return f"{source_id}__{relation_type.lower()}__{target_id}"[:160]
def entity_appears_in_sentence(entity_name: str, sentence: str) -> bool:
pattern = r"\b" + re.escape(entity_name) + r"\b"
return re.search(pattern, sentence, flags=re.IGNORECASE) is not None
def extract_relations_from_text(
text: str,
entities: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
if not text or len(entities) < 2:
return []
relations = []
sentences = split_sentences(text)
clean_entities = [
entity for entity in entities
if not is_noisy_entity_name(entity.get("name", ""))
]
if len(clean_entities) < 2:
return []
for sentence in sentences:
present_entities = [
entity for entity in clean_entities
if entity_appears_in_sentence(entity["name"], sentence)
]
# Avoid relation explosion
present_entities = present_entities[:5]
if len(present_entities) < 2:
continue
relation_type = detect_relation_type(sentence)
for source, target in itertools.combinations(present_entities, 2):
if source["entity_id"] == target["entity_id"]:
continue
if is_noisy_entity_name(source["name"]) or is_noisy_entity_name(target["name"]):
continue
relations.append(
{
"relation_id": relation_id(
source["entity_id"],
relation_type,
target["entity_id"]
),
"source_entity_id": source["entity_id"],
"target_entity_id": target["entity_id"],
"source_name": source["name"],
"target_name": target["name"],
"relation_type": relation_type,
"evidence_sentence": sentence
}
)
return relations
def detect_relation_type(sentence: str) -> str:
sentence_lower = sentence.lower()
for phrase, relation_type in VERB_RELATION_MAP.items():
if phrase in sentence_lower:
return relation_type
return "RELATED_TO"
|