LitBench-UI / src /utils /graph_utils.py
Andreas99's picture
Upload 22 files
908351f verified
import regex
import re
def retrieve_text_cite(text, command):
base_pattern = (
r'\\' + command + r"(?:\[(?:.*?)\])*\{((?:[^{}]+|\{(?1)\})*)\}(?:\[(?:.*?)\])*"
)
def extract_text_inside_curly_braces(text):
pattern = r"\{((?:[^{}]|(?R))*)\}"
match = regex.search(pattern, text)
if match:
return match.group(1)
else:
return ""
found_texts = []
for match in regex.finditer(base_pattern, text):
temp_substring = text[match.span()[0] : match.span()[1]]
found_texts.append(extract_text_inside_curly_braces(temp_substring))
return found_texts
def get_citing_sentences(content):
content_new = re.sub(r'[\n]+', ' ', content) # keep only one \n
content_new = re.sub(r'e\.g\.' , 'eg', content_new)
content_new = re.sub(r'i\.e\.' , 'eg', content_new)
content_new = re.sub(r'etc\.' , 'etc', content_new)
content_new = re.sub(r' +', ' ', content_new)
sentences = [sentence + '.' for sentence in content_new.split('.')]
citing_sentences = [s for s in sentences if '\\cite' in s]
results = {}
for s in citing_sentences:
citations = retrieve_text_cite(s, 'cite')
final_citations = []
for cite in citations:
final_citations.extend(cite.split(','))
results[s] = final_citations
return results
def get_intro(content):
sections = retrieve_text_cite(content, 'section')
if sections == []:
return ''
try_intro = [x for x in sections if x.strip().lower() == 'introduction']
if try_intro == []:
return ''
else:
to_find = try_intro[0]
ind = sections.index(to_find)
if ind + 1 < len(sections):
start_marker = f'\\section{{{sections[ind]}}}'
end_marker = f'\\section{{{sections[ind+1]}}}'
start_point = content.find(start_marker)
end_point = content.find(end_marker)
return content[start_point+len(start_marker):end_point]
else:
return ''
def get_related_works(content):
sections = retrieve_text_cite(content, 'section')
if sections == []:
return ''
possible_related = [
"Literature Review",
"Related Work",
"Related Works",
"Prior Work",
"Prior Works",
"Related Research",
"Research Overview",
"Previous Work",
"Previous Works",
"Review of the Literature",
"Review of Related Literature",
"Survey of Related Work",
"Survey of Related Works",
"Background",
"Research Background",
"Review of Prior Research",
"Literature Survey",
"Overview of Literature",
"Existing Literature",
"Review of Existing Work",
"Review of Existing Works",
"Review of Previous Studies",
"Review of Prior Literature",
"Summary of Related Research",
"Survey of Existing Literature",
"Survey of Literature",
"Existing Research Overview",
"Prior Literature Review"
]
possible_sections = [x for x in sections if any([True for y in possible_related if y.lower() == x.strip().lower()])]
if possible_sections == []:
return ''
else:
to_find = possible_sections[0]
ind = sections.index(to_find)
if ind + 1 < len(sections):
start_marker = f'\\section{{{sections[ind]}}}'
end_marker = f'\\section{{{sections[ind+1]}}}'
start_point = content.find(start_marker)
end_point = content.find(end_marker)
return content[start_point+len(start_marker):end_point]
else:
return ''