rough_work / indexing_search.py
jkushwaha's picture
Create indexing_search.py
316f3f2 verified
def neighbor_keywords_with_threshold(text, threshold=8):
keywords = ["no", "not", "normal", "intact", "deficient"]
target_words = ["mmr", "mismatch"]
result = {}
words = text.lower().split()
for i, word in enumerate(words):
if word in target_words:
result[i] = {word: {"left": {}, "right": {}}}
for j in range(max(0, i - threshold), min(i + threshold + 1, len(words))):
if words[j] in keywords:
if j < i:
result[i][word]["left"][words[j]] = j
else:
result[i][word]["right"][words[j]] = j
return result
text = "PMS-2 Positive Tumors displaying loss of any MMR protein are mismatch repair deficient and considered to be MSI-High (MSI-H), whereas those with intact MMR proteins are expected to be microsatellite stable (MSS) or MSI-low (MSI-L). $$ IHC shows normal expression of MLH-1, MSH-2, MSH-6, and PMS-2. $$ The results of the IC analysis suggest the presence of normal DNA mismatch $$ Positive MSH-6. Another MMR protein is also involved."
output = neighbor_keywords_with_threshold(text, threshold=8)
print(output)