rough_work / indexing.py
jkushwaha's picture
Create indexing.py
a7bc9d6 verified
import re
import re
def word_index_multiple_mmr(text):
words = ["no", "not", "normal", "intact", "deficient", "mmr", "mismatch"]
word_indices = {word: [] for word in words}
current_index = 0
for word in text.split():
word_lower = word.lower()
if word_lower in words:
word_indices[word_lower].append(current_index)
current_index += 1
mmr_indices = word_indices["mmr"]
for word, indices in word_indices.items():
if word != "mmr":
for i, index in enumerate(indices):
if i < len(mmr_indices) and index < mmr_indices[i]:
indices[i] = -index
return word_indices
# Example usage
text = """PMS-2 Positive Tumors displaying loss of any MMR protein are mismatch repair deficient and considered to be MSI-High (MSI-H), whereas those with intact MMR proteins are expected to be microsatellite stable (MSS) or MSI-low (MSI-L). $$ IHC shows normal expression of MLH-1, MSH-2, MSH-6, and PMS-2. $$ The results of the IC analysis suggest the presence of normal DNA mismatch $$ Positive MSH-6. Another MMR protein is also involved."""
result = word_index_multiple_mmr(text)
print(result)