jkushwaha commited on
Commit
316f3f2
·
verified ·
1 Parent(s): a7bc9d6

Create indexing_search.py

Browse files
Files changed (1) hide show
  1. indexing_search.py +22 -0
indexing_search.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def neighbor_keywords_with_threshold(text, threshold=8):
2
+ keywords = ["no", "not", "normal", "intact", "deficient"]
3
+ target_words = ["mmr", "mismatch"]
4
+ result = {}
5
+
6
+ words = text.lower().split()
7
+ for i, word in enumerate(words):
8
+ if word in target_words:
9
+ result[i] = {word: {"left": {}, "right": {}}}
10
+ for j in range(max(0, i - threshold), min(i + threshold + 1, len(words))):
11
+ if words[j] in keywords:
12
+ if j < i:
13
+ result[i][word]["left"][words[j]] = j
14
+ else:
15
+ result[i][word]["right"][words[j]] = j
16
+
17
+ return result
18
+
19
+ text = "PMS-2 Positive Tumors displaying loss of any MMR protein are mismatch repair deficient and considered to be MSI-High (MSI-H), whereas those with intact MMR proteins are expected to be microsatellite stable (MSS) or MSI-low (MSI-L). $$ IHC shows normal expression of MLH-1, MSH-2, MSH-6, and PMS-2. $$ The results of the IC analysis suggest the presence of normal DNA mismatch $$ Positive MSH-6. Another MMR protein is also involved."
20
+
21
+ output = neighbor_keywords_with_threshold(text, threshold=8)
22
+ print(output)