File size: 654 Bytes
b316eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import nltk
from nltk.tokenize import word_tokenize
import re

# Load the document
with open('document.txt', 'r') as f:
    text = f.read()

# Preprocess the text
tokens = word_tokenize(text.lower())
tokens = [t for t in tokens if t.isalpha()]  # remove non-alpha characters

# Define key words
key_words = ['chronic kidney disease', 'heart failure', 'cirrhosis', 'ascites', 'ESRD', 'liver disease']

# Use regex to find key words
found_key_words = []
for key_word in key_words:
    pattern = re.compile(r'\b' + key_word + r'\b')
    if pattern.search(text):
        found_key_words.append(key_word)

# Return the list of key words
print(found_key_words)