Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -107,10 +107,28 @@ def FrontRevSentChunk (Chunkmode, Translate, Text, langdest):
|
|
| 107 |
FinalOutput += "\n" + translated.text
|
| 108 |
return FinalOutput
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
def SepHypandSynExpansion(text):
|
| 111 |
# Tokenize the text
|
| 112 |
tokens = nltk.word_tokenize(text)
|
| 113 |
-
NoHits = "
|
| 114 |
FinalOutput = ""
|
| 115 |
|
| 116 |
# Find synonyms and hypernyms of each word in the text
|
|
@@ -121,11 +139,13 @@ def SepHypandSynExpansion(text):
|
|
| 121 |
synonyms += synset.lemma_names()
|
| 122 |
hypernyms += [hypernym.name() for hypernym in synset.hypernyms()]
|
| 123 |
if not synonyms and not hypernyms:
|
| 124 |
-
NoHits +=
|
| 125 |
-
NoHits = set(NoHits)
|
| 126 |
else:
|
| 127 |
-
FinalOutput += "\n" f"{token}: hypernyms={hypernyms}, synonyms={synonyms}"
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
|
| 131 |
def WikiSearch(term):
|
|
|
|
| 107 |
FinalOutput += "\n" + translated.text
|
| 108 |
return FinalOutput
|
| 109 |
|
| 110 |
+
# Define a function to filter out non-verb, noun, or adjective words
|
| 111 |
+
def filter_words(words):
|
| 112 |
+
# Use NLTK to tag each word with its part of speech
|
| 113 |
+
tagged_words = nltk.pos_tag(words)
|
| 114 |
+
|
| 115 |
+
# Define a set of parts of speech to keep (verbs, nouns, adjectives)
|
| 116 |
+
keep_pos = {'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'NN', 'NNS', 'NNP', 'NNPS', 'JJ', 'JJR', 'JJS'}
|
| 117 |
+
|
| 118 |
+
# Filter the list to only include words with the desired parts of speech
|
| 119 |
+
filtered_words = [word for word, pos in tagged_words if pos in keep_pos]
|
| 120 |
+
|
| 121 |
+
return filtered_words
|
| 122 |
+
|
| 123 |
+
# Call the function to get the filtered list of words
|
| 124 |
+
filtered_words = filter_words(words)
|
| 125 |
+
|
| 126 |
+
print(filtered_words)
|
| 127 |
+
|
| 128 |
def SepHypandSynExpansion(text):
|
| 129 |
# Tokenize the text
|
| 130 |
tokens = nltk.word_tokenize(text)
|
| 131 |
+
NoHits = ""
|
| 132 |
FinalOutput = ""
|
| 133 |
|
| 134 |
# Find synonyms and hypernyms of each word in the text
|
|
|
|
| 139 |
synonyms += synset.lemma_names()
|
| 140 |
hypernyms += [hypernym.name() for hypernym in synset.hypernyms()]
|
| 141 |
if not synonyms and not hypernyms:
|
| 142 |
+
NoHits += f"{token} | "
|
|
|
|
| 143 |
else:
|
| 144 |
+
FinalOutput += "\n" f"{token}: hypernyms={hypernyms}, synonyms={synonyms} \n"
|
| 145 |
+
NoHits = set(NoHits.split(" | "))
|
| 146 |
+
NoHits = filter_words(NoHits)
|
| 147 |
+
NoHits = "Words to pay special attention to: \n" + str(NoHits)
|
| 148 |
+
return NoHits, FinalOutput
|
| 149 |
|
| 150 |
|
| 151 |
def WikiSearch(term):
|