Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import spacy
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import networkx as nx
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
|
| 7 |
+
# Initialize spaCy NLP model
|
| 8 |
+
nlp = spacy.load("en_core_web_sm")
|
| 9 |
+
|
| 10 |
+
# Import Lexicon
|
| 11 |
+
cues = pd.read_excel('link_cues.xlsx')
|
| 12 |
+
list_causalmarkers = cues['causal_markers']
|
| 13 |
+
|
| 14 |
+
def contains_words_or_phrases(words_list, sentence):
|
| 15 |
+
"""
|
| 16 |
+
Check if any word or phrase from words_list is present in the sentence.
|
| 17 |
+
|
| 18 |
+
:param words_list: List of words or phrases to check
|
| 19 |
+
:param sentence: The input sentence where to look for words or phrases
|
| 20 |
+
:return: Entities if any word or phrase is found, otherwise None
|
| 21 |
+
"""
|
| 22 |
+
# Normalize the sentence to lower case to make the search case insensitive
|
| 23 |
+
normalized_sentence = sentence.lower()
|
| 24 |
+
|
| 25 |
+
# Check each word or phrase in the list
|
| 26 |
+
for word_or_phrase in words_list:
|
| 27 |
+
# Check if the word or phrase is in the normalized sentence
|
| 28 |
+
if word_or_phrase.lower() in normalized_sentence:
|
| 29 |
+
return True # Return True immediately if any word or phrase is found
|
| 30 |
+
|
| 31 |
+
return False # Return False if none of the words or phrases are found
|
| 32 |
+
|
| 33 |
+
class NounExtractor:
|
| 34 |
+
def __init__(self, nlp):
|
| 35 |
+
"""
|
| 36 |
+
Initialize the NounExtractor with a pre-loaded spaCy NLP model.
|
| 37 |
+
"""
|
| 38 |
+
self.nlp = nlp
|
| 39 |
+
|
| 40 |
+
def process_text(self, text):
|
| 41 |
+
"""
|
| 42 |
+
Process the text using the spaCy NLP pipeline.
|
| 43 |
+
"""
|
| 44 |
+
return self.nlp(text)
|
| 45 |
+
|
| 46 |
+
def get_noun_phrases(self, doc):
|
| 47 |
+
"""
|
| 48 |
+
Extract and refine noun phrases from the spaCy doc, tracking and using dependency labels accurately.
|
| 49 |
+
"""
|
| 50 |
+
noun_phrases = list(doc.noun_chunks)
|
| 51 |
+
merged_phrases = []
|
| 52 |
+
skip_indexes = set() # Indexes to skip because they have been merged into another phrase
|
| 53 |
+
list_dep_labels = [token.dep_ for token in doc] # List of dependency labels for each token
|
| 54 |
+
|
| 55 |
+
for i in range(len(noun_phrases)):
|
| 56 |
+
if i in skip_indexes:
|
| 57 |
+
continue
|
| 58 |
+
|
| 59 |
+
current = noun_phrases[i]
|
| 60 |
+
# Collect dependency labels for the current noun phrase
|
| 61 |
+
deps_in_phrase = {list_dep_labels[tok.i] for tok in current}
|
| 62 |
+
|
| 63 |
+
# Merge logic based on 'of' construction
|
| 64 |
+
if i + 1 < len(noun_phrases) and (doc[current.end].text in ['of', 'in', 'among', 'on', 'towards', 'to', 'for']):
|
| 65 |
+
next_phrase = noun_phrases[i + 1]
|
| 66 |
+
if i + 2 < len(noun_phrases) and doc[next_phrase.end].dep_ == 'pcomp':
|
| 67 |
+
extended_phrase = doc[current.start:noun_phrases[i + 2].end]
|
| 68 |
+
skip_indexes.update({i + 1, i + 2})
|
| 69 |
+
extended_deps = {list_dep_labels[tok.i] for tok in extended_phrase}
|
| 70 |
+
dep_label = self.determine_dep_label(extended_deps)
|
| 71 |
+
merged_phrases.append((extended_phrase.text, dep_label))
|
| 72 |
+
continue
|
| 73 |
+
else:
|
| 74 |
+
merged_phrase = doc[current.start:next_phrase.end]
|
| 75 |
+
skip_indexes.add(i + 1)
|
| 76 |
+
merged_deps = {list_dep_labels[tok.i] for tok in merged_phrase}
|
| 77 |
+
dep_label = self.determine_dep_label(merged_deps)
|
| 78 |
+
merged_phrases.append((merged_phrase.text, dep_label))
|
| 79 |
+
continue
|
| 80 |
+
|
| 81 |
+
if i not in skip_indexes:
|
| 82 |
+
dep_label = self.determine_dep_label(deps_in_phrase)
|
| 83 |
+
merged_phrases.append((current.text, dep_label))
|
| 84 |
+
|
| 85 |
+
return merged_phrases
|
| 86 |
+
|
| 87 |
+
def determine_dep_label(self, deps_in_phrase):
|
| 88 |
+
"""
|
| 89 |
+
Determine the most appropriate dependency label for a phrase based on internal dependencies.
|
| 90 |
+
"""
|
| 91 |
+
if 'nsubj' in deps_in_phrase or 'nsubjpass' in deps_in_phrase:
|
| 92 |
+
return 'ROOT'
|
| 93 |
+
else:
|
| 94 |
+
# Choose a representative dependency if no clear subject is present
|
| 95 |
+
return deps_in_phrase.pop() if deps_in_phrase else 'unknown'
|
| 96 |
+
|
| 97 |
+
def extract(self, sentence, action_verb):
|
| 98 |
+
"""
|
| 99 |
+
Extracts and returns noun phrases with their detailed dependency tags from the sentence.
|
| 100 |
+
"""
|
| 101 |
+
doc = self.process_text(sentence)
|
| 102 |
+
noun_phrases = self.get_noun_phrases(doc)
|
| 103 |
+
result_dict = {phrase: dep for phrase, dep in noun_phrases}
|
| 104 |
+
|
| 105 |
+
# Check for the presence of any actionable verbs in the sentence
|
| 106 |
+
found_verbs = [v for v in action_verb if v.lower() in sentence.lower()]
|
| 107 |
+
if found_verbs:
|
| 108 |
+
# Adjust dependency labels for noun phrases based on the presence of an actionable verb.
|
| 109 |
+
for phrase, dep in list(result_dict.items()): # Work on a copy of items to safely modify the dict
|
| 110 |
+
if dep == 'ROOT':
|
| 111 |
+
result_dict[phrase] = 'dobj'
|
| 112 |
+
elif dep == 'dobj':
|
| 113 |
+
result_dict[phrase] = 'ROOT'
|
| 114 |
+
|
| 115 |
+
return result_dict
|
| 116 |
+
|
| 117 |
+
def format_results(results):
|
| 118 |
+
formatted = []
|
| 119 |
+
# Find all roots or central subjects to structure the phrases around them
|
| 120 |
+
root_keys = [key for key, value in results.items() if value == 'ROOT' or value == 'nsubjpass']
|
| 121 |
+
|
| 122 |
+
for key, value in results.items():
|
| 123 |
+
if key in root_keys:
|
| 124 |
+
continue # Skip the roots themselves when adding to the formatted list
|
| 125 |
+
for root_key in root_keys:
|
| 126 |
+
if value == 'nsubjpass': # If the dependency indicates a passive subject
|
| 127 |
+
formatted.append(f"{key} -> {root_key}")
|
| 128 |
+
else:
|
| 129 |
+
formatted.append(f"{root_key} <- {key}")
|
| 130 |
+
|
| 131 |
+
# Remove duplicates and return the formatted results
|
| 132 |
+
formatted = list(set(formatted))
|
| 133 |
+
return formatted
|
| 134 |
+
|
| 135 |
+
def wrap_label(label):
|
| 136 |
+
"""Helper function to wrap labels after every three words."""
|
| 137 |
+
words = label.split()
|
| 138 |
+
wrapped_label = '\n'.join(' '.join(words[i:i+3]) for i in range(0, len(words), 3))
|
| 139 |
+
return wrapped_label
|
| 140 |
+
|
| 141 |
+
def visualize_cognitive_map(formatted_results):
|
| 142 |
+
G = nx.DiGraph() # Directed graph to show direction of relationships
|
| 143 |
+
|
| 144 |
+
# Add edges based on formatted results
|
| 145 |
+
for result in formatted_results:
|
| 146 |
+
if '<-' in result:
|
| 147 |
+
# Extract nodes and add edge in the reverse direction
|
| 148 |
+
nodes = result.split(' <- ')
|
| 149 |
+
G.add_edge(nodes[1], nodes[0])
|
| 150 |
+
elif '->' in result:
|
| 151 |
+
# Extract nodes and add edge in the specified direction
|
| 152 |
+
nodes = result.split(' -> ')
|
| 153 |
+
G.add_edge(nodes[0], nodes[1])
|
| 154 |
+
|
| 155 |
+
# Position nodes using the spring layout
|
| 156 |
+
pos = nx.spring_layout(G, k=0.50)
|
| 157 |
+
|
| 158 |
+
# Setup the plot with a larger size
|
| 159 |
+
plt.figure(figsize=(12, 8)) # Larger figure size for better visibility
|
| 160 |
+
|
| 161 |
+
# Prepare custom labels with wrapped text
|
| 162 |
+
labels = {node: wrap_label(node) for node in G.nodes()}
|
| 163 |
+
|
| 164 |
+
# Draw the graph with custom labels
|
| 165 |
+
nx.draw(G, pos, labels=labels, node_color='skyblue', edge_color='#FF5733',
|
| 166 |
+
node_size=5000, font_size=10, font_weight='bold', with_labels=True, arrowstyle='-|>', arrowsize=30)
|
| 167 |
+
|
| 168 |
+
plt.show()
|
| 169 |
+
|
| 170 |
+
return plt
|
| 171 |
+
|
| 172 |
+
extractor = NounExtractor(nlp=nlp)
|
| 173 |
+
|
| 174 |
+
# Example of how to use this function
|
| 175 |
+
words_list = ["so", "because", "increase", "contribute", "due to"]
|
| 176 |
+
action_verb = ['affect', 'influence', 'increase', 'against']
|
| 177 |
+
|
| 178 |
+
# Define the callback function for the GUI
|
| 179 |
+
def CogMapAnalysis(text):
|
| 180 |
+
if contains_words_or_phrases(words_list, text):
|
| 181 |
+
result = extractor.extract(text, action_verb)
|
| 182 |
+
formatted_result = format_results(result)
|
| 183 |
+
plot = visualize_cognitive_map(formatted_result)
|
| 184 |
+
return formatted_result, plot
|
| 185 |
+
else:
|
| 186 |
+
formatted_result = "❌ No causal expression was identified."
|
| 187 |
+
plot = None # Use None instead of empty string for non-existent objects
|
| 188 |
+
return formatted_result, plot
|
| 189 |
+
|
| 190 |
+
# Create the GUI using the 'gr' library
|
| 191 |
+
with gr.Blocks() as demo:
|
| 192 |
+
with gr.Column():
|
| 193 |
+
gr.Markdown('<div style="text-align: center;"><h1><strong>CogMap</strong></h1></div> <div style="text-align: center;"><h3></h3></div>')
|
| 194 |
+
|
| 195 |
+
with gr.Row():
|
| 196 |
+
inputs = gr.Textbox(label="Input", lines=2, placeholder="Enter your text here...")
|
| 197 |
+
examples = [
|
| 198 |
+
"Public support for anti-discrimination laws and the movement to support immigrants grew due to the impact of getting widespread education on social justice issues.",
|
| 199 |
+
"The introduction of new anti-discrimination laws has been driven by an increasing awareness of social injustices and grassroots movements.",
|
| 200 |
+
"CogMap is a tool that lets you create cognitive maps from text."
|
| 201 |
+
]
|
| 202 |
+
|
| 203 |
+
output = gr.Textbox(label="CogMap", lines=1, placeholder=".............")
|
| 204 |
+
cogmap_plot = gr.Plot(label="Visualization")
|
| 205 |
+
interface = gr.Interface(fn=CogMapAnalysis, examples=examples, inputs=inputs, outputs=[output, cogmap_plot])
|
| 206 |
+
|
| 207 |
+
with gr.Row():
|
| 208 |
+
gr.Markdown("⚠️ Feel free to flag me if you find any errors. :)")
|
| 209 |
+
|
| 210 |
+
with gr.Column():
|
| 211 |
+
gr.Markdown('<p style="text-align: center; ">Demo made with ❤ by P.K. Ningrum (2024) | Contact: panggih_kusuma.ningrum@univ-fcomte.fr</p>')
|
| 212 |
+
|
| 213 |
+
if __name__ == "__main__":
|
| 214 |
+
demo.launch(show_api=False, share=True)
|