Update modules/text_analysis/semantic_analysis.py
Browse files
modules/text_analysis/semantic_analysis.py
CHANGED
|
@@ -68,33 +68,36 @@ def identify_key_concepts(doc, top_n=10):
|
|
| 68 |
word_freq = Counter([token.lemma_.lower() for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ'] and not token.is_stop])
|
| 69 |
return word_freq.most_common(top_n)
|
| 70 |
|
| 71 |
-
def create_concept_graph(
|
| 72 |
-
vectorizer = TfidfVectorizer()
|
| 73 |
-
tfidf_matrix = vectorizer.fit_transform([text])
|
| 74 |
-
concept_vectors = vectorizer.transform([c[0] for c in concepts])
|
| 75 |
-
similarity_matrix = cosine_similarity(concept_vectors, concept_vectors)
|
| 76 |
-
|
| 77 |
G = nx.Graph()
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
return G
|
| 85 |
|
| 86 |
def visualize_concept_graph(G, lang):
|
| 87 |
-
fig, ax = plt.subplots(figsize=(
|
| 88 |
pos = nx.spring_layout(G, k=0.5, iterations=50)
|
| 89 |
|
| 90 |
node_sizes = [G.nodes[node]['weight'] * 100 for node in G.nodes()]
|
| 91 |
nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color='lightblue', alpha=0.8, ax=ax)
|
| 92 |
nx.draw_networkx_labels(G, pos, font_size=10, font_weight="bold", ax=ax)
|
| 93 |
-
nx.draw_networkx_edges(G, pos, width=1, alpha=0.5, ax=ax)
|
| 94 |
|
| 95 |
-
|
| 96 |
-
nx.
|
| 97 |
-
|
| 98 |
title = {
|
| 99 |
'es': "Relaciones entre Conceptos Clave",
|
| 100 |
'en': "Key Concept Relations",
|
|
@@ -102,18 +105,18 @@ def visualize_concept_graph(G, lang):
|
|
| 102 |
}
|
| 103 |
ax.set_title(title[lang], fontsize=16)
|
| 104 |
ax.axis('off')
|
| 105 |
-
|
| 106 |
plt.tight_layout()
|
| 107 |
return fig
|
| 108 |
|
| 109 |
def perform_semantic_analysis(text, nlp, lang):
|
| 110 |
doc = nlp(text)
|
| 111 |
-
|
| 112 |
# Identificar conceptos clave
|
| 113 |
key_concepts = identify_key_concepts(doc)
|
| 114 |
-
|
| 115 |
# Crear y visualizar grafo de conceptos
|
| 116 |
-
concept_graph = create_concept_graph(
|
| 117 |
relations_graph = visualize_concept_graph(concept_graph, lang)
|
| 118 |
|
| 119 |
return {
|
|
|
|
| 68 |
word_freq = Counter([token.lemma_.lower() for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ'] and not token.is_stop])
|
| 69 |
return word_freq.most_common(top_n)
|
| 70 |
|
| 71 |
+
def create_concept_graph(doc, key_concepts):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
G = nx.Graph()
|
| 73 |
+
|
| 74 |
+
# Añadir nodos
|
| 75 |
+
for concept, freq in key_concepts:
|
| 76 |
+
G.add_node(concept, weight=freq)
|
| 77 |
+
|
| 78 |
+
# Añadir aristas basadas en la co-ocurrencia en oraciones
|
| 79 |
+
for sent in doc.sents:
|
| 80 |
+
sent_concepts = [token.lemma_.lower() for token in sent if token.lemma_.lower() in dict(key_concepts)]
|
| 81 |
+
for i, concept1 in enumerate(sent_concepts):
|
| 82 |
+
for concept2 in sent_concepts[i+1:]:
|
| 83 |
+
if G.has_edge(concept1, concept2):
|
| 84 |
+
G[concept1][concept2]['weight'] += 1
|
| 85 |
+
else:
|
| 86 |
+
G.add_edge(concept1, concept2, weight=1)
|
| 87 |
+
|
| 88 |
return G
|
| 89 |
|
| 90 |
def visualize_concept_graph(G, lang):
|
| 91 |
+
fig, ax = plt.subplots(figsize=(12, 8))
|
| 92 |
pos = nx.spring_layout(G, k=0.5, iterations=50)
|
| 93 |
|
| 94 |
node_sizes = [G.nodes[node]['weight'] * 100 for node in G.nodes()]
|
| 95 |
nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color='lightblue', alpha=0.8, ax=ax)
|
| 96 |
nx.draw_networkx_labels(G, pos, font_size=10, font_weight="bold", ax=ax)
|
|
|
|
| 97 |
|
| 98 |
+
edge_weights = [G[u][v]['weight'] for u, v in G.edges()]
|
| 99 |
+
nx.draw_networkx_edges(G, pos, width=edge_weights, alpha=0.5, ax=ax)
|
| 100 |
+
|
| 101 |
title = {
|
| 102 |
'es': "Relaciones entre Conceptos Clave",
|
| 103 |
'en': "Key Concept Relations",
|
|
|
|
| 105 |
}
|
| 106 |
ax.set_title(title[lang], fontsize=16)
|
| 107 |
ax.axis('off')
|
| 108 |
+
|
| 109 |
plt.tight_layout()
|
| 110 |
return fig
|
| 111 |
|
| 112 |
def perform_semantic_analysis(text, nlp, lang):
|
| 113 |
doc = nlp(text)
|
| 114 |
+
|
| 115 |
# Identificar conceptos clave
|
| 116 |
key_concepts = identify_key_concepts(doc)
|
| 117 |
+
|
| 118 |
# Crear y visualizar grafo de conceptos
|
| 119 |
+
concept_graph = create_concept_graph(doc, key_concepts)
|
| 120 |
relations_graph = visualize_concept_graph(concept_graph, lang)
|
| 121 |
|
| 122 |
return {
|