Update modules/text_analysis/semantic_analysis.py
Browse files
modules/text_analysis/semantic_analysis.py
CHANGED
|
@@ -6,8 +6,6 @@ import matplotlib.pyplot as plt
|
|
| 6 |
from collections import Counter
|
| 7 |
from collections import defaultdict
|
| 8 |
|
| 9 |
-
# Remove the global nlp model loading
|
| 10 |
-
|
| 11 |
# Define colors for grammatical categories
|
| 12 |
POS_COLORS = {
|
| 13 |
'ADJ': '#FFA07A', # Light Salmon
|
|
@@ -215,9 +213,39 @@ def visualize_semantic_relations(doc, lang):
|
|
| 215 |
|
| 216 |
return fig
|
| 217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
############################################################################################################################################
|
| 219 |
def perform_semantic_analysis(text, nlp, lang):
|
| 220 |
doc = nlp(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
# Imprimir entidades para depuraci贸n
|
| 223 |
print(f"Entidades encontradas ({lang}):")
|
|
@@ -225,6 +253,10 @@ def perform_semantic_analysis(text, nlp, lang):
|
|
| 225 |
print(f"{ent.text} - {ent.label_}")
|
| 226 |
|
| 227 |
relations_graph = visualize_semantic_relations(doc, lang)
|
| 228 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
-
__all__ = ['visualize_semantic_relations', 'create_semantic_graph', 'POS_COLORS', 'POS_TRANSLATIONS']
|
|
|
|
| 6 |
from collections import Counter
|
| 7 |
from collections import defaultdict
|
| 8 |
|
|
|
|
|
|
|
| 9 |
# Define colors for grammatical categories
|
| 10 |
POS_COLORS = {
|
| 11 |
'ADJ': '#FFA07A', # Light Salmon
|
|
|
|
| 213 |
|
| 214 |
return fig
|
| 215 |
|
| 216 |
+
############################################################################################################################################
|
| 217 |
+
def identify_and_contextualize_entities(doc, lang):
|
| 218 |
+
entities = []
|
| 219 |
+
for ent in doc.ents:
|
| 220 |
+
# Obtener el contexto (3 palabras antes y despu茅s de la entidad)
|
| 221 |
+
start = max(0, ent.start - 3)
|
| 222 |
+
end = min(len(doc), ent.end + 3)
|
| 223 |
+
context = doc[start:end].text
|
| 224 |
+
|
| 225 |
+
entities.append({
|
| 226 |
+
'text': ent.text,
|
| 227 |
+
'label': ent.label_,
|
| 228 |
+
'start': ent.start,
|
| 229 |
+
'end': ent.end,
|
| 230 |
+
'context': context
|
| 231 |
+
})
|
| 232 |
+
|
| 233 |
+
# Identificar conceptos clave (usando sustantivos y verbos m谩s frecuentes)
|
| 234 |
+
word_freq = Counter([token.lemma_.lower() for token in doc if token.pos_ in ['NOUN', 'VERB'] and not token.is_stop])
|
| 235 |
+
key_concepts = word_freq.most_common(10) # Top 10 conceptos clave
|
| 236 |
+
|
| 237 |
+
return entities, key_concepts
|
| 238 |
+
|
| 239 |
+
|
| 240 |
############################################################################################################################################
|
| 241 |
def perform_semantic_analysis(text, nlp, lang):
|
| 242 |
doc = nlp(text)
|
| 243 |
+
|
| 244 |
+
# Identificar entidades y conceptos clave
|
| 245 |
+
entities, key_concepts = identify_and_contextualize_entities(doc, lang)
|
| 246 |
+
|
| 247 |
+
# Visualizar relaciones sem谩nticas
|
| 248 |
+
relations_graph = visualize_semantic_relations(doc, lang)
|
| 249 |
|
| 250 |
# Imprimir entidades para depuraci贸n
|
| 251 |
print(f"Entidades encontradas ({lang}):")
|
|
|
|
| 253 |
print(f"{ent.text} - {ent.label_}")
|
| 254 |
|
| 255 |
relations_graph = visualize_semantic_relations(doc, lang)
|
| 256 |
+
return {
|
| 257 |
+
'entities': entities,
|
| 258 |
+
'key_concepts': key_concepts,
|
| 259 |
+
'relations_graph': relations_graph
|
| 260 |
+
}
|
| 261 |
|
| 262 |
+
__all__ = ['visualize_semantic_relations', 'create_semantic_graph', 'POS_COLORS', 'POS_TRANSLATIONS', 'identify_and_contextualize_entities']
|