CO3 / src /modules /consecutio.py
amaisto's picture
Update src/modules/consecutio.py
9d2f453 verified
import math
from spacy.tokens import Doc, Token
class ConsecutioAnalyzer:
def __init__(self):
self.reset_metrics()
def reset_metrics(self):
self.total_sentences = 0
self.sum_max_depth = 0
self.total_verb_pairs = 0
self.valid_consecutio_pairs = 0
self.total_words = 0
self.total_verb_groups = 0
self.root_tenses_list = []
class TokenNode:
def __init__(self, spacy_token):
# Mapping identico a consecutio.java
self.id = str(spacy_token.i)
self.form = spacy_token.text
self.pos_tag = spacy_token.tag_ # Usa TAG (VBD, VBP) per i tempi
self.dep_rel = spacy_token.dep_
# In spaCy la root punta a se stessa; in Java la root ha head "0"
self.head_id = str(spacy_token.head.i) if spacy_token.head.i != spacy_token.i else "0"
self.children = []
self.compound_tense = ""
self.is_compound_head = False
def is_verb(self):
# Traduzione esatta di isVerb()
return self.pos_tag.startswith("V") or self.pos_tag.startswith("MD") or self.pos_tag == "AUX"
def is_auxiliary_rel(self):
# In spaCy la relazione è in .dep_
return "aux" in self.dep_rel.lower()
def get_simple_tense(self):
# Traduzione esatta dello switch case Java
tag = self.pos_tag
if tag in ["VBD", "VBN"]: return "Past"
if tag in ["VBP", "VBZ"]: return "Pres"
if tag == "VBG": return "Ger" # Mappa il 'Prog' o 'Ger'
if tag == "MD": return "Mod"
if tag == "VB": return "Inf"
return "N/A"
def analyze(self, doc):
"""
Riceve l'oggetto 'doc' di spaCy.
Implementa analyzeSingleDocument di consecutio.java.
"""
self.reset_metrics()
# doc.sents fornisce le frasi analizzate
for sent in doc.sents:
self.total_sentences += 1
# Conteggio parole escludendo punteggiatura
words_in_sent = [t for t in sent if not t.is_punct]
self.total_words += len(words_in_sent)
try:
# buildSentenceMap
node_map = {str(t.i): self.TokenNode(t) for t in sent}
root = None
# Ricostruzione gerarchia figli
for node in node_map.values():
if node.head_id == "0" or node.dep_rel.lower() == "root":
root = node
elif node.head_id in node_map:
node_map[node.head_id].children.append(node)
if root:
self._process_compound_tenses(root)
self.root_tenses_list.append(root.compound_tense)
self.sum_max_depth += self._calculate_tree_depth(root)
self._check_consecutio_recursively(root)
else:
self.root_tenses_list.append("N/A")
except Exception:
continue
# Calcoli finali identici al Java
stability = self._calculate_dominant_stability()
avg_depth = self.sum_max_depth / self.total_sentences if self.total_sentences > 0 else 0.0
consecutio_score = self.valid_consecutio_pairs / self.total_verb_pairs if self.total_verb_pairs > 0 else 1.0
verb_density = self.total_verb_groups / self.total_words if self.total_words > 0 else 0.0
sentence_depths = self.calculate_average_graph_depth(doc)
return {
"tense_stability": round(stability, 4),
"avg_depth": round(avg_depth, 4) if self.total_sentences > 0 else 0.0,
"consecutio_index": round(consecutio_score, 4),
"verb_density": round(verb_density, 4),
"sentence_depths": round(sentence_depths, 4)
}
def _process_compound_tenses(self, node):
# processCompoundTenses
aux_children = []
for child in node.children:
if child.is_auxiliary_rel():
aux_children.append(child)
self._process_compound_tenses(child)
if node.is_verb() and not node.is_auxiliary_rel():
node.is_compound_head = True
self.total_verb_groups += 1
group = [node] + aux_children
node.compound_tense = self._infer_compound_tense(group, node)
def _infer_compound_tense(self, group, head):
# inferCompoundTense
has_will, has_have, has_modal = False, False, False
for n in group:
if n == head: continue
f = n.form.lower()
if "will" in f or "'ll" in f: has_will = True
elif any(x in f for x in ["have", "has", "had"]): has_have = True
elif n.pos_tag == "MD": has_modal = True
if has_modal: return "Mod"
if has_will: return "Fut"
if has_have and head.pos_tag == "VBN": return "Perf"
return head.get_simple_tense()
def _calculate_tree_depth(self, node):
# calculateTreeDepth
if not node.children: return 1
max_d = 0
for child in node.children:
if child.is_auxiliary_rel(): continue
max_d = max(max_d, self._calculate_tree_depth(child))
return 1 + max_d
def _check_consecutio_recursively(self, parent):
# checkConsecutioRecursively
for child in parent.children:
if child.is_auxiliary_rel(): continue
if parent.is_compound_head and child.is_compound_head:
self.total_verb_pairs += 1
if self._is_consecutio_valid(parent.compound_tense, child.compound_tense):
self.valid_consecutio_pairs += 1
self._check_consecutio_recursively(child)
def _is_consecutio_valid(self, p, c):
# isConsecutioValid
if self._is_pres_group(p): return True
if self._is_past_group(p):
if c == "Pres" or c == "Ger": return False
return True
def _is_past_group(self, t):
return "Past" in t or "Perf" in t or "Mod" in t
def _is_pres_group(self, t):
return "Pres" in t or "Fut" in t or "Ger" in t
def _calculate_dominant_stability(self):
# calculateDominantStability
valid_roots = [t for t in self.root_tenses_list if t != "N/A"]
if not valid_roots: return 0.0
past_c = sum(1 for t in valid_roots if self._is_past_group(t))
pres_c = sum(1 for t in valid_roots if self._is_pres_group(t))
dom_past = past_c >= pres_c
aligned = sum(1 for t in valid_roots if (dom_past and self._is_past_group(t)) or (not dom_past and self._is_pres_group(t)))
return aligned / len(valid_roots)
@staticmethod
def calculate_average_graph_depth(doc: Doc) -> float:
"""
Calcola la profondità media di tutte le frasi nel documento spaCy.
"""
total_depth = 0
total_nodes = 0
for sent in doc.sents:
# Dizionario per memorizzare le profondità (memoization)
# Usiamo l'indice del token nel documento come chiave
depths = {}
for token in sent:
depths[token.i] = ConsecutioAnalyzer._get_token_depth(token, depths)
total_depth += depths[token.i]
total_nodes += len(sent)
return total_depth / total_nodes if total_nodes > 0 else 0.0
@staticmethod
def _get_token_depth(token: Token, depths: dict) -> int:
"""
Calcola ricorsivamente la profondità di un token spaCy.
"""
if token.i in depths:
return depths[token.i]
# In spaCy, la radice ha se stessa come head (token.head == token)
if token.head == token:
depths[token.i] = 1
return 1
# Profondità = 1 + profondità del padre
depth = ConsecutioAnalyzer._get_token_depth(token.head, depths) + 1
depths[token.i] = depth
return depth