import math from spacy.tokens import Doc, Token class ConsecutioAnalyzer: def __init__(self): self.reset_metrics() def reset_metrics(self): self.total_sentences = 0 self.sum_max_depth = 0 self.total_verb_pairs = 0 self.valid_consecutio_pairs = 0 self.total_words = 0 self.total_verb_groups = 0 self.root_tenses_list = [] class TokenNode: def __init__(self, spacy_token): # Mapping identico a consecutio.java self.id = str(spacy_token.i) self.form = spacy_token.text self.pos_tag = spacy_token.tag_ # Usa TAG (VBD, VBP) per i tempi self.dep_rel = spacy_token.dep_ # In spaCy la root punta a se stessa; in Java la root ha head "0" self.head_id = str(spacy_token.head.i) if spacy_token.head.i != spacy_token.i else "0" self.children = [] self.compound_tense = "" self.is_compound_head = False def is_verb(self): # Traduzione esatta di isVerb() return self.pos_tag.startswith("V") or self.pos_tag.startswith("MD") or self.pos_tag == "AUX" def is_auxiliary_rel(self): # In spaCy la relazione è in .dep_ return "aux" in self.dep_rel.lower() def get_simple_tense(self): # Traduzione esatta dello switch case Java tag = self.pos_tag if tag in ["VBD", "VBN"]: return "Past" if tag in ["VBP", "VBZ"]: return "Pres" if tag == "VBG": return "Ger" # Mappa il 'Prog' o 'Ger' if tag == "MD": return "Mod" if tag == "VB": return "Inf" return "N/A" def analyze(self, doc): """ Riceve l'oggetto 'doc' di spaCy. Implementa analyzeSingleDocument di consecutio.java. """ self.reset_metrics() # doc.sents fornisce le frasi analizzate for sent in doc.sents: self.total_sentences += 1 # Conteggio parole escludendo punteggiatura words_in_sent = [t for t in sent if not t.is_punct] self.total_words += len(words_in_sent) try: # buildSentenceMap node_map = {str(t.i): self.TokenNode(t) for t in sent} root = None # Ricostruzione gerarchia figli for node in node_map.values(): if node.head_id == "0" or node.dep_rel.lower() == "root": root = node elif node.head_id in node_map: node_map[node.head_id].children.append(node) if root: self._process_compound_tenses(root) self.root_tenses_list.append(root.compound_tense) self.sum_max_depth += self._calculate_tree_depth(root) self._check_consecutio_recursively(root) else: self.root_tenses_list.append("N/A") except Exception: continue # Calcoli finali identici al Java stability = self._calculate_dominant_stability() avg_depth = self.sum_max_depth / self.total_sentences if self.total_sentences > 0 else 0.0 consecutio_score = self.valid_consecutio_pairs / self.total_verb_pairs if self.total_verb_pairs > 0 else 1.0 verb_density = self.total_verb_groups / self.total_words if self.total_words > 0 else 0.0 sentence_depths = self.calculate_average_graph_depth(doc) return { "tense_stability": round(stability, 4), "avg_depth": round(avg_depth, 4) if self.total_sentences > 0 else 0.0, "consecutio_index": round(consecutio_score, 4), "verb_density": round(verb_density, 4), "sentence_depths": round(sentence_depths, 4) } def _process_compound_tenses(self, node): # processCompoundTenses aux_children = [] for child in node.children: if child.is_auxiliary_rel(): aux_children.append(child) self._process_compound_tenses(child) if node.is_verb() and not node.is_auxiliary_rel(): node.is_compound_head = True self.total_verb_groups += 1 group = [node] + aux_children node.compound_tense = self._infer_compound_tense(group, node) def _infer_compound_tense(self, group, head): # inferCompoundTense has_will, has_have, has_modal = False, False, False for n in group: if n == head: continue f = n.form.lower() if "will" in f or "'ll" in f: has_will = True elif any(x in f for x in ["have", "has", "had"]): has_have = True elif n.pos_tag == "MD": has_modal = True if has_modal: return "Mod" if has_will: return "Fut" if has_have and head.pos_tag == "VBN": return "Perf" return head.get_simple_tense() def _calculate_tree_depth(self, node): # calculateTreeDepth if not node.children: return 1 max_d = 0 for child in node.children: if child.is_auxiliary_rel(): continue max_d = max(max_d, self._calculate_tree_depth(child)) return 1 + max_d def _check_consecutio_recursively(self, parent): # checkConsecutioRecursively for child in parent.children: if child.is_auxiliary_rel(): continue if parent.is_compound_head and child.is_compound_head: self.total_verb_pairs += 1 if self._is_consecutio_valid(parent.compound_tense, child.compound_tense): self.valid_consecutio_pairs += 1 self._check_consecutio_recursively(child) def _is_consecutio_valid(self, p, c): # isConsecutioValid if self._is_pres_group(p): return True if self._is_past_group(p): if c == "Pres" or c == "Ger": return False return True def _is_past_group(self, t): return "Past" in t or "Perf" in t or "Mod" in t def _is_pres_group(self, t): return "Pres" in t or "Fut" in t or "Ger" in t def _calculate_dominant_stability(self): # calculateDominantStability valid_roots = [t for t in self.root_tenses_list if t != "N/A"] if not valid_roots: return 0.0 past_c = sum(1 for t in valid_roots if self._is_past_group(t)) pres_c = sum(1 for t in valid_roots if self._is_pres_group(t)) dom_past = past_c >= pres_c aligned = sum(1 for t in valid_roots if (dom_past and self._is_past_group(t)) or (not dom_past and self._is_pres_group(t))) return aligned / len(valid_roots) @staticmethod def calculate_average_graph_depth(doc: Doc) -> float: """ Calcola la profondità media di tutte le frasi nel documento spaCy. """ total_depth = 0 total_nodes = 0 for sent in doc.sents: # Dizionario per memorizzare le profondità (memoization) # Usiamo l'indice del token nel documento come chiave depths = {} for token in sent: depths[token.i] = ConsecutioAnalyzer._get_token_depth(token, depths) total_depth += depths[token.i] total_nodes += len(sent) return total_depth / total_nodes if total_nodes > 0 else 0.0 @staticmethod def _get_token_depth(token: Token, depths: dict) -> int: """ Calcola ricorsivamente la profondità di un token spaCy. """ if token.i in depths: return depths[token.i] # In spaCy, la radice ha se stessa come head (token.head == token) if token.head == token: depths[token.i] = 1 return 1 # Profondità = 1 + profondità del padre depth = ConsecutioAnalyzer._get_token_depth(token.head, depths) + 1 depths[token.i] = depth return depth