File size: 8,114 Bytes
9d2f453
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b47539a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
import math
from spacy.tokens import Doc, Token

class ConsecutioAnalyzer:
    def __init__(self):
        self.reset_metrics()

    def reset_metrics(self):
        self.total_sentences = 0
        self.sum_max_depth = 0
        self.total_verb_pairs = 0
        self.valid_consecutio_pairs = 0
        self.total_words = 0
        self.total_verb_groups = 0
        self.root_tenses_list = []

    class TokenNode:
        def __init__(self, spacy_token):
            # Mapping identico a consecutio.java
            self.id = str(spacy_token.i)
            self.form = spacy_token.text
            self.pos_tag = spacy_token.tag_  # Usa TAG (VBD, VBP) per i tempi
            self.dep_rel = spacy_token.dep_
            # In spaCy la root punta a se stessa; in Java la root ha head "0"
            self.head_id = str(spacy_token.head.i) if spacy_token.head.i != spacy_token.i else "0"
            self.children = []
            self.compound_tense = ""
            self.is_compound_head = False

        def is_verb(self):
            # Traduzione esatta di isVerb()
            return self.pos_tag.startswith("V") or self.pos_tag.startswith("MD") or self.pos_tag == "AUX"
        
        def is_auxiliary_rel(self):
            # In spaCy la relazione è in .dep_
            return "aux" in self.dep_rel.lower()
        
        def get_simple_tense(self):
            # Traduzione esatta dello switch case Java
            tag = self.pos_tag
            if tag in ["VBD", "VBN"]: return "Past"
            if tag in ["VBP", "VBZ"]: return "Pres"
            if tag == "VBG": return "Ger" # Mappa il 'Prog' o 'Ger'
            if tag == "MD": return "Mod"
            if tag == "VB": return "Inf"
            return "N/A"

    def analyze(self, doc):
        """
        Riceve l'oggetto 'doc' di spaCy. 
        Implementa analyzeSingleDocument di consecutio.java.
        """
        self.reset_metrics()
        
        # doc.sents fornisce le frasi analizzate
        for sent in doc.sents:
            self.total_sentences += 1
            # Conteggio parole escludendo punteggiatura
            words_in_sent = [t for t in sent if not t.is_punct]
            self.total_words += len(words_in_sent)

            try:
                # buildSentenceMap
                node_map = {str(t.i): self.TokenNode(t) for t in sent}
                root = None
                
                # Ricostruzione gerarchia figli
                for node in node_map.values():
                    if node.head_id == "0" or node.dep_rel.lower() == "root":
                        root = node
                    elif node.head_id in node_map:
                        node_map[node.head_id].children.append(node)

                if root:
                    self._process_compound_tenses(root)
                    self.root_tenses_list.append(root.compound_tense)
                    self.sum_max_depth += self._calculate_tree_depth(root)
                    self._check_consecutio_recursively(root)
                else:
                    self.root_tenses_list.append("N/A")
            except Exception:
                continue

        # Calcoli finali identici al Java
        stability = self._calculate_dominant_stability()
        avg_depth = self.sum_max_depth / self.total_sentences if self.total_sentences > 0 else 0.0
        consecutio_score = self.valid_consecutio_pairs / self.total_verb_pairs if self.total_verb_pairs > 0 else 1.0
        verb_density = self.total_verb_groups / self.total_words if self.total_words > 0 else 0.0
        sentence_depths = self.calculate_average_graph_depth(doc)
        return {
            "tense_stability": round(stability, 4),
            "avg_depth": round(avg_depth, 4) if self.total_sentences > 0 else 0.0,
            "consecutio_index": round(consecutio_score, 4),
            "verb_density": round(verb_density, 4),
            "sentence_depths": round(sentence_depths, 4)
        }

    def _process_compound_tenses(self, node):
        # processCompoundTenses
        aux_children = []
        for child in node.children:
            if child.is_auxiliary_rel():
                aux_children.append(child)
            self._process_compound_tenses(child)
        
        if node.is_verb() and not node.is_auxiliary_rel():
            node.is_compound_head = True
            self.total_verb_groups += 1
            group = [node] + aux_children
            node.compound_tense = self._infer_compound_tense(group, node)

    def _infer_compound_tense(self, group, head):
        # inferCompoundTense
        has_will, has_have, has_modal = False, False, False
        for n in group:
            if n == head: continue
            f = n.form.lower()
            if "will" in f or "'ll" in f: has_will = True
            elif any(x in f for x in ["have", "has", "had"]): has_have = True
            elif n.pos_tag == "MD": has_modal = True
        
        if has_modal: return "Mod"
        if has_will: return "Fut"
        if has_have and head.pos_tag == "VBN": return "Perf"
        return head.get_simple_tense()

    def _calculate_tree_depth(self, node):
        # calculateTreeDepth
        if not node.children: return 1
        max_d = 0
        for child in node.children:
            if child.is_auxiliary_rel(): continue
            max_d = max(max_d, self._calculate_tree_depth(child))
        return 1 + max_d

    def _check_consecutio_recursively(self, parent):
        # checkConsecutioRecursively
        for child in parent.children:
            if child.is_auxiliary_rel(): continue
            if parent.is_compound_head and child.is_compound_head:
                self.total_verb_pairs += 1
                if self._is_consecutio_valid(parent.compound_tense, child.compound_tense):
                    self.valid_consecutio_pairs += 1
            self._check_consecutio_recursively(child)

    def _is_consecutio_valid(self, p, c):
        # isConsecutioValid
        if self._is_pres_group(p): return True
        if self._is_past_group(p):
            if c == "Pres" or c == "Ger": return False
        return True

    def _is_past_group(self, t):
        return "Past" in t or "Perf" in t or "Mod" in t

    def _is_pres_group(self, t):
        return "Pres" in t or "Fut" in t or "Ger" in t

    def _calculate_dominant_stability(self):
        # calculateDominantStability
        valid_roots = [t for t in self.root_tenses_list if t != "N/A"]
        if not valid_roots: return 0.0
        past_c = sum(1 for t in valid_roots if self._is_past_group(t))
        pres_c = sum(1 for t in valid_roots if self._is_pres_group(t))
        dom_past = past_c >= pres_c
        aligned = sum(1 for t in valid_roots if (dom_past and self._is_past_group(t)) or (not dom_past and self._is_pres_group(t)))
        return aligned / len(valid_roots)
    
    @staticmethod
    def calculate_average_graph_depth(doc: Doc) -> float:
        """
        Calcola la profondità media di tutte le frasi nel documento spaCy.
        """
        total_depth = 0
        total_nodes = 0

        for sent in doc.sents:
            # Dizionario per memorizzare le profondità (memoization)
            # Usiamo l'indice del token nel documento come chiave
            depths = {}
            
            for token in sent:
                depths[token.i] = ConsecutioAnalyzer._get_token_depth(token, depths)
                total_depth += depths[token.i]
            
            total_nodes += len(sent)

        return total_depth / total_nodes if total_nodes > 0 else 0.0

    @staticmethod
    def _get_token_depth(token: Token, depths: dict) -> int:
        """
        Calcola ricorsivamente la profondità di un token spaCy.
        """
        if token.i in depths:
            return depths[token.i]

        # In spaCy, la radice ha se stessa come head (token.head == token)
        if token.head == token:
            depths[token.i] = 1
            return 1

        # Profondità = 1 + profondità del padre
        depth = ConsecutioAnalyzer._get_token_depth(token.head, depths) + 1
        depths[token.i] = depth
        return depth