Spaces:

amaisto
/

CO3

Sleeping

File size: 8,114 Bytes

import math
from spacy.tokens import Doc, Token

class ConsecutioAnalyzer:
    def __init__(self):
        self.reset_metrics()

    def reset_metrics(self):
        self.total_sentences = 0
        self.sum_max_depth = 0
        self.total_verb_pairs = 0
        self.valid_consecutio_pairs = 0
        self.total_words = 0
        self.total_verb_groups = 0
        self.root_tenses_list = []

    class TokenNode:
        def __init__(self, spacy_token):
            # Mapping identico a consecutio.java
            self.id = str(spacy_token.i)
            self.form = spacy_token.text
            self.pos_tag = spacy_token.tag_  # Usa TAG (VBD, VBP) per i tempi
            self.dep_rel = spacy_token.dep_
            # In spaCy la root punta a se stessa; in Java la root ha head "0"
            self.head_id = str(spacy_token.head.i) if spacy_token.head.i != spacy_token.i else "0"
            self.children = []
            self.compound_tense = ""
            self.is_compound_head = False

        def is_verb(self):
            # Traduzione esatta di isVerb()
            return self.pos_tag.startswith("V") or self.pos_tag.startswith("MD") or self.pos_tag == "AUX"
        
        def is_auxiliary_rel(self):
            # In spaCy la relazione è in .dep_
            return "aux" in self.dep_rel.lower()
        
        def get_simple_tense(self):
            # Traduzione esatta dello switch case Java
            tag = self.pos_tag
            if tag in ["VBD", "VBN"]: return "Past"
            if tag in ["VBP", "VBZ"]: return "Pres"
            if tag == "VBG": return "Ger" # Mappa il 'Prog' o 'Ger'
            if tag == "MD": return "Mod"
            if tag == "VB": return "Inf"
            return "N/A"

    def analyze(self, doc):
        """
        Riceve l'oggetto 'doc' di spaCy. 
        Implementa analyzeSingleDocument di consecutio.java.
        """
        self.reset_metrics()
        
        # doc.sents fornisce le frasi analizzate
        for sent in doc.sents:
            self.total_sentences += 1
            # Conteggio parole escludendo punteggiatura
            words_in_sent = [t for t in sent if not t.is_punct]
            self.total_words += len(words_in_sent)

            try:
                # buildSentenceMap
                node_map = {str(t.i): self.TokenNode(t) for t in sent}
                root = None
                
                # Ricostruzione gerarchia figli
                for node in node_map.values():
                    if node.head_id == "0" or node.dep_rel.lower() == "root":
                        root = node
                    elif node.head_id in node_map:
                        node_map[node.head_id].children.append(node)

                if root:
                    self._process_compound_tenses(root)
                    self.root_tenses_list.append(root.compound_tense)
                    self.sum_max_depth += self._calculate_tree_depth(root)
                    self._check_consecutio_recursively(root)
                else:
                    self.root_tenses_list.append("N/A")
            except Exception:
                continue

        # Calcoli finali identici al Java
        stability = self._calculate_dominant_stability()
        avg_depth = self.sum_max_depth / self.total_sentences if self.total_sentences > 0 else 0.0
        consecutio_score = self.valid_consecutio_pairs / self.total_verb_pairs if self.total_verb_pairs > 0 else 1.0
        verb_density = self.total_verb_groups / self.total_words if self.total_words > 0 else 0.0
        sentence_depths = self.calculate_average_graph_depth(doc)
        return {
            "tense_stability": round(stability, 4),
            "avg_depth": round(avg_depth, 4) if self.total_sentences > 0 else 0.0,
            "consecutio_index": round(consecutio_score, 4),
            "verb_density": round(verb_density, 4),
            "sentence_depths": round(sentence_depths, 4)
        }

    def _process_compound_tenses(self, node):
        # processCompoundTenses
        aux_children = []
        for child in node.children:
            if child.is_auxiliary_rel():
                aux_children.append(child)
            self._process_compound_tenses(child)
        
        if node.is_verb() and not node.is_auxiliary_rel():
            node.is_compound_head = True
            self.total_verb_groups += 1
            group = [node] + aux_children
            node.compound_tense = self._infer_compound_tense(group, node)

    def _infer_compound_tense(self, group, head):
        # inferCompoundTense
        has_will, has_have, has_modal = False, False, False
        for n in group:
            if n == head: continue
            f = n.form.lower()
            if "will" in f or "'ll" in f: has_will = True
            elif any(x in f for x in ["have", "has", "had"]): has_have = True
            elif n.pos_tag == "MD": has_modal = True
        
        if has_modal: return "Mod"
        if has_will: return "Fut"
        if has_have and head.pos_tag == "VBN": return "Perf"
        return head.get_simple_tense()

    def _calculate_tree_depth(self, node):
        # calculateTreeDepth
        if not node.children: return 1
        max_d = 0
        for child in node.children:
            if child.is_auxiliary_rel(): continue
            max_d = max(max_d, self._calculate_tree_depth(child))
        return 1 + max_d

    def _check_consecutio_recursively(self, parent):
        # checkConsecutioRecursively
        for child in parent.children:
            if child.is_auxiliary_rel(): continue
            if parent.is_compound_head and child.is_compound_head:
                self.total_verb_pairs += 1
                if self._is_consecutio_valid(parent.compound_tense, child.compound_tense):
                    self.valid_consecutio_pairs += 1
            self._check_consecutio_recursively(child)

    def _is_consecutio_valid(self, p, c):
        # isConsecutioValid
        if self._is_pres_group(p): return True
        if self._is_past_group(p):
            if c == "Pres" or c == "Ger": return False
        return True

    def _is_past_group(self, t):
        return "Past" in t or "Perf" in t or "Mod" in t

    def _is_pres_group(self, t):
        return "Pres" in t or "Fut" in t or "Ger" in t

    def _calculate_dominant_stability(self):
        # calculateDominantStability
        valid_roots = [t for t in self.root_tenses_list if t != "N/A"]
        if not valid_roots: return 0.0
        past_c = sum(1 for t in valid_roots if self._is_past_group(t))
        pres_c = sum(1 for t in valid_roots if self._is_pres_group(t))
        dom_past = past_c >= pres_c
        aligned = sum(1 for t in valid_roots if (dom_past and self._is_past_group(t)) or (not dom_past and self._is_pres_group(t)))
        return aligned / len(valid_roots)
    
    @staticmethod
    def calculate_average_graph_depth(doc: Doc) -> float:
        """
        Calcola la profondità media di tutte le frasi nel documento spaCy.
        """
        total_depth = 0
        total_nodes = 0

        for sent in doc.sents:
            # Dizionario per memorizzare le profondità (memoization)
            # Usiamo l'indice del token nel documento come chiave
            depths = {}
            
            for token in sent:
                depths[token.i] = ConsecutioAnalyzer._get_token_depth(token, depths)
                total_depth += depths[token.i]
            
            total_nodes += len(sent)

        return total_depth / total_nodes if total_nodes > 0 else 0.0

    @staticmethod
    def _get_token_depth(token: Token, depths: dict) -> int:
        """
        Calcola ricorsivamente la profondità di un token spaCy.
        """
        if token.i in depths:
            return depths[token.i]

        # In spaCy, la radice ha se stessa come head (token.head == token)
        if token.head == token:
            depths[token.i] = 1
            return 1

        # Profondità = 1 + profondità del padre
        depth = ConsecutioAnalyzer._get_token_depth(token.head, depths) + 1
        depths[token.i] = depth
        return depth