Spaces:

amaisto
/

CO3

Sleeping

App Files Files Community

CO3 / src /modules /consecutio.py

amaisto

Update src/modules/consecutio.py

9d2f453 verified 24 days ago

raw

history blame contribute delete

8.11 kB

	import math
	from spacy.tokens import Doc, Token

	class ConsecutioAnalyzer:
	def __init__(self):
	self.reset_metrics()

	def reset_metrics(self):
	self.total_sentences = 0
	self.sum_max_depth = 0
	self.total_verb_pairs = 0
	self.valid_consecutio_pairs = 0
	self.total_words = 0
	self.total_verb_groups = 0
	self.root_tenses_list = []

	class TokenNode:
	def __init__(self, spacy_token):
	# Mapping identico a consecutio.java
	self.id = str(spacy_token.i)
	self.form = spacy_token.text
	self.pos_tag = spacy_token.tag_ # Usa TAG (VBD, VBP) per i tempi
	self.dep_rel = spacy_token.dep_
	# In spaCy la root punta a se stessa; in Java la root ha head "0"
	self.head_id = str(spacy_token.head.i) if spacy_token.head.i != spacy_token.i else "0"
	self.children = []
	self.compound_tense = ""
	self.is_compound_head = False

	def is_verb(self):
	# Traduzione esatta di isVerb()
	return self.pos_tag.startswith("V") or self.pos_tag.startswith("MD") or self.pos_tag == "AUX"

	def is_auxiliary_rel(self):
	# In spaCy la relazione è in .dep_
	return "aux" in self.dep_rel.lower()

	def get_simple_tense(self):
	# Traduzione esatta dello switch case Java
	tag = self.pos_tag
	if tag in ["VBD", "VBN"]: return "Past"
	if tag in ["VBP", "VBZ"]: return "Pres"
	if tag == "VBG": return "Ger" # Mappa il 'Prog' o 'Ger'
	if tag == "MD": return "Mod"
	if tag == "VB": return "Inf"
	return "N/A"

	def analyze(self, doc):
	"""
	Riceve l'oggetto 'doc' di spaCy.
	Implementa analyzeSingleDocument di consecutio.java.
	"""
	self.reset_metrics()

	# doc.sents fornisce le frasi analizzate
	for sent in doc.sents:
	self.total_sentences += 1
	# Conteggio parole escludendo punteggiatura
	words_in_sent = [t for t in sent if not t.is_punct]
	self.total_words += len(words_in_sent)

	try:
	# buildSentenceMap
	node_map = {str(t.i): self.TokenNode(t) for t in sent}
	root = None

	# Ricostruzione gerarchia figli
	for node in node_map.values():
	if node.head_id == "0" or node.dep_rel.lower() == "root":
	root = node
	elif node.head_id in node_map:
	node_map[node.head_id].children.append(node)

	if root:
	self._process_compound_tenses(root)
	self.root_tenses_list.append(root.compound_tense)
	self.sum_max_depth += self._calculate_tree_depth(root)
	self._check_consecutio_recursively(root)
	else:
	self.root_tenses_list.append("N/A")
	except Exception:
	continue

	# Calcoli finali identici al Java
	stability = self._calculate_dominant_stability()
	avg_depth = self.sum_max_depth / self.total_sentences if self.total_sentences > 0 else 0.0
	consecutio_score = self.valid_consecutio_pairs / self.total_verb_pairs if self.total_verb_pairs > 0 else 1.0
	verb_density = self.total_verb_groups / self.total_words if self.total_words > 0 else 0.0
	sentence_depths = self.calculate_average_graph_depth(doc)
	return {
	"tense_stability": round(stability, 4),
	"avg_depth": round(avg_depth, 4) if self.total_sentences > 0 else 0.0,
	"consecutio_index": round(consecutio_score, 4),
	"verb_density": round(verb_density, 4),
	"sentence_depths": round(sentence_depths, 4)
	}

	def _process_compound_tenses(self, node):
	# processCompoundTenses
	aux_children = []
	for child in node.children:
	if child.is_auxiliary_rel():
	aux_children.append(child)
	self._process_compound_tenses(child)

	if node.is_verb() and not node.is_auxiliary_rel():
	node.is_compound_head = True
	self.total_verb_groups += 1
	group = [node] + aux_children
	node.compound_tense = self._infer_compound_tense(group, node)

	def _infer_compound_tense(self, group, head):
	# inferCompoundTense
	has_will, has_have, has_modal = False, False, False
	for n in group:
	if n == head: continue
	f = n.form.lower()
	if "will" in f or "'ll" in f: has_will = True
	elif any(x in f for x in ["have", "has", "had"]): has_have = True
	elif n.pos_tag == "MD": has_modal = True

	if has_modal: return "Mod"
	if has_will: return "Fut"
	if has_have and head.pos_tag == "VBN": return "Perf"
	return head.get_simple_tense()

	def _calculate_tree_depth(self, node):
	# calculateTreeDepth
	if not node.children: return 1
	max_d = 0
	for child in node.children:
	if child.is_auxiliary_rel(): continue
	max_d = max(max_d, self._calculate_tree_depth(child))
	return 1 + max_d

	def _check_consecutio_recursively(self, parent):
	# checkConsecutioRecursively
	for child in parent.children:
	if child.is_auxiliary_rel(): continue
	if parent.is_compound_head and child.is_compound_head:
	self.total_verb_pairs += 1
	if self._is_consecutio_valid(parent.compound_tense, child.compound_tense):
	self.valid_consecutio_pairs += 1
	self._check_consecutio_recursively(child)

	def _is_consecutio_valid(self, p, c):
	# isConsecutioValid
	if self._is_pres_group(p): return True
	if self._is_past_group(p):
	if c == "Pres" or c == "Ger": return False
	return True

	def _is_past_group(self, t):
	return "Past" in t or "Perf" in t or "Mod" in t

	def _is_pres_group(self, t):
	return "Pres" in t or "Fut" in t or "Ger" in t

	def _calculate_dominant_stability(self):
	# calculateDominantStability
	valid_roots = [t for t in self.root_tenses_list if t != "N/A"]
	if not valid_roots: return 0.0
	past_c = sum(1 for t in valid_roots if self._is_past_group(t))
	pres_c = sum(1 for t in valid_roots if self._is_pres_group(t))
	dom_past = past_c >= pres_c
	aligned = sum(1 for t in valid_roots if (dom_past and self._is_past_group(t)) or (not dom_past and self._is_pres_group(t)))
	return aligned / len(valid_roots)

	@staticmethod
	def calculate_average_graph_depth(doc: Doc) -> float:
	"""
	Calcola la profondità media di tutte le frasi nel documento spaCy.
	"""
	total_depth = 0
	total_nodes = 0

	for sent in doc.sents:
	# Dizionario per memorizzare le profondità (memoization)
	# Usiamo l'indice del token nel documento come chiave
	depths = {}

	for token in sent:
	depths[token.i] = ConsecutioAnalyzer._get_token_depth(token, depths)
	total_depth += depths[token.i]

	total_nodes += len(sent)

	return total_depth / total_nodes if total_nodes > 0 else 0.0

	@staticmethod
	def _get_token_depth(token: Token, depths: dict) -> int:
	"""
	Calcola ricorsivamente la profondità di un token spaCy.
	"""
	if token.i in depths:
	return depths[token.i]

	# In spaCy, la radice ha se stessa come head (token.head == token)
	if token.head == token:
	depths[token.i] = 1
	return 1

	# Profondità = 1 + profondità del padre
	depth = ConsecutioAnalyzer._get_token_depth(token.head, depths) + 1
	depths[token.i] = depth
	return depth