Spaces:

devusman
/

analysis_tool

Sleeping

App Files Files Community

analysis_tool / app.py

devusman

updated this

4f5a1e9 9 months ago

raw

history blame

8.64 kB

	import os
	from flask import Flask, request, jsonify
	from flask_cors import CORS
	import spacy

	# --- CORRECTED MODEL LOADING SECTION ---
	# This approach loads the model by its package name. It is more robust because
	# the model is now managed as a dependency in requirements.txt,
	# removing the need to manually place a model folder next to the script.
	try:
	nlp = spacy.load("it_core_news_sm")
	except OSError:
	raise RuntimeError(
	"Could not find the 'it_core_news_sm' model. "
	"Please ensure it is listed and installed from your requirements.txt file."
	)

	# --- END SECTION ---

	# Initialize the Flask app
	app = Flask(__name__)

	# Enable Cross-Origin Resource Sharing (CORS) to allow your frontend to call this API
	CORS(app)

	# A mapping from spaCy dependency labels to our logical analysis labels
	DEP_MAP = {
	"nsubj": "Soggetto",
	"ROOT": "Predicato Verbale",
	"obj": "Complemento Oggetto",
	"iobj": "Complemento di Termine",
	"obl": "Complemento Indiretto",
	"nmod": "Complemento di Specificazione",
	"amod": "Attributo",
	"advmod": "Complemento Avverbiale",
	"appos": "Apposizione",
	"acl:relcl": "Proposizione Subordinata Relativa",
	"advcl": "Proposizione Subordinata Avverbiale",
	"ccomp": "Proposizione Subordinata Oggettiva",
	"csubj": "Proposizione Subordinata Soggettiva"
	}

	def get_complement_type(token):
	"""Refine the complement type based on the preceding preposition."""
	preposition = ""
	# Look for a preposition (`case`) attached to this token
	for child in token.children:
	if child.dep_ == "case":
	preposition = child.text.lower()
	break

	# If no preposition is found on the children, check the head token.
	# This helps in cases of complex prepositional phrases.
	if not preposition and token.head.dep_ == 'obl':
	for child in token.head.children:
	if child.dep_ == "case":
	preposition = child.text.lower()
	break

	if preposition in ["di", "del", "dello", "della", "dei", "degli", "delle"]:
	return "Complemento di Specificazione"
	if preposition in ["a", "al", "allo", "alla", "ai", "agli", "alle"]:
	return "Complemento di Termine"
	if preposition in ["da", "dal", "dallo", "dalla", "dai", "dagli", "dalle"]:
	# Check if it's a passive sentence for Complemento d'Agente
	if any(child.dep_ == 'aux:pass' for child in token.head.children):
	return "Complemento d'Agente"
	return "Complemento di Moto da Luogo"
	if preposition in ["in", "nel", "nello", "nella", "nei", "negli", "nelle"]:
	return "Complemento di Stato in Luogo"
	if preposition in ["con", "col", "coi"]:
	return "Complemento di Compagnia o Mezzo"
	if preposition in ["su", "sul", "sullo", "sulla", "sui", "sugli", "sulle"]:
	return "Complemento di Argomento o Luogo"
	if preposition in ["per"]:
	return "Complemento di Fine o Causa"
	if preposition in ["tra", "fra"]:
	return "Complemento di Luogo o Tempo (Partitivo)"

	return "Complemento Indiretto"

	def get_full_text(token):
	"""Recursively builds the full text of a phrase starting from a head token."""
	# Collect the text of the token and all its children that form the phrase
	# (like articles, adjectives, etc.)
	phrase_tokens = [token] + [t for t in token.children if t.dep_ in ('det', 'amod', 'case', 'advmod')]
	# Sort by index to maintain original order
	phrase_tokens.sort(key=lambda x: x.i)
	return " ".join(t.text for t in phrase_tokens)

	def build_phrases(tokens):
	"""Merges tokens into meaningful grammatical phrases."""
	phrase_map = {}

	# First pass: map head tokens to their full text
	for token in tokens:
	# The head of a phrase is usually a noun, verb, or adjective
	if token.dep_ not in ['det', 'case', 'amod', 'punct', 'aux', 'cop', 'mark']:
	phrase_map[token.i] = {
	"text": get_full_text(token),
	"label": "", # Label will be assigned next
	"token": token
	}

	# Second pass: assign labels and structure
	analysis_result = []
	processed_indices = set()

	for index, phrase in phrase_map.items():
	if index in processed_indices:
	continue

	token = phrase['token']
	dep = token.dep_
	label = ""

	if dep == "ROOT":
	# Check for nominal predicate (e.g., "è bello")
	is_nominal = any(c.dep_ == 'cop' for c in token.children)
	if is_nominal:
	copula = [c for c in token.children if c.dep_ == 'cop'][0]
	predicate_name = get_full_text(token)
	analysis_result.append({
	"text": copula.text,
	"label": "Copula"
	})
	analysis_result.append({
	"text": predicate_name,
	"label": "Parte Nominale del Predicato"
	})
	else:
	label = "Predicato Verbale"
	elif dep == 'obl':
	label = get_complement_type(token)
	elif dep in DEP_MAP:
	label = DEP_MAP[dep]

	if label:
	analysis_result.append({"text": phrase['text'], "label": label})

	processed_indices.add(index)

	return analysis_result


	def analyze_clause(clause_tokens):
	"""Analyzes a single clause (main or subordinate)."""
	# Filter out conjunctions that introduce the clause as they are part of the structure, not the clause itself
	tokens_in_clause = [t for t in clause_tokens if t.dep_ != 'mark']
	return build_phrases(tokens_in_clause)


	@app.route("/")
	def home():
	"""Provides a simple welcome message for the API root."""
	return jsonify({"message": "API is running. Use the /api/analyze endpoint with a POST request."})

	@app.route('/api/analyze', methods=['POST'])
	def analyze_sentence():
	"""Main endpoint to receive a sentence and return its full logical analysis."""
	try:
	data = request.get_json()
	if not data or 'sentence' not in data:
	return jsonify({"error": "Sentence not provided in JSON payload"}), 400

	sentence = data['sentence']
	doc = nlp(sentence)

	main_clause_tokens = []
	subordinate_clauses = []

	# Identify subordinate clauses first
	for token in doc:
	# Subordinate clauses are identified by specific dependency relations
	if token.dep_ in ["acl:relcl", "advcl", "ccomp", "csubj"]:
	# The subtree of the token constitutes the subordinate clause
	sub_clause_tokens = list(token.subtree)
	sub_clause_type = DEP_MAP.get(token.dep_, "Proposizione Subordinata")

	# Find the introducing element (e.g., 'che', 'quando', 'perché')
	marker = [child for child in token.children if child.dep_ == 'mark']
	intro = marker[0].text if marker else ""

	subordinate_clauses.append({
	"type": sub_clause_type,
	"text": " ".join(t.text for t in sub_clause_tokens),
	"intro": intro,
	"analysis": analyze_clause(sub_clause_tokens)
	})

	# Tokens not in any subordinate clause belong to the main clause
	subordinate_indices = {token.i for clause in subordinate_clauses for token in nlp(clause["text"])}
	main_clause_tokens = [token for token in doc if token.i not in subordinate_indices]

	# Final structured result
	final_analysis = {
	"main_clause": {
	"text": " ".join(t.text for t in main_clause_tokens if not t.is_punct),
	"analysis": analyze_clause(main_clause_tokens)
	},
	"subordinate_clauses": subordinate_clauses
	}

	return jsonify(final_analysis)

	except Exception as e:
	# Log the full error to the console for debugging
	print(f"An error occurred during analysis: {e}")
	import traceback
	traceback.print_exc()
	return jsonify({"error": "An internal error occurred. Check server logs for details."}), 500

	# The following block is for local development and testing,
	# it won't be used when deployed with Gunicorn.
	if __name__ == '__main__':
	# Use a port that is not default 5000 to avoid conflicts
	app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 8080)), debug=True)