analysis_tool / app.py
devusman's picture
updated this
4f5a1e9
raw
history blame
8.64 kB
import os
from flask import Flask, request, jsonify
from flask_cors import CORS
import spacy
# --- CORRECTED MODEL LOADING SECTION ---
# This approach loads the model by its package name. It is more robust because
# the model is now managed as a dependency in requirements.txt,
# removing the need to manually place a model folder next to the script.
try:
nlp = spacy.load("it_core_news_sm")
except OSError:
raise RuntimeError(
"Could not find the 'it_core_news_sm' model. "
"Please ensure it is listed and installed from your requirements.txt file."
)
# --- END SECTION ---
# Initialize the Flask app
app = Flask(__name__)
# Enable Cross-Origin Resource Sharing (CORS) to allow your frontend to call this API
CORS(app)
# A mapping from spaCy dependency labels to our logical analysis labels
DEP_MAP = {
"nsubj": "Soggetto",
"ROOT": "Predicato Verbale",
"obj": "Complemento Oggetto",
"iobj": "Complemento di Termine",
"obl": "Complemento Indiretto",
"nmod": "Complemento di Specificazione",
"amod": "Attributo",
"advmod": "Complemento Avverbiale",
"appos": "Apposizione",
"acl:relcl": "Proposizione Subordinata Relativa",
"advcl": "Proposizione Subordinata Avverbiale",
"ccomp": "Proposizione Subordinata Oggettiva",
"csubj": "Proposizione Subordinata Soggettiva"
}
def get_complement_type(token):
"""Refine the complement type based on the preceding preposition."""
preposition = ""
# Look for a preposition (`case`) attached to this token
for child in token.children:
if child.dep_ == "case":
preposition = child.text.lower()
break
# If no preposition is found on the children, check the head token.
# This helps in cases of complex prepositional phrases.
if not preposition and token.head.dep_ == 'obl':
for child in token.head.children:
if child.dep_ == "case":
preposition = child.text.lower()
break
if preposition in ["di", "del", "dello", "della", "dei", "degli", "delle"]:
return "Complemento di Specificazione"
if preposition in ["a", "al", "allo", "alla", "ai", "agli", "alle"]:
return "Complemento di Termine"
if preposition in ["da", "dal", "dallo", "dalla", "dai", "dagli", "dalle"]:
# Check if it's a passive sentence for Complemento d'Agente
if any(child.dep_ == 'aux:pass' for child in token.head.children):
return "Complemento d'Agente"
return "Complemento di Moto da Luogo"
if preposition in ["in", "nel", "nello", "nella", "nei", "negli", "nelle"]:
return "Complemento di Stato in Luogo"
if preposition in ["con", "col", "coi"]:
return "Complemento di Compagnia o Mezzo"
if preposition in ["su", "sul", "sullo", "sulla", "sui", "sugli", "sulle"]:
return "Complemento di Argomento o Luogo"
if preposition in ["per"]:
return "Complemento di Fine o Causa"
if preposition in ["tra", "fra"]:
return "Complemento di Luogo o Tempo (Partitivo)"
return "Complemento Indiretto"
def get_full_text(token):
"""Recursively builds the full text of a phrase starting from a head token."""
# Collect the text of the token and all its children that form the phrase
# (like articles, adjectives, etc.)
phrase_tokens = [token] + [t for t in token.children if t.dep_ in ('det', 'amod', 'case', 'advmod')]
# Sort by index to maintain original order
phrase_tokens.sort(key=lambda x: x.i)
return " ".join(t.text for t in phrase_tokens)
def build_phrases(tokens):
"""Merges tokens into meaningful grammatical phrases."""
phrase_map = {}
# First pass: map head tokens to their full text
for token in tokens:
# The head of a phrase is usually a noun, verb, or adjective
if token.dep_ not in ['det', 'case', 'amod', 'punct', 'aux', 'cop', 'mark']:
phrase_map[token.i] = {
"text": get_full_text(token),
"label": "", # Label will be assigned next
"token": token
}
# Second pass: assign labels and structure
analysis_result = []
processed_indices = set()
for index, phrase in phrase_map.items():
if index in processed_indices:
continue
token = phrase['token']
dep = token.dep_
label = ""
if dep == "ROOT":
# Check for nominal predicate (e.g., "è bello")
is_nominal = any(c.dep_ == 'cop' for c in token.children)
if is_nominal:
copula = [c for c in token.children if c.dep_ == 'cop'][0]
predicate_name = get_full_text(token)
analysis_result.append({
"text": copula.text,
"label": "Copula"
})
analysis_result.append({
"text": predicate_name,
"label": "Parte Nominale del Predicato"
})
else:
label = "Predicato Verbale"
elif dep == 'obl':
label = get_complement_type(token)
elif dep in DEP_MAP:
label = DEP_MAP[dep]
if label:
analysis_result.append({"text": phrase['text'], "label": label})
processed_indices.add(index)
return analysis_result
def analyze_clause(clause_tokens):
"""Analyzes a single clause (main or subordinate)."""
# Filter out conjunctions that introduce the clause as they are part of the structure, not the clause itself
tokens_in_clause = [t for t in clause_tokens if t.dep_ != 'mark']
return build_phrases(tokens_in_clause)
@app.route("/")
def home():
"""Provides a simple welcome message for the API root."""
return jsonify({"message": "API is running. Use the /api/analyze endpoint with a POST request."})
@app.route('/api/analyze', methods=['POST'])
def analyze_sentence():
"""Main endpoint to receive a sentence and return its full logical analysis."""
try:
data = request.get_json()
if not data or 'sentence' not in data:
return jsonify({"error": "Sentence not provided in JSON payload"}), 400
sentence = data['sentence']
doc = nlp(sentence)
main_clause_tokens = []
subordinate_clauses = []
# Identify subordinate clauses first
for token in doc:
# Subordinate clauses are identified by specific dependency relations
if token.dep_ in ["acl:relcl", "advcl", "ccomp", "csubj"]:
# The subtree of the token constitutes the subordinate clause
sub_clause_tokens = list(token.subtree)
sub_clause_type = DEP_MAP.get(token.dep_, "Proposizione Subordinata")
# Find the introducing element (e.g., 'che', 'quando', 'perché')
marker = [child for child in token.children if child.dep_ == 'mark']
intro = marker[0].text if marker else ""
subordinate_clauses.append({
"type": sub_clause_type,
"text": " ".join(t.text for t in sub_clause_tokens),
"intro": intro,
"analysis": analyze_clause(sub_clause_tokens)
})
# Tokens not in any subordinate clause belong to the main clause
subordinate_indices = {token.i for clause in subordinate_clauses for token in nlp(clause["text"])}
main_clause_tokens = [token for token in doc if token.i not in subordinate_indices]
# Final structured result
final_analysis = {
"main_clause": {
"text": " ".join(t.text for t in main_clause_tokens if not t.is_punct),
"analysis": analyze_clause(main_clause_tokens)
},
"subordinate_clauses": subordinate_clauses
}
return jsonify(final_analysis)
except Exception as e:
# Log the full error to the console for debugging
print(f"An error occurred during analysis: {e}")
import traceback
traceback.print_exc()
return jsonify({"error": "An internal error occurred. Check server logs for details."}), 500
# The following block is for local development and testing,
# it won't be used when deployed with Gunicorn.
if __name__ == '__main__':
# Use a port that is not default 5000 to avoid conflicts
app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 8080)), debug=True)