Spaces:
Sleeping
Sleeping
File size: 5,668 Bytes
42693f7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | # app.py
import os
from flask import Flask, request, jsonify
from flask_cors import CORS
import spacy
# --- CORRECTED MODEL LOADING SECTION ---
# This path points to the 'it_core_news_pruned' directory
# located in the same folder as this 'app.py' script.
try:
# Get the absolute path to the directory containing this script
base_dir = os.path.dirname(os.path.abspath(__file__))
# Join it with the name of our model directory
model_path = os.path.join(base_dir, 'it_core_news_pruned')
# Load the model from the specified path
nlp = spacy.load(model_path)
except Exception as e:
# This provides a more detailed error if loading fails
raise RuntimeError(f"Error loading spaCy model from {model_path}: {e}")
# --- END SECTION ---
# Initialize the Flask app
app = Flask(__name__)
# Enable Cross-Origin Resource Sharing (CORS) to allow your frontend to call this API
CORS(app)
# A mapping from spaCy dependency labels to our logical analysis labels
DEP_MAP = {
"nsubj": "Soggetto",
"ROOT": "Predicato Verbale",
"obj": "Complemento Oggetto",
"iobj": "Complemento di Termine",
"obl": "Complemento Indiretto",
"nmod": "Complemento di Specificazione",
"amod": "Attributo",
"advmod": "Complemento Avverbiale",
"appos": "Apposizione",
"cop": "Copula (parte del Predicato Nominale)",
"aux": "Ausiliare (parte del Predicato)",
"case": "Preposizione (introduce un complemento)"
}
def get_complement_type(token):
"""Refine the complement type based on the preceding preposition."""
preposition = ""
# Look for a preposition attached to this token
for child in token.children:
if child.dep_ == "case":
preposition = child.text.lower()
break
# If no preposition found, check if the token's head has one (for multi-word complements)
if not preposition:
if token.head.dep_ == 'obl':
for child in token.head.children:
if child.dep_ == "case":
preposition = child.text.lower()
break
if preposition in ["di", "del", "dello", "della", "dei", "degli", "delle"]:
return "Complemento di Specificazione"
if preposition in ["a", "al", "allo", "alla", "ai", "agli", "alle"]:
return "Complemento di Termine"
if preposition in ["da", "dal", "dallo", "dalla", "dai", "dagli", "dalle"]:
return "Complemento (introdotto da 'da')"
if preposition in ["in", "nel", "nello", "nella", "nei", "negli", "nelle"]:
return "Complemento di Luogo/Tempo"
if preposition in ["con", "col", "coi"]:
return "Complemento di Compagnia/Mezzo"
if preposition in ["su", "sul", "sullo", "sulla", "sui", "sugli", "sulle"]:
return "Complemento di Argomento/Luogo"
if preposition in ["per"]:
return "Complemento di Fine/Causa"
if preposition in ["tra", "fra"]:
return "Complemento di Luogo/Tempo (Partitivo)"
return "Complemento Indiretto"
@app.route("/")
def home():
return jsonify({"message": "API is running. Use the /api/analyze endpoint."})
@app.route('/api/analyze', methods=['POST'])
def analyze_sentence():
try:
data = request.get_json()
if not data or 'sentence' not in data:
return jsonify({"error": "Sentence not provided"}), 400
sentence = data['sentence']
doc = nlp(sentence)
# This token-based analysis logic is more robust
analysis = []
for token in doc:
if token.is_punct or token.dep_ in ['case', 'det', 'aux', 'mark']:
continue
# Determine the label for the token
dep = token.dep_
label = ""
if dep == "ROOT":
# Check for nominal predicate (e.g., "è bello")
is_nominal = any(c.dep_ == 'cop' for c in token.children)
label = "Predicato Nominale" if is_nominal else "Predicato Verbale"
elif dep == 'obl':
label = get_complement_type(token)
else:
label = DEP_MAP.get(dep)
if label:
analysis.append({ "text": token.text, "label": label, "head": token.head.text })
# Simple merging logic
if not analysis:
return jsonify([])
final_analysis = []
current_phrase = analysis[0]
for i in range(1, len(analysis)):
# If the current token belongs to the same phrase (same head and label), merge them
if analysis[i]['label'] == current_phrase['label'] and analysis[i]['head'] == current_phrase['head']:
current_phrase['text'] += " " + analysis[i]['text']
else:
final_analysis.append({'text': current_phrase['text'], 'label': current_phrase['label']})
current_phrase = analysis[i]
final_analysis.append({'text': current_phrase['text'], 'label': current_phrase['label']})
return jsonify(final_analysis)
except Exception as e:
# Log the full error to the console for debugging
print(f"An error occurred: {e}")
return jsonify({"error": "An internal error occurred. See server logs for details."}), 500
# --- FIX: ADD THIS BLOCK TO BIND TO THE CORRECT HOST AND PORT ---
if __name__ == "__main__":
# The host '0.0.0.0' makes the server publicly available
# The port is dynamically read from the 'PORT' environment variable,
# with a default of 10000 for local testing.
port = int(os.environ.get('PORT', 10000))
app.run(host='0.0.0.0', port=port)
# --- END FIX --- |