Spaces:

devusman
/

analysis_tool

Sleeping

File size: 5,668 Bytes

42693f7

# app.py

import os
from flask import Flask, request, jsonify
from flask_cors import CORS
import spacy

# --- CORRECTED MODEL LOADING SECTION ---
# This path points to the 'it_core_news_pruned' directory 
# located in the same folder as this 'app.py' script.
try:
    # Get the absolute path to the directory containing this script
    base_dir = os.path.dirname(os.path.abspath(__file__))
    # Join it with the name of our model directory
    model_path = os.path.join(base_dir, 'it_core_news_pruned')
    # Load the model from the specified path
    nlp = spacy.load(model_path)
except Exception as e:
    # This provides a more detailed error if loading fails
    raise RuntimeError(f"Error loading spaCy model from {model_path}: {e}")
# --- END SECTION ---

# Initialize the Flask app
app = Flask(__name__)
# Enable Cross-Origin Resource Sharing (CORS) to allow your frontend to call this API
CORS(app)

# A mapping from spaCy dependency labels to our logical analysis labels
DEP_MAP = {
    "nsubj": "Soggetto",
    "ROOT": "Predicato Verbale",
    "obj": "Complemento Oggetto",
    "iobj": "Complemento di Termine",
    "obl": "Complemento Indiretto",
    "nmod": "Complemento di Specificazione",
    "amod": "Attributo",
    "advmod": "Complemento Avverbiale",
    "appos": "Apposizione",
    "cop": "Copula (parte del Predicato Nominale)",
    "aux": "Ausiliare (parte del Predicato)",
    "case": "Preposizione (introduce un complemento)"
}

def get_complement_type(token):
    """Refine the complement type based on the preceding preposition."""
    preposition = ""
    # Look for a preposition attached to this token
    for child in token.children:
        if child.dep_ == "case":
            preposition = child.text.lower()
            break
    
    # If no preposition found, check if the token's head has one (for multi-word complements)
    if not preposition:
         if token.head.dep_ == 'obl':
              for child in token.head.children:
                   if child.dep_ == "case":
                        preposition = child.text.lower()
                        break

    if preposition in ["di", "del", "dello", "della", "dei", "degli", "delle"]:
        return "Complemento di Specificazione"
    if preposition in ["a", "al", "allo", "alla", "ai", "agli", "alle"]:
        return "Complemento di Termine"
    if preposition in ["da", "dal", "dallo", "dalla", "dai", "dagli", "dalle"]:
        return "Complemento (introdotto da 'da')"
    if preposition in ["in", "nel", "nello", "nella", "nei", "negli", "nelle"]:
        return "Complemento di Luogo/Tempo"
    if preposition in ["con", "col", "coi"]:
        return "Complemento di Compagnia/Mezzo"
    if preposition in ["su", "sul", "sullo", "sulla", "sui", "sugli", "sulle"]:
        return "Complemento di Argomento/Luogo"
    if preposition in ["per"]:
        return "Complemento di Fine/Causa"
    if preposition in ["tra", "fra"]:
        return "Complemento di Luogo/Tempo (Partitivo)"
        
    return "Complemento Indiretto"

@app.route("/")
def home():
    return jsonify({"message": "API is running. Use the /api/analyze endpoint."})

@app.route('/api/analyze', methods=['POST'])
def analyze_sentence():
    try:
        data = request.get_json()
        if not data or 'sentence' not in data:
            return jsonify({"error": "Sentence not provided"}), 400

        sentence = data['sentence']
        doc = nlp(sentence)
        
        # This token-based analysis logic is more robust
        analysis = []
        for token in doc:
            if token.is_punct or token.dep_ in ['case', 'det', 'aux', 'mark']:
                continue

            # Determine the label for the token
            dep = token.dep_
            label = ""

            if dep == "ROOT":
                 # Check for nominal predicate (e.g., "è bello")
                 is_nominal = any(c.dep_ == 'cop' for c in token.children)
                 label = "Predicato Nominale" if is_nominal else "Predicato Verbale"
            elif dep == 'obl':
                label = get_complement_type(token)
            else:
                label = DEP_MAP.get(dep)

            if label:
                 analysis.append({ "text": token.text, "label": label, "head": token.head.text })

        # Simple merging logic
        if not analysis:
             return jsonify([])

        final_analysis = []
        current_phrase = analysis[0]

        for i in range(1, len(analysis)):
            # If the current token belongs to the same phrase (same head and label), merge them
            if analysis[i]['label'] == current_phrase['label'] and analysis[i]['head'] == current_phrase['head']:
                current_phrase['text'] += " " + analysis[i]['text']
            else:
                final_analysis.append({'text': current_phrase['text'], 'label': current_phrase['label']})
                current_phrase = analysis[i]
        
        final_analysis.append({'text': current_phrase['text'], 'label': current_phrase['label']})

        return jsonify(final_analysis)

    except Exception as e:
        # Log the full error to the console for debugging
        print(f"An error occurred: {e}")
        return jsonify({"error": "An internal error occurred. See server logs for details."}), 500

# --- FIX: ADD THIS BLOCK TO BIND TO THE CORRECT HOST AND PORT ---
if __name__ == "__main__":
    # The host '0.0.0.0' makes the server publicly available
    # The port is dynamically read from the 'PORT' environment variable, 
    # with a default of 10000 for local testing.
    port = int(os.environ.get('PORT', 10000))
    app.run(host='0.0.0.0', port=port)
# --- END FIX ---