Spaces:
Sleeping
Sleeping
| import os | |
| from flask import Flask, request, jsonify | |
| from flask_cors import CORS | |
| import spacy | |
| # --- CORRECTED MODEL LOADING SECTION --- | |
| # This approach loads the model by its package name. It is more robust because | |
| # the model is now managed as a dependency in requirements.txt, | |
| # removing the need to manually place a model folder next to the script. | |
| try: | |
| nlp = spacy.load("it_core_news_sm") | |
| except OSError: | |
| raise RuntimeError( | |
| "Could not find the 'it_core_news_sm' model. " | |
| "Please ensure it is listed and installed from your requirements.txt file." | |
| ) | |
| # --- END SECTION --- | |
| # Initialize the Flask app | |
| app = Flask(__name__) | |
| # Enable Cross-Origin Resource Sharing (CORS) to allow your frontend to call this API | |
| CORS(app) | |
| # A mapping from spaCy dependency labels to our logical analysis labels | |
| DEP_MAP = { | |
| "nsubj": "Soggetto", | |
| "ROOT": "Predicato Verbale", | |
| "obj": "Complemento Oggetto", | |
| "iobj": "Complemento di Termine", | |
| "obl": "Complemento Indiretto", | |
| "nmod": "Complemento di Specificazione", | |
| "amod": "Attributo", | |
| "advmod": "Complemento Avverbiale", | |
| "appos": "Apposizione", | |
| "acl:relcl": "Proposizione Subordinata Relativa", | |
| "advcl": "Proposizione Subordinata Avverbiale", | |
| "ccomp": "Proposizione Subordinata Oggettiva", | |
| "csubj": "Proposizione Subordinata Soggettiva" | |
| } | |
| def get_complement_type(token): | |
| """Refine the complement type based on the preceding preposition.""" | |
| preposition = "" | |
| # Look for a preposition (`case`) attached to this token | |
| for child in token.children: | |
| if child.dep_ == "case": | |
| preposition = child.text.lower() | |
| break | |
| # If no preposition is found on the children, check the head token. | |
| # This helps in cases of complex prepositional phrases. | |
| if not preposition and token.head.dep_ == 'obl': | |
| for child in token.head.children: | |
| if child.dep_ == "case": | |
| preposition = child.text.lower() | |
| break | |
| if preposition in ["di", "del", "dello", "della", "dei", "degli", "delle"]: | |
| return "Complemento di Specificazione" | |
| if preposition in ["a", "al", "allo", "alla", "ai", "agli", "alle"]: | |
| return "Complemento di Termine" | |
| if preposition in ["da", "dal", "dallo", "dalla", "dai", "dagli", "dalle"]: | |
| # Check if it's a passive sentence for Complemento d'Agente | |
| if any(child.dep_ == 'aux:pass' for child in token.head.children): | |
| return "Complemento d'Agente" | |
| return "Complemento di Moto da Luogo" | |
| if preposition in ["in", "nel", "nello", "nella", "nei", "negli", "nelle"]: | |
| return "Complemento di Stato in Luogo" | |
| if preposition in ["con", "col", "coi"]: | |
| return "Complemento di Compagnia o Mezzo" | |
| if preposition in ["su", "sul", "sullo", "sulla", "sui", "sugli", "sulle"]: | |
| return "Complemento di Argomento o Luogo" | |
| if preposition in ["per"]: | |
| return "Complemento di Fine o Causa" | |
| if preposition in ["tra", "fra"]: | |
| return "Complemento di Luogo o Tempo (Partitivo)" | |
| return "Complemento Indiretto" | |
| def get_full_text(token): | |
| """Recursively builds the full text of a phrase starting from a head token.""" | |
| # Collect the text of the token and all its children that form the phrase | |
| # (like articles, adjectives, etc.) | |
| phrase_tokens = [token] + [t for t in token.children if t.dep_ in ('det', 'amod', 'case', 'advmod')] | |
| # Sort by index to maintain original order | |
| phrase_tokens.sort(key=lambda x: x.i) | |
| return " ".join(t.text for t in phrase_tokens) | |
| def build_phrases(tokens): | |
| """Merges tokens into meaningful grammatical phrases.""" | |
| phrase_map = {} | |
| # First pass: map head tokens to their full text | |
| for token in tokens: | |
| # The head of a phrase is usually a noun, verb, or adjective | |
| if token.dep_ not in ['det', 'case', 'amod', 'punct', 'aux', 'cop', 'mark']: | |
| phrase_map[token.i] = { | |
| "text": get_full_text(token), | |
| "label": "", # Label will be assigned next | |
| "token": token | |
| } | |
| # Second pass: assign labels and structure | |
| analysis_result = [] | |
| processed_indices = set() | |
| for index, phrase in phrase_map.items(): | |
| if index in processed_indices: | |
| continue | |
| token = phrase['token'] | |
| dep = token.dep_ | |
| label = "" | |
| if dep == "ROOT": | |
| # Check for nominal predicate (e.g., "è bello") | |
| is_nominal = any(c.dep_ == 'cop' for c in token.children) | |
| if is_nominal: | |
| copula = [c for c in token.children if c.dep_ == 'cop'][0] | |
| predicate_name = get_full_text(token) | |
| analysis_result.append({ | |
| "text": copula.text, | |
| "label": "Copula" | |
| }) | |
| analysis_result.append({ | |
| "text": predicate_name, | |
| "label": "Parte Nominale del Predicato" | |
| }) | |
| else: | |
| label = "Predicato Verbale" | |
| elif dep == 'obl': | |
| label = get_complement_type(token) | |
| elif dep in DEP_MAP: | |
| label = DEP_MAP[dep] | |
| if label: | |
| analysis_result.append({"text": phrase['text'], "label": label}) | |
| processed_indices.add(index) | |
| return analysis_result | |
| def analyze_clause(clause_tokens): | |
| """Analyzes a single clause (main or subordinate).""" | |
| # Filter out conjunctions that introduce the clause as they are part of the structure, not the clause itself | |
| tokens_in_clause = [t for t in clause_tokens if t.dep_ != 'mark'] | |
| return build_phrases(tokens_in_clause) | |
| def home(): | |
| """Provides a simple welcome message for the API root.""" | |
| return jsonify({"message": "API is running. Use the /api/analyze endpoint with a POST request."}) | |
| def analyze_sentence(): | |
| """Main endpoint to receive a sentence and return its full logical analysis.""" | |
| try: | |
| data = request.get_json() | |
| if not data or 'sentence' not in data: | |
| return jsonify({"error": "Sentence not provided in JSON payload"}), 400 | |
| sentence = data['sentence'] | |
| doc = nlp(sentence) | |
| main_clause_tokens = [] | |
| subordinate_clauses = [] | |
| # Identify subordinate clauses first | |
| for token in doc: | |
| # Subordinate clauses are identified by specific dependency relations | |
| if token.dep_ in ["acl:relcl", "advcl", "ccomp", "csubj"]: | |
| # The subtree of the token constitutes the subordinate clause | |
| sub_clause_tokens = list(token.subtree) | |
| sub_clause_type = DEP_MAP.get(token.dep_, "Proposizione Subordinata") | |
| # Find the introducing element (e.g., 'che', 'quando', 'perché') | |
| marker = [child for child in token.children if child.dep_ == 'mark'] | |
| intro = marker[0].text if marker else "" | |
| subordinate_clauses.append({ | |
| "type": sub_clause_type, | |
| "text": " ".join(t.text for t in sub_clause_tokens), | |
| "intro": intro, | |
| "analysis": analyze_clause(sub_clause_tokens) | |
| }) | |
| # Tokens not in any subordinate clause belong to the main clause | |
| subordinate_indices = {token.i for clause in subordinate_clauses for token in nlp(clause["text"])} | |
| main_clause_tokens = [token for token in doc if token.i not in subordinate_indices] | |
| # Final structured result | |
| final_analysis = { | |
| "main_clause": { | |
| "text": " ".join(t.text for t in main_clause_tokens if not t.is_punct), | |
| "analysis": analyze_clause(main_clause_tokens) | |
| }, | |
| "subordinate_clauses": subordinate_clauses | |
| } | |
| return jsonify(final_analysis) | |
| except Exception as e: | |
| # Log the full error to the console for debugging | |
| print(f"An error occurred during analysis: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({"error": "An internal error occurred. Check server logs for details."}), 500 | |
| # The following block is for local development and testing, | |
| # it won't be used when deployed with Gunicorn. | |
| if __name__ == '__main__': | |
| # Use a port that is not default 5000 to avoid conflicts | |
| app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 8080)), debug=True) |