Spaces:

devusman
/

analysis_tool

Sleeping

App Files Files Community

devusman commited on Jul 31, 2025

Commit

4f5a1e9

1 Parent(s): 870c988

updated this

Browse files

Files changed (1) hide show

app.py +142 -63

app.py CHANGED Viewed

@@ -14,10 +14,12 @@ except OSError:
         "Could not find the 'it_core_news_sm' model. "
         "Please ensure it is listed and installed from your requirements.txt file."
     )
 # --- END SECTION ---
 # Initialize the Flask app
 app = Flask(__name__)
 # Enable Cross-Origin Resource Sharing (CORS) to allow your frontend to call this API
 CORS(app)
@@ -32,107 +34,184 @@ DEP_MAP = {
     "amod": "Attributo",
     "advmod": "Complemento Avverbiale",
     "appos": "Apposizione",
-    "cop": "Copula (parte del Predicato Nominale)",
-    "aux": "Ausiliare (parte del Predicato)",
-    "case": "Preposizione (introduce un complemento)"
 }
 def get_complement_type(token):
     """Refine the complement type based on the preceding preposition."""
     preposition = ""
-    # Look for a preposition attached to this token
     for child in token.children:
         if child.dep_ == "case":
             preposition = child.text.lower()
             break
-    # If no preposition found, check if the token's head has one (for multi-word complements)
-    if not preposition:
-         if token.head.dep_ == 'obl':
-              for child in token.head.children:
-                   if child.dep_ == "case":
-                        preposition = child.text.lower()
-                        break
     if preposition in ["di", "del", "dello", "della", "dei", "degli", "delle"]:
         return "Complemento di Specificazione"
     if preposition in ["a", "al", "allo", "alla", "ai", "agli", "alle"]:
         return "Complemento di Termine"
     if preposition in ["da", "dal", "dallo", "dalla", "dai", "dagli", "dalle"]:
-        return "Complemento (introdotto da 'da')"
     if preposition in ["in", "nel", "nello", "nella", "nei", "negli", "nelle"]:
-        return "Complemento di Luogo/Tempo"
     if preposition in ["con", "col", "coi"]:
-        return "Complemento di Compagnia/Mezzo"
     if preposition in ["su", "sul", "sullo", "sulla", "sui", "sugli", "sulle"]:
-        return "Complemento di Argomento/Luogo"
     if preposition in ["per"]:
-        return "Complemento di Fine/Causa"
     if preposition in ["tra", "fra"]:
-        return "Complemento di Luogo/Tempo (Partitivo)"
     return "Complemento Indiretto"
 @app.route("/")
 def home():
-    return jsonify({"message": "API is running. Use the /api/analyze endpoint."})
 @app.route('/api/analyze', methods=['POST'])
 def analyze_sentence():
     try:
         data = request.get_json()
         if not data or 'sentence' not in data:
-            return jsonify({"error": "Sentence not provided"}), 400
         sentence = data['sentence']
         doc = nlp(sentence)
-        # This token-based analysis logic is more robust
-        analysis = []
         for token in doc:
-            if token.is_punct or token.dep_ in ['case', 'det', 'aux', 'mark']:
-                continue
-            # Determine the label for the token
-            dep = token.dep_
-            label = ""
-            if dep == "ROOT":
-                 # Check for nominal predicate (e.g., "è bello")
-                 is_nominal = any(c.dep_ == 'cop' for c in token.children)
-                 label = "Predicato Nominale" if is_nominal else "Predicato Verbale"
-            elif dep == 'obl':
-                label = get_complement_type(token)
-            else:
-                label = DEP_MAP.get(dep)
-            if label:
-                 analysis.append({ "text": token.text, "label": label, "head": token.head.text })
-        # Simple merging logic
-        if not analysis:
-             return jsonify([])
-        final_analysis = []
-        current_phrase = analysis[0]
-        for i in range(1, len(analysis)):
-            # If the current token belongs to the same phrase (same head and label), merge them
-            if analysis[i]['label'] == current_phrase['label'] and analysis[i]['head'] == current_phrase['head']:
-                current_phrase['text'] += " " + analysis[i]['text']
-            else:
-                final_analysis.append({'text': current_phrase['text'], 'label': current_phrase['label']})
-                current_phrase = analysis[i]
-        final_analysis.append({'text': current_phrase['text'], 'label': current_phrase['label']})
         return jsonify(final_analysis)
     except Exception as e:
         # Log the full error to the console for debugging
-        print(f"An error occurred: {e}")
-        return jsonify({"error": "An internal error occurred. See server logs for details."}), 500
-# The __main__ block has been removed because it is not used by Gunicorn.
-# The Dockerfile's CMD instruction is the single source of truth for running the app,
-# which prevents confusion about which host and port are being used.

         "Could not find the 'it_core_news_sm' model. "
         "Please ensure it is listed and installed from your requirements.txt file."
     )
 # --- END SECTION ---
 # Initialize the Flask app
 app = Flask(__name__)
 # Enable Cross-Origin Resource Sharing (CORS) to allow your frontend to call this API
 CORS(app)
     "amod": "Attributo",
     "advmod": "Complemento Avverbiale",
     "appos": "Apposizione",
+    "acl:relcl": "Proposizione Subordinata Relativa",
+    "advcl": "Proposizione Subordinata Avverbiale",
+    "ccomp": "Proposizione Subordinata Oggettiva",
+    "csubj": "Proposizione Subordinata Soggettiva"
 }
 def get_complement_type(token):
     """Refine the complement type based on the preceding preposition."""
     preposition = ""
+    # Look for a preposition (`case`) attached to this token
     for child in token.children:
         if child.dep_ == "case":
             preposition = child.text.lower()
             break
+    # If no preposition is found on the children, check the head token.
+    # This helps in cases of complex prepositional phrases.
+    if not preposition and token.head.dep_ == 'obl':
+        for child in token.head.children:
+            if child.dep_ == "case":
+                preposition = child.text.lower()
+                break
     if preposition in ["di", "del", "dello", "della", "dei", "degli", "delle"]:
         return "Complemento di Specificazione"
     if preposition in ["a", "al", "allo", "alla", "ai", "agli", "alle"]:
         return "Complemento di Termine"
     if preposition in ["da", "dal", "dallo", "dalla", "dai", "dagli", "dalle"]:
+        # Check if it's a passive sentence for Complemento d'Agente
+        if any(child.dep_ == 'aux:pass' for child in token.head.children):
+            return "Complemento d'Agente"
+        return "Complemento di Moto da Luogo"
     if preposition in ["in", "nel", "nello", "nella", "nei", "negli", "nelle"]:
+        return "Complemento di Stato in Luogo"
     if preposition in ["con", "col", "coi"]:
+        return "Complemento di Compagnia o Mezzo"
     if preposition in ["su", "sul", "sullo", "sulla", "sui", "sugli", "sulle"]:
+        return "Complemento di Argomento o Luogo"
     if preposition in ["per"]:
+        return "Complemento di Fine o Causa"
     if preposition in ["tra", "fra"]:
+        return "Complemento di Luogo o Tempo (Partitivo)"
     return "Complemento Indiretto"
+def get_full_text(token):
+    """Recursively builds the full text of a phrase starting from a head token."""
+    # Collect the text of the token and all its children that form the phrase
+    # (like articles, adjectives, etc.)
+    phrase_tokens = [token] + [t for t in token.children if t.dep_ in ('det', 'amod', 'case', 'advmod')]
+    # Sort by index to maintain original order
+    phrase_tokens.sort(key=lambda x: x.i)
+    return " ".join(t.text for t in phrase_tokens)
+def build_phrases(tokens):
+    """Merges tokens into meaningful grammatical phrases."""
+    phrase_map = {}
+    # First pass: map head tokens to their full text
+    for token in tokens:
+        # The head of a phrase is usually a noun, verb, or adjective
+        if token.dep_ not in ['det', 'case', 'amod', 'punct', 'aux', 'cop', 'mark']:
+            phrase_map[token.i] = {
+                "text": get_full_text(token),
+                "label": "", # Label will be assigned next
+                "token": token
+            }
+    # Second pass: assign labels and structure
+    analysis_result = []
+    processed_indices = set()
+    for index, phrase in phrase_map.items():
+        if index in processed_indices:
+            continue
+        token = phrase['token']
+        dep = token.dep_
+        label = ""
+        if dep == "ROOT":
+            # Check for nominal predicate (e.g., "è bello")
+            is_nominal = any(c.dep_ == 'cop' for c in token.children)
+            if is_nominal:
+                copula = [c for c in token.children if c.dep_ == 'cop'][0]
+                predicate_name = get_full_text(token)
+                analysis_result.append({
+                    "text": copula.text,
+                    "label": "Copula"
+                })
+                analysis_result.append({
+                    "text": predicate_name,
+                    "label": "Parte Nominale del Predicato"
+                })
+            else:
+                label = "Predicato Verbale"
+        elif dep == 'obl':
+            label = get_complement_type(token)
+        elif dep in DEP_MAP:
+            label = DEP_MAP[dep]
+        if label:
+            analysis_result.append({"text": phrase['text'], "label": label})
+        processed_indices.add(index)
+    return analysis_result
+def analyze_clause(clause_tokens):
+    """Analyzes a single clause (main or subordinate)."""
+    # Filter out conjunctions that introduce the clause as they are part of the structure, not the clause itself
+    tokens_in_clause = [t for t in clause_tokens if t.dep_ != 'mark']
+    return build_phrases(tokens_in_clause)
 @app.route("/")
 def home():
+    """Provides a simple welcome message for the API root."""
+    return jsonify({"message": "API is running. Use the /api/analyze endpoint with a POST request."})
 @app.route('/api/analyze', methods=['POST'])
 def analyze_sentence():
+    """Main endpoint to receive a sentence and return its full logical analysis."""
     try:
         data = request.get_json()
         if not data or 'sentence' not in data:
+            return jsonify({"error": "Sentence not provided in JSON payload"}), 400
         sentence = data['sentence']
         doc = nlp(sentence)
+        main_clause_tokens = []
+        subordinate_clauses = []
+        # Identify subordinate clauses first
         for token in doc:
+            # Subordinate clauses are identified by specific dependency relations
+            if token.dep_ in ["acl:relcl", "advcl", "ccomp", "csubj"]:
+                # The subtree of the token constitutes the subordinate clause
+                sub_clause_tokens = list(token.subtree)
+                sub_clause_type = DEP_MAP.get(token.dep_, "Proposizione Subordinata")
+                # Find the introducing element (e.g., 'che', 'quando', 'perché')
+                marker = [child for child in token.children if child.dep_ == 'mark']
+                intro = marker[0].text if marker else ""
+                subordinate_clauses.append({
+                    "type": sub_clause_type,
+                    "text": " ".join(t.text for t in sub_clause_tokens),
+                    "intro": intro,
+                    "analysis": analyze_clause(sub_clause_tokens)
+                })
+        # Tokens not in any subordinate clause belong to the main clause
+        subordinate_indices = {token.i for clause in subordinate_clauses for token in nlp(clause["text"])}
+        main_clause_tokens = [token for token in doc if token.i not in subordinate_indices]
+        # Final structured result
+        final_analysis = {
+            "main_clause": {
+                "text": " ".join(t.text for t in main_clause_tokens if not t.is_punct),
+                "analysis": analyze_clause(main_clause_tokens)
+            },
+            "subordinate_clauses": subordinate_clauses
+        }
         return jsonify(final_analysis)
     except Exception as e:
         # Log the full error to the console for debugging
+        print(f"An error occurred during analysis: {e}")
+        import traceback
+        traceback.print_exc()
+        return jsonify({"error": "An internal error occurred. Check server logs for details."}), 500
+# The following block is for local development and testing,
+# it won't be used when deployed with Gunicorn.
+if __name__ == '__main__':
+    # Use a port that is not default 5000 to avoid conflicts
+    app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 8080)), debug=True)