devusman commited on
Commit
4f5a1e9
·
1 Parent(s): 870c988

updated this

Browse files
Files changed (1) hide show
  1. app.py +142 -63
app.py CHANGED
@@ -14,10 +14,12 @@ except OSError:
14
  "Could not find the 'it_core_news_sm' model. "
15
  "Please ensure it is listed and installed from your requirements.txt file."
16
  )
 
17
  # --- END SECTION ---
18
 
19
  # Initialize the Flask app
20
  app = Flask(__name__)
 
21
  # Enable Cross-Origin Resource Sharing (CORS) to allow your frontend to call this API
22
  CORS(app)
23
 
@@ -32,107 +34,184 @@ DEP_MAP = {
32
  "amod": "Attributo",
33
  "advmod": "Complemento Avverbiale",
34
  "appos": "Apposizione",
35
- "cop": "Copula (parte del Predicato Nominale)",
36
- "aux": "Ausiliare (parte del Predicato)",
37
- "case": "Preposizione (introduce un complemento)"
 
38
  }
39
 
40
  def get_complement_type(token):
41
  """Refine the complement type based on the preceding preposition."""
42
  preposition = ""
43
- # Look for a preposition attached to this token
44
  for child in token.children:
45
  if child.dep_ == "case":
46
  preposition = child.text.lower()
47
  break
48
-
49
- # If no preposition found, check if the token's head has one (for multi-word complements)
50
- if not preposition:
51
- if token.head.dep_ == 'obl':
52
- for child in token.head.children:
53
- if child.dep_ == "case":
54
- preposition = child.text.lower()
55
- break
56
 
57
  if preposition in ["di", "del", "dello", "della", "dei", "degli", "delle"]:
58
  return "Complemento di Specificazione"
59
  if preposition in ["a", "al", "allo", "alla", "ai", "agli", "alle"]:
60
  return "Complemento di Termine"
61
  if preposition in ["da", "dal", "dallo", "dalla", "dai", "dagli", "dalle"]:
62
- return "Complemento (introdotto da 'da')"
 
 
 
63
  if preposition in ["in", "nel", "nello", "nella", "nei", "negli", "nelle"]:
64
- return "Complemento di Luogo/Tempo"
65
  if preposition in ["con", "col", "coi"]:
66
- return "Complemento di Compagnia/Mezzo"
67
  if preposition in ["su", "sul", "sullo", "sulla", "sui", "sugli", "sulle"]:
68
- return "Complemento di Argomento/Luogo"
69
  if preposition in ["per"]:
70
- return "Complemento di Fine/Causa"
71
  if preposition in ["tra", "fra"]:
72
- return "Complemento di Luogo/Tempo (Partitivo)"
73
 
74
  return "Complemento Indiretto"
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  @app.route("/")
77
  def home():
78
- return jsonify({"message": "API is running. Use the /api/analyze endpoint."})
 
79
 
80
  @app.route('/api/analyze', methods=['POST'])
81
  def analyze_sentence():
 
82
  try:
83
  data = request.get_json()
84
  if not data or 'sentence' not in data:
85
- return jsonify({"error": "Sentence not provided"}), 400
86
 
87
  sentence = data['sentence']
88
  doc = nlp(sentence)
89
 
90
- # This token-based analysis logic is more robust
91
- analysis = []
 
 
92
  for token in doc:
93
- if token.is_punct or token.dep_ in ['case', 'det', 'aux', 'mark']:
94
- continue
95
-
96
- # Determine the label for the token
97
- dep = token.dep_
98
- label = ""
99
-
100
- if dep == "ROOT":
101
- # Check for nominal predicate (e.g., "è bello")
102
- is_nominal = any(c.dep_ == 'cop' for c in token.children)
103
- label = "Predicato Nominale" if is_nominal else "Predicato Verbale"
104
- elif dep == 'obl':
105
- label = get_complement_type(token)
106
- else:
107
- label = DEP_MAP.get(dep)
108
-
109
- if label:
110
- analysis.append({ "text": token.text, "label": label, "head": token.head.text })
111
-
112
- # Simple merging logic
113
- if not analysis:
114
- return jsonify([])
115
-
116
- final_analysis = []
117
- current_phrase = analysis[0]
118
-
119
- for i in range(1, len(analysis)):
120
- # If the current token belongs to the same phrase (same head and label), merge them
121
- if analysis[i]['label'] == current_phrase['label'] and analysis[i]['head'] == current_phrase['head']:
122
- current_phrase['text'] += " " + analysis[i]['text']
123
- else:
124
- final_analysis.append({'text': current_phrase['text'], 'label': current_phrase['label']})
125
- current_phrase = analysis[i]
126
 
127
- final_analysis.append({'text': current_phrase['text'], 'label': current_phrase['label']})
128
-
129
  return jsonify(final_analysis)
130
 
131
  except Exception as e:
132
  # Log the full error to the console for debugging
133
- print(f"An error occurred: {e}")
134
- return jsonify({"error": "An internal error occurred. See server logs for details."}), 500
135
-
136
- # The __main__ block has been removed because it is not used by Gunicorn.
137
- # The Dockerfile's CMD instruction is the single source of truth for running the app,
138
- # which prevents confusion about which host and port are being used.
 
 
 
 
 
14
  "Could not find the 'it_core_news_sm' model. "
15
  "Please ensure it is listed and installed from your requirements.txt file."
16
  )
17
+
18
  # --- END SECTION ---
19
 
20
  # Initialize the Flask app
21
  app = Flask(__name__)
22
+
23
  # Enable Cross-Origin Resource Sharing (CORS) to allow your frontend to call this API
24
  CORS(app)
25
 
 
34
  "amod": "Attributo",
35
  "advmod": "Complemento Avverbiale",
36
  "appos": "Apposizione",
37
+ "acl:relcl": "Proposizione Subordinata Relativa",
38
+ "advcl": "Proposizione Subordinata Avverbiale",
39
+ "ccomp": "Proposizione Subordinata Oggettiva",
40
+ "csubj": "Proposizione Subordinata Soggettiva"
41
  }
42
 
43
  def get_complement_type(token):
44
  """Refine the complement type based on the preceding preposition."""
45
  preposition = ""
46
+ # Look for a preposition (`case`) attached to this token
47
  for child in token.children:
48
  if child.dep_ == "case":
49
  preposition = child.text.lower()
50
  break
51
+
52
+ # If no preposition is found on the children, check the head token.
53
+ # This helps in cases of complex prepositional phrases.
54
+ if not preposition and token.head.dep_ == 'obl':
55
+ for child in token.head.children:
56
+ if child.dep_ == "case":
57
+ preposition = child.text.lower()
58
+ break
59
 
60
  if preposition in ["di", "del", "dello", "della", "dei", "degli", "delle"]:
61
  return "Complemento di Specificazione"
62
  if preposition in ["a", "al", "allo", "alla", "ai", "agli", "alle"]:
63
  return "Complemento di Termine"
64
  if preposition in ["da", "dal", "dallo", "dalla", "dai", "dagli", "dalle"]:
65
+ # Check if it's a passive sentence for Complemento d'Agente
66
+ if any(child.dep_ == 'aux:pass' for child in token.head.children):
67
+ return "Complemento d'Agente"
68
+ return "Complemento di Moto da Luogo"
69
  if preposition in ["in", "nel", "nello", "nella", "nei", "negli", "nelle"]:
70
+ return "Complemento di Stato in Luogo"
71
  if preposition in ["con", "col", "coi"]:
72
+ return "Complemento di Compagnia o Mezzo"
73
  if preposition in ["su", "sul", "sullo", "sulla", "sui", "sugli", "sulle"]:
74
+ return "Complemento di Argomento o Luogo"
75
  if preposition in ["per"]:
76
+ return "Complemento di Fine o Causa"
77
  if preposition in ["tra", "fra"]:
78
+ return "Complemento di Luogo o Tempo (Partitivo)"
79
 
80
  return "Complemento Indiretto"
81
 
82
+ def get_full_text(token):
83
+ """Recursively builds the full text of a phrase starting from a head token."""
84
+ # Collect the text of the token and all its children that form the phrase
85
+ # (like articles, adjectives, etc.)
86
+ phrase_tokens = [token] + [t for t in token.children if t.dep_ in ('det', 'amod', 'case', 'advmod')]
87
+ # Sort by index to maintain original order
88
+ phrase_tokens.sort(key=lambda x: x.i)
89
+ return " ".join(t.text for t in phrase_tokens)
90
+
91
+ def build_phrases(tokens):
92
+ """Merges tokens into meaningful grammatical phrases."""
93
+ phrase_map = {}
94
+
95
+ # First pass: map head tokens to their full text
96
+ for token in tokens:
97
+ # The head of a phrase is usually a noun, verb, or adjective
98
+ if token.dep_ not in ['det', 'case', 'amod', 'punct', 'aux', 'cop', 'mark']:
99
+ phrase_map[token.i] = {
100
+ "text": get_full_text(token),
101
+ "label": "", # Label will be assigned next
102
+ "token": token
103
+ }
104
+
105
+ # Second pass: assign labels and structure
106
+ analysis_result = []
107
+ processed_indices = set()
108
+
109
+ for index, phrase in phrase_map.items():
110
+ if index in processed_indices:
111
+ continue
112
+
113
+ token = phrase['token']
114
+ dep = token.dep_
115
+ label = ""
116
+
117
+ if dep == "ROOT":
118
+ # Check for nominal predicate (e.g., "è bello")
119
+ is_nominal = any(c.dep_ == 'cop' for c in token.children)
120
+ if is_nominal:
121
+ copula = [c for c in token.children if c.dep_ == 'cop'][0]
122
+ predicate_name = get_full_text(token)
123
+ analysis_result.append({
124
+ "text": copula.text,
125
+ "label": "Copula"
126
+ })
127
+ analysis_result.append({
128
+ "text": predicate_name,
129
+ "label": "Parte Nominale del Predicato"
130
+ })
131
+ else:
132
+ label = "Predicato Verbale"
133
+ elif dep == 'obl':
134
+ label = get_complement_type(token)
135
+ elif dep in DEP_MAP:
136
+ label = DEP_MAP[dep]
137
+
138
+ if label:
139
+ analysis_result.append({"text": phrase['text'], "label": label})
140
+
141
+ processed_indices.add(index)
142
+
143
+ return analysis_result
144
+
145
+
146
+ def analyze_clause(clause_tokens):
147
+ """Analyzes a single clause (main or subordinate)."""
148
+ # Filter out conjunctions that introduce the clause as they are part of the structure, not the clause itself
149
+ tokens_in_clause = [t for t in clause_tokens if t.dep_ != 'mark']
150
+ return build_phrases(tokens_in_clause)
151
+
152
+
153
  @app.route("/")
154
  def home():
155
+ """Provides a simple welcome message for the API root."""
156
+ return jsonify({"message": "API is running. Use the /api/analyze endpoint with a POST request."})
157
 
158
  @app.route('/api/analyze', methods=['POST'])
159
  def analyze_sentence():
160
+ """Main endpoint to receive a sentence and return its full logical analysis."""
161
  try:
162
  data = request.get_json()
163
  if not data or 'sentence' not in data:
164
+ return jsonify({"error": "Sentence not provided in JSON payload"}), 400
165
 
166
  sentence = data['sentence']
167
  doc = nlp(sentence)
168
 
169
+ main_clause_tokens = []
170
+ subordinate_clauses = []
171
+
172
+ # Identify subordinate clauses first
173
  for token in doc:
174
+ # Subordinate clauses are identified by specific dependency relations
175
+ if token.dep_ in ["acl:relcl", "advcl", "ccomp", "csubj"]:
176
+ # The subtree of the token constitutes the subordinate clause
177
+ sub_clause_tokens = list(token.subtree)
178
+ sub_clause_type = DEP_MAP.get(token.dep_, "Proposizione Subordinata")
179
+
180
+ # Find the introducing element (e.g., 'che', 'quando', 'perché')
181
+ marker = [child for child in token.children if child.dep_ == 'mark']
182
+ intro = marker[0].text if marker else ""
183
+
184
+ subordinate_clauses.append({
185
+ "type": sub_clause_type,
186
+ "text": " ".join(t.text for t in sub_clause_tokens),
187
+ "intro": intro,
188
+ "analysis": analyze_clause(sub_clause_tokens)
189
+ })
190
+
191
+ # Tokens not in any subordinate clause belong to the main clause
192
+ subordinate_indices = {token.i for clause in subordinate_clauses for token in nlp(clause["text"])}
193
+ main_clause_tokens = [token for token in doc if token.i not in subordinate_indices]
194
+
195
+ # Final structured result
196
+ final_analysis = {
197
+ "main_clause": {
198
+ "text": " ".join(t.text for t in main_clause_tokens if not t.is_punct),
199
+ "analysis": analyze_clause(main_clause_tokens)
200
+ },
201
+ "subordinate_clauses": subordinate_clauses
202
+ }
 
 
 
 
203
 
 
 
204
  return jsonify(final_analysis)
205
 
206
  except Exception as e:
207
  # Log the full error to the console for debugging
208
+ print(f"An error occurred during analysis: {e}")
209
+ import traceback
210
+ traceback.print_exc()
211
+ return jsonify({"error": "An internal error occurred. Check server logs for details."}), 500
212
+
213
+ # The following block is for local development and testing,
214
+ # it won't be used when deployed with Gunicorn.
215
+ if __name__ == '__main__':
216
+ # Use a port that is not default 5000 to avoid conflicts
217
+ app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 8080)), debug=True)