sbompolas commited on
Commit
08f1dd3
·
verified ·
1 Parent(s): 383a058

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -34
app.py CHANGED
@@ -6,6 +6,8 @@ import requests
6
  import traceback
7
  from pathlib import Path
8
 
 
 
9
  LESBIAN_MODELS = {}
10
  MODEL_VARIANTS = {
11
  "Lesbian-only": "sbompolas/Lesbian-Greek-Morphosyntactic-Model",
@@ -59,6 +61,9 @@ def initialize_models():
59
 
60
  loaded, load_status = initialize_models()
61
 
 
 
 
62
  def stanza_doc_to_conllu(doc) -> str:
63
  lines = []
64
  for sid, sent in enumerate(doc.sentences, 1):
@@ -78,22 +83,37 @@ def stanza_doc_to_conllu(doc) -> str:
78
 
79
  def conllu_to_dataframe(conllu: str) -> pd.DataFrame:
80
  rows = []
81
- for L in conllu.splitlines():
82
- if not L:
 
83
  if rows and rows[-1] != {}:
84
  rows.append({})
85
  continue
86
- if L.startswith("#"):
87
- if "=" in L:
88
- key, val = L[2:].split("=", 1)
89
- rows.append({'ID': f"# {key.strip()} =", 'FORM': val.strip()})
 
 
 
 
 
 
90
  continue
91
- parts = L.split("\t")
 
92
  if len(parts) >= 10:
93
  rows.append({
94
- 'ID': parts[0], 'FORM': parts[1], 'LEMMA': parts[2],
95
- 'UPOS': parts[3], 'XPOS': parts[4], 'FEATS': parts[5],
96
- 'HEAD': parts[6], 'DEPREL': parts[7], 'DEPS': parts[8], 'MISC': parts[9]
 
 
 
 
 
 
 
97
  })
98
  return pd.DataFrame(rows)
99
 
@@ -101,14 +121,19 @@ def create_dependency_visualization(df: pd.DataFrame) -> str:
101
  if df.empty:
102
  return "No data to visualize"
103
  viz = []
104
- for i, row in df.iterrows():
105
- if pd.isna(row["ID"]):
 
 
106
  continue
107
- if isinstance(row["ID"], str) and row["ID"].startswith("#"):
 
 
108
  if viz:
109
- viz.append("")
110
- viz.append(f"{row['ID']} {row['FORM']}")
111
  continue
 
112
  w, p, d, h = row['FORM'], row['UPOS'], row['DEPREL'], row['HEAD']
113
  if h != '0':
114
  try:
@@ -120,8 +145,159 @@ def create_dependency_visualization(df: pd.DataFrame) -> str:
120
  viz.append(f"{w} ({p}) --{d}--> ROOT")
121
  return "\n".join(viz)
122
 
123
- # Keep your create_single_sentence_svg as-is; it already includes annotation rendering
124
- # Be sure ann = [...] block includes: upos, lemma, and all feats, which it does in your version
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  def process_text(text, variant):
127
  if not text.strip():
@@ -146,12 +322,16 @@ def process_text(text, variant):
146
 
147
  sentences = []
148
  for sent in doc.sentences:
149
- payload = [ {
150
- 'ID': w.id, 'FORM': w.text, 'LEMMA': w.lemma or "_",
151
- 'UPOS': w.upos or "_", 'XPOS': w.xpos or "_",
152
- 'FEATS': w.feats or "_", 'HEAD': w.head or 0,
 
 
 
 
153
  'DEPREL': w.deprel or "_"
154
- } for w in sent.words ]
155
  sentences.append(payload)
156
 
157
  sent_ids = [str(i+1) for i in range(len(sentences))]
@@ -169,11 +349,14 @@ def process_text(text, variant):
169
 
170
  def update_svg(selected_id, sentences):
171
  try:
172
- idx = int(selected_id)-1
173
  return create_single_sentence_svg(sentences[idx])
174
  except:
175
  return "<p>Invalid selection</p>"
176
 
 
 
 
177
  def create_app():
178
  with gr.Blocks(title="Lesbian Greek Parser") as app:
179
  gr.Markdown("# Lesbian Greek Morphosyntactic Parser")
@@ -183,11 +366,16 @@ def create_app():
183
 
184
  with gr.Row():
185
  with gr.Column():
186
- txt = gr.Textbox(label="Input Text", lines=4,
187
- placeholder="Εισάγετε κείμενο…")
188
- mdl = gr.Radio(choices=list(MODEL_VARIANTS.keys()),
189
- value="Lesbian-only",
190
- label="Model Variant")
 
 
 
 
 
191
  btn = gr.Button("Parse", variant="primary")
192
 
193
  with gr.Row():
@@ -198,13 +386,17 @@ def create_app():
198
 
199
  with gr.Row():
200
  with gr.Column():
201
- conllu_out = gr.Textbox(label="CoNLL-U",
202
- lines=10,
203
- show_copy_button=True)
 
 
204
  table_out = gr.Dataframe(label="Token Table")
205
- text_out = gr.Textbox(label="Text-based Dependencies",
206
- lines=8,
207
- show_copy_button=True)
 
 
208
 
209
  btn.click(
210
  fn=process_text,
 
6
  import traceback
7
  from pathlib import Path
8
 
9
+ # 1. MODEL VARIANTS & INITIALIZATION
10
+
11
  LESBIAN_MODELS = {}
12
  MODEL_VARIANTS = {
13
  "Lesbian-only": "sbompolas/Lesbian-Greek-Morphosyntactic-Model",
 
61
 
62
  loaded, load_status = initialize_models()
63
 
64
+
65
+ # 2. CONLL-U / DATAFRAME / TEXT‐VIZ
66
+
67
  def stanza_doc_to_conllu(doc) -> str:
68
  lines = []
69
  for sid, sent in enumerate(doc.sentences, 1):
 
83
 
84
  def conllu_to_dataframe(conllu: str) -> pd.DataFrame:
85
  rows = []
86
+ for line in conllu.splitlines():
87
+ # empty line → separator between sentences
88
+ if not line:
89
  if rows and rows[-1] != {}:
90
  rows.append({})
91
  continue
92
+
93
+ # comment line → sentence header
94
+ if line.startswith("#"):
95
+ # ex: "# sent_id = 2"
96
+ if "=" in line:
97
+ key, val = line[2:].split("=", 1)
98
+ rows.append({
99
+ 'ID': f"# {key.strip()} =",
100
+ 'FORM': val.strip()
101
+ })
102
  continue
103
+
104
+ parts = line.split("\t")
105
  if len(parts) >= 10:
106
  rows.append({
107
+ 'ID': parts[0],
108
+ 'FORM': parts[1],
109
+ 'LEMMA': parts[2],
110
+ 'UPOS': parts[3],
111
+ 'XPOS': parts[4],
112
+ 'FEATS': parts[5],
113
+ 'HEAD': parts[6],
114
+ 'DEPREL': parts[7],
115
+ 'DEPS': parts[8],
116
+ 'MISC': parts[9]
117
  })
118
  return pd.DataFrame(rows)
119
 
 
121
  if df.empty:
122
  return "No data to visualize"
123
  viz = []
124
+ for _, row in df.iterrows():
125
+ rid = row.get("ID")
126
+ # skip outright blank-rows skeleton
127
+ if pd.isna(rid):
128
  continue
129
+
130
+ # sentence header
131
+ if isinstance(rid, str) and rid.startswith("#"):
132
  if viz:
133
+ viz.append("") # blank line before new sentence
134
+ viz.append(f"{rid} {row.get('FORM')}")
135
  continue
136
+
137
  w, p, d, h = row['FORM'], row['UPOS'], row['DEPREL'], row['HEAD']
138
  if h != '0':
139
  try:
 
145
  viz.append(f"{w} ({p}) --{d}--> ROOT")
146
  return "\n".join(viz)
147
 
148
+
149
+ # 3. FULL SVG BUILDER
150
+
151
+ def create_single_sentence_svg(sentence_data, sentence_num=1, total_sentences=1):
152
+ try:
153
+ df = pd.DataFrame(sentence_data) if isinstance(sentence_data, list) else sentence_data
154
+ word_count = len(df)
155
+ base_word_width = 100
156
+ min_spacing = 30
157
+ word_spacing = max(
158
+ base_word_width,
159
+ (word_count * base_word_width + min_spacing * (word_count - 1)) / word_count
160
+ )
161
+
162
+ width = max(800, word_count * word_spacing + 100)
163
+ height = 500
164
+
165
+ word_y = height - 120
166
+ pos_y = word_y + 20
167
+ features_start_y = pos_y + 15
168
+
169
+ deprel_colors = {
170
+ 'root': '#000000', 'nsubj': '#2980b9', 'obj': '#27ae60', 'det': '#e67e22',
171
+ 'amod': '#8e44ad', 'nmod': '#16a085', 'case': '#34495e', 'punct': '#7f8c8d',
172
+ 'cc': '#d35400', 'conj': '#2c3e50', 'cop': '#e74c3c', 'mark': '#9b59b6',
173
+ 'csubj': '#3498db', 'xcomp': '#1abc9c', 'ccomp': '#f39c12', 'advcl': '#e91e63',
174
+ 'advmod': '#9c27b0', 'obl': '#795548', 'iobj': '#607d8b', 'fixed': '#ff5722',
175
+ 'aux': '#ff9800', 'acl': '#4caf50', 'appos': '#673ab7', 'compound': '#009688'
176
+ }
177
+
178
+ svg = [
179
+ f'<svg width="{width}" height="{height}" xmlns="http://www.w3.org/2000/svg" '
180
+ 'style="background: white; border: 1px solid #eee;">',
181
+ '<defs>'
182
+ ]
183
+ for rel, color in deprel_colors.items():
184
+ svg.append(
185
+ f'<marker id="arrow_{rel}" markerWidth="4" markerHeight="4" '
186
+ 'markerUnits="userSpaceOnUse" orient="auto" refX="3.5" refY="2">'
187
+ f'<path d="M0,0 L4,2 L0,4 Z" fill="{color}"/>'
188
+ '</marker>'
189
+ )
190
+ svg.append('</defs><g>')
191
+
192
+ # calculate x positions
193
+ word_positions = {}
194
+ for idx, row in df.iterrows():
195
+ wid = int(row['ID'])
196
+ word_positions[wid] = 50 + (wid - 1) * word_spacing
197
+
198
+ # draw arcs/lines
199
+ used_spans = []
200
+ for _, row in df.iterrows():
201
+ wid = int(row['ID'])
202
+ hid = int(row['HEAD']) if row['HEAD'] != '0' else 0
203
+ rel = row['DEPREL']
204
+
205
+ x1 = word_positions[wid]
206
+ col = deprel_colors.get(rel, '#000')
207
+
208
+ if hid == 0:
209
+ # root arrow
210
+ svg.append(
211
+ f'<line x1="{x1}" y1="{word_y-15}" x2="{x1}" y2="50" '
212
+ f'stroke="{col}" stroke-width="1.5"/>'
213
+ )
214
+ mid = (word_y-15 + 50) / 2
215
+ svg.append(
216
+ f'<rect x="{x1-15}" y="{mid-8}" width="30" height="14" '
217
+ f'fill="white" stroke="{col}" rx="2"/>'
218
+ )
219
+ svg.append(
220
+ f'<text x="{x1}" y="{mid+2}" text-anchor="middle" '
221
+ f'fill="{col}" font-family="Arial" font-size="8" font-weight="bold">ROOT</text>'
222
+ )
223
+ else:
224
+ x2 = word_positions.get(hid, x1)
225
+ span = (min(wid, hid), max(wid, hid))
226
+ lvl = 0
227
+ conflict = True
228
+ while conflict:
229
+ conflict = False
230
+ for (es, el), used_lvl in used_spans:
231
+ if used_lvl == lvl and not (span[1] < es or span[0] > el):
232
+ lvl += 1
233
+ conflict = True
234
+ break
235
+ used_spans.append((span, lvl))
236
+
237
+ dist = abs(x2 - x1)
238
+ base_h = min(40 + dist * 0.15, 100)
239
+ arc_h = base_h + lvl * 35
240
+ midx = (x1 + x2) / 2
241
+ cty = word_y - arc_h
242
+ svg.append(
243
+ f'<path d="M {x1} {word_y-15} Q {midx} {cty} {x2} {word_y-15}" '
244
+ f'stroke="{col}" stroke-width="1.5" fill="none" marker-end="url(#arrow_{rel})"/>'
245
+ )
246
+ # label box
247
+ amx = 0.25*x1 + 0.5*midx + 0.25*x2
248
+ amy = 0.25*(word_y-15) + 0.5*cty + 0.25*(word_y-15)
249
+ lw = len(rel)*6 + 8
250
+ svg.append(
251
+ f'<rect x="{amx-lw/2}" y="{amy-8}" width="{lw}" height="14" '
252
+ f'fill="white" stroke="{col}" rx="2"/>'
253
+ )
254
+ svg.append(
255
+ f'<text x="{amx}" y="{amy+2}" text-anchor="middle" '
256
+ f'fill="{col}" font-family="Arial" font-size="8" font-weight="bold">{rel}</text>'
257
+ )
258
+
259
+ # draw words + annotations
260
+ for _, row in df.iterrows():
261
+ wid = int(row['ID'])
262
+ x = word_positions[wid]
263
+ word = row['FORM']
264
+ pos = row['UPOS']
265
+ lemma = row['LEMMA']
266
+ feats = row['FEATS']
267
+ xpos = row['XPOS']
268
+
269
+ # word text
270
+ svg.append(
271
+ f'<text x="{x}" y="{word_y}" text-anchor="middle" '
272
+ 'font-family="Arial" font-size="13" font-weight="bold">'
273
+ f'{word}</text>'
274
+ )
275
+
276
+ # annotations underneath
277
+ ann = []
278
+ if pos and pos != '_': ann.append(f"upos={pos}")
279
+ if lemma and lemma not in ('_', word): ann.append(f"lemma={lemma}")
280
+ if xpos and xpos != '_': ann.append(f"xpos={xpos}")
281
+ if feats and feats not in ('', '_'):
282
+ for fpair in feats.split('|'):
283
+ if '=' in fpair:
284
+ ann.append(fpair)
285
+
286
+ for i, a in enumerate(ann):
287
+ y0 = features_start_y + i*12
288
+ svg.append(
289
+ f'<text x="{x}" y="{y0}" text-anchor="middle" '
290
+ f'font-family="Arial" font-size="7" fill="#666">{a}</text>'
291
+ )
292
+
293
+ svg.append('</g></svg>')
294
+ return "".join(svg)
295
+
296
+ except Exception as e:
297
+ return f"<p>Error creating SVG: {e}</p>"
298
+
299
+
300
+ # 4. PROCESS & DROPDOWN-UPDATES
301
 
302
  def process_text(text, variant):
303
  if not text.strip():
 
322
 
323
  sentences = []
324
  for sent in doc.sentences:
325
+ payload = [{
326
+ 'ID': w.id,
327
+ 'FORM': w.text,
328
+ 'LEMMA': w.lemma or "_",
329
+ 'UPOS': w.upos or "_",
330
+ 'XPOS': w.xpos or "_",
331
+ 'FEATS': w.feats or "_",
332
+ 'HEAD': w.head or 0,
333
  'DEPREL': w.deprel or "_"
334
+ } for w in sent.words]
335
  sentences.append(payload)
336
 
337
  sent_ids = [str(i+1) for i in range(len(sentences))]
 
349
 
350
  def update_svg(selected_id, sentences):
351
  try:
352
+ idx = int(selected_id) - 1
353
  return create_single_sentence_svg(sentences[idx])
354
  except:
355
  return "<p>Invalid selection</p>"
356
 
357
+
358
+ # 5. BUILD GRADIO UI
359
+
360
  def create_app():
361
  with gr.Blocks(title="Lesbian Greek Parser") as app:
362
  gr.Markdown("# Lesbian Greek Morphosyntactic Parser")
 
366
 
367
  with gr.Row():
368
  with gr.Column():
369
+ txt = gr.Textbox(
370
+ label="Input Text",
371
+ lines=4,
372
+ placeholder="Εισάγετε κείμενο…"
373
+ )
374
+ mdl = gr.Radio(
375
+ choices=list(MODEL_VARIANTS.keys()),
376
+ value="Lesbian-only",
377
+ label="Model Variant"
378
+ )
379
  btn = gr.Button("Parse", variant="primary")
380
 
381
  with gr.Row():
 
386
 
387
  with gr.Row():
388
  with gr.Column():
389
+ conllu_out = gr.Textbox(
390
+ label="CoNLL-U",
391
+ lines=10,
392
+ show_copy_button=True
393
+ )
394
  table_out = gr.Dataframe(label="Token Table")
395
+ text_out = gr.Textbox(
396
+ label="Text-based Dependencies",
397
+ lines=8,
398
+ show_copy_button=True
399
+ )
400
 
401
  btn.click(
402
  fn=process_text,