sbompolas commited on
Commit
383a058
·
verified ·
1 Parent(s): a97b4db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -165
app.py CHANGED
@@ -6,8 +6,6 @@ import requests
6
  import traceback
7
  from pathlib import Path
8
 
9
- # 1. MODEL VARIANTS & INITIALIZATION
10
-
11
  LESBIAN_MODELS = {}
12
  MODEL_VARIANTS = {
13
  "Lesbian-only": "sbompolas/Lesbian-Greek-Morphosyntactic-Model",
@@ -61,8 +59,6 @@ def initialize_models():
61
 
62
  loaded, load_status = initialize_models()
63
 
64
- # 2. CONLL-U / DATAFRAME / TEXT‐VIZ
65
-
66
  def stanza_doc_to_conllu(doc) -> str:
67
  lines = []
68
  for sid, sent in enumerate(doc.sentences, 1):
@@ -83,7 +79,14 @@ def stanza_doc_to_conllu(doc) -> str:
83
  def conllu_to_dataframe(conllu: str) -> pd.DataFrame:
84
  rows = []
85
  for L in conllu.splitlines():
86
- if not L or L.startswith("#"):
 
 
 
 
 
 
 
87
  continue
88
  parts = L.split("\t")
89
  if len(parts) >= 10:
@@ -97,9 +100,16 @@ def conllu_to_dataframe(conllu: str) -> pd.DataFrame:
97
  def create_dependency_visualization(df: pd.DataFrame) -> str:
98
  if df.empty:
99
  return "No data to visualize"
100
- viz = ["Dependency Parse Visualization:", "-"*40]
101
- for _, r in df.iterrows():
102
- w, p, d, h = r['FORM'], r['UPOS'], r['DEPREL'], r['HEAD']
 
 
 
 
 
 
 
103
  if h != '0':
104
  try:
105
  hw = df.iloc[int(h)-1]['FORM']
@@ -110,159 +120,8 @@ def create_dependency_visualization(df: pd.DataFrame) -> str:
110
  viz.append(f"{w} ({p}) --{d}--> ROOT")
111
  return "\n".join(viz)
112
 
113
- # 3. FULL SVG BUILDER (your original function)
114
-
115
- def create_single_sentence_svg(sentence_data, sentence_num=1, total_sentences=1):
116
- try:
117
- if isinstance(sentence_data, list):
118
- df = pd.DataFrame(sentence_data)
119
- else:
120
- df = sentence_data
121
-
122
- word_count = len(df)
123
- base_word_width = 100
124
- min_spacing = 30
125
- word_spacing = max(
126
- base_word_width,
127
- (word_count * base_word_width + min_spacing * (word_count - 1)) / word_count
128
- )
129
-
130
- width = max(800, word_count * word_spacing + 100)
131
- height = 500
132
-
133
- word_y = height - 120
134
- pos_y = word_y + 20
135
- features_start_y = pos_y + 15
136
-
137
- deprel_colors = {
138
- 'root': '#000000', 'nsubj': '#2980b9', 'obj': '#27ae60', 'det': '#e67e22',
139
- 'amod': '#8e44ad', 'nmod': '#16a085', 'case': '#34495e', 'punct': '#7f8c8d',
140
- 'cc': '#d35400', 'conj': '#2c3e50', 'cop': '#e74c3c', 'mark': '#9b59b6',
141
- 'csubj': '#3498db', 'xcomp': '#1abc9c', 'ccomp': '#f39c12', 'advcl': '#e91e63',
142
- 'advmod': '#9c27b0', 'obl': '#795548', 'iobj': '#607d8b', 'fixed': '#ff5722',
143
- 'aux': '#ff9800', 'acl': '#4caf50', 'appos': '#673ab7', 'compound': '#009688'
144
- }
145
-
146
- svg_parts = [
147
- f'<svg width="{width}" height="{height}" xmlns="http://www.w3.org/2000/svg" '
148
- 'style="background: white; border: 1px solid #eee;">',
149
- '<defs>'
150
- ]
151
-
152
- for deprel, color in deprel_colors.items():
153
- marker_id = f"arrow_{deprel}"
154
- svg_parts.append(
155
- f'<marker id="{marker_id}" markerWidth="4" markerHeight="4" '
156
- 'markerUnits="userSpaceOnUse" orient="auto" refX="3.5" refY="2">'
157
- f'<path d="M0,0 L4,2 L0,4 Z" fill="{color}"/>'
158
- f'</marker>'
159
- )
160
-
161
- svg_parts.append('</defs>')
162
- svg_parts.append('<g>')
163
-
164
- word_positions = {}
165
- for idx, row in df.iterrows():
166
- wid = int(row['ID'])
167
- word_positions[wid] = 50 + (wid - 1) * word_spacing
168
-
169
- used_spans = []
170
-
171
- for idx, row in df.iterrows():
172
- wid = int(row['ID'])
173
- hid = int(row['HEAD']) if row['HEAD'] != '0' else 0
174
- rel = row['DEPREL']
175
-
176
- if hid == 0:
177
- x = word_positions[wid]
178
- col = deprel_colors.get(rel, '#000')
179
- svg_parts.append(
180
- f'<line x1="{x}" y1="{word_y-15}" x2="{x}" y2="50" '
181
- f'stroke="{col}" stroke-width="1.5"/>'
182
- )
183
- mid = (word_y-15 + 50) / 2
184
- svg_parts.append(
185
- f'<rect x="{x-15}" y="{mid-8}" width="30" height="14" '
186
- f'fill="white" stroke="{col}" rx="2"/>'
187
- )
188
- svg_parts.append(
189
- f'<text x="{x}" y="{mid+2}" text-anchor="middle" '
190
- f'fill="{col}" font-family="Arial" font-size="8" font-weight="bold">ROOT</text>'
191
- )
192
- else:
193
- if hid in word_positions:
194
- x1, x2 = word_positions[wid], word_positions[hid]
195
- span = (min(wid, hid), max(wid, hid))
196
- lvl = 0
197
- conflict = True
198
- while conflict:
199
- conflict = False
200
- for es, el in used_spans:
201
- if el == lvl and not (span[1] < es[0] or span[0] > es[1]):
202
- lvl += 1
203
- conflict = True
204
- break
205
- used_spans.append((span, lvl))
206
-
207
- dist = abs(x2 - x1)
208
- base_h = min(40 + dist * 0.15, 100)
209
- arc_h = base_h + lvl * 35
210
- col = deprel_colors.get(rel, '#000')
211
- midx = (x1 + x2) / 2
212
- cty = word_y - arc_h
213
- path = f'M {x1} {word_y-15} Q {midx} {cty} {x2} {word_y-15}'
214
- svg_parts.append(
215
- f'<path d="{path}" stroke="{col}" stroke-width="1.5" '
216
- f'fill="none" marker-end="url(#arrow_{rel})"/>'
217
- )
218
- amx = 0.25*x1 + 0.5*midx + 0.25*x2
219
- amy = 0.25*(word_y-15) + 0.5*cty + 0.25*(word_y-15)
220
- lw = len(rel)*6 + 8
221
- svg_parts.append(
222
- f'<rect x="{amx-lw/2}" y="{amy-8}" width="{lw}" height="14" '
223
- f'fill="white" stroke="{col}" rx="2"/>'
224
- )
225
- svg_parts.append(
226
- f'<text x="{amx}" y="{amy+2}" text-anchor="middle" '
227
- f'fill="{col}" font-family="Arial" font-size="8" font-weight="bold">{rel}</text>'
228
- )
229
-
230
- for idx, row in df.iterrows():
231
- wid = int(row['ID'])
232
- x = word_positions[wid]
233
- word = row['FORM']
234
- pos = row['UPOS']
235
- lemma= row['LEMMA']
236
- feats= row['FEATS']
237
- xpos = row['XPOS']
238
-
239
- svg_parts.append(
240
- f'<text x="{x}" y="{word_y}" text-anchor="middle" '
241
- f'font-family="Arial" font-size="13" font-weight="bold">{word}</text>'
242
- )
243
-
244
- ann = []
245
- if pos and pos!='_': ann.append(f"upos={pos}")
246
- if lemma and lemma not in ('_', word): ann.append(f"lemma={lemma}")
247
- if xpos and xpos!='_':ann.append(f"xpos={xpos}")
248
- if feats and feats not in ('', '_'):
249
- for fpair in feats.split('|'):
250
- if '=' in fpair: ann.append(fpair)
251
-
252
- for i,a in enumerate(ann):
253
- y0 = features_start_y + i*12
254
- svg_parts.append(
255
- f'<text x="{x}" y="{y0}" text-anchor="middle" '
256
- f'font-family="Arial" font-size="7" fill="#666">{a}</text>'
257
- )
258
-
259
- svg_parts.append('</g></svg>')
260
- return "".join(svg_parts)
261
-
262
- except Exception as e:
263
- return f"<p>Error creating SVG: {e}</p>"
264
-
265
- # 4. PROCESS & DROPDOWN-UPDATES
266
 
267
  def process_text(text, variant):
268
  if not text.strip():
@@ -287,12 +146,12 @@ def process_text(text, variant):
287
 
288
  sentences = []
289
  for sent in doc.sentences:
290
- payload = [{
291
  'ID': w.id, 'FORM': w.text, 'LEMMA': w.lemma or "_",
292
  'UPOS': w.upos or "_", 'XPOS': w.xpos or "_",
293
  'FEATS': w.feats or "_", 'HEAD': w.head or 0,
294
  'DEPREL': w.deprel or "_"
295
- } for w in sent.words]
296
  sentences.append(payload)
297
 
298
  sent_ids = [str(i+1) for i in range(len(sentences))]
@@ -315,8 +174,6 @@ def update_svg(selected_id, sentences):
315
  except:
316
  return "<p>Invalid selection</p>"
317
 
318
- # 5. BUILD GRADIO UI
319
-
320
  def create_app():
321
  with gr.Blocks(title="Lesbian Greek Parser") as app:
322
  gr.Markdown("# Lesbian Greek Morphosyntactic Parser")
 
6
  import traceback
7
  from pathlib import Path
8
 
 
 
9
  LESBIAN_MODELS = {}
10
  MODEL_VARIANTS = {
11
  "Lesbian-only": "sbompolas/Lesbian-Greek-Morphosyntactic-Model",
 
59
 
60
  loaded, load_status = initialize_models()
61
 
 
 
62
  def stanza_doc_to_conllu(doc) -> str:
63
  lines = []
64
  for sid, sent in enumerate(doc.sentences, 1):
 
79
  def conllu_to_dataframe(conllu: str) -> pd.DataFrame:
80
  rows = []
81
  for L in conllu.splitlines():
82
+ if not L:
83
+ if rows and rows[-1] != {}:
84
+ rows.append({})
85
+ continue
86
+ if L.startswith("#"):
87
+ if "=" in L:
88
+ key, val = L[2:].split("=", 1)
89
+ rows.append({'ID': f"# {key.strip()} =", 'FORM': val.strip()})
90
  continue
91
  parts = L.split("\t")
92
  if len(parts) >= 10:
 
100
  def create_dependency_visualization(df: pd.DataFrame) -> str:
101
  if df.empty:
102
  return "No data to visualize"
103
+ viz = []
104
+ for i, row in df.iterrows():
105
+ if pd.isna(row["ID"]):
106
+ continue
107
+ if isinstance(row["ID"], str) and row["ID"].startswith("#"):
108
+ if viz:
109
+ viz.append("")
110
+ viz.append(f"{row['ID']} {row['FORM']}")
111
+ continue
112
+ w, p, d, h = row['FORM'], row['UPOS'], row['DEPREL'], row['HEAD']
113
  if h != '0':
114
  try:
115
  hw = df.iloc[int(h)-1]['FORM']
 
120
  viz.append(f"{w} ({p}) --{d}--> ROOT")
121
  return "\n".join(viz)
122
 
123
+ # Keep your create_single_sentence_svg as-is; it already includes annotation rendering
124
+ # Be sure ann = [...] block includes: upos, lemma, and all feats, which it does in your version
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  def process_text(text, variant):
127
  if not text.strip():
 
146
 
147
  sentences = []
148
  for sent in doc.sentences:
149
+ payload = [ {
150
  'ID': w.id, 'FORM': w.text, 'LEMMA': w.lemma or "_",
151
  'UPOS': w.upos or "_", 'XPOS': w.xpos or "_",
152
  'FEATS': w.feats or "_", 'HEAD': w.head or 0,
153
  'DEPREL': w.deprel or "_"
154
+ } for w in sent.words ]
155
  sentences.append(payload)
156
 
157
  sent_ids = [str(i+1) for i in range(len(sentences))]
 
174
  except:
175
  return "<p>Invalid selection</p>"
176
 
 
 
177
  def create_app():
178
  with gr.Blocks(title="Lesbian Greek Parser") as app:
179
  gr.Markdown("# Lesbian Greek Morphosyntactic Parser")