Update app.py
Browse files
app.py
CHANGED
|
@@ -6,8 +6,6 @@ import requests
|
|
| 6 |
import traceback
|
| 7 |
from pathlib import Path
|
| 8 |
|
| 9 |
-
# 1. MODEL VARIANTS & INITIALIZATION
|
| 10 |
-
|
| 11 |
LESBIAN_MODELS = {}
|
| 12 |
MODEL_VARIANTS = {
|
| 13 |
"Lesbian-only": "sbompolas/Lesbian-Greek-Morphosyntactic-Model",
|
|
@@ -61,8 +59,6 @@ def initialize_models():
|
|
| 61 |
|
| 62 |
loaded, load_status = initialize_models()
|
| 63 |
|
| 64 |
-
# 2. CONLL-U / DATAFRAME / TEXT‐VIZ
|
| 65 |
-
|
| 66 |
def stanza_doc_to_conllu(doc) -> str:
|
| 67 |
lines = []
|
| 68 |
for sid, sent in enumerate(doc.sentences, 1):
|
|
@@ -83,7 +79,14 @@ def stanza_doc_to_conllu(doc) -> str:
|
|
| 83 |
def conllu_to_dataframe(conllu: str) -> pd.DataFrame:
|
| 84 |
rows = []
|
| 85 |
for L in conllu.splitlines():
|
| 86 |
-
if not L
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
continue
|
| 88 |
parts = L.split("\t")
|
| 89 |
if len(parts) >= 10:
|
|
@@ -97,9 +100,16 @@ def conllu_to_dataframe(conllu: str) -> pd.DataFrame:
|
|
| 97 |
def create_dependency_visualization(df: pd.DataFrame) -> str:
|
| 98 |
if df.empty:
|
| 99 |
return "No data to visualize"
|
| 100 |
-
viz = [
|
| 101 |
-
for
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
if h != '0':
|
| 104 |
try:
|
| 105 |
hw = df.iloc[int(h)-1]['FORM']
|
|
@@ -110,159 +120,8 @@ def create_dependency_visualization(df: pd.DataFrame) -> str:
|
|
| 110 |
viz.append(f"{w} ({p}) --{d}--> ROOT")
|
| 111 |
return "\n".join(viz)
|
| 112 |
|
| 113 |
-
#
|
| 114 |
-
|
| 115 |
-
def create_single_sentence_svg(sentence_data, sentence_num=1, total_sentences=1):
|
| 116 |
-
try:
|
| 117 |
-
if isinstance(sentence_data, list):
|
| 118 |
-
df = pd.DataFrame(sentence_data)
|
| 119 |
-
else:
|
| 120 |
-
df = sentence_data
|
| 121 |
-
|
| 122 |
-
word_count = len(df)
|
| 123 |
-
base_word_width = 100
|
| 124 |
-
min_spacing = 30
|
| 125 |
-
word_spacing = max(
|
| 126 |
-
base_word_width,
|
| 127 |
-
(word_count * base_word_width + min_spacing * (word_count - 1)) / word_count
|
| 128 |
-
)
|
| 129 |
-
|
| 130 |
-
width = max(800, word_count * word_spacing + 100)
|
| 131 |
-
height = 500
|
| 132 |
-
|
| 133 |
-
word_y = height - 120
|
| 134 |
-
pos_y = word_y + 20
|
| 135 |
-
features_start_y = pos_y + 15
|
| 136 |
-
|
| 137 |
-
deprel_colors = {
|
| 138 |
-
'root': '#000000', 'nsubj': '#2980b9', 'obj': '#27ae60', 'det': '#e67e22',
|
| 139 |
-
'amod': '#8e44ad', 'nmod': '#16a085', 'case': '#34495e', 'punct': '#7f8c8d',
|
| 140 |
-
'cc': '#d35400', 'conj': '#2c3e50', 'cop': '#e74c3c', 'mark': '#9b59b6',
|
| 141 |
-
'csubj': '#3498db', 'xcomp': '#1abc9c', 'ccomp': '#f39c12', 'advcl': '#e91e63',
|
| 142 |
-
'advmod': '#9c27b0', 'obl': '#795548', 'iobj': '#607d8b', 'fixed': '#ff5722',
|
| 143 |
-
'aux': '#ff9800', 'acl': '#4caf50', 'appos': '#673ab7', 'compound': '#009688'
|
| 144 |
-
}
|
| 145 |
-
|
| 146 |
-
svg_parts = [
|
| 147 |
-
f'<svg width="{width}" height="{height}" xmlns="http://www.w3.org/2000/svg" '
|
| 148 |
-
'style="background: white; border: 1px solid #eee;">',
|
| 149 |
-
'<defs>'
|
| 150 |
-
]
|
| 151 |
-
|
| 152 |
-
for deprel, color in deprel_colors.items():
|
| 153 |
-
marker_id = f"arrow_{deprel}"
|
| 154 |
-
svg_parts.append(
|
| 155 |
-
f'<marker id="{marker_id}" markerWidth="4" markerHeight="4" '
|
| 156 |
-
'markerUnits="userSpaceOnUse" orient="auto" refX="3.5" refY="2">'
|
| 157 |
-
f'<path d="M0,0 L4,2 L0,4 Z" fill="{color}"/>'
|
| 158 |
-
f'</marker>'
|
| 159 |
-
)
|
| 160 |
-
|
| 161 |
-
svg_parts.append('</defs>')
|
| 162 |
-
svg_parts.append('<g>')
|
| 163 |
-
|
| 164 |
-
word_positions = {}
|
| 165 |
-
for idx, row in df.iterrows():
|
| 166 |
-
wid = int(row['ID'])
|
| 167 |
-
word_positions[wid] = 50 + (wid - 1) * word_spacing
|
| 168 |
-
|
| 169 |
-
used_spans = []
|
| 170 |
-
|
| 171 |
-
for idx, row in df.iterrows():
|
| 172 |
-
wid = int(row['ID'])
|
| 173 |
-
hid = int(row['HEAD']) if row['HEAD'] != '0' else 0
|
| 174 |
-
rel = row['DEPREL']
|
| 175 |
-
|
| 176 |
-
if hid == 0:
|
| 177 |
-
x = word_positions[wid]
|
| 178 |
-
col = deprel_colors.get(rel, '#000')
|
| 179 |
-
svg_parts.append(
|
| 180 |
-
f'<line x1="{x}" y1="{word_y-15}" x2="{x}" y2="50" '
|
| 181 |
-
f'stroke="{col}" stroke-width="1.5"/>'
|
| 182 |
-
)
|
| 183 |
-
mid = (word_y-15 + 50) / 2
|
| 184 |
-
svg_parts.append(
|
| 185 |
-
f'<rect x="{x-15}" y="{mid-8}" width="30" height="14" '
|
| 186 |
-
f'fill="white" stroke="{col}" rx="2"/>'
|
| 187 |
-
)
|
| 188 |
-
svg_parts.append(
|
| 189 |
-
f'<text x="{x}" y="{mid+2}" text-anchor="middle" '
|
| 190 |
-
f'fill="{col}" font-family="Arial" font-size="8" font-weight="bold">ROOT</text>'
|
| 191 |
-
)
|
| 192 |
-
else:
|
| 193 |
-
if hid in word_positions:
|
| 194 |
-
x1, x2 = word_positions[wid], word_positions[hid]
|
| 195 |
-
span = (min(wid, hid), max(wid, hid))
|
| 196 |
-
lvl = 0
|
| 197 |
-
conflict = True
|
| 198 |
-
while conflict:
|
| 199 |
-
conflict = False
|
| 200 |
-
for es, el in used_spans:
|
| 201 |
-
if el == lvl and not (span[1] < es[0] or span[0] > es[1]):
|
| 202 |
-
lvl += 1
|
| 203 |
-
conflict = True
|
| 204 |
-
break
|
| 205 |
-
used_spans.append((span, lvl))
|
| 206 |
-
|
| 207 |
-
dist = abs(x2 - x1)
|
| 208 |
-
base_h = min(40 + dist * 0.15, 100)
|
| 209 |
-
arc_h = base_h + lvl * 35
|
| 210 |
-
col = deprel_colors.get(rel, '#000')
|
| 211 |
-
midx = (x1 + x2) / 2
|
| 212 |
-
cty = word_y - arc_h
|
| 213 |
-
path = f'M {x1} {word_y-15} Q {midx} {cty} {x2} {word_y-15}'
|
| 214 |
-
svg_parts.append(
|
| 215 |
-
f'<path d="{path}" stroke="{col}" stroke-width="1.5" '
|
| 216 |
-
f'fill="none" marker-end="url(#arrow_{rel})"/>'
|
| 217 |
-
)
|
| 218 |
-
amx = 0.25*x1 + 0.5*midx + 0.25*x2
|
| 219 |
-
amy = 0.25*(word_y-15) + 0.5*cty + 0.25*(word_y-15)
|
| 220 |
-
lw = len(rel)*6 + 8
|
| 221 |
-
svg_parts.append(
|
| 222 |
-
f'<rect x="{amx-lw/2}" y="{amy-8}" width="{lw}" height="14" '
|
| 223 |
-
f'fill="white" stroke="{col}" rx="2"/>'
|
| 224 |
-
)
|
| 225 |
-
svg_parts.append(
|
| 226 |
-
f'<text x="{amx}" y="{amy+2}" text-anchor="middle" '
|
| 227 |
-
f'fill="{col}" font-family="Arial" font-size="8" font-weight="bold">{rel}</text>'
|
| 228 |
-
)
|
| 229 |
-
|
| 230 |
-
for idx, row in df.iterrows():
|
| 231 |
-
wid = int(row['ID'])
|
| 232 |
-
x = word_positions[wid]
|
| 233 |
-
word = row['FORM']
|
| 234 |
-
pos = row['UPOS']
|
| 235 |
-
lemma= row['LEMMA']
|
| 236 |
-
feats= row['FEATS']
|
| 237 |
-
xpos = row['XPOS']
|
| 238 |
-
|
| 239 |
-
svg_parts.append(
|
| 240 |
-
f'<text x="{x}" y="{word_y}" text-anchor="middle" '
|
| 241 |
-
f'font-family="Arial" font-size="13" font-weight="bold">{word}</text>'
|
| 242 |
-
)
|
| 243 |
-
|
| 244 |
-
ann = []
|
| 245 |
-
if pos and pos!='_': ann.append(f"upos={pos}")
|
| 246 |
-
if lemma and lemma not in ('_', word): ann.append(f"lemma={lemma}")
|
| 247 |
-
if xpos and xpos!='_':ann.append(f"xpos={xpos}")
|
| 248 |
-
if feats and feats not in ('', '_'):
|
| 249 |
-
for fpair in feats.split('|'):
|
| 250 |
-
if '=' in fpair: ann.append(fpair)
|
| 251 |
-
|
| 252 |
-
for i,a in enumerate(ann):
|
| 253 |
-
y0 = features_start_y + i*12
|
| 254 |
-
svg_parts.append(
|
| 255 |
-
f'<text x="{x}" y="{y0}" text-anchor="middle" '
|
| 256 |
-
f'font-family="Arial" font-size="7" fill="#666">{a}</text>'
|
| 257 |
-
)
|
| 258 |
-
|
| 259 |
-
svg_parts.append('</g></svg>')
|
| 260 |
-
return "".join(svg_parts)
|
| 261 |
-
|
| 262 |
-
except Exception as e:
|
| 263 |
-
return f"<p>Error creating SVG: {e}</p>"
|
| 264 |
-
|
| 265 |
-
# 4. PROCESS & DROPDOWN-UPDATES
|
| 266 |
|
| 267 |
def process_text(text, variant):
|
| 268 |
if not text.strip():
|
|
@@ -287,12 +146,12 @@ def process_text(text, variant):
|
|
| 287 |
|
| 288 |
sentences = []
|
| 289 |
for sent in doc.sentences:
|
| 290 |
-
payload = [{
|
| 291 |
'ID': w.id, 'FORM': w.text, 'LEMMA': w.lemma or "_",
|
| 292 |
'UPOS': w.upos or "_", 'XPOS': w.xpos or "_",
|
| 293 |
'FEATS': w.feats or "_", 'HEAD': w.head or 0,
|
| 294 |
'DEPREL': w.deprel or "_"
|
| 295 |
-
} for w in sent.words]
|
| 296 |
sentences.append(payload)
|
| 297 |
|
| 298 |
sent_ids = [str(i+1) for i in range(len(sentences))]
|
|
@@ -315,8 +174,6 @@ def update_svg(selected_id, sentences):
|
|
| 315 |
except:
|
| 316 |
return "<p>Invalid selection</p>"
|
| 317 |
|
| 318 |
-
# 5. BUILD GRADIO UI
|
| 319 |
-
|
| 320 |
def create_app():
|
| 321 |
with gr.Blocks(title="Lesbian Greek Parser") as app:
|
| 322 |
gr.Markdown("# Lesbian Greek Morphosyntactic Parser")
|
|
|
|
| 6 |
import traceback
|
| 7 |
from pathlib import Path
|
| 8 |
|
|
|
|
|
|
|
| 9 |
LESBIAN_MODELS = {}
|
| 10 |
MODEL_VARIANTS = {
|
| 11 |
"Lesbian-only": "sbompolas/Lesbian-Greek-Morphosyntactic-Model",
|
|
|
|
| 59 |
|
| 60 |
loaded, load_status = initialize_models()
|
| 61 |
|
|
|
|
|
|
|
| 62 |
def stanza_doc_to_conllu(doc) -> str:
|
| 63 |
lines = []
|
| 64 |
for sid, sent in enumerate(doc.sentences, 1):
|
|
|
|
| 79 |
def conllu_to_dataframe(conllu: str) -> pd.DataFrame:
|
| 80 |
rows = []
|
| 81 |
for L in conllu.splitlines():
|
| 82 |
+
if not L:
|
| 83 |
+
if rows and rows[-1] != {}:
|
| 84 |
+
rows.append({})
|
| 85 |
+
continue
|
| 86 |
+
if L.startswith("#"):
|
| 87 |
+
if "=" in L:
|
| 88 |
+
key, val = L[2:].split("=", 1)
|
| 89 |
+
rows.append({'ID': f"# {key.strip()} =", 'FORM': val.strip()})
|
| 90 |
continue
|
| 91 |
parts = L.split("\t")
|
| 92 |
if len(parts) >= 10:
|
|
|
|
| 100 |
def create_dependency_visualization(df: pd.DataFrame) -> str:
|
| 101 |
if df.empty:
|
| 102 |
return "No data to visualize"
|
| 103 |
+
viz = []
|
| 104 |
+
for i, row in df.iterrows():
|
| 105 |
+
if pd.isna(row["ID"]):
|
| 106 |
+
continue
|
| 107 |
+
if isinstance(row["ID"], str) and row["ID"].startswith("#"):
|
| 108 |
+
if viz:
|
| 109 |
+
viz.append("")
|
| 110 |
+
viz.append(f"{row['ID']} {row['FORM']}")
|
| 111 |
+
continue
|
| 112 |
+
w, p, d, h = row['FORM'], row['UPOS'], row['DEPREL'], row['HEAD']
|
| 113 |
if h != '0':
|
| 114 |
try:
|
| 115 |
hw = df.iloc[int(h)-1]['FORM']
|
|
|
|
| 120 |
viz.append(f"{w} ({p}) --{d}--> ROOT")
|
| 121 |
return "\n".join(viz)
|
| 122 |
|
| 123 |
+
# Keep your create_single_sentence_svg as-is; it already includes annotation rendering
|
| 124 |
+
# Be sure ann = [...] block includes: upos, lemma, and all feats, which it does in your version
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
def process_text(text, variant):
|
| 127 |
if not text.strip():
|
|
|
|
| 146 |
|
| 147 |
sentences = []
|
| 148 |
for sent in doc.sentences:
|
| 149 |
+
payload = [ {
|
| 150 |
'ID': w.id, 'FORM': w.text, 'LEMMA': w.lemma or "_",
|
| 151 |
'UPOS': w.upos or "_", 'XPOS': w.xpos or "_",
|
| 152 |
'FEATS': w.feats or "_", 'HEAD': w.head or 0,
|
| 153 |
'DEPREL': w.deprel or "_"
|
| 154 |
+
} for w in sent.words ]
|
| 155 |
sentences.append(payload)
|
| 156 |
|
| 157 |
sent_ids = [str(i+1) for i in range(len(sentences))]
|
|
|
|
| 174 |
except:
|
| 175 |
return "<p>Invalid selection</p>"
|
| 176 |
|
|
|
|
|
|
|
| 177 |
def create_app():
|
| 178 |
with gr.Blocks(title="Lesbian Greek Parser") as app:
|
| 179 |
gr.Markdown("# Lesbian Greek Morphosyntactic Parser")
|