Update app.py
Browse files
app.py
CHANGED
|
@@ -6,15 +6,19 @@ import requests
|
|
| 6 |
import traceback
|
| 7 |
from pathlib import Path
|
| 8 |
|
|
|
|
| 9 |
# 1. MODEL VARIANTS & INITIALIZATION
|
|
|
|
| 10 |
|
| 11 |
MODEL_VARIANTS = {
|
| 12 |
-
"Lesbian-only (UD_Greek-Lesbian)":
|
| 13 |
-
"Lesbian-augmented (UD_Greek-NGUD+Lesbian)":
|
| 14 |
-
"Standard Modern Greek (UD_Greek-GUD)":
|
| 15 |
}
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
try:
|
| 19 |
resp = requests.get(url, stream=True)
|
| 20 |
resp.raise_for_status()
|
|
@@ -23,26 +27,33 @@ def download_model_file(url, filename):
|
|
| 23 |
f.write(chunk)
|
| 24 |
return True
|
| 25 |
except Exception as e:
|
| 26 |
-
print(f"Download failed {filename}: {e}")
|
| 27 |
return False
|
| 28 |
|
| 29 |
def initialize_models():
|
| 30 |
try:
|
| 31 |
base = Path("./models")
|
| 32 |
base.mkdir(exist_ok=True)
|
| 33 |
-
for
|
| 34 |
-
out = base /
|
| 35 |
out.mkdir(exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
files = {
|
| 37 |
-
"tokenizer.pt": f"https://huggingface.co/{repo}/resolve/main/tokenizer.pt",
|
| 38 |
-
"lemmatizer.pt": f"https://huggingface.co/{repo}/resolve/main/lemmatizer.pt",
|
| 39 |
-
"pos.pt": f"https://huggingface.co/{repo}/resolve/main/pos.pt",
|
| 40 |
-
"depparse.pt": f"https://huggingface.co/{repo}/resolve/main/depparse.pt",
|
| 41 |
}
|
|
|
|
| 42 |
for fn, url in files.items():
|
| 43 |
tgt = out / fn
|
| 44 |
if not tgt.exists() and not download_model_file(url, str(tgt)):
|
| 45 |
-
return False, f"Failed to download {fn} for {
|
|
|
|
| 46 |
cfg = {
|
| 47 |
'processors': 'tokenize,pos,lemma,depparse',
|
| 48 |
'lang': 'el',
|
|
@@ -51,9 +62,10 @@ def initialize_models():
|
|
| 51 |
'tokenize_model_path': str(out/"tokenizer.pt"),
|
| 52 |
'pos_model_path': str(out/"pos.pt"),
|
| 53 |
'lemma_model_path': str(out/"lemmatizer.pt"),
|
| 54 |
-
'depparse_model_path': str(out/"depparse.pt")
|
| 55 |
}
|
| 56 |
-
|
|
|
|
| 57 |
return True, "Models loaded"
|
| 58 |
except Exception as e:
|
| 59 |
traceback.print_exc()
|
|
@@ -62,7 +74,9 @@ def initialize_models():
|
|
| 62 |
loaded, load_status = initialize_models()
|
| 63 |
|
| 64 |
|
| 65 |
-
#
|
|
|
|
|
|
|
| 66 |
|
| 67 |
def stanza_doc_to_conllu(doc) -> str:
|
| 68 |
lines = []
|
|
@@ -71,22 +85,20 @@ def stanza_doc_to_conllu(doc) -> str:
|
|
| 71 |
lines.append(f"# text = {sent.text}")
|
| 72 |
for w in sent.words:
|
| 73 |
fields = [
|
| 74 |
-
str(w.id), w.text,
|
| 75 |
-
w.lemma or "_", w.upos or "_",
|
| 76 |
w.xpos or "_", w.feats or "_",
|
| 77 |
-
str(w.head
|
| 78 |
w.deprel or "_", "_", "_"
|
| 79 |
]
|
| 80 |
lines.append("\t".join(fields))
|
| 81 |
-
lines.append("") #
|
| 82 |
return "\n".join(lines)
|
| 83 |
|
| 84 |
def conllu_to_dataframe(conllu: str) -> pd.DataFrame:
|
| 85 |
cols = ['ID','FORM','LEMMA','UPOS','XPOS','FEATS','HEAD','DEPREL','DEPS','MISC']
|
| 86 |
rows = []
|
| 87 |
for line in conllu.splitlines():
|
| 88 |
-
if not line:
|
| 89 |
-
# empty separator row
|
| 90 |
rows.append({c: "" for c in cols})
|
| 91 |
continue
|
| 92 |
if line.startswith("#"):
|
|
@@ -103,7 +115,9 @@ def conllu_to_dataframe(conllu: str) -> pd.DataFrame:
|
|
| 103 |
return pd.DataFrame(rows, columns=cols).fillna("")
|
| 104 |
|
| 105 |
|
| 106 |
-
#
|
|
|
|
|
|
|
| 107 |
|
| 108 |
def create_single_sentence_svg(sentence_data, sentence_num=1, total_sentences=1):
|
| 109 |
try:
|
|
@@ -112,16 +126,17 @@ def create_single_sentence_svg(sentence_data, sentence_num=1, total_sentences=1)
|
|
| 112 |
base_w, min_sp = 100, 30
|
| 113 |
spacing = max(base_w, (n*base_w + (n-1)*min_sp)/n)
|
| 114 |
width = max(800, n*spacing + 100)
|
| 115 |
-
orig_height = 500
|
| 116 |
-
crop_top = 30 # px to remove from top
|
| 117 |
-
bottom_pad = 30 # px to add at bottom
|
| 118 |
-
height = orig_height - crop_top + bottom_pad
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
colors = {
|
| 124 |
-
'root':'#
|
| 125 |
'amod':'#8e44ad','nmod':'#16a085','case':'#34495e','punct':'#7f8c8d',
|
| 126 |
'cc':'#d35400','conj':'#2c3e50','cop':'#e74c3c','mark':'#9b59b6',
|
| 127 |
'csubj':'#3498db','xcomp':'#1abc9c','ccomp':'#f39c12','advcl':'#e91e63',
|
|
@@ -130,8 +145,10 @@ def create_single_sentence_svg(sentence_data, sentence_num=1, total_sentences=1)
|
|
| 130 |
}
|
| 131 |
|
| 132 |
svg = [
|
| 133 |
-
f'<svg width="{width}" height="{height}"
|
| 134 |
-
'
|
|
|
|
|
|
|
| 135 |
]
|
| 136 |
for rel, c in colors.items():
|
| 137 |
svg.append(
|
|
@@ -139,98 +156,81 @@ def create_single_sentence_svg(sentence_data, sentence_num=1, total_sentences=1)
|
|
| 139 |
'markerUnits="userSpaceOnUse" orient="auto-start-reverse" refX="3.5" refY="2">'
|
| 140 |
f'<path d="M0,0 L4,2 L0,4Z" fill="{c}"/></marker>'
|
| 141 |
)
|
| 142 |
-
svg.append(
|
| 143 |
|
| 144 |
-
# compute x positions
|
| 145 |
xpos = {
|
| 146 |
-
int(r['ID']): 50 + (int(r['ID'])
|
| 147 |
-
for _,
|
| 148 |
}
|
| 149 |
|
| 150 |
used_spans = []
|
| 151 |
for _, r in df.iterrows():
|
| 152 |
-
if not str(r['ID']).isdigit():
|
| 153 |
-
continue
|
| 154 |
i, h = int(r['ID']), int(r['HEAD'])
|
| 155 |
rel, c = r['DEPREL'], colors.get(r['DEPREL'], '#000')
|
| 156 |
x1 = xpos[i]
|
| 157 |
if h == 0:
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
f'<rect x="{x1-15}" y="{mid-8}" width="30" height="14" '
|
| 166 |
-
f'fill="white" stroke="{c}" rx="2"/>'
|
| 167 |
-
)
|
| 168 |
-
svg.append(
|
| 169 |
-
f'<text x="{x1}" y="{mid+2}" text-anchor="middle" '
|
| 170 |
-
f'fill="{c}" font-family="Arial" font-size="8" font-weight="bold">ROOT</text>'
|
| 171 |
-
)
|
| 172 |
else:
|
| 173 |
x2 = xpos.get(h, x1)
|
| 174 |
-
span = (min(i,
|
| 175 |
-
lvl
|
| 176 |
conflict = True
|
| 177 |
while conflict:
|
| 178 |
conflict = False
|
| 179 |
-
for (es,
|
| 180 |
-
if
|
| 181 |
lvl += 1
|
| 182 |
conflict = True
|
| 183 |
break
|
| 184 |
-
used_spans.append((span,
|
| 185 |
-
dist
|
| 186 |
-
arc_h = min(40
|
| 187 |
-
midx, cty = (x1
|
| 188 |
-
|
| 189 |
svg.append(
|
| 190 |
-
f'<path d="{
|
| 191 |
f'marker-start="url(#m_{rel})"/>'
|
| 192 |
)
|
| 193 |
amx = 0.25*x1 + 0.5*midx + 0.25*x2
|
| 194 |
-
amy = 0.25*(word_y-15)
|
| 195 |
-
lw
|
| 196 |
-
svg.append(
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
svg.append(
|
| 201 |
-
f'<text x="{amx}" y="{amy+2}" text-anchor="middle" '
|
| 202 |
-
f'fill="{c}" font-family="Arial" font-size="8" font-weight="bold">{rel}</text>'
|
| 203 |
-
)
|
| 204 |
|
| 205 |
-
# draw tokens + annotations
|
| 206 |
for _, r in df.iterrows():
|
| 207 |
-
if not str(r['ID']).isdigit():
|
| 208 |
-
continue
|
| 209 |
x = xpos[int(r['ID'])]
|
| 210 |
-
svg.append(
|
| 211 |
-
|
| 212 |
-
f'font-family="Arial" font-size="13" font-weight="bold">{r["FORM"]}</text>'
|
| 213 |
-
)
|
| 214 |
ann = []
|
| 215 |
-
if r['UPOS'] and r['UPOS']
|
| 216 |
-
if r['LEMMA'] not in (
|
| 217 |
-
if r['FEATS'] and r['FEATS']
|
| 218 |
-
for f in r['FEATS'].split(
|
| 219 |
-
if
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
)
|
| 226 |
-
|
| 227 |
-
svg.append('</g></svg>')
|
| 228 |
return "".join(svg)
|
|
|
|
| 229 |
except Exception as e:
|
| 230 |
return f"<p>Error creating SVG: {e}</p>"
|
| 231 |
|
| 232 |
|
| 233 |
-
#
|
|
|
|
|
|
|
| 234 |
|
| 235 |
def process_text(text, variant):
|
| 236 |
if not text.strip():
|
|
@@ -239,33 +239,34 @@ def process_text(text, variant):
|
|
| 239 |
gr.Dropdown.update(choices=[], value=None),
|
| 240 |
[], "", pd.DataFrame()
|
| 241 |
)
|
| 242 |
-
pipe =
|
| 243 |
if pipe is None:
|
| 244 |
return (
|
| 245 |
gr.HTML.update(value="<p>Error: model not loaded</p>"),
|
| 246 |
gr.Dropdown.update(choices=[], value=None),
|
| 247 |
[], "", pd.DataFrame()
|
| 248 |
)
|
| 249 |
-
|
|
|
|
| 250 |
conllu = stanza_doc_to_conllu(doc)
|
| 251 |
-
df
|
| 252 |
|
| 253 |
sentences = []
|
| 254 |
for sent in doc.sentences:
|
| 255 |
payload = [{
|
| 256 |
-
'ID':
|
| 257 |
-
'FORM':
|
| 258 |
-
'LEMMA':
|
| 259 |
-
'UPOS':
|
| 260 |
-
'XPOS':
|
| 261 |
-
'FEATS':
|
| 262 |
-
'HEAD':
|
| 263 |
-
'DEPREL':
|
| 264 |
} for w in sent.words]
|
| 265 |
sentences.append(payload)
|
| 266 |
|
| 267 |
sent_ids = [str(i+1) for i in range(len(sentences))]
|
| 268 |
-
dd_upd
|
| 269 |
init_svg = create_single_sentence_svg(sentences[0]) if sentences else "<p>No data</p>"
|
| 270 |
|
| 271 |
return init_svg, dd_upd, sentences, conllu, df
|
|
@@ -278,38 +279,41 @@ def update_svg(selected_id, sentences):
|
|
| 278 |
return "<p>Invalid selection</p>"
|
| 279 |
|
| 280 |
|
| 281 |
-
#
|
|
|
|
|
|
|
| 282 |
|
| 283 |
def create_app():
|
| 284 |
with gr.Blocks(title="Lesbian Greek Parser") as app:
|
| 285 |
gr.Markdown("# Lesbian Greek Morphosyntactic Parser")
|
| 286 |
-
|
| 287 |
if not loaded:
|
| 288 |
gr.Markdown(f"❌ Load error: {load_status}")
|
| 289 |
|
| 290 |
with gr.Row():
|
| 291 |
with gr.Column():
|
| 292 |
-
txt
|
| 293 |
-
mdl
|
| 294 |
-
|
|
|
|
|
|
|
| 295 |
|
| 296 |
with gr.Row():
|
| 297 |
with gr.Column():
|
| 298 |
-
svg_out
|
| 299 |
-
sentence_dd
|
| 300 |
-
|
| 301 |
|
| 302 |
with gr.Row():
|
| 303 |
with gr.Column():
|
| 304 |
conllu_out = gr.Textbox(label="CoNLL-U", lines=10, show_copy_button=True)
|
| 305 |
table_out = gr.Dataframe(label="Token Table")
|
| 306 |
|
| 307 |
-
btn.click(
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
|
| 314 |
return app
|
| 315 |
|
|
|
|
| 6 |
import traceback
|
| 7 |
from pathlib import Path
|
| 8 |
|
| 9 |
+
# -----------------------------------------------------------------------------
|
| 10 |
# 1. MODEL VARIANTS & INITIALIZATION
|
| 11 |
+
# -----------------------------------------------------------------------------
|
| 12 |
|
| 13 |
MODEL_VARIANTS = {
|
| 14 |
+
"Lesbian-only (UD_Greek-Lesbian)": "sbompolas/Lesbian-Greek-Morphosyntactic-Model",
|
| 15 |
+
"Lesbian-augmented (UD_Greek-NGUD+Lesbian)": "sbompolas/NGUD-Lesbian-Morphosyntactic-Model",
|
| 16 |
+
"Standard Modern Greek (UD_Greek-GUD)": "viv/UD_Greek-GUD"
|
| 17 |
}
|
| 18 |
|
| 19 |
+
MODELS = {}
|
| 20 |
+
|
| 21 |
+
def download_model_file(url: str, filename: str) -> bool:
|
| 22 |
try:
|
| 23 |
resp = requests.get(url, stream=True)
|
| 24 |
resp.raise_for_status()
|
|
|
|
| 27 |
f.write(chunk)
|
| 28 |
return True
|
| 29 |
except Exception as e:
|
| 30 |
+
print(f"⛔ Download failed {filename}: {e}")
|
| 31 |
return False
|
| 32 |
|
| 33 |
def initialize_models():
|
| 34 |
try:
|
| 35 |
base = Path("./models")
|
| 36 |
base.mkdir(exist_ok=True)
|
| 37 |
+
for label, repo in MODEL_VARIANTS.items():
|
| 38 |
+
out = base / label
|
| 39 |
out.mkdir(exist_ok=True)
|
| 40 |
+
|
| 41 |
+
# GUD stores its .pt files under "models/" subfolder
|
| 42 |
+
is_gud = repo.endswith("UD_Greek-GUD")
|
| 43 |
+
subdir = "models" if is_gud else ""
|
| 44 |
+
|
| 45 |
files = {
|
| 46 |
+
"tokenizer.pt": f"https://huggingface.co/{repo}/resolve/main/{subdir}/tokenizer.pt",
|
| 47 |
+
"lemmatizer.pt": f"https://huggingface.co/{repo}/resolve/main/{subdir}/lemmatizer.pt",
|
| 48 |
+
"pos.pt": f"https://huggingface.co/{repo}/resolve/main/{subdir}/pos.pt",
|
| 49 |
+
"depparse.pt": f"https://huggingface.co/{repo}/resolve/main/{subdir}/depparse.pt",
|
| 50 |
}
|
| 51 |
+
|
| 52 |
for fn, url in files.items():
|
| 53 |
tgt = out / fn
|
| 54 |
if not tgt.exists() and not download_model_file(url, str(tgt)):
|
| 55 |
+
return False, f"Failed to download {fn} for {label}"
|
| 56 |
+
|
| 57 |
cfg = {
|
| 58 |
'processors': 'tokenize,pos,lemma,depparse',
|
| 59 |
'lang': 'el',
|
|
|
|
| 62 |
'tokenize_model_path': str(out/"tokenizer.pt"),
|
| 63 |
'pos_model_path': str(out/"pos.pt"),
|
| 64 |
'lemma_model_path': str(out/"lemmatizer.pt"),
|
| 65 |
+
'depparse_model_path': str(out/"depparse.pt"),
|
| 66 |
}
|
| 67 |
+
MODELS[label] = stanza.Pipeline(**cfg)
|
| 68 |
+
|
| 69 |
return True, "Models loaded"
|
| 70 |
except Exception as e:
|
| 71 |
traceback.print_exc()
|
|
|
|
| 74 |
loaded, load_status = initialize_models()
|
| 75 |
|
| 76 |
|
| 77 |
+
# -----------------------------------------------------------------------------
|
| 78 |
+
# 2. CONLL-U OUTPUT & TOKEN TABLE
|
| 79 |
+
# -----------------------------------------------------------------------------
|
| 80 |
|
| 81 |
def stanza_doc_to_conllu(doc) -> str:
|
| 82 |
lines = []
|
|
|
|
| 85 |
lines.append(f"# text = {sent.text}")
|
| 86 |
for w in sent.words:
|
| 87 |
fields = [
|
| 88 |
+
str(w.id), w.text, w.lemma or "_", w.upos or "_",
|
|
|
|
| 89 |
w.xpos or "_", w.feats or "_",
|
| 90 |
+
str(w.head if w.head is not None else 0),
|
| 91 |
w.deprel or "_", "_", "_"
|
| 92 |
]
|
| 93 |
lines.append("\t".join(fields))
|
| 94 |
+
lines.append("") # separator
|
| 95 |
return "\n".join(lines)
|
| 96 |
|
| 97 |
def conllu_to_dataframe(conllu: str) -> pd.DataFrame:
|
| 98 |
cols = ['ID','FORM','LEMMA','UPOS','XPOS','FEATS','HEAD','DEPREL','DEPS','MISC']
|
| 99 |
rows = []
|
| 100 |
for line in conllu.splitlines():
|
| 101 |
+
if not line.strip():
|
|
|
|
| 102 |
rows.append({c: "" for c in cols})
|
| 103 |
continue
|
| 104 |
if line.startswith("#"):
|
|
|
|
| 115 |
return pd.DataFrame(rows, columns=cols).fillna("")
|
| 116 |
|
| 117 |
|
| 118 |
+
# -----------------------------------------------------------------------------
|
| 119 |
+
# 3. SVG BUILDER (arrows at start, crop top + pad bottom)
|
| 120 |
+
# -----------------------------------------------------------------------------
|
| 121 |
|
| 122 |
def create_single_sentence_svg(sentence_data, sentence_num=1, total_sentences=1):
|
| 123 |
try:
|
|
|
|
| 126 |
base_w, min_sp = 100, 30
|
| 127 |
spacing = max(base_w, (n*base_w + (n-1)*min_sp)/n)
|
| 128 |
width = max(800, n*spacing + 100)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
+
orig_h = 500
|
| 131 |
+
crop_top = 30
|
| 132 |
+
pad_bot = 30
|
| 133 |
+
height = orig_h - crop_top + pad_bot
|
| 134 |
+
|
| 135 |
+
word_y = height - 120
|
| 136 |
+
feats_y = word_y + 35
|
| 137 |
|
| 138 |
colors = {
|
| 139 |
+
'root':'#000', 'nsubj':'#2980b9', 'obj':'#27ae60', 'det':'#e67e22',
|
| 140 |
'amod':'#8e44ad','nmod':'#16a085','case':'#34495e','punct':'#7f8c8d',
|
| 141 |
'cc':'#d35400','conj':'#2c3e50','cop':'#e74c3c','mark':'#9b59b6',
|
| 142 |
'csubj':'#3498db','xcomp':'#1abc9c','ccomp':'#f39c12','advcl':'#e91e63',
|
|
|
|
| 145 |
}
|
| 146 |
|
| 147 |
svg = [
|
| 148 |
+
f'<svg width="{width}" height="{height}" '
|
| 149 |
+
f'viewBox="0 {crop_top} {width} {orig_h}" '
|
| 150 |
+
'xmlns="http://www.w3.org/2000/svg" '
|
| 151 |
+
'style="background:white;border:1px solid #eee;"><defs>'
|
| 152 |
]
|
| 153 |
for rel, c in colors.items():
|
| 154 |
svg.append(
|
|
|
|
| 156 |
'markerUnits="userSpaceOnUse" orient="auto-start-reverse" refX="3.5" refY="2">'
|
| 157 |
f'<path d="M0,0 L4,2 L0,4Z" fill="{c}"/></marker>'
|
| 158 |
)
|
| 159 |
+
svg.append("</defs><g>")
|
| 160 |
|
|
|
|
| 161 |
xpos = {
|
| 162 |
+
int(r['ID']): 50 + (int(r['ID'])-1)*spacing
|
| 163 |
+
for _,r in df.iterrows() if str(r['ID']).isdigit()
|
| 164 |
}
|
| 165 |
|
| 166 |
used_spans = []
|
| 167 |
for _, r in df.iterrows():
|
| 168 |
+
if not str(r['ID']).isdigit(): continue
|
|
|
|
| 169 |
i, h = int(r['ID']), int(r['HEAD'])
|
| 170 |
rel, c = r['DEPREL'], colors.get(r['DEPREL'], '#000')
|
| 171 |
x1 = xpos[i]
|
| 172 |
if h == 0:
|
| 173 |
+
svg.append(f'<line x1="{x1}" y1="{word_y-15}" x2="{x1}" y2="50" '
|
| 174 |
+
f'stroke="{c}" stroke-width="1.5"/>')
|
| 175 |
+
mid = (word_y-15 + 50)/2
|
| 176 |
+
svg.append(f'<rect x="{x1-15}" y="{mid-8}" width="30" height="14" '
|
| 177 |
+
f'fill="white" stroke="{c}" rx="2"/>')
|
| 178 |
+
svg.append(f'<text x="{x1}" y="{mid+2}" text-anchor="middle" '
|
| 179 |
+
f'fill="{c}" font-family="Arial" font-size="8" font-weight="bold">ROOT</text>')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
else:
|
| 181 |
x2 = xpos.get(h, x1)
|
| 182 |
+
span = (min(i,h), max(i,h))
|
| 183 |
+
lvl = 0
|
| 184 |
conflict = True
|
| 185 |
while conflict:
|
| 186 |
conflict = False
|
| 187 |
+
for (es,el), lvl_used in used_spans:
|
| 188 |
+
if lvl_used==lvl and not (span[1]<es or span[0]>el):
|
| 189 |
lvl += 1
|
| 190 |
conflict = True
|
| 191 |
break
|
| 192 |
+
used_spans.append((span,lvl))
|
| 193 |
+
dist = abs(x2-x1)
|
| 194 |
+
arc_h = min(40+dist*0.15,100)+lvl*35
|
| 195 |
+
midx, cty = (x1+x2)/2, word_y-arc_h
|
| 196 |
+
path = f'M{x1} {word_y-15} Q{midx} {cty} {x2} {word_y-15}'
|
| 197 |
svg.append(
|
| 198 |
+
f'<path d="{path}" stroke="{c}" fill="none" stroke-width="1.5" '
|
| 199 |
f'marker-start="url(#m_{rel})"/>'
|
| 200 |
)
|
| 201 |
amx = 0.25*x1 + 0.5*midx + 0.25*x2
|
| 202 |
+
amy = 0.25*(word_y-15)+0.5*cty+0.25*(word_y-15)
|
| 203 |
+
lw = len(rel)*6 + 8
|
| 204 |
+
svg.append(f'<rect x="{amx-lw/2}" y="{amy-8}" width="{lw}" height="14" '
|
| 205 |
+
f'fill="white" stroke="{c}" rx="2"/>')
|
| 206 |
+
svg.append(f'<text x="{amx}" y="{amy+2}" text-anchor="middle" '
|
| 207 |
+
f'fill="{c}" font-family="Arial" font-size="8" font-weight="bold">{rel}</text>')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
|
|
|
|
| 209 |
for _, r in df.iterrows():
|
| 210 |
+
if not str(r['ID']).isdigit(): continue
|
|
|
|
| 211 |
x = xpos[int(r['ID'])]
|
| 212 |
+
svg.append(f'<text x="{x}" y="{word_y}" text-anchor="middle" '
|
| 213 |
+
f'font-family="Arial" font-size="13" font-weight="bold">{r["FORM"]}</text>')
|
|
|
|
|
|
|
| 214 |
ann = []
|
| 215 |
+
if r['UPOS'] and r['UPOS']!="_": ann.append(f"upos={r['UPOS']}")
|
| 216 |
+
if r['LEMMA'] not in ("_",r["FORM"]): ann.append(f"lemma={r['LEMMA']}")
|
| 217 |
+
if r['FEATS'] and r['FEATS']!="_":
|
| 218 |
+
for f in r['FEATS'].split("|"):
|
| 219 |
+
if "=" in f: ann.append(f)
|
| 220 |
+
for i,a in enumerate(ann):
|
| 221 |
+
svg.append(f'<text x="{x}" y="{feats_y+i*12}" text-anchor="middle" '
|
| 222 |
+
f'font-family="Arial" font-size="7" fill="#666">{a}</text>')
|
| 223 |
+
|
| 224 |
+
svg.append("</g></svg>")
|
|
|
|
|
|
|
|
|
|
| 225 |
return "".join(svg)
|
| 226 |
+
|
| 227 |
except Exception as e:
|
| 228 |
return f"<p>Error creating SVG: {e}</p>"
|
| 229 |
|
| 230 |
|
| 231 |
+
# -----------------------------------------------------------------------------
|
| 232 |
+
# 4. PROCESS & DROPDOWNS
|
| 233 |
+
# -----------------------------------------------------------------------------
|
| 234 |
|
| 235 |
def process_text(text, variant):
|
| 236 |
if not text.strip():
|
|
|
|
| 239 |
gr.Dropdown.update(choices=[], value=None),
|
| 240 |
[], "", pd.DataFrame()
|
| 241 |
)
|
| 242 |
+
pipe = MODELS.get(variant)
|
| 243 |
if pipe is None:
|
| 244 |
return (
|
| 245 |
gr.HTML.update(value="<p>Error: model not loaded</p>"),
|
| 246 |
gr.Dropdown.update(choices=[], value=None),
|
| 247 |
[], "", pd.DataFrame()
|
| 248 |
)
|
| 249 |
+
|
| 250 |
+
doc = pipe(text)
|
| 251 |
conllu = stanza_doc_to_conllu(doc)
|
| 252 |
+
df = conllu_to_dataframe(conllu)
|
| 253 |
|
| 254 |
sentences = []
|
| 255 |
for sent in doc.sentences:
|
| 256 |
payload = [{
|
| 257 |
+
'ID': w.id,
|
| 258 |
+
'FORM': w.text,
|
| 259 |
+
'LEMMA': w.lemma or "_",
|
| 260 |
+
'UPOS': w.upos or "_",
|
| 261 |
+
'XPOS': w.xpos or "_",
|
| 262 |
+
'FEATS': w.feats or "_",
|
| 263 |
+
'HEAD': w.head or 0,
|
| 264 |
+
'DEPREL':w.deprel or "_"
|
| 265 |
} for w in sent.words]
|
| 266 |
sentences.append(payload)
|
| 267 |
|
| 268 |
sent_ids = [str(i+1) for i in range(len(sentences))]
|
| 269 |
+
dd_upd = update(choices=sent_ids, value=sent_ids[0] if sent_ids else None)
|
| 270 |
init_svg = create_single_sentence_svg(sentences[0]) if sentences else "<p>No data</p>"
|
| 271 |
|
| 272 |
return init_svg, dd_upd, sentences, conllu, df
|
|
|
|
| 279 |
return "<p>Invalid selection</p>"
|
| 280 |
|
| 281 |
|
| 282 |
+
# -----------------------------------------------------------------------------
|
| 283 |
+
# 5. BUILD GRADIO APP
|
| 284 |
+
# -----------------------------------------------------------------------------
|
| 285 |
|
| 286 |
def create_app():
|
| 287 |
with gr.Blocks(title="Lesbian Greek Parser") as app:
|
| 288 |
gr.Markdown("# Lesbian Greek Morphosyntactic Parser")
|
|
|
|
| 289 |
if not loaded:
|
| 290 |
gr.Markdown(f"❌ Load error: {load_status}")
|
| 291 |
|
| 292 |
with gr.Row():
|
| 293 |
with gr.Column():
|
| 294 |
+
txt = gr.Textbox(label="Input Text", lines=4, placeholder="Εισάγετε κείμενο…")
|
| 295 |
+
mdl = gr.Radio(choices=list(MODEL_VARIANTS.keys()),
|
| 296 |
+
value=list(MODEL_VARIANTS.keys())[0],
|
| 297 |
+
label="Model Variant")
|
| 298 |
+
btn = gr.Button("Parse", variant="primary")
|
| 299 |
|
| 300 |
with gr.Row():
|
| 301 |
with gr.Column():
|
| 302 |
+
svg_out = gr.HTML("<p>No visualization</p>")
|
| 303 |
+
sentence_dd = gr.Dropdown(label="Choose sentence", choices=[])
|
| 304 |
+
state_sents = gr.State([])
|
| 305 |
|
| 306 |
with gr.Row():
|
| 307 |
with gr.Column():
|
| 308 |
conllu_out = gr.Textbox(label="CoNLL-U", lines=10, show_copy_button=True)
|
| 309 |
table_out = gr.Dataframe(label="Token Table")
|
| 310 |
|
| 311 |
+
btn.click(fn=process_text,
|
| 312 |
+
inputs=[txt, mdl],
|
| 313 |
+
outputs=[svg_out, sentence_dd, state_sents, conllu_out, table_out])
|
| 314 |
+
sentence_dd.change(fn=update_svg,
|
| 315 |
+
inputs=[sentence_dd, state_sents],
|
| 316 |
+
outputs=svg_out)
|
| 317 |
|
| 318 |
return app
|
| 319 |
|