Update app.py
Browse files
app.py
CHANGED
|
@@ -1,321 +1,237 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import stanza
|
| 3 |
import pandas as pd
|
| 4 |
-
import sys
|
| 5 |
import traceback
|
| 6 |
-
import os
|
| 7 |
-
import tempfile
|
| 8 |
import requests
|
| 9 |
from pathlib import Path
|
| 10 |
import json
|
|
|
|
| 11 |
|
| 12 |
# Global variables to store the pipelines
|
| 13 |
LESBIAN_MODELS = {}
|
| 14 |
MODEL_VARIANTS = {
|
| 15 |
-
"Lesbian-only":
|
| 16 |
-
"Lesbian-synthetic-data":
|
| 17 |
}
|
| 18 |
|
| 19 |
-
|
| 20 |
def download_model_file(url, filename):
|
| 21 |
"""Download a model file from Hugging Face"""
|
| 22 |
try:
|
| 23 |
-
print(f"Downloading {filename}...")
|
| 24 |
response = requests.get(url, stream=True)
|
| 25 |
response.raise_for_status()
|
| 26 |
with open(filename, 'wb') as f:
|
| 27 |
for chunk in response.iter_content(chunk_size=8192):
|
| 28 |
f.write(chunk)
|
| 29 |
-
print(f"Successfully downloaded {filename}")
|
| 30 |
return True
|
| 31 |
except Exception as e:
|
| 32 |
-
print(f"
|
| 33 |
return False
|
| 34 |
|
| 35 |
-
|
| 36 |
def initialize_lesbian_greek_model():
|
| 37 |
-
"""Download and initialize both
|
| 38 |
try:
|
| 39 |
-
print("Initializing both Lesbian-only and Lesbian-synthetic-data pipelines...")
|
| 40 |
base_dir = Path("./models")
|
| 41 |
base_dir.mkdir(exist_ok=True)
|
| 42 |
for variant_name, repo in MODEL_VARIANTS.items():
|
| 43 |
-
|
| 44 |
-
out_dir = base_dir / variant_name
|
| 45 |
out_dir.mkdir(parents=True, exist_ok=True)
|
| 46 |
|
| 47 |
-
#
|
| 48 |
-
|
| 49 |
-
"tokenizer.pt":
|
| 50 |
-
"lemmatizer.pt":
|
| 51 |
-
"pos.pt":
|
| 52 |
-
"depparse.pt":
|
| 53 |
}
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
tgt = out_dir / fname
|
| 58 |
if not tgt.exists():
|
| 59 |
if not download_model_file(url, str(tgt)):
|
| 60 |
-
return False, f"Failed
|
| 61 |
|
| 62 |
-
#
|
| 63 |
config = {
|
| 64 |
'processors': 'tokenize,pos,lemma,depparse',
|
| 65 |
'lang': 'el',
|
| 66 |
'use_gpu': False,
|
| 67 |
'verbose': False,
|
| 68 |
-
'tokenize_model_path': str(out_dir
|
| 69 |
-
'pos_model_path': str(out_dir
|
| 70 |
-
'lemma_model_path': str(out_dir
|
| 71 |
-
'depparse_model_path': str(out_dir
|
| 72 |
}
|
| 73 |
-
|
| 74 |
try:
|
| 75 |
-
|
| 76 |
-
LESBIAN_MODELS[variant_name] =
|
| 77 |
-
print(f"
|
| 78 |
except Exception as e:
|
| 79 |
-
print(f"⚠️ Could not load {variant_name}: {e}")
|
| 80 |
return False, f"Pipeline init error for {variant_name}: {e}"
|
| 81 |
|
| 82 |
-
return True, "
|
| 83 |
except Exception as e:
|
| 84 |
-
print(f"Initialization failure: {e}")
|
| 85 |
traceback.print_exc()
|
| 86 |
return False, str(e)
|
| 87 |
|
| 88 |
-
|
| 89 |
def stanza_doc_to_conllu(doc) -> str:
|
| 90 |
"""Convert Stanza Document to CoNLL-U format"""
|
| 91 |
-
|
| 92 |
-
for
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
for
|
| 96 |
fields = [
|
| 97 |
-
str(
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
str(
|
| 104 |
-
|
| 105 |
"_",
|
| 106 |
"_"
|
| 107 |
]
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
return "\n".join(
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
def parse_and_conllu(text: str, variant: str) -> str:
|
| 114 |
-
"""Parse text with chosen variant and return CoNLL-U"""
|
| 115 |
-
pipeline = LESBIAN_MODELS.get(variant)
|
| 116 |
-
if not pipeline:
|
| 117 |
-
return f"Error: model {variant} not loaded."
|
| 118 |
-
|
| 119 |
-
if not text.strip():
|
| 120 |
-
return "Error: Please enter some text to parse."
|
| 121 |
-
|
| 122 |
-
try:
|
| 123 |
-
doc = pipeline(text)
|
| 124 |
-
return stanza_doc_to_conllu(doc)
|
| 125 |
-
except Exception as e:
|
| 126 |
-
return f"Error processing text: {e}"
|
| 127 |
-
|
| 128 |
|
| 129 |
def conllu_to_dataframe(conllu_text: str) -> pd.DataFrame:
|
| 130 |
"""Convert CoNLL-U text to pandas DataFrame"""
|
| 131 |
-
if conllu_text.startswith("Error"):
|
| 132 |
return pd.DataFrame()
|
| 133 |
-
|
| 134 |
-
data = []
|
| 135 |
for line in conllu_text.splitlines():
|
| 136 |
if not line or line.startswith("#"):
|
| 137 |
continue
|
| 138 |
parts = line.split("\t")
|
| 139 |
if len(parts) >= 10:
|
| 140 |
-
|
| 141 |
'ID': parts[0], 'FORM': parts[1], 'LEMMA': parts[2],
|
| 142 |
'UPOS': parts[3], 'XPOS': parts[4], 'FEATS': parts[5],
|
| 143 |
'HEAD': parts[6], 'DEPREL': parts[7], 'DEPS': parts[8], 'MISC': parts[9]
|
| 144 |
})
|
| 145 |
-
return pd.DataFrame(
|
| 146 |
-
|
| 147 |
|
| 148 |
def create_dependency_visualization(df: pd.DataFrame) -> str:
|
| 149 |
"""Simple text-based dependency display"""
|
| 150 |
if df.empty:
|
| 151 |
return "No data to visualize"
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
if head != '0':
|
| 157 |
try:
|
| 158 |
-
|
| 159 |
-
head_word = df.iloc[head_idx]['FORM']
|
| 160 |
-
lines.append(f"{word} ({pos}) --{deprel}--> {head_word}")
|
| 161 |
except:
|
| 162 |
-
|
|
|
|
| 163 |
else:
|
| 164 |
-
|
| 165 |
-
return "\n".join(
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
def create_dependency_tree_svg(df: pd.DataFrame) -> str:
|
| 169 |
-
"""SVG-based dependency tree with simple slider navigation"""
|
| 170 |
-
if df.empty:
|
| 171 |
-
return "<p>No data to visualize</p>"
|
| 172 |
-
|
| 173 |
-
# group into sentences
|
| 174 |
-
sentences, current = [], []
|
| 175 |
-
for idx, row in df.iterrows():
|
| 176 |
-
wid = int(row['ID'])
|
| 177 |
-
if wid == 1 and current:
|
| 178 |
-
sentences.append(current)
|
| 179 |
-
current = []
|
| 180 |
-
current.append(row.to_dict())
|
| 181 |
-
if current:
|
| 182 |
-
sentences.append(current)
|
| 183 |
-
|
| 184 |
-
# render each as an SVG slide
|
| 185 |
-
slides = []
|
| 186 |
-
for sent in sentences:
|
| 187 |
-
svg = create_single_sentence_svg(sent)
|
| 188 |
-
slides.append(svg)
|
| 189 |
-
|
| 190 |
-
# wrap slides in divs with show/hide logic
|
| 191 |
-
slide_divs = "\n".join(
|
| 192 |
-
f'<div class="slide" style="display:{"block" if i==0 else "none"}">{svg}</div>'
|
| 193 |
-
for i, svg in enumerate(slides)
|
| 194 |
-
)
|
| 195 |
-
|
| 196 |
-
return f"""
|
| 197 |
-
<div id="slider">
|
| 198 |
-
{slide_divs}
|
| 199 |
-
<button id="prevBtn">← Prev</button>
|
| 200 |
-
<button id="nextBtn">Next →</button>
|
| 201 |
-
</div>
|
| 202 |
-
<script>
|
| 203 |
-
(function() {{
|
| 204 |
-
const slides = document.querySelectorAll('#slider .slide');
|
| 205 |
-
let idx = 0;
|
| 206 |
-
document.getElementById('prevBtn').onclick = () => {{
|
| 207 |
-
slides[idx].style.display = 'none';
|
| 208 |
-
idx = (idx - 1 + slides.length) % slides.length;
|
| 209 |
-
slides[idx].style.display = 'block';
|
| 210 |
-
}};
|
| 211 |
-
document.getElementById('nextBtn').onclick = () => {{
|
| 212 |
-
slides[idx].style.display = 'none';
|
| 213 |
-
idx = (idx + 1) % slides.length;
|
| 214 |
-
slides[idx].style.display = 'block';
|
| 215 |
-
}};
|
| 216 |
-
}})();
|
| 217 |
-
</script>
|
| 218 |
-
"""
|
| 219 |
-
|
| 220 |
|
| 221 |
def create_single_sentence_svg(sentence_data):
|
| 222 |
-
"""
|
| 223 |
-
#
|
| 224 |
-
|
| 225 |
-
# ...
|
| 226 |
-
return "<svg><!-- your detailed SVG here --></svg>"
|
| 227 |
-
|
| 228 |
|
| 229 |
def process_text(text, variant):
|
| 230 |
-
"""
|
| 231 |
if not text.strip():
|
| 232 |
-
|
| 233 |
-
return "Please enter
|
| 234 |
|
| 235 |
-
|
| 236 |
-
if
|
| 237 |
-
return
|
| 238 |
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
|
|
|
| 242 |
|
|
|
|
|
|
|
| 243 |
text_viz = create_dependency_visualization(df)
|
| 244 |
-
svg_viz = create_dependency_tree_svg(df)
|
| 245 |
-
return conllu, df, text_viz, svg_viz
|
| 246 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
def create_gradio_app():
|
| 255 |
-
with gr.Blocks(title="Lesbian Greek
|
| 256 |
-
gr.Markdown(""
|
| 257 |
-
# Lesbian Greek Morphosyntactic Parser
|
| 258 |
-
|
| 259 |
-
Pick a model variant, enter your text, and see tokenization, POS, lemmas, and dependencies.
|
| 260 |
-
""")
|
| 261 |
|
| 262 |
-
# show status
|
| 263 |
if loaded:
|
| 264 |
-
gr.Markdown(f"✅
|
| 265 |
else:
|
| 266 |
-
gr.Markdown(f"❌ Loading error: {
|
| 267 |
-
|
| 268 |
-
with gr.Row():
|
| 269 |
-
with gr.Column():
|
| 270 |
-
text_input = gr.Textbox(
|
| 271 |
-
label="Lesbian Greek Text Input",
|
| 272 |
-
placeholder="Enter Lesbian Greek here...",
|
| 273 |
-
lines=4
|
| 274 |
-
)
|
| 275 |
-
parse_button = gr.Button("Parse Text", variant="primary")
|
| 276 |
-
|
| 277 |
-
with gr.Column():
|
| 278 |
-
model_selector = gr.Radio(
|
| 279 |
-
choices=list(MODEL_VARIANTS.keys()),
|
| 280 |
-
value="Lesbian-only",
|
| 281 |
-
label="Choose Variant"
|
| 282 |
-
)
|
| 283 |
-
|
| 284 |
-
with gr.Row():
|
| 285 |
-
with gr.Column():
|
| 286 |
-
gr.Markdown("### Dependency Tree")
|
| 287 |
-
dependency_tree_viz = gr.HTML("<p>Parse to see tree</p>")
|
| 288 |
-
|
| 289 |
-
with gr.Row():
|
| 290 |
-
with gr.Column():
|
| 291 |
-
gr.Markdown("### CoNLL-U Output")
|
| 292 |
-
conllu_output = gr.Textbox(lines=10, show_copy_button=True)
|
| 293 |
|
| 294 |
with gr.Row():
|
| 295 |
with gr.Column():
|
| 296 |
-
gr.
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
with gr.Row():
|
| 300 |
with gr.Column():
|
| 301 |
-
gr.
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
|
| 316 |
return app
|
| 317 |
|
| 318 |
-
|
| 319 |
if __name__ == "__main__":
|
| 320 |
app = create_gradio_app()
|
| 321 |
app.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import stanza
|
| 3 |
import pandas as pd
|
|
|
|
| 4 |
import traceback
|
|
|
|
|
|
|
| 5 |
import requests
|
| 6 |
from pathlib import Path
|
| 7 |
import json
|
| 8 |
+
import os
|
| 9 |
|
| 10 |
# Global variables to store the pipelines
|
| 11 |
LESBIAN_MODELS = {}
|
| 12 |
MODEL_VARIANTS = {
|
| 13 |
+
"Lesbian-only": "sbompolas/Lesbian-Greek-Morphosyntactic-Model",
|
| 14 |
+
"Lesbian-synthetic-data": "sbompolas/NGUD-Lesbian-Morphosyntactic-Model"
|
| 15 |
}
|
| 16 |
|
|
|
|
| 17 |
def download_model_file(url, filename):
|
| 18 |
"""Download a model file from Hugging Face"""
|
| 19 |
try:
|
|
|
|
| 20 |
response = requests.get(url, stream=True)
|
| 21 |
response.raise_for_status()
|
| 22 |
with open(filename, 'wb') as f:
|
| 23 |
for chunk in response.iter_content(chunk_size=8192):
|
| 24 |
f.write(chunk)
|
|
|
|
| 25 |
return True
|
| 26 |
except Exception as e:
|
| 27 |
+
print(f"Download failed {filename}: {e}")
|
| 28 |
return False
|
| 29 |
|
|
|
|
| 30 |
def initialize_lesbian_greek_model():
|
| 31 |
+
"""Download and initialize both model variants"""
|
| 32 |
try:
|
|
|
|
| 33 |
base_dir = Path("./models")
|
| 34 |
base_dir.mkdir(exist_ok=True)
|
| 35 |
for variant_name, repo in MODEL_VARIANTS.items():
|
| 36 |
+
out_dir = base_dir/variant_name
|
|
|
|
| 37 |
out_dir.mkdir(parents=True, exist_ok=True)
|
| 38 |
|
| 39 |
+
# four model files
|
| 40 |
+
files = {
|
| 41 |
+
"tokenizer.pt": f"https://huggingface.co/{repo}/resolve/main/tokenizer.pt",
|
| 42 |
+
"lemmatizer.pt": f"https://huggingface.co/{repo}/resolve/main/lemmatizer.pt",
|
| 43 |
+
"pos.pt": f"https://huggingface.co/{repo}/resolve/main/pos.pt",
|
| 44 |
+
"depparse.pt": f"https://huggingface.co/{repo}/resolve/main/depparse.pt",
|
| 45 |
}
|
| 46 |
+
# download
|
| 47 |
+
for fn, url in files.items():
|
| 48 |
+
tgt = out_dir/fn
|
|
|
|
| 49 |
if not tgt.exists():
|
| 50 |
if not download_model_file(url, str(tgt)):
|
| 51 |
+
return False, f"Failed download {fn} for {variant_name}"
|
| 52 |
|
| 53 |
+
# build stanza pipeline
|
| 54 |
config = {
|
| 55 |
'processors': 'tokenize,pos,lemma,depparse',
|
| 56 |
'lang': 'el',
|
| 57 |
'use_gpu': False,
|
| 58 |
'verbose': False,
|
| 59 |
+
'tokenize_model_path': str(out_dir/"tokenizer.pt"),
|
| 60 |
+
'pos_model_path': str(out_dir/"pos.pt"),
|
| 61 |
+
'lemma_model_path': str(out_dir/"lemmatizer.pt"),
|
| 62 |
+
'depparse_model_path': str(out_dir/"depparse.pt")
|
| 63 |
}
|
|
|
|
| 64 |
try:
|
| 65 |
+
pipe = stanza.Pipeline(**config)
|
| 66 |
+
LESBIAN_MODELS[variant_name] = pipe
|
| 67 |
+
print(f"Loaded {variant_name}")
|
| 68 |
except Exception as e:
|
|
|
|
| 69 |
return False, f"Pipeline init error for {variant_name}: {e}"
|
| 70 |
|
| 71 |
+
return True, "Models loaded"
|
| 72 |
except Exception as e:
|
|
|
|
| 73 |
traceback.print_exc()
|
| 74 |
return False, str(e)
|
| 75 |
|
|
|
|
| 76 |
def stanza_doc_to_conllu(doc) -> str:
|
| 77 |
"""Convert Stanza Document to CoNLL-U format"""
|
| 78 |
+
lines = []
|
| 79 |
+
for sid, sentence in enumerate(doc.sentences, start=1):
|
| 80 |
+
lines.append(f"# sent_id = {sid}")
|
| 81 |
+
lines.append(f"# text = {sentence.text}")
|
| 82 |
+
for w in sentence.words:
|
| 83 |
fields = [
|
| 84 |
+
str(w.id),
|
| 85 |
+
w.text,
|
| 86 |
+
w.lemma or "_",
|
| 87 |
+
w.upos or "_",
|
| 88 |
+
w.xpos or "_",
|
| 89 |
+
w.feats or "_",
|
| 90 |
+
str(w.head) if w.head is not None else "0",
|
| 91 |
+
w.deprel or "_",
|
| 92 |
"_",
|
| 93 |
"_"
|
| 94 |
]
|
| 95 |
+
lines.append("\t".join(fields))
|
| 96 |
+
lines.append("")
|
| 97 |
+
return "\n".join(lines)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
def conllu_to_dataframe(conllu_text: str) -> pd.DataFrame:
|
| 100 |
"""Convert CoNLL-U text to pandas DataFrame"""
|
| 101 |
+
if not conllu_text or conllu_text.startswith("Error"):
|
| 102 |
return pd.DataFrame()
|
| 103 |
+
rows = []
|
|
|
|
| 104 |
for line in conllu_text.splitlines():
|
| 105 |
if not line or line.startswith("#"):
|
| 106 |
continue
|
| 107 |
parts = line.split("\t")
|
| 108 |
if len(parts) >= 10:
|
| 109 |
+
rows.append({
|
| 110 |
'ID': parts[0], 'FORM': parts[1], 'LEMMA': parts[2],
|
| 111 |
'UPOS': parts[3], 'XPOS': parts[4], 'FEATS': parts[5],
|
| 112 |
'HEAD': parts[6], 'DEPREL': parts[7], 'DEPS': parts[8], 'MISC': parts[9]
|
| 113 |
})
|
| 114 |
+
return pd.DataFrame(rows)
|
|
|
|
| 115 |
|
| 116 |
def create_dependency_visualization(df: pd.DataFrame) -> str:
|
| 117 |
"""Simple text-based dependency display"""
|
| 118 |
if df.empty:
|
| 119 |
return "No data to visualize"
|
| 120 |
+
viz = ["Dependency Parse Visualization:", "-"*40]
|
| 121 |
+
for _, r in df.iterrows():
|
| 122 |
+
w, p, d, h = r['FORM'], r['UPOS'], r['DEPREL'], r['HEAD']
|
| 123 |
+
if h != '0':
|
|
|
|
| 124 |
try:
|
| 125 |
+
hw = df.iloc[int(h)-1]['FORM']
|
|
|
|
|
|
|
| 126 |
except:
|
| 127 |
+
hw = "[ERROR]"
|
| 128 |
+
viz.append(f"{w} ({p}) --{d}--> {hw}")
|
| 129 |
else:
|
| 130 |
+
viz.append(f"{w} ({p}) --{d}--> ROOT")
|
| 131 |
+
return "\n".join(viz)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
def create_single_sentence_svg(sentence_data):
|
| 134 |
+
"""Detailed SVG builder unchanged—paste your original implementation here."""
|
| 135 |
+
# ... your create_single_sentence_svg code ...
|
| 136 |
+
return "<svg><!-- your SVG here --></svg>"
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
def process_text(text, variant):
|
| 139 |
+
"""Parse the text, return all outputs including sentence list and initial SVG"""
|
| 140 |
if not text.strip():
|
| 141 |
+
empty = pd.DataFrame()
|
| 142 |
+
return "Please enter text.", empty, "", [], [], "<p>No data</p>"
|
| 143 |
|
| 144 |
+
pipe = LESBIAN_MODELS.get(variant)
|
| 145 |
+
if not pipe:
|
| 146 |
+
return f"Error: model {variant} not loaded.", pd.DataFrame(), "", [], [], "<p>Error</p>"
|
| 147 |
|
| 148 |
+
try:
|
| 149 |
+
doc = pipe(text)
|
| 150 |
+
except Exception as e:
|
| 151 |
+
return f"Error parsing: {e}", pd.DataFrame(), "", [], [], "<p>Error</p>"
|
| 152 |
|
| 153 |
+
conllu = stanza_doc_to_conllu(doc)
|
| 154 |
+
df = conllu_to_dataframe(conllu)
|
| 155 |
text_viz = create_dependency_visualization(df)
|
|
|
|
|
|
|
| 156 |
|
| 157 |
+
# build per-sentence data
|
| 158 |
+
sentences = []
|
| 159 |
+
for sent in doc.sentences:
|
| 160 |
+
sent_rows = []
|
| 161 |
+
for w in sent.words:
|
| 162 |
+
sent_rows.append({
|
| 163 |
+
'ID': w.id, 'FORM': w.text, 'LEMMA': w.lemma or "_",
|
| 164 |
+
'UPOS': w.upos or "_", 'XPOS': w.xpos or "_",
|
| 165 |
+
'FEATS': w.feats or "_", 'HEAD': w.head or 0, 'DEPREL': w.deprel or "_"
|
| 166 |
+
})
|
| 167 |
+
sentences.append(sent_rows)
|
| 168 |
+
|
| 169 |
+
# dropdown choices and initial svg
|
| 170 |
+
choices = [str(i+1) for i in range(len(sentences))]
|
| 171 |
+
init_svg = create_single_sentence_svg(sentences[0]) if sentences else "<p>No sentences</p>"
|
| 172 |
+
|
| 173 |
+
return conllu, df, text_viz, choices, sentences, init_svg
|
| 174 |
|
| 175 |
+
def update_svg(sel, sentences):
|
| 176 |
+
"""Return SVG for the selected sentence id"""
|
| 177 |
+
try:
|
| 178 |
+
idx = int(sel) - 1
|
| 179 |
+
if 0 <= idx < len(sentences):
|
| 180 |
+
return create_single_sentence_svg(sentences[idx])
|
| 181 |
+
except:
|
| 182 |
+
pass
|
| 183 |
+
return "<p>Invalid selection</p>"
|
| 184 |
|
| 185 |
+
# initialize at startup
|
| 186 |
+
loaded, status = initialize_lesbian_greek_model()
|
| 187 |
+
print(f"Models loaded={loaded}, status={status}")
|
| 188 |
|
| 189 |
def create_gradio_app():
|
| 190 |
+
with gr.Blocks(title="Lesbian Greek Parser") as app:
|
| 191 |
+
gr.Markdown("# Lesbian Greek Morphosyntactic Parser")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
|
|
|
| 193 |
if loaded:
|
| 194 |
+
gr.Markdown(f"✅ Loaded variants: {', '.join(MODEL_VARIANTS.keys())}")
|
| 195 |
else:
|
| 196 |
+
gr.Markdown(f"❌ Loading error: {status}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
with gr.Row():
|
| 199 |
with gr.Column():
|
| 200 |
+
text_input = gr.Textbox(label="Text", lines=4,
|
| 201 |
+
placeholder="Εισάγετε κείμενο...")
|
| 202 |
+
parse_btn = gr.Button("Parse")
|
|
|
|
| 203 |
with gr.Column():
|
| 204 |
+
model_sel = gr.Radio(choices=list(MODEL_VARIANTS.keys()),
|
| 205 |
+
value="Lesbian-only", label="Model Variant")
|
| 206 |
+
|
| 207 |
+
# sentence selector & state
|
| 208 |
+
sentence_dropdown = gr.Dropdown(label="Sentence", choices=[])
|
| 209 |
+
sentences_state = gr.State([])
|
| 210 |
+
|
| 211 |
+
# outputs
|
| 212 |
+
conllu_out = gr.Textbox(label="CoNLL-U", lines=10, show_copy_button=True)
|
| 213 |
+
table_out = gr.Dataframe(label="Tokens")
|
| 214 |
+
text_viz_out = gr.Textbox(label="Dependencies", lines=8, show_copy_button=True)
|
| 215 |
+
svg_out = gr.HTML("<p>No data</p>")
|
| 216 |
+
|
| 217 |
+
# wire up parse event
|
| 218 |
+
parse_btn.click(
|
| 219 |
+
fn=process_text,
|
| 220 |
+
inputs=[text_input, model_sel],
|
| 221 |
+
outputs=[
|
| 222 |
+
conllu_out, table_out, text_viz_out,
|
| 223 |
+
sentence_dropdown, sentences_state, svg_out
|
| 224 |
+
]
|
| 225 |
+
)
|
| 226 |
+
# on sentence change
|
| 227 |
+
sentence_dropdown.change(
|
| 228 |
+
fn=update_svg,
|
| 229 |
+
inputs=[sentence_dropdown, sentences_state],
|
| 230 |
+
outputs=svg_out
|
| 231 |
+
)
|
| 232 |
|
| 233 |
return app
|
| 234 |
|
|
|
|
| 235 |
if __name__ == "__main__":
|
| 236 |
app = create_gradio_app()
|
| 237 |
app.launch()
|