Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,9 @@ import numpy as np
|
|
| 7 |
import pandas as pd
|
| 8 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
| 9 |
|
|
|
|
|
|
|
|
|
|
| 10 |
MODEL_ID = "Setur/BRAGD"
|
| 11 |
TAGS_FILEPATH = "Sosialurin-BRAGD_tags.csv" # must match model labels
|
| 12 |
LABELS_FILEPATH = "tag_labels.json" # add to repo root (FO+EN labels)
|
|
@@ -28,18 +31,12 @@ GROUP_ORDER = ["subcategory","gender","number","case","article","proper","degree
|
|
| 28 |
# You said Subcategory B doesn't exist and will be deleted from the CSV:
|
| 29 |
HIDE_CODES = {"subcategory": {"B"}}
|
| 30 |
|
| 31 |
-
GROUP_TITLES = {
|
| 32 |
-
"en": {"subcategory":"Subcategory","gender":"Gender","number":"Number","case":"Case","article":"Article suffix","proper":"Proper noun",
|
| 33 |
-
"degree":"Degree","declension":"Declension","mood":"Mood","voice":"Voice","tense":"Tense","person":"Person","definiteness":"Definiteness"},
|
| 34 |
-
"fo": {"subcategory":"Undirflokkur","gender":"Kyn","number":"Tal","case":"Fall","article":"Bundið eftirlið","proper":"Sernavn",
|
| 35 |
-
"degree":"Stig","declension":"Bending","mood":"Háttur","voice":"Søgn","tense":"Tíð","person":"Persónur","definiteness":"Bundni/óbundni"},
|
| 36 |
-
}
|
| 37 |
-
|
| 38 |
UI = {
|
| 39 |
-
"fo": {"w":"Orð","t":"Mark","s":"Vís sum","m":"Merking"
|
| 40 |
-
"en": {"w":"Word","t":"Tag","s":"Show as","m":"Meaning"
|
| 41 |
}
|
| 42 |
|
|
|
|
| 43 |
CSS = """
|
| 44 |
:root{
|
| 45 |
--primary-500:#89AFA9; --primary-600:#6F9992; --primary-700:#5B7F79;
|
|
@@ -92,7 +89,9 @@ def process_tag_features(tag_to_features: dict, intervals):
|
|
| 92 |
wt_masks = {wt:[a for a in arrs if a[wt]==1] for wt in range(15)}
|
| 93 |
out = {}
|
| 94 |
for wt,labels in wt_masks.items():
|
| 95 |
-
if not labels:
|
|
|
|
|
|
|
| 96 |
sum_labels = np.sum(np.array(labels), axis=0)
|
| 97 |
out[wt] = [iv for iv in intervals if np.sum(sum_labels[iv[0]:iv[1]+1]) != 0]
|
| 98 |
return out
|
|
@@ -118,7 +117,9 @@ def predict_vectors(logits, attention_mask, begin_tokens, dict_intervals, vec_le
|
|
| 118 |
vectors.append(vec)
|
| 119 |
return vectors
|
| 120 |
|
|
|
|
| 121 |
# Load labels (extracted from your XLSX)
|
|
|
|
| 122 |
with open(LABELS_FILEPATH, "r", encoding="utf-8") as f:
|
| 123 |
LABELS = json.load(f)
|
| 124 |
|
|
@@ -130,10 +131,14 @@ def label_for(lang: str, group: str, wc: str, code: str) -> str:
|
|
| 130 |
return by_wc[wc][group][code]
|
| 131 |
return glob.get(group, {}).get(code, "")
|
| 132 |
|
|
|
|
| 133 |
# Load CSV mappings (authoritative)
|
|
|
|
| 134 |
tag_to_features, features_to_tag, VEC_LEN, FEATURE_COLS = load_tag_mappings(TAGS_FILEPATH)
|
| 135 |
|
|
|
|
| 136 |
# Load model
|
|
|
|
| 137 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
|
| 138 |
model = AutoModelForTokenClassification.from_pretrained(MODEL_ID, token=HF_TOKEN)
|
| 139 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
@@ -170,38 +175,65 @@ def group_code(vec: torch.Tensor, group: str) -> str:
|
|
| 170 |
return code
|
| 171 |
return ""
|
| 172 |
|
| 173 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
wc = wc_code(vec)
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
for g in GROUP_ORDER:
|
| 180 |
c = group_code(vec, g)
|
| 181 |
if not c:
|
| 182 |
continue
|
| 183 |
-
lbl = label_for(lang, g, wc, c) or label_for(lang, g, "", c)
|
| 184 |
-
parts.append(f"{c} – {lbl}" if lbl else c)
|
| 185 |
-
return "; ".join(parts)
|
| 186 |
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
lang = "fo" if lang=="fo" else "en"
|
| 189 |
wc = wc_code(vec)
|
| 190 |
-
|
| 191 |
-
raw = vector_to_tag(vec)
|
| 192 |
|
| 193 |
-
|
| 194 |
-
if
|
| 195 |
-
return "Fyriseting" if lang=="fo" else "Preposition"
|
| 196 |
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
|
| 204 |
-
return
|
| 205 |
|
| 206 |
def compute_codes_by_wc():
|
| 207 |
codes = defaultdict(lambda: defaultdict(set)) # wc -> group -> set(code)
|
|
@@ -229,8 +261,15 @@ def compute_codes_by_wc():
|
|
| 229 |
CODES_BY_WC = compute_codes_by_wc()
|
| 230 |
|
| 231 |
def build_legend(lang: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
lang = "fo" if lang=="fo" else "en"
|
| 233 |
-
|
|
|
|
|
|
|
| 234 |
for wc in sorted(CODES_BY_WC.keys()):
|
| 235 |
wcl = label_for(lang, "word_class", wc, wc) or ""
|
| 236 |
lines.append(f"#### {wc} — {wcl}" if wcl else f"#### {wc}")
|
|
@@ -239,12 +278,48 @@ def build_legend(lang: str) -> str:
|
|
| 239 |
cs = sorted(CODES_BY_WC[wc].get(g, set()))
|
| 240 |
if not cs:
|
| 241 |
continue
|
| 242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
for c in cs:
|
| 244 |
lbl = label_for(lang, g, wc, c) or label_for(lang, g, "", c)
|
| 245 |
lines.append(f"- `{c}` — {lbl}" if lbl else f"- `{c}`")
|
| 246 |
lines.append("")
|
|
|
|
| 247 |
lines.append("")
|
|
|
|
| 248 |
return "\n".join(lines).strip()
|
| 249 |
|
| 250 |
def run_model(sentence: str):
|
|
@@ -255,8 +330,16 @@ def run_model(sentence: str):
|
|
| 255 |
if not tokens:
|
| 256 |
return []
|
| 257 |
|
| 258 |
-
enc = tokenizer(
|
| 259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
|
| 261 |
input_ids = enc["input_ids"].to(device)
|
| 262 |
attention_mask = enc["attention_mask"].to(device)
|
|
@@ -265,9 +348,12 @@ def run_model(sentence: str):
|
|
| 265 |
begin = []
|
| 266 |
last = None
|
| 267 |
for wid in word_ids:
|
| 268 |
-
if wid is None:
|
| 269 |
-
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
| 271 |
last = wid
|
| 272 |
|
| 273 |
with torch.no_grad():
|
|
@@ -288,18 +374,27 @@ def run_model(sentence: str):
|
|
| 288 |
vec_i += 1
|
| 289 |
return rows
|
| 290 |
|
| 291 |
-
def
|
| 292 |
lang = "fo" if lang=="fo" else "en"
|
| 293 |
-
cols = [UI[lang]["w"], UI[lang]["t"], UI[lang]["s"]
|
| 294 |
if not rows_state:
|
| 295 |
-
return pd.DataFrame(columns=cols), build_legend(lang)
|
| 296 |
|
| 297 |
-
|
|
|
|
| 298 |
for r in rows_state:
|
| 299 |
vec = torch.tensor(r["vec"])
|
| 300 |
-
|
| 301 |
-
|
|
|
|
| 302 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
theme = gr.themes.Soft()
|
| 304 |
|
| 305 |
with gr.Blocks(theme=theme, css=CSS, title="BRAGD-markarin") as demo:
|
|
@@ -309,25 +404,29 @@ with gr.Blocks(theme=theme, css=CSS, title="BRAGD-markarin") as demo:
|
|
| 309 |
btn = gr.Button("Marka / Tag", variant="primary")
|
| 310 |
|
| 311 |
state = gr.State([])
|
|
|
|
| 312 |
out_df = gr.Dataframe(wrap=True, interactive=False, label="Úrslit / Results")
|
| 313 |
|
| 314 |
# Under results + can be changed AFTER tagging (no rerun; just re-render)
|
| 315 |
lang = gr.Dropdown(choices=[("Føroyskt","fo"), ("English","en")], value="fo", label="Mál / Language")
|
| 316 |
|
|
|
|
|
|
|
|
|
|
| 317 |
with gr.Accordion("Markingaryvirlit / Legend", open=False):
|
| 318 |
legend_md = gr.Markdown(build_legend("fo"))
|
| 319 |
|
| 320 |
def on_tag(sentence, lang_choice):
|
| 321 |
rows = run_model(sentence)
|
| 322 |
-
|
| 323 |
-
return rows,
|
| 324 |
|
| 325 |
def on_lang(rows, lang_choice):
|
| 326 |
-
|
| 327 |
-
return
|
| 328 |
|
| 329 |
-
btn.click(on_tag, inputs=[inp, lang], outputs=[state, out_df, legend_md])
|
| 330 |
-
lang.change(on_lang, inputs=[state, lang], outputs=[out_df, legend_md])
|
| 331 |
|
| 332 |
if __name__ == "__main__":
|
| 333 |
demo.launch()
|
|
|
|
| 7 |
import pandas as pd
|
| 8 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
| 9 |
|
| 10 |
+
# ----------------------------
|
| 11 |
+
# Config
|
| 12 |
+
# ----------------------------
|
| 13 |
MODEL_ID = "Setur/BRAGD"
|
| 14 |
TAGS_FILEPATH = "Sosialurin-BRAGD_tags.csv" # must match model labels
|
| 15 |
LABELS_FILEPATH = "tag_labels.json" # add to repo root (FO+EN labels)
|
|
|
|
| 31 |
# You said Subcategory B doesn't exist and will be deleted from the CSV:
|
| 32 |
HIDE_CODES = {"subcategory": {"B"}}
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
UI = {
|
| 35 |
+
"fo": {"w":"Orð", "t":"Mark", "s":"Vís sum", "m":"Merking"},
|
| 36 |
+
"en": {"w":"Word","t":"Tag", "s":"Show as", "m":"Meaning"},
|
| 37 |
}
|
| 38 |
|
| 39 |
+
# Theme color: #89AFA9 (+ close shades)
|
| 40 |
CSS = """
|
| 41 |
:root{
|
| 42 |
--primary-500:#89AFA9; --primary-600:#6F9992; --primary-700:#5B7F79;
|
|
|
|
| 89 |
wt_masks = {wt:[a for a in arrs if a[wt]==1] for wt in range(15)}
|
| 90 |
out = {}
|
| 91 |
for wt,labels in wt_masks.items():
|
| 92 |
+
if not labels:
|
| 93 |
+
out[wt]=[]
|
| 94 |
+
continue
|
| 95 |
sum_labels = np.sum(np.array(labels), axis=0)
|
| 96 |
out[wt] = [iv for iv in intervals if np.sum(sum_labels[iv[0]:iv[1]+1]) != 0]
|
| 97 |
return out
|
|
|
|
| 117 |
vectors.append(vec)
|
| 118 |
return vectors
|
| 119 |
|
| 120 |
+
# ----------------------------
|
| 121 |
# Load labels (extracted from your XLSX)
|
| 122 |
+
# ----------------------------
|
| 123 |
with open(LABELS_FILEPATH, "r", encoding="utf-8") as f:
|
| 124 |
LABELS = json.load(f)
|
| 125 |
|
|
|
|
| 131 |
return by_wc[wc][group][code]
|
| 132 |
return glob.get(group, {}).get(code, "")
|
| 133 |
|
| 134 |
+
# ----------------------------
|
| 135 |
# Load CSV mappings (authoritative)
|
| 136 |
+
# ----------------------------
|
| 137 |
tag_to_features, features_to_tag, VEC_LEN, FEATURE_COLS = load_tag_mappings(TAGS_FILEPATH)
|
| 138 |
|
| 139 |
+
# ----------------------------
|
| 140 |
# Load model
|
| 141 |
+
# ----------------------------
|
| 142 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
|
| 143 |
model = AutoModelForTokenClassification.from_pretrained(MODEL_ID, token=HF_TOKEN)
|
| 144 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
| 175 |
return code
|
| 176 |
return ""
|
| 177 |
|
| 178 |
+
def clean_label(s: str) -> str:
|
| 179 |
+
s = (s or "").strip()
|
| 180 |
+
s = re.sub(r"\s+", " ", s)
|
| 181 |
+
s = s.strip(" -;,:")
|
| 182 |
+
return s
|
| 183 |
+
|
| 184 |
+
def visible_summary(vec: torch.Tensor, lang: str) -> str:
|
| 185 |
+
"""
|
| 186 |
+
Vís sum / Show as:
|
| 187 |
+
- ONLY words/labels, no letters, no hyphens like "X –"
|
| 188 |
+
- all selected features (stable order)
|
| 189 |
+
"""
|
| 190 |
+
lang = "fo" if lang=="fo" else "en"
|
| 191 |
+
raw_tag = vector_to_tag(vec)
|
| 192 |
wc = wc_code(vec)
|
| 193 |
+
|
| 194 |
+
wc_lbl = label_for(lang, "word_class", wc, wc) or wc
|
| 195 |
+
|
| 196 |
+
# Exact override requested earlier:
|
| 197 |
+
if raw_tag == "DGd":
|
| 198 |
+
wc_lbl = "Fyriseting" if lang=="fo" else "Preposition"
|
| 199 |
+
|
| 200 |
+
labels = [clean_label(wc_lbl)]
|
| 201 |
+
|
| 202 |
for g in GROUP_ORDER:
|
| 203 |
c = group_code(vec, g)
|
| 204 |
if not c:
|
| 205 |
continue
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
+
lbl = label_for(lang, g, wc, c) or label_for(lang, g, "", c) or ""
|
| 208 |
+
lbl = clean_label(lbl)
|
| 209 |
+
if not lbl:
|
| 210 |
+
continue
|
| 211 |
+
|
| 212 |
+
if lbl not in labels:
|
| 213 |
+
labels.append(lbl)
|
| 214 |
+
|
| 215 |
+
return ", ".join([l for l in labels if l])
|
| 216 |
+
|
| 217 |
+
def meaning_detail(vec: torch.Tensor, lang: str) -> str:
|
| 218 |
+
"""
|
| 219 |
+
Merking / Meaning:
|
| 220 |
+
keeps codes + labels (useful for debugging and linguists)
|
| 221 |
+
"""
|
| 222 |
lang = "fo" if lang=="fo" else "en"
|
| 223 |
wc = wc_code(vec)
|
| 224 |
+
parts = []
|
|
|
|
| 225 |
|
| 226 |
+
wc_lbl = label_for(lang, "word_class", wc, wc)
|
| 227 |
+
parts.append(f"{wc} – {wc_lbl}" if wc_lbl else wc)
|
|
|
|
| 228 |
|
| 229 |
+
for g in GROUP_ORDER:
|
| 230 |
+
c = group_code(vec, g)
|
| 231 |
+
if not c:
|
| 232 |
+
continue
|
| 233 |
+
lbl = label_for(lang, g, wc, c) or label_for(lang, g, "", c)
|
| 234 |
+
parts.append(f"{c} – {lbl}" if lbl else c)
|
| 235 |
|
| 236 |
+
return "; ".join([p for p in parts if p])
|
| 237 |
|
| 238 |
def compute_codes_by_wc():
|
| 239 |
codes = defaultdict(lambda: defaultdict(set)) # wc -> group -> set(code)
|
|
|
|
| 261 |
CODES_BY_WC = compute_codes_by_wc()
|
| 262 |
|
| 263 |
def build_legend(lang: str) -> str:
|
| 264 |
+
"""
|
| 265 |
+
Elaborate overview:
|
| 266 |
+
Under each word class, show the letter codes actually used in the CURRENT CSV,
|
| 267 |
+
with labels from tag_labels.json (fallback to code if missing).
|
| 268 |
+
"""
|
| 269 |
lang = "fo" if lang=="fo" else "en"
|
| 270 |
+
title = "### Markingaryvirlit" if lang=="fo" else "### Tag legend"
|
| 271 |
+
lines = [title, ""]
|
| 272 |
+
|
| 273 |
for wc in sorted(CODES_BY_WC.keys()):
|
| 274 |
wcl = label_for(lang, "word_class", wc, wc) or ""
|
| 275 |
lines.append(f"#### {wc} — {wcl}" if wcl else f"#### {wc}")
|
|
|
|
| 278 |
cs = sorted(CODES_BY_WC[wc].get(g, set()))
|
| 279 |
if not cs:
|
| 280 |
continue
|
| 281 |
+
|
| 282 |
+
if lang=="fo":
|
| 283 |
+
group_name = {
|
| 284 |
+
"subcategory":"Undirflokkur",
|
| 285 |
+
"gender":"Kyn",
|
| 286 |
+
"number":"Tal",
|
| 287 |
+
"case":"Fall",
|
| 288 |
+
"article":"Bundni/óbundni",
|
| 289 |
+
"proper":"Sernavn",
|
| 290 |
+
"degree":"Stig",
|
| 291 |
+
"declension":"Bending",
|
| 292 |
+
"mood":"Háttur",
|
| 293 |
+
"voice":"Søgn",
|
| 294 |
+
"tense":"Tíð",
|
| 295 |
+
"person":"Persónur",
|
| 296 |
+
"definiteness":"Bundni/óbundni",
|
| 297 |
+
}.get(g, g)
|
| 298 |
+
else:
|
| 299 |
+
group_name = {
|
| 300 |
+
"subcategory":"Subcategory",
|
| 301 |
+
"gender":"Gender",
|
| 302 |
+
"number":"Number",
|
| 303 |
+
"case":"Case",
|
| 304 |
+
"article":"Definite suffix",
|
| 305 |
+
"proper":"Proper noun",
|
| 306 |
+
"degree":"Degree",
|
| 307 |
+
"declension":"Declension",
|
| 308 |
+
"mood":"Mood",
|
| 309 |
+
"voice":"Voice",
|
| 310 |
+
"tense":"Tense",
|
| 311 |
+
"person":"Person",
|
| 312 |
+
"definiteness":"Definiteness",
|
| 313 |
+
}.get(g, g)
|
| 314 |
+
|
| 315 |
+
lines.append(f"**{group_name}**")
|
| 316 |
for c in cs:
|
| 317 |
lbl = label_for(lang, g, wc, c) or label_for(lang, g, "", c)
|
| 318 |
lines.append(f"- `{c}` — {lbl}" if lbl else f"- `{c}`")
|
| 319 |
lines.append("")
|
| 320 |
+
|
| 321 |
lines.append("")
|
| 322 |
+
|
| 323 |
return "\n".join(lines).strip()
|
| 324 |
|
| 325 |
def run_model(sentence: str):
|
|
|
|
| 330 |
if not tokens:
|
| 331 |
return []
|
| 332 |
|
| 333 |
+
enc = tokenizer(
|
| 334 |
+
tokens,
|
| 335 |
+
is_split_into_words=True,
|
| 336 |
+
add_special_tokens=True,
|
| 337 |
+
max_length=128,
|
| 338 |
+
padding="max_length",
|
| 339 |
+
truncation=True,
|
| 340 |
+
return_attention_mask=True,
|
| 341 |
+
return_tensors="pt",
|
| 342 |
+
)
|
| 343 |
|
| 344 |
input_ids = enc["input_ids"].to(device)
|
| 345 |
attention_mask = enc["attention_mask"].to(device)
|
|
|
|
| 348 |
begin = []
|
| 349 |
last = None
|
| 350 |
for wid in word_ids:
|
| 351 |
+
if wid is None:
|
| 352 |
+
begin.append(0)
|
| 353 |
+
elif wid != last:
|
| 354 |
+
begin.append(1)
|
| 355 |
+
else:
|
| 356 |
+
begin.append(0)
|
| 357 |
last = wid
|
| 358 |
|
| 359 |
with torch.no_grad():
|
|
|
|
| 374 |
vec_i += 1
|
| 375 |
return rows
|
| 376 |
|
| 377 |
+
def render_main(rows_state, lang: str):
|
| 378 |
lang = "fo" if lang=="fo" else "en"
|
| 379 |
+
cols = [UI[lang]["w"], UI[lang]["t"], UI[lang]["s"]]
|
| 380 |
if not rows_state:
|
| 381 |
+
return pd.DataFrame(columns=cols), build_legend(lang), pd.DataFrame(columns=[UI[lang]["w"], UI[lang]["t"], UI[lang]["m"]])
|
| 382 |
|
| 383 |
+
out_main = []
|
| 384 |
+
out_mean = []
|
| 385 |
for r in rows_state:
|
| 386 |
vec = torch.tensor(r["vec"])
|
| 387 |
+
tag = vector_to_tag(vec)
|
| 388 |
+
out_main.append([r["word"], tag, visible_summary(vec, lang)])
|
| 389 |
+
out_mean.append([r["word"], tag, meaning_detail(vec, lang)])
|
| 390 |
|
| 391 |
+
df_main = pd.DataFrame(out_main, columns=cols)
|
| 392 |
+
df_mean = pd.DataFrame(out_mean, columns=[UI[lang]["w"], UI[lang]["t"], UI[lang]["m"]])
|
| 393 |
+
return df_main, build_legend(lang), df_mean
|
| 394 |
+
|
| 395 |
+
# ----------------------------
|
| 396 |
+
# Gradio UI
|
| 397 |
+
# ----------------------------
|
| 398 |
theme = gr.themes.Soft()
|
| 399 |
|
| 400 |
with gr.Blocks(theme=theme, css=CSS, title="BRAGD-markarin") as demo:
|
|
|
|
| 404 |
btn = gr.Button("Marka / Tag", variant="primary")
|
| 405 |
|
| 406 |
state = gr.State([])
|
| 407 |
+
|
| 408 |
out_df = gr.Dataframe(wrap=True, interactive=False, label="Úrslit / Results")
|
| 409 |
|
| 410 |
# Under results + can be changed AFTER tagging (no rerun; just re-render)
|
| 411 |
lang = gr.Dropdown(choices=[("Føroyskt","fo"), ("English","en")], value="fo", label="Mál / Language")
|
| 412 |
|
| 413 |
+
with gr.Accordion("Merking / Meaning", open=False):
|
| 414 |
+
out_mean_df = gr.Dataframe(wrap=True, interactive=False, label="")
|
| 415 |
+
|
| 416 |
with gr.Accordion("Markingaryvirlit / Legend", open=False):
|
| 417 |
legend_md = gr.Markdown(build_legend("fo"))
|
| 418 |
|
| 419 |
def on_tag(sentence, lang_choice):
|
| 420 |
rows = run_model(sentence)
|
| 421 |
+
df_main, legend, df_mean = render_main(rows, lang_choice)
|
| 422 |
+
return rows, df_main, legend, df_mean
|
| 423 |
|
| 424 |
def on_lang(rows, lang_choice):
|
| 425 |
+
df_main, legend, df_mean = render_main(rows, lang_choice)
|
| 426 |
+
return df_main, legend, df_mean
|
| 427 |
|
| 428 |
+
btn.click(on_tag, inputs=[inp, lang], outputs=[state, out_df, legend_md, out_mean_df])
|
| 429 |
+
lang.change(on_lang, inputs=[state, lang], outputs=[out_df, legend_md, out_mean_df])
|
| 430 |
|
| 431 |
if __name__ == "__main__":
|
| 432 |
demo.launch()
|