unijoh commited on
Commit
fa38fed
·
verified ·
1 Parent(s): 84c23cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -146
app.py CHANGED
@@ -23,6 +23,7 @@ try:
23
  except Exception:
24
  _HAS_FOTOKENIZER = False
25
 
 
26
  def _patch_fotokenizer_for_py313() -> None:
27
  """FO-Tokenizer currently uses importlib.resources.open_text(package=..., resource=...).
28
  In Python 3.13, open_text no longer accepts the `package=` keyword.
@@ -30,7 +31,6 @@ def _patch_fotokenizer_for_py313() -> None:
30
  if not _HAS_FOTOKENIZER:
31
  return
32
  try:
33
- # If open_text doesn't accept `package`, patch the reference inside fotokenizer.abbrev.
34
  if "package" not in inspect.signature(importlib_resources.open_text).parameters:
35
  def _open_text_compat(*args, **kwargs):
36
  if "package" in kwargs:
@@ -41,12 +41,11 @@ def _patch_fotokenizer_for_py313() -> None:
41
  return importlib_resources.open_text(pkg, res, encoding=encoding, errors=errors)
42
  return importlib_resources.open_text(*args, **kwargs)
43
 
44
- # Patch the function that fotokenizer.abbrev imported into its module namespace
45
  fo_abbrev.open_text = _open_text_compat # type: ignore[attr-defined]
46
  except Exception:
47
- # If patching fails, we'll fall back to a naive sentence split later.
48
  pass
49
 
 
50
  _patch_fotokenizer_for_py313()
51
 
52
  # ----------------------------
@@ -55,12 +54,9 @@ _patch_fotokenizer_for_py313()
55
  MODEL_ID = "Setur/BRAGD"
56
  TAGS_FILEPATH = "Sosialurin-BRAGD_tags.csv"
57
  LABELS_FILEPATH = "tag_labels.json"
58
- HF_TOKEN = os.getenv("BRAGD")
59
 
60
  TARGET_MAX_TOKENS = 256 # We will cap this to the model's max if needed.
61
 
62
- if not HF_TOKEN:
63
- raise RuntimeError("Missing BRAGD token secret (Space → Settings → Secrets → BRAGD).")
64
  if not os.path.exists(LABELS_FILEPATH):
65
  raise RuntimeError(f"Missing {LABELS_FILEPATH}. Add it to the Space repo root.")
66
 
@@ -69,12 +65,15 @@ INTERVALS = (
69
  (51, 53), (54, 60), (61, 63), (64, 66), (67, 70), (71, 72)
70
  )
71
 
72
- GROUP_ORDER = ["subcategory","gender","number","case","article","proper","degree","declension","mood","voice","tense","person","definiteness"]
 
 
 
73
  HIDE_CODES = {"subcategory": {"B"}} # Subcategory B to be removed
74
 
75
  UI = {
76
- "fo": {"w":"Orð", "t":"Mark", "s":"Útgreining", "m":"Útgreinað marking"},
77
- "en": {"w":"Word","t":"Tag", "s":"Analysis", "m":"Expanded tags"},
78
  }
79
 
80
  MODEL_LINK = "https://huggingface.co/Setur/BRAGD"
@@ -84,7 +83,6 @@ MODEL_LINK = "https://huggingface.co/Setur/BRAGD"
84
  # ----------------------------
85
  CSS = """
86
  /* Keep Gradio default styling; only override our buttons. */
87
-
88
  #btn_tag, #lang_fo_on, #lang_en_on{
89
  background:#89AFA9 !important;
90
  border-color:#6F9992 !important;
@@ -95,7 +93,6 @@ CSS = """
95
  border-color:#6F9992 !important;
96
  color:#0b1b19 !important;
97
  }
98
-
99
  #lang_fo_off, #lang_en_off, #btn_dl_main, #btn_dl_exp{
100
  background:#C6DAD6 !important;
101
  border-color:#6F9992 !important;
@@ -106,8 +103,6 @@ CSS = """
106
  border-color:#6F9992 !important;
107
  color:#0b1b19 !important;
108
  }
109
-
110
- /* Dark mode: make the INACTIVE buttons match what you had before (darker, readable) */
111
  @media (prefers-color-scheme: dark){
112
  #lang_fo_off, #lang_en_off, #btn_dl_main, #btn_dl_exp{
113
  background:#2a3b38 !important;
@@ -120,8 +115,6 @@ CSS = """
120
  color:#0b1b19 !important;
121
  }
122
  }
123
-
124
- /* Minimal layout so the language buttons stay hard-right like before */
125
  #results_hdr{
126
  display:flex !important;
127
  align-items:center !important;
@@ -140,8 +133,6 @@ CSS = """
140
  min-width:120px !important;
141
  flex:0 0 auto !important;
142
  }
143
-
144
- /* Expanded-tags header: keep download button hard-right */
145
  #expanded_hdr{
146
  display:flex !important;
147
  align-items:center !important;
@@ -160,7 +151,6 @@ CSS = """
160
  min-width:120px !important;
161
  flex:0 0 auto !important;
162
  }
163
- /* Remove the big Gradio panel/frame around the textbox (keep textarea normal) */
164
  #input_col,
165
  #input_col > div,
166
  #input_col .gr-block,
@@ -172,9 +162,6 @@ CSS = """
172
  border: 0 !important;
173
  box-shadow: none !important;
174
  }
175
-
176
-
177
- /* Prevent the main "Marka / Tag" button from stretching vertically */
178
  #btn_tag{
179
  align-self:flex-start !important;
180
  flex:0 0 auto !important;
@@ -183,8 +170,6 @@ CSS = """
183
  #btn_tag button{
184
  height:auto !important;
185
  }
186
-
187
- /* Results tables (rendered as HTML so we fully control wrapping/scrolling). */
188
  #out_df .df-scroll, #out_mean_df .df-scroll{
189
  overflow-x:auto !important;
190
  width:100% !important;
@@ -237,7 +222,7 @@ def split_sentences(text: str):
237
  up with merged words (e.g. `Núriggarkanska`). This function therefore:
238
  - preserves `.txt` pieces as-is
239
  - converts descriptor-only whitespace-like tokens into a single space
240
- - adds a **best-effort** inserted space between tokens in cases where whitespace
241
  is missing but clearly intended (word→word, comma/semicolon/colon→word)
242
  """
243
 
@@ -256,17 +241,12 @@ def split_sentences(text: str):
256
  buf.append(piece)
257
  return
258
 
259
- # If we already ended with whitespace, just append.
260
  last = buf[-1]
261
  last_char = last[-1] if last else ""
262
  if last_char.isspace():
263
  buf.append(piece)
264
  return
265
 
266
- # If next token begins with a letter/number and previous token ends with:
267
- # - a letter/number (word→word)
268
- # - comma/semicolon/colon (",;:" → word)
269
- # ...then insert a space (this fixes missing whitespace from some tokenizers).
270
  if piece[0].isalnum() and (last_char.isalnum() or last_char in {",", ";", ":"}):
271
  buf.append(" ")
272
 
@@ -283,11 +263,9 @@ def split_sentences(text: str):
283
  _append_piece(cur, tok.txt)
284
  continue
285
 
286
- # Descriptor-only token (e.g., sentence boundary markers)
287
  descr = FO_TOK.descr.get(tok.kind, "").replace(" ", "_")
288
 
289
  if descr == "BEGIN_SENT":
290
- # Flush anything we may have buffered (robustness for odd streams)
291
  if cur:
292
  sent = "".join(cur).strip()
293
  if sent:
@@ -302,7 +280,6 @@ def split_sentences(text: str):
302
  cur = []
303
  continue
304
 
305
- # Best-effort: keep whitespace-like descriptor-only tokens.
306
  up = descr.upper()
307
  if "WHITESPACE" in up or "SPACE" in up or "TAB" in up:
308
  _append_piece(cur, " ")
@@ -311,7 +288,6 @@ def split_sentences(text: str):
311
  elif up == "DASH":
312
  _append_piece(cur, "-")
313
  else:
314
- # Ignore other descriptor-only tokens.
315
  pass
316
 
317
  if cur:
@@ -319,13 +295,10 @@ def split_sentences(text: str):
319
  if sent:
320
  sents.append(sent)
321
 
322
- # If fotokenizer didn't yield any markers, treat as one sentence.
323
  return sents or [s.strip()]
324
  except Exception:
325
- # We'll fall back below
326
  pass
327
 
328
- # Fallback: split on end punctuation followed by whitespace.
329
  parts = re.split(r"(?<=[.!?])\s+", s.strip())
330
  return [p.strip() for p in parts if p.strip()]
331
 
@@ -337,105 +310,122 @@ def run_model_multisentence(text: str):
337
  rows_all.extend(run_model(sent))
338
  return rows_all
339
 
 
340
  # ----------------------------
341
  # CSV mapping
342
  # ----------------------------
343
  def load_tag_mappings(path: str):
344
  df = pd.read_csv(path)
345
  feature_cols = list(df.columns[1:])
346
- tag_to_features = {row["Original Tag"]: row[1:].values.astype(int) for _, row in df.iterrows()}
347
- features_to_tag = {tuple(row[1:].values.astype(int)): row["Original Tag"] for _, row in df.iterrows()}
 
 
 
 
 
 
348
  return tag_to_features, features_to_tag, len(feature_cols), feature_cols
349
 
 
350
  def group_from_col(col: str):
351
- if col == "Article": return ("article","A")
352
- if col.startswith("No-Article "): return ("article", col.split()[-1])
353
- if col == "Proper Noun": return ("proper","P")
354
- if col.startswith("Not-Proper-Noun "): return ("proper", col.split()[-1])
 
 
 
 
355
 
356
  prefixes = [
357
- ("Word Class ","word_class"),
358
- ("Subcategory ","subcategory"), ("No-Subcategory ","subcategory"),
359
- ("Gender ","gender"), ("No-Gender ","gender"),
360
- ("Number ","number"), ("No-Number ","number"),
361
- ("Case ","case"), ("No-Case ","case"),
362
- ("Degree ","degree"), ("No-Degree ","degree"),
363
- ("Declension ","declension"), ("No-Declension ","declension"),
364
- ("Mood ","mood"),
365
- ("Voice ","voice"), ("No-Voice ","voice"),
366
- ("Tense ","tense"), ("No-Tense ","tense"),
367
- ("Person ","person"), ("No-Person ","person"),
368
- ("Definite ","definiteness"), ("Indefinite ","definiteness"),
369
  ]
370
- for p,g in prefixes:
371
  if col.startswith(p):
372
  return (g, col.split()[-1])
373
- return (None,None)
 
374
 
375
  def process_tag_features(tag_to_features: dict, intervals):
376
  arrs = [np.array(tpl) for tpl in set(tuple(a) for a in tag_to_features.values())]
377
- wt_masks = {wt:[a for a in arrs if a[wt]==1] for wt in range(15)}
378
  out = {}
379
- for wt,labels in wt_masks.items():
380
  if not labels:
381
- out[wt]=[]
382
  continue
383
  sum_labels = np.sum(np.array(labels), axis=0)
384
- out[wt] = [iv for iv in intervals if np.sum(sum_labels[iv[0]:iv[1]+1]) != 0]
385
  return out
386
 
 
387
  def predict_vectors(logits, attention_mask, begin_tokens, dict_intervals, vec_len):
388
  softmax = torch.nn.Softmax(dim=0)
389
  vectors = []
390
  for idx in range(len(logits)):
391
- if attention_mask[idx].item()!=1 or begin_tokens[idx]!=1:
392
  continue
393
  pred = logits[idx]
394
  vec = torch.zeros(vec_len, device=logits.device)
395
  wt = torch.argmax(softmax(pred[0:15])).item()
396
- vec[wt]=1
397
- for (a,b) in dict_intervals.get(wt, []):
398
- seg = pred[a:b+1]
399
  k = torch.argmax(softmax(seg)).item()
400
- vec[a+k]=1
401
  vectors.append(vec)
402
  return vectors
403
 
 
404
  # ----------------------------
405
  # Load labels
406
  # ----------------------------
407
  with open(LABELS_FILEPATH, "r", encoding="utf-8") as f:
408
  LABELS = json.load(f)
409
 
 
410
  def label_for(lang: str, group: str, wc: str, code: str) -> str:
411
- lang = "fo" if lang=="fo" else "en"
412
  by_wc = LABELS.get(lang, {}).get("by_word_class", {})
413
  glob = LABELS.get(lang, {}).get("global", {})
414
  if wc and wc in by_wc and code in by_wc[wc].get(group, {}):
415
  return by_wc[wc][group][code]
416
  return glob.get(group, {}).get(code, "")
417
 
 
418
  def clean_label(s: str) -> str:
419
  s = (s or "").strip()
420
  s = re.sub(r"\s+", " ", s)
421
  return s.strip(" -;,:").strip()
422
 
 
423
  # ----------------------------
424
  # Load model + mapping
425
  # ----------------------------
426
  tag_to_features, features_to_tag, VEC_LEN, FEATURE_COLS = load_tag_mappings(TAGS_FILEPATH)
427
 
428
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
429
- model = AutoModelForTokenClassification.from_pretrained(MODEL_ID, token=HF_TOKEN)
430
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
431
- model.to(device); model.eval()
 
432
 
433
- # Decide max token length (cap to model/tokenizer max if they define one)
434
  MAX_TOKENS = int(TARGET_MAX_TOKENS)
435
  _model_max = getattr(getattr(model, "config", None), "max_position_embeddings", None)
436
  _tok_max = getattr(tokenizer, "model_max_length", None)
437
 
438
- # Some tokenizers set model_max_length to a huge placeholder (e.g., 1e30). Ignore those.
439
  for _m in (_model_max, _tok_max):
440
  if isinstance(_m, int) and 0 < _m < 100000:
441
  MAX_TOKENS = min(MAX_TOKENS, _m)
@@ -446,57 +436,60 @@ if hasattr(model, "config") and hasattr(model.config, "num_labels") and model.co
446
  DICT_INTERVALS = process_tag_features(tag_to_features, INTERVALS)
447
 
448
  GROUPS = defaultdict(list)
449
- for i,col in enumerate(FEATURE_COLS):
450
- g,code = group_from_col(col)
451
  if g and code not in HIDE_CODES.get(g, set()):
452
  GROUPS[g].append((i, code, col))
453
 
 
454
  def vector_to_tag(vec: torch.Tensor) -> str:
455
  return features_to_tag.get(tuple(vec.int().tolist()), "Unknown Tag")
456
 
 
457
  def wc_code(vec: torch.Tensor) -> str:
458
- for idx,code,_ in GROUPS["word_class"]:
459
- if int(vec[idx].item())==1:
460
  return code
461
  return ""
462
 
 
463
  def group_code(vec: torch.Tensor, group: str) -> str:
464
  hidden = HIDE_CODES.get(group, set())
465
- for idx,code,_ in GROUPS.get(group, []):
466
  if code in hidden:
467
  continue
468
- if int(vec[idx].item())==1:
469
  return code
470
  return ""
471
 
472
- HIDE_IN_ANALYSIS = {("D","subcategory","G"), ("D","subcategory","N")}
 
473
  VOICE_ANALYSIS = {
474
  "fo": {"A": "gerðsøgn", "M": "miðalsøgn", "v": "orð luttøkuháttur"},
475
  "en": {"A": "active voice", "M": "middle voice", "v": "supine form"},
476
  }
477
 
 
478
  def analysis_text(vec: torch.Tensor, lang: str) -> str:
479
- lang = "fo" if lang=="fo" else "en"
480
  tag = vector_to_tag(vec)
481
  wc = wc_code(vec)
482
 
483
- # --- ADDED: compute mood_code and skip flag for infinitive/imperative verbs ---
484
  mood_code = group_code(vec, "mood") if wc == "V" else ""
485
- skip_empty_verb_feats = (wc == "V" and mood_code in {"I", "M"}) # navnháttur or boðsháttur
486
- # --- end added ---
487
 
488
  if tag == "DGd":
489
- return "fyriseting" if lang=="fo" else "preposition"
490
 
491
  mood = group_code(vec, "mood")
492
  if mood == "U":
493
- sup = label_for(lang, "mood", wc, "U") or ("luttøkuháttur" if lang=="fo" else "supine")
494
  vcode = group_code(vec, "voice") or "v"
495
  vlabel = VOICE_ANALYSIS[lang].get(vcode, VOICE_ANALYSIS[lang]["v"])
496
  return f"{clean_label(sup)}, {clean_label(vlabel)}"
497
 
498
  parts = []
499
- if wc in {"P","C"}:
500
  subc = group_code(vec, "subcategory")
501
  subl = clean_label(label_for(lang, "subcategory", wc, subc) or "")
502
  if subl:
@@ -511,12 +504,10 @@ def analysis_text(vec: torch.Tensor, lang: str) -> str:
511
  if not c:
512
  continue
513
 
514
- # --- ADDED: skip only the generic "no" codes for verbs in infinitive/imperative ---
515
  if skip_empty_verb_feats and g in {"number", "tense", "person"} and c in {"n", "t", "p"}:
516
  continue
517
- # --- end added ---
518
 
519
- if wc in {"P","C"} and g == "subcategory":
520
  continue
521
  if (wc, g, c) in HIDE_IN_ANALYSIS:
522
  continue
@@ -526,8 +517,9 @@ def analysis_text(vec: torch.Tensor, lang: str) -> str:
526
 
527
  return ", ".join(parts)
528
 
 
529
  def expanded_text(vec: torch.Tensor, lang: str) -> str:
530
- lang = "fo" if lang=="fo" else "en"
531
  wc = wc_code(vec)
532
  parts = []
533
  wc_lbl = label_for(lang, "word_class", wc, wc)
@@ -540,31 +532,34 @@ def expanded_text(vec: torch.Tensor, lang: str) -> str:
540
  parts.append(f"{c} – {lbl}" if lbl else c)
541
  return "; ".join([p for p in parts if p])
542
 
 
543
  def compute_codes_by_wc():
544
  codes = defaultdict(lambda: defaultdict(set))
545
  for arr in tag_to_features.values():
546
  arr = np.array(arr)
547
  wc = None
548
- for idx,code,_ in GROUPS["word_class"]:
549
- if arr[idx]==1:
550
  wc = code
551
  break
552
  if not wc:
553
  continue
554
  for g in GROUP_ORDER:
555
  hidden = HIDE_CODES.get(g, set())
556
- for idx,code,_ in GROUPS.get(g, []):
557
  if code in hidden:
558
  continue
559
- if arr[idx]==1:
560
  codes[wc][g].add(code)
561
  return codes
562
 
 
563
  CODES_BY_WC = compute_codes_by_wc()
564
 
 
565
  def build_overview(lang: str) -> str:
566
- lang = "fo" if lang=="fo" else "en"
567
- title = "### Markayvirlit" if lang=="fo" else "### Tag Overview"
568
  lines = [title, ""]
569
  for wc in sorted(CODES_BY_WC.keys()):
570
  wcl = label_for(lang, "word_class", wc, wc) or ""
@@ -574,12 +569,18 @@ def build_overview(lang: str) -> str:
574
  if not cs:
575
  continue
576
  group_name = {
577
- "fo": {"subcategory":"Undirflokkur","gender":"Kyn","number":"Tal","case":"Fall","article":"Bundni/óbundni",
578
- "proper":"Sernavn / felagsnavn","degree":"Stig","declension":"Bending","mood":"Háttur","voice":"Søgn",
579
- "tense":"Tíð","person":"Persónur","definiteness":"Bundni/óbundni"},
580
- "en": {"subcategory":"Subcategory","gender":"Gender","number":"Number","case":"Case","article":"Definiteness",
581
- "proper":"Proper/common noun","degree":"Degree","declension":"Declension","mood":"Mood","voice":"Voice",
582
- "tense":"Tense","person":"Person","definiteness":"Definiteness"},
 
 
 
 
 
 
583
  }[lang].get(g, g)
584
  lines.append(f"**{group_name}**")
585
  for c in cs:
@@ -589,6 +590,7 @@ def build_overview(lang: str) -> str:
589
  lines.append("")
590
  return "\n".join(lines).strip()
591
 
 
592
  def run_model(sentence: str):
593
  s = (sentence or "").strip()
594
  if not s:
@@ -596,8 +598,17 @@ def run_model(sentence: str):
596
  tokens = simp_tok(s)
597
  if not tokens:
598
  return []
599
- enc = tokenizer(tokens, is_split_into_words=True, add_special_tokens=True, max_length=MAX_TOKENS,
600
- padding="max_length", truncation=True, return_attention_mask=True, return_tensors="pt")
 
 
 
 
 
 
 
 
 
601
  input_ids = enc["input_ids"].to(device)
602
  attention_mask = enc["attention_mask"].to(device)
603
  word_ids = enc.word_ids(batch_index=0)
@@ -618,8 +629,8 @@ def run_model(sentence: str):
618
  vectors = predict_vectors(logits, attention_mask[0], begin, DICT_INTERVALS, VEC_LEN)
619
 
620
  rows, vec_i, seen = [], 0, set()
621
- for i,wid in enumerate(word_ids):
622
- if wid is None or begin[i]!=1 or wid in seen:
623
  continue
624
  seen.add(wid)
625
  word = tokens[wid] if wid < len(tokens) else "<UNK>"
@@ -628,10 +639,8 @@ def run_model(sentence: str):
628
  vec_i += 1
629
  return rows
630
 
 
631
  def _make_html_table(headers, rows):
632
- # We render results as plain HTML so we can force:
633
- # - no wrapping anywhere
634
- # - horizontal scrolling when content is wider than the page
635
  th = "".join(f"<th>{html.escape(str(h))}</th>" for h in headers)
636
  body_rows = []
637
  for row in rows:
@@ -644,8 +653,9 @@ def _make_html_table(headers, rows):
644
  '</div>'
645
  )
646
 
 
647
  def render(rows_state, lang: str):
648
- lang = "fo" if lang=="fo" else "en"
649
  cols_main = [UI[lang]["w"], UI[lang]["t"], UI[lang]["s"]]
650
  cols_mean = [UI[lang]["w"], UI[lang]["t"], UI[lang]["m"]]
651
  if not rows_state:
@@ -662,17 +672,14 @@ def render(rows_state, lang: str):
662
 
663
 
664
  def _write_tsv(df: pd.DataFrame, filename: str) -> str:
665
- """Write a TSV file to a unique temp folder and return the absolute path."""
666
  tmpdir = os.path.join(tempfile.gettempdir(), "marka_downloads", str(uuid.uuid4()))
667
  os.makedirs(tmpdir, exist_ok=True)
668
  path = os.path.join(tmpdir, filename)
669
  df.to_csv(path, sep="\t", index=False, encoding="utf-8")
670
  return path
671
 
 
672
  def build_download_main(rows_state) -> str:
673
- """TSV with 4 columns:
674
- Orð, Mark, Útgreining (FO), Analysis (EN)
675
- """
676
  words, tags, fo_vals, en_vals = [], [], [], []
677
  for r in (rows_state or []):
678
  vec = torch.tensor(r["vec"])
@@ -690,8 +697,8 @@ def build_download_main(rows_state) -> str:
690
  })
691
  return _write_tsv(df, "Markað.tsv")
692
 
 
693
  def build_download_expanded(rows_state, lang: str) -> str:
694
- """TSV with 3 columns: Word/Orð, Tag/Mark, and Expanded tags in the UI language."""
695
  lang = "fo" if lang == "fo" else "en"
696
  words, tags, vals = [], [], []
697
  for r in (rows_state or []):
@@ -707,10 +714,16 @@ def build_download_expanded(rows_state, lang: str) -> str:
707
  })
708
  return _write_tsv(df, "Markað_útgreinað.tsv")
709
 
 
710
  with gr.Blocks(css=CSS, title="Marka") as demo:
711
  with gr.Row(equal_height=False):
712
  with gr.Column(scale=2, elem_id="input_col"):
713
- inp = gr.Textbox(lines=6, placeholder="Skriva her ... / Type here ...", show_label=False, elem_id="input_box")
 
 
 
 
 
714
  with gr.Column(scale=1, min_width=320):
715
  gr.Markdown(
716
  "## Marka\n"
@@ -726,11 +739,11 @@ with gr.Blocks(css=CSS, title="Marka") as demo:
726
  with results_hdr:
727
  results_title = gr.Markdown("### Úrslit / Results")
728
  with gr.Row(elem_id="lang_buttons") as lang_buttons_row:
729
- btn_lang_fo_on = gr.Button("Føroyskt", variant="primary", elem_id="lang_fo_on", visible=False)
730
  btn_lang_fo_off = gr.Button("Føroyskt", variant="secondary", elem_id="lang_fo_off", visible=False)
731
- btn_lang_en_on = gr.Button("English", variant="primary", elem_id="lang_en_on", visible=False)
732
- btn_lang_en_off = gr.Button("English", variant="secondary", elem_id="lang_en_off", visible=False)
733
- btn_dl_main = gr.DownloadButton("Tak niður / Download", variant="secondary", elem_id="btn_dl_main", visible=False)
734
  out_df = gr.HTML(value="", elem_id="out_df", visible=False)
735
 
736
  expanded_acc = gr.Accordion("Útgreinað marking / Expanded tags", open=False, visible=False)
@@ -746,21 +759,15 @@ with gr.Blocks(css=CSS, title="Marka") as demo:
746
  overview_md = gr.Markdown(build_overview("fo"))
747
 
748
  def show_loading(lang_current):
749
- """Instant feedback on click.
750
-
751
- - Shows the (empty) results table immediately so Gradio can display its built-in loading overlay.
752
- - Hides/collapses the Expanded tags section while processing (avoids duplicate loading skeletons).
753
- - Disables the button + changes its label while the model runs.
754
- """
755
  lang_current = "fo" if lang_current == "fo" else "en"
756
  cols_main = [UI[lang_current]["w"], UI[lang_current]["t"], UI[lang_current]["s"]]
757
  shell = _make_html_table(cols_main, [])
758
  return (
759
  gr.update(value=shell, visible=True),
760
- gr.update(visible=False), # hide main download while processing
761
- gr.update(visible=False), # hide expanded download while processing
762
- gr.update(visible=False), # hide + collapse expanded section while processing
763
- gr.update(value=""), # clear expanded content (prevents flashing stale content)
764
  gr.update(value="Markar... / Tagging...", interactive=False),
765
  )
766
 
@@ -780,7 +787,7 @@ with gr.Blocks(css=CSS, title="Marka") as demo:
780
  gr.update(value=df_main, visible=True),
781
  gr.update(value=df_mean),
782
  gr.update(value=overview),
783
- gr.update(visible=True), # expanded_acc
784
  gr.update(visible=show_fo),
785
  gr.update(visible=not show_fo),
786
  gr.update(visible=show_en),
@@ -813,6 +820,7 @@ with gr.Blocks(css=CSS, title="Marka") as demo:
813
  gr.update(value=dl_main_path, visible=have_rows),
814
  gr.update(value=dl_exp_path, visible=have_rows),
815
  )
 
816
  def on_set_fo(rows):
817
  return on_set_lang(rows, "fo")
818
 
@@ -829,44 +837,54 @@ with gr.Blocks(css=CSS, title="Marka") as demo:
829
  _evt.then(
830
  on_tag,
831
  inputs=[inp, lang_state],
832
- outputs=[state, out_df, out_mean_df, overview_md, expanded_acc,
833
- btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off,
834
- btn_dl_main, btn_dl_exp, lang_state, btn],
 
 
835
  queue=False,
836
  )
837
 
838
  btn_lang_fo_on.click(
839
  on_set_fo,
840
  inputs=[state],
841
- outputs=[lang_state, out_df, out_mean_df, overview_md,
842
- btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off,
843
- btn_dl_main, btn_dl_exp],
 
 
844
  queue=False,
845
  )
846
  btn_lang_fo_off.click(
847
  on_set_fo,
848
  inputs=[state],
849
- outputs=[lang_state, out_df, out_mean_df, overview_md,
850
- btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off,
851
- btn_dl_main, btn_dl_exp],
 
 
852
  queue=False,
853
  )
854
  btn_lang_en_on.click(
855
  on_set_en,
856
  inputs=[state],
857
- outputs=[lang_state, out_df, out_mean_df, overview_md,
858
- btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off,
859
- btn_dl_main, btn_dl_exp],
 
 
860
  queue=False,
861
  )
862
  btn_lang_en_off.click(
863
  on_set_en,
864
  inputs=[state],
865
- outputs=[lang_state, out_df, out_mean_df, overview_md,
866
- btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off,
867
- btn_dl_main, btn_dl_exp],
 
 
868
  queue=False,
869
  )
870
 
871
  if __name__ == "__main__":
872
- demo.launch()
 
23
  except Exception:
24
  _HAS_FOTOKENIZER = False
25
 
26
+
27
  def _patch_fotokenizer_for_py313() -> None:
28
  """FO-Tokenizer currently uses importlib.resources.open_text(package=..., resource=...).
29
  In Python 3.13, open_text no longer accepts the `package=` keyword.
 
31
  if not _HAS_FOTOKENIZER:
32
  return
33
  try:
 
34
  if "package" not in inspect.signature(importlib_resources.open_text).parameters:
35
  def _open_text_compat(*args, **kwargs):
36
  if "package" in kwargs:
 
41
  return importlib_resources.open_text(pkg, res, encoding=encoding, errors=errors)
42
  return importlib_resources.open_text(*args, **kwargs)
43
 
 
44
  fo_abbrev.open_text = _open_text_compat # type: ignore[attr-defined]
45
  except Exception:
 
46
  pass
47
 
48
+
49
  _patch_fotokenizer_for_py313()
50
 
51
  # ----------------------------
 
54
  MODEL_ID = "Setur/BRAGD"
55
  TAGS_FILEPATH = "Sosialurin-BRAGD_tags.csv"
56
  LABELS_FILEPATH = "tag_labels.json"
 
57
 
58
  TARGET_MAX_TOKENS = 256 # We will cap this to the model's max if needed.
59
 
 
 
60
  if not os.path.exists(LABELS_FILEPATH):
61
  raise RuntimeError(f"Missing {LABELS_FILEPATH}. Add it to the Space repo root.")
62
 
 
65
  (51, 53), (54, 60), (61, 63), (64, 66), (67, 70), (71, 72)
66
  )
67
 
68
+ GROUP_ORDER = [
69
+ "subcategory", "gender", "number", "case", "article", "proper",
70
+ "degree", "declension", "mood", "voice", "tense", "person", "definiteness"
71
+ ]
72
  HIDE_CODES = {"subcategory": {"B"}} # Subcategory B to be removed
73
 
74
  UI = {
75
+ "fo": {"w": "Orð", "t": "Mark", "s": "Útgreining", "m": "Útgreinað marking"},
76
+ "en": {"w": "Word", "t": "Tag", "s": "Analysis", "m": "Expanded tags"},
77
  }
78
 
79
  MODEL_LINK = "https://huggingface.co/Setur/BRAGD"
 
83
  # ----------------------------
84
  CSS = """
85
  /* Keep Gradio default styling; only override our buttons. */
 
86
  #btn_tag, #lang_fo_on, #lang_en_on{
87
  background:#89AFA9 !important;
88
  border-color:#6F9992 !important;
 
93
  border-color:#6F9992 !important;
94
  color:#0b1b19 !important;
95
  }
 
96
  #lang_fo_off, #lang_en_off, #btn_dl_main, #btn_dl_exp{
97
  background:#C6DAD6 !important;
98
  border-color:#6F9992 !important;
 
103
  border-color:#6F9992 !important;
104
  color:#0b1b19 !important;
105
  }
 
 
106
  @media (prefers-color-scheme: dark){
107
  #lang_fo_off, #lang_en_off, #btn_dl_main, #btn_dl_exp{
108
  background:#2a3b38 !important;
 
115
  color:#0b1b19 !important;
116
  }
117
  }
 
 
118
  #results_hdr{
119
  display:flex !important;
120
  align-items:center !important;
 
133
  min-width:120px !important;
134
  flex:0 0 auto !important;
135
  }
 
 
136
  #expanded_hdr{
137
  display:flex !important;
138
  align-items:center !important;
 
151
  min-width:120px !important;
152
  flex:0 0 auto !important;
153
  }
 
154
  #input_col,
155
  #input_col > div,
156
  #input_col .gr-block,
 
162
  border: 0 !important;
163
  box-shadow: none !important;
164
  }
 
 
 
165
  #btn_tag{
166
  align-self:flex-start !important;
167
  flex:0 0 auto !important;
 
170
  #btn_tag button{
171
  height:auto !important;
172
  }
 
 
173
  #out_df .df-scroll, #out_mean_df .df-scroll{
174
  overflow-x:auto !important;
175
  width:100% !important;
 
222
  up with merged words (e.g. `Núriggarkanska`). This function therefore:
223
  - preserves `.txt` pieces as-is
224
  - converts descriptor-only whitespace-like tokens into a single space
225
+ - adds a best-effort inserted space between tokens in cases where whitespace
226
  is missing but clearly intended (word→word, comma/semicolon/colon→word)
227
  """
228
 
 
241
  buf.append(piece)
242
  return
243
 
 
244
  last = buf[-1]
245
  last_char = last[-1] if last else ""
246
  if last_char.isspace():
247
  buf.append(piece)
248
  return
249
 
 
 
 
 
250
  if piece[0].isalnum() and (last_char.isalnum() or last_char in {",", ";", ":"}):
251
  buf.append(" ")
252
 
 
263
  _append_piece(cur, tok.txt)
264
  continue
265
 
 
266
  descr = FO_TOK.descr.get(tok.kind, "").replace(" ", "_")
267
 
268
  if descr == "BEGIN_SENT":
 
269
  if cur:
270
  sent = "".join(cur).strip()
271
  if sent:
 
280
  cur = []
281
  continue
282
 
 
283
  up = descr.upper()
284
  if "WHITESPACE" in up or "SPACE" in up or "TAB" in up:
285
  _append_piece(cur, " ")
 
288
  elif up == "DASH":
289
  _append_piece(cur, "-")
290
  else:
 
291
  pass
292
 
293
  if cur:
 
295
  if sent:
296
  sents.append(sent)
297
 
 
298
  return sents or [s.strip()]
299
  except Exception:
 
300
  pass
301
 
 
302
  parts = re.split(r"(?<=[.!?])\s+", s.strip())
303
  return [p.strip() for p in parts if p.strip()]
304
 
 
310
  rows_all.extend(run_model(sent))
311
  return rows_all
312
 
313
+
314
  # ----------------------------
315
  # CSV mapping
316
  # ----------------------------
317
  def load_tag_mappings(path: str):
318
  df = pd.read_csv(path)
319
  feature_cols = list(df.columns[1:])
320
+ tag_to_features = {
321
+ row["Original Tag"]: row[1:].values.astype(int)
322
+ for _, row in df.iterrows()
323
+ }
324
+ features_to_tag = {
325
+ tuple(row[1:].values.astype(int)): row["Original Tag"]
326
+ for _, row in df.iterrows()
327
+ }
328
  return tag_to_features, features_to_tag, len(feature_cols), feature_cols
329
 
330
+
331
  def group_from_col(col: str):
332
+ if col == "Article":
333
+ return ("article", "A")
334
+ if col.startswith("No-Article "):
335
+ return ("article", col.split()[-1])
336
+ if col == "Proper Noun":
337
+ return ("proper", "P")
338
+ if col.startswith("Not-Proper-Noun "):
339
+ return ("proper", col.split()[-1])
340
 
341
  prefixes = [
342
+ ("Word Class ", "word_class"),
343
+ ("Subcategory ", "subcategory"), ("No-Subcategory ", "subcategory"),
344
+ ("Gender ", "gender"), ("No-Gender ", "gender"),
345
+ ("Number ", "number"), ("No-Number ", "number"),
346
+ ("Case ", "case"), ("No-Case ", "case"),
347
+ ("Degree ", "degree"), ("No-Degree ", "degree"),
348
+ ("Declension ", "declension"), ("No-Declension ", "declension"),
349
+ ("Mood ", "mood"),
350
+ ("Voice ", "voice"), ("No-Voice ", "voice"),
351
+ ("Tense ", "tense"), ("No-Tense ", "tense"),
352
+ ("Person ", "person"), ("No-Person ", "person"),
353
+ ("Definite ", "definiteness"), ("Indefinite ", "definiteness"),
354
  ]
355
+ for p, g in prefixes:
356
  if col.startswith(p):
357
  return (g, col.split()[-1])
358
+ return (None, None)
359
+
360
 
361
  def process_tag_features(tag_to_features: dict, intervals):
362
  arrs = [np.array(tpl) for tpl in set(tuple(a) for a in tag_to_features.values())]
363
+ wt_masks = {wt: [a for a in arrs if a[wt] == 1] for wt in range(15)}
364
  out = {}
365
+ for wt, labels in wt_masks.items():
366
  if not labels:
367
+ out[wt] = []
368
  continue
369
  sum_labels = np.sum(np.array(labels), axis=0)
370
+ out[wt] = [iv for iv in intervals if np.sum(sum_labels[iv[0]:iv[1] + 1]) != 0]
371
  return out
372
 
373
+
374
  def predict_vectors(logits, attention_mask, begin_tokens, dict_intervals, vec_len):
375
  softmax = torch.nn.Softmax(dim=0)
376
  vectors = []
377
  for idx in range(len(logits)):
378
+ if attention_mask[idx].item() != 1 or begin_tokens[idx] != 1:
379
  continue
380
  pred = logits[idx]
381
  vec = torch.zeros(vec_len, device=logits.device)
382
  wt = torch.argmax(softmax(pred[0:15])).item()
383
+ vec[wt] = 1
384
+ for (a, b) in dict_intervals.get(wt, []):
385
+ seg = pred[a:b + 1]
386
  k = torch.argmax(softmax(seg)).item()
387
+ vec[a + k] = 1
388
  vectors.append(vec)
389
  return vectors
390
 
391
+
392
  # ----------------------------
393
  # Load labels
394
  # ----------------------------
395
  with open(LABELS_FILEPATH, "r", encoding="utf-8") as f:
396
  LABELS = json.load(f)
397
 
398
+
399
  def label_for(lang: str, group: str, wc: str, code: str) -> str:
400
+ lang = "fo" if lang == "fo" else "en"
401
  by_wc = LABELS.get(lang, {}).get("by_word_class", {})
402
  glob = LABELS.get(lang, {}).get("global", {})
403
  if wc and wc in by_wc and code in by_wc[wc].get(group, {}):
404
  return by_wc[wc][group][code]
405
  return glob.get(group, {}).get(code, "")
406
 
407
+
408
  def clean_label(s: str) -> str:
409
  s = (s or "").strip()
410
  s = re.sub(r"\s+", " ", s)
411
  return s.strip(" -;,:").strip()
412
 
413
+
414
  # ----------------------------
415
  # Load model + mapping
416
  # ----------------------------
417
  tag_to_features, features_to_tag, VEC_LEN, FEATURE_COLS = load_tag_mappings(TAGS_FILEPATH)
418
 
419
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
420
+ model = AutoModelForTokenClassification.from_pretrained(MODEL_ID)
421
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
422
+ model.to(device)
423
+ model.eval()
424
 
 
425
  MAX_TOKENS = int(TARGET_MAX_TOKENS)
426
  _model_max = getattr(getattr(model, "config", None), "max_position_embeddings", None)
427
  _tok_max = getattr(tokenizer, "model_max_length", None)
428
 
 
429
  for _m in (_model_max, _tok_max):
430
  if isinstance(_m, int) and 0 < _m < 100000:
431
  MAX_TOKENS = min(MAX_TOKENS, _m)
 
436
  DICT_INTERVALS = process_tag_features(tag_to_features, INTERVALS)
437
 
438
  GROUPS = defaultdict(list)
439
+ for i, col in enumerate(FEATURE_COLS):
440
+ g, code = group_from_col(col)
441
  if g and code not in HIDE_CODES.get(g, set()):
442
  GROUPS[g].append((i, code, col))
443
 
444
+
445
  def vector_to_tag(vec: torch.Tensor) -> str:
446
  return features_to_tag.get(tuple(vec.int().tolist()), "Unknown Tag")
447
 
448
+
449
  def wc_code(vec: torch.Tensor) -> str:
450
+ for idx, code, _ in GROUPS["word_class"]:
451
+ if int(vec[idx].item()) == 1:
452
  return code
453
  return ""
454
 
455
+
456
  def group_code(vec: torch.Tensor, group: str) -> str:
457
  hidden = HIDE_CODES.get(group, set())
458
+ for idx, code, _ in GROUPS.get(group, []):
459
  if code in hidden:
460
  continue
461
+ if int(vec[idx].item()) == 1:
462
  return code
463
  return ""
464
 
465
+
466
+ HIDE_IN_ANALYSIS = {("D", "subcategory", "G"), ("D", "subcategory", "N")}
467
  VOICE_ANALYSIS = {
468
  "fo": {"A": "gerðsøgn", "M": "miðalsøgn", "v": "orð luttøkuháttur"},
469
  "en": {"A": "active voice", "M": "middle voice", "v": "supine form"},
470
  }
471
 
472
+
473
  def analysis_text(vec: torch.Tensor, lang: str) -> str:
474
+ lang = "fo" if lang == "fo" else "en"
475
  tag = vector_to_tag(vec)
476
  wc = wc_code(vec)
477
 
 
478
  mood_code = group_code(vec, "mood") if wc == "V" else ""
479
+ skip_empty_verb_feats = (wc == "V" and mood_code in {"I", "M"})
 
480
 
481
  if tag == "DGd":
482
+ return "fyriseting" if lang == "fo" else "preposition"
483
 
484
  mood = group_code(vec, "mood")
485
  if mood == "U":
486
+ sup = label_for(lang, "mood", wc, "U") or ("luttøkuháttur" if lang == "fo" else "supine")
487
  vcode = group_code(vec, "voice") or "v"
488
  vlabel = VOICE_ANALYSIS[lang].get(vcode, VOICE_ANALYSIS[lang]["v"])
489
  return f"{clean_label(sup)}, {clean_label(vlabel)}"
490
 
491
  parts = []
492
+ if wc in {"P", "C"}:
493
  subc = group_code(vec, "subcategory")
494
  subl = clean_label(label_for(lang, "subcategory", wc, subc) or "")
495
  if subl:
 
504
  if not c:
505
  continue
506
 
 
507
  if skip_empty_verb_feats and g in {"number", "tense", "person"} and c in {"n", "t", "p"}:
508
  continue
 
509
 
510
+ if wc in {"P", "C"} and g == "subcategory":
511
  continue
512
  if (wc, g, c) in HIDE_IN_ANALYSIS:
513
  continue
 
517
 
518
  return ", ".join(parts)
519
 
520
+
521
  def expanded_text(vec: torch.Tensor, lang: str) -> str:
522
+ lang = "fo" if lang == "fo" else "en"
523
  wc = wc_code(vec)
524
  parts = []
525
  wc_lbl = label_for(lang, "word_class", wc, wc)
 
532
  parts.append(f"{c} – {lbl}" if lbl else c)
533
  return "; ".join([p for p in parts if p])
534
 
535
+
536
  def compute_codes_by_wc():
537
  codes = defaultdict(lambda: defaultdict(set))
538
  for arr in tag_to_features.values():
539
  arr = np.array(arr)
540
  wc = None
541
+ for idx, code, _ in GROUPS["word_class"]:
542
+ if arr[idx] == 1:
543
  wc = code
544
  break
545
  if not wc:
546
  continue
547
  for g in GROUP_ORDER:
548
  hidden = HIDE_CODES.get(g, set())
549
+ for idx, code, _ in GROUPS.get(g, []):
550
  if code in hidden:
551
  continue
552
+ if arr[idx] == 1:
553
  codes[wc][g].add(code)
554
  return codes
555
 
556
+
557
  CODES_BY_WC = compute_codes_by_wc()
558
 
559
+
560
  def build_overview(lang: str) -> str:
561
+ lang = "fo" if lang == "fo" else "en"
562
+ title = "### Markayvirlit" if lang == "fo" else "### Tag Overview"
563
  lines = [title, ""]
564
  for wc in sorted(CODES_BY_WC.keys()):
565
  wcl = label_for(lang, "word_class", wc, wc) or ""
 
569
  if not cs:
570
  continue
571
  group_name = {
572
+ "fo": {
573
+ "subcategory": "Undirflokkur", "gender": "Kyn", "number": "Tal", "case": "Fall",
574
+ "article": "Bundni/óbundni", "proper": "Sernavn / felagsnavn", "degree": "Stig",
575
+ "declension": "Bending", "mood": "Háttur", "voice": "Søgn", "tense": "Tíð",
576
+ "person": "Persónur", "definiteness": "Bundni/óbundni"
577
+ },
578
+ "en": {
579
+ "subcategory": "Subcategory", "gender": "Gender", "number": "Number", "case": "Case",
580
+ "article": "Definiteness", "proper": "Proper/common noun", "degree": "Degree",
581
+ "declension": "Declension", "mood": "Mood", "voice": "Voice", "tense": "Tense",
582
+ "person": "Person", "definiteness": "Definiteness"
583
+ },
584
  }[lang].get(g, g)
585
  lines.append(f"**{group_name}**")
586
  for c in cs:
 
590
  lines.append("")
591
  return "\n".join(lines).strip()
592
 
593
+
594
  def run_model(sentence: str):
595
  s = (sentence or "").strip()
596
  if not s:
 
598
  tokens = simp_tok(s)
599
  if not tokens:
600
  return []
601
+
602
+ enc = tokenizer(
603
+ tokens,
604
+ is_split_into_words=True,
605
+ add_special_tokens=True,
606
+ max_length=MAX_TOKENS,
607
+ padding="max_length",
608
+ truncation=True,
609
+ return_attention_mask=True,
610
+ return_tensors="pt",
611
+ )
612
  input_ids = enc["input_ids"].to(device)
613
  attention_mask = enc["attention_mask"].to(device)
614
  word_ids = enc.word_ids(batch_index=0)
 
629
  vectors = predict_vectors(logits, attention_mask[0], begin, DICT_INTERVALS, VEC_LEN)
630
 
631
  rows, vec_i, seen = [], 0, set()
632
+ for i, wid in enumerate(word_ids):
633
+ if wid is None or begin[i] != 1 or wid in seen:
634
  continue
635
  seen.add(wid)
636
  word = tokens[wid] if wid < len(tokens) else "<UNK>"
 
639
  vec_i += 1
640
  return rows
641
 
642
+
643
  def _make_html_table(headers, rows):
 
 
 
644
  th = "".join(f"<th>{html.escape(str(h))}</th>" for h in headers)
645
  body_rows = []
646
  for row in rows:
 
653
  '</div>'
654
  )
655
 
656
+
657
  def render(rows_state, lang: str):
658
+ lang = "fo" if lang == "fo" else "en"
659
  cols_main = [UI[lang]["w"], UI[lang]["t"], UI[lang]["s"]]
660
  cols_mean = [UI[lang]["w"], UI[lang]["t"], UI[lang]["m"]]
661
  if not rows_state:
 
672
 
673
 
674
  def _write_tsv(df: pd.DataFrame, filename: str) -> str:
 
675
  tmpdir = os.path.join(tempfile.gettempdir(), "marka_downloads", str(uuid.uuid4()))
676
  os.makedirs(tmpdir, exist_ok=True)
677
  path = os.path.join(tmpdir, filename)
678
  df.to_csv(path, sep="\t", index=False, encoding="utf-8")
679
  return path
680
 
681
+
682
  def build_download_main(rows_state) -> str:
 
 
 
683
  words, tags, fo_vals, en_vals = [], [], [], []
684
  for r in (rows_state or []):
685
  vec = torch.tensor(r["vec"])
 
697
  })
698
  return _write_tsv(df, "Markað.tsv")
699
 
700
+
701
  def build_download_expanded(rows_state, lang: str) -> str:
 
702
  lang = "fo" if lang == "fo" else "en"
703
  words, tags, vals = [], [], []
704
  for r in (rows_state or []):
 
714
  })
715
  return _write_tsv(df, "Markað_útgreinað.tsv")
716
 
717
+
718
  with gr.Blocks(css=CSS, title="Marka") as demo:
719
  with gr.Row(equal_height=False):
720
  with gr.Column(scale=2, elem_id="input_col"):
721
+ inp = gr.Textbox(
722
+ lines=6,
723
+ placeholder="Skriva her ... / Type here ...",
724
+ show_label=False,
725
+ elem_id="input_box",
726
+ )
727
  with gr.Column(scale=1, min_width=320):
728
  gr.Markdown(
729
  "## Marka\n"
 
739
  with results_hdr:
740
  results_title = gr.Markdown("### Úrslit / Results")
741
  with gr.Row(elem_id="lang_buttons") as lang_buttons_row:
742
+ btn_lang_fo_on = gr.Button("Føroyskt", variant="primary", elem_id="lang_fo_on", visible=False)
743
  btn_lang_fo_off = gr.Button("Føroyskt", variant="secondary", elem_id="lang_fo_off", visible=False)
744
+ btn_lang_en_on = gr.Button("English", variant="primary", elem_id="lang_en_on", visible=False)
745
+ btn_lang_en_off = gr.Button("English", variant="secondary", elem_id="lang_en_off", visible=False)
746
+ btn_dl_main = gr.DownloadButton("Tak niður / Download", variant="secondary", elem_id="btn_dl_main", visible=False)
747
  out_df = gr.HTML(value="", elem_id="out_df", visible=False)
748
 
749
  expanded_acc = gr.Accordion("Útgreinað marking / Expanded tags", open=False, visible=False)
 
759
  overview_md = gr.Markdown(build_overview("fo"))
760
 
761
  def show_loading(lang_current):
 
 
 
 
 
 
762
  lang_current = "fo" if lang_current == "fo" else "en"
763
  cols_main = [UI[lang_current]["w"], UI[lang_current]["t"], UI[lang_current]["s"]]
764
  shell = _make_html_table(cols_main, [])
765
  return (
766
  gr.update(value=shell, visible=True),
767
+ gr.update(visible=False),
768
+ gr.update(visible=False),
769
+ gr.update(visible=False),
770
+ gr.update(value=""),
771
  gr.update(value="Markar... / Tagging...", interactive=False),
772
  )
773
 
 
787
  gr.update(value=df_main, visible=True),
788
  gr.update(value=df_mean),
789
  gr.update(value=overview),
790
+ gr.update(visible=True),
791
  gr.update(visible=show_fo),
792
  gr.update(visible=not show_fo),
793
  gr.update(visible=show_en),
 
820
  gr.update(value=dl_main_path, visible=have_rows),
821
  gr.update(value=dl_exp_path, visible=have_rows),
822
  )
823
+
824
  def on_set_fo(rows):
825
  return on_set_lang(rows, "fo")
826
 
 
837
  _evt.then(
838
  on_tag,
839
  inputs=[inp, lang_state],
840
+ outputs=[
841
+ state, out_df, out_mean_df, overview_md, expanded_acc,
842
+ btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off,
843
+ btn_dl_main, btn_dl_exp, lang_state, btn
844
+ ],
845
  queue=False,
846
  )
847
 
848
  btn_lang_fo_on.click(
849
  on_set_fo,
850
  inputs=[state],
851
+ outputs=[
852
+ lang_state, out_df, out_mean_df, overview_md,
853
+ btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off,
854
+ btn_dl_main, btn_dl_exp
855
+ ],
856
  queue=False,
857
  )
858
  btn_lang_fo_off.click(
859
  on_set_fo,
860
  inputs=[state],
861
+ outputs=[
862
+ lang_state, out_df, out_mean_df, overview_md,
863
+ btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off,
864
+ btn_dl_main, btn_dl_exp
865
+ ],
866
  queue=False,
867
  )
868
  btn_lang_en_on.click(
869
  on_set_en,
870
  inputs=[state],
871
+ outputs=[
872
+ lang_state, out_df, out_mean_df, overview_md,
873
+ btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off,
874
+ btn_dl_main, btn_dl_exp
875
+ ],
876
  queue=False,
877
  )
878
  btn_lang_en_off.click(
879
  on_set_en,
880
  inputs=[state],
881
+ outputs=[
882
+ lang_state, out_df, out_mean_df, overview_md,
883
+ btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off,
884
+ btn_dl_main, btn_dl_exp
885
+ ],
886
  queue=False,
887
  )
888
 
889
  if __name__ == "__main__":
890
+ demo.launch()