unijoh commited on
Commit
c64e9f6
·
verified ·
1 Parent(s): 22e1960

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +40 -40
  2. tag_labels.json +17 -17
app.py CHANGED
@@ -38,7 +38,6 @@ UI = {
38
  "fo": {"w":"Orð", "t":"Mark", "s":"Útgreining", "m":"Útgreinað marking"},
39
  "en": {"w":"Word","t":"Tag", "s":"Analysis", "m":"Expanded tags"},
40
  }
41
-
42
  MODEL_LINK = "https://huggingface.co/Setur/BRAGD"
43
 
44
  # Theme color: #89AFA9 (+ close shades) + system font
@@ -50,11 +49,14 @@ CSS = """
50
  body, .gradio-container, .prose, .markdown, textarea, input, select, button, table{
51
  font-family:-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Arial, "Noto Sans", sans-serif !important;
52
  }
 
 
53
  .gr-button-primary, button.primary, .primary{
54
  background:var(--primary-500)!important; border-color:var(--primary-600)!important; color:#0b1b19!important;
55
  }
56
  .gr-button-primary:hover, button.primary:hover, .primary:hover{ background:var(--primary-600)!important; }
57
  a{ color:var(--primary-700)!important; }
 
58
 
59
  /* Dataframe column wrapping: keep Orð + Mark on one line */
60
  .gr-dataframe table td:nth-child(1),
@@ -65,20 +67,18 @@ a{ color:var(--primary-700)!important; }
65
  .gr-dataframe table td:nth-child(2),
66
  .gr-dataframe table th:nth-child(2){
67
  white-space: nowrap !important;
68
- width: 18% !important;
69
  font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important;
70
  }
71
  .gr-dataframe table td:nth-child(3),
72
  .gr-dataframe table th:nth-child(3){
73
  white-space: normal !important;
74
- width: 64% !important;
75
  }
76
 
77
- /* Make the language dropdown compact */
78
- #lang_dd { max-width: 170px; }
79
-
80
- /* Slightly smaller primary button */
81
- .gr-button-primary{ padding: 0.35rem 0.85rem !important; font-size: 0.95rem !important; }
82
  """
83
 
84
  # ----------------------------
@@ -238,6 +238,7 @@ def analysis_text(vec: torch.Tensor, lang: str) -> str:
238
  Útgreining / Analysis:
239
  - plain words (no letters/hyphens)
240
  - pronouns: start at subcategory, not word class
 
241
  - DGd: show only fyriseting/preposition
242
  - supine: show only supine + voice (drop verb/number/tense/person etc.)
243
  """
@@ -258,7 +259,7 @@ def analysis_text(vec: torch.Tensor, lang: str) -> str:
258
 
259
  parts = []
260
 
261
- # Pronouns + conjunctions: subcategory already carries the head noun (fornavn / sambindingarorð)
262
  if wc in {"P","C"}:
263
  subc = group_code(vec, "subcategory")
264
  subl = clean_label(label_for(lang, "subcategory", wc, subc) or "")
@@ -289,10 +290,7 @@ def analysis_text(vec: torch.Tensor, lang: str) -> str:
289
  return ", ".join(parts)
290
 
291
  def expanded_text(vec: torch.Tensor, lang: str) -> str:
292
- """
293
- Útgreinað marking / Expanded tags:
294
- codes + labels (useful for debugging and linguists)
295
- """
296
  lang = "fo" if lang=="fo" else "en"
297
  wc = wc_code(vec)
298
  parts = []
@@ -335,11 +333,9 @@ def compute_codes_by_wc():
335
  CODES_BY_WC = compute_codes_by_wc()
336
 
337
  def build_overview(lang: str) -> str:
338
- """
339
- Overview under each word class with the letter codes actually used in the CURRENT CSV.
340
- """
341
  lang = "fo" if lang=="fo" else "en"
342
- title = "### Markingaryvirlit" if lang=="fo" else "### Tag Overview"
343
  lines = [title, ""]
344
 
345
  for wc in sorted(CODES_BY_WC.keys()):
@@ -350,6 +346,7 @@ def build_overview(lang: str) -> str:
350
  cs = sorted(CODES_BY_WC[wc].get(g, set()))
351
  if not cs:
352
  continue
 
353
  group_name = {
354
  "fo": {
355
  "subcategory":"Undirflokkur", "gender":"Kyn", "number":"Tal", "case":"Fall",
@@ -458,36 +455,38 @@ def render(rows_state, lang: str):
458
  # ----------------------------
459
  theme = gr.themes.Soft()
460
 
461
- with gr.Blocks(theme=theme, css=CSS, title="BRAGD-markarin") as demo:
462
- # Compact header: info left, input right
463
  with gr.Row(equal_height=True):
464
- with gr.Column(scale=1, min_width=280):
465
- gr.Markdown(
466
- "### BRAGD-markarin\n"
467
- "Skriv ein setning og fá hann markaðan.\n\n"
468
- f"**Myndil / Model:** [{MODEL_ID}]({MODEL_LINK})"
469
- )
470
  with gr.Column(scale=2):
471
  inp = gr.Textbox(
472
- lines=5,
473
  placeholder="Skriva her ... / Type here ...",
474
  show_label=False,
475
  )
 
 
 
 
 
 
476
  btn = gr.Button("Marka / Tag", variant="primary")
477
 
478
  state = gr.State([])
479
 
480
  # Results header row (components hide until first run)
481
  with gr.Row():
482
- results_title = gr.Markdown("### Úrslit / Results", visible=False)
483
- lang = gr.Dropdown(
484
- choices=[("Føroyskt","fo"), ("English","en")],
485
- value="fo",
486
- show_label=False,
487
- filterable=False,
488
- elem_id="lang_dd",
489
- visible=False,
490
- )
 
 
491
 
492
  out_df = gr.Dataframe(
493
  value=pd.DataFrame(columns=[UI["fo"]["w"], UI["fo"]["t"], UI["fo"]["s"]]),
@@ -499,7 +498,8 @@ with gr.Blocks(theme=theme, css=CSS, title="BRAGD-markarin") as demo:
499
  visible=False,
500
  )
501
 
502
- with gr.Accordion("Útgreinað marking / Expanded tags", open=False):
 
503
  out_mean_df = gr.Dataframe(
504
  value=pd.DataFrame(columns=[UI["fo"]["w"], UI["fo"]["t"], UI["fo"]["m"]]),
505
  wrap=True,
@@ -507,16 +507,15 @@ with gr.Blocks(theme=theme, css=CSS, title="BRAGD-markarin") as demo:
507
  show_label=False,
508
  row_count=(0, "fixed"),
509
  col_count=(3, "fixed"),
510
- visible=False,
511
  )
512
 
513
- with gr.Accordion("Markingaryvirlit / Tag Overview", open=False):
514
  overview_md = gr.Markdown("", visible=False)
515
 
516
  def on_tag(sentence, lang_choice):
517
  rows = run_model(sentence)
518
  df_main, df_mean, overview = render(rows, lang_choice)
519
-
520
  return (
521
  rows,
522
  gr.update(value=df_main, visible=True),
@@ -524,6 +523,7 @@ with gr.Blocks(theme=theme, css=CSS, title="BRAGD-markarin") as demo:
524
  gr.update(value=overview, visible=True),
525
  gr.update(visible=True), # results_title
526
  gr.update(visible=True), # lang
 
527
  )
528
 
529
  def on_lang(rows, lang_choice):
@@ -537,7 +537,7 @@ with gr.Blocks(theme=theme, css=CSS, title="BRAGD-markarin") as demo:
537
  btn.click(
538
  on_tag,
539
  inputs=[inp, lang],
540
- outputs=[state, out_df, out_mean_df, overview_md, results_title, lang],
541
  queue=False,
542
  )
543
 
 
38
  "fo": {"w":"Orð", "t":"Mark", "s":"Útgreining", "m":"Útgreinað marking"},
39
  "en": {"w":"Word","t":"Tag", "s":"Analysis", "m":"Expanded tags"},
40
  }
 
41
  MODEL_LINK = "https://huggingface.co/Setur/BRAGD"
42
 
43
  # Theme color: #89AFA9 (+ close shades) + system font
 
49
  body, .gradio-container, .prose, .markdown, textarea, input, select, button, table{
50
  font-family:-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Arial, "Noto Sans", sans-serif !important;
51
  }
52
+
53
+ /* Buttons */
54
  .gr-button-primary, button.primary, .primary{
55
  background:var(--primary-500)!important; border-color:var(--primary-600)!important; color:#0b1b19!important;
56
  }
57
  .gr-button-primary:hover, button.primary:hover, .primary:hover{ background:var(--primary-600)!important; }
58
  a{ color:var(--primary-700)!important; }
59
+ .gr-button-primary{ padding: 0.32rem 0.75rem !important; font-size: 0.95rem !important; }
60
 
61
  /* Dataframe column wrapping: keep Orð + Mark on one line */
62
  .gr-dataframe table td:nth-child(1),
 
67
  .gr-dataframe table td:nth-child(2),
68
  .gr-dataframe table th:nth-child(2){
69
  white-space: nowrap !important;
70
+ width: 20% !important;
71
  font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important;
72
  }
73
  .gr-dataframe table td:nth-child(3),
74
  .gr-dataframe table th:nth-child(3){
75
  white-space: normal !important;
76
+ width: 62% !important;
77
  }
78
 
79
+ /* Compact dropdown on the right (avoid huge grey container) */
80
+ #lang_dd { max-width: 150px; min-width: 150px; margin-left: auto; }
81
+ #lang_dd > div { width: 150px !important; }
 
 
82
  """
83
 
84
  # ----------------------------
 
238
  Útgreining / Analysis:
239
  - plain words (no letters/hyphens)
240
  - pronouns: start at subcategory, not word class
241
+ - conjunctions: subcategory already includes 'sambindingarorð' in label
242
  - DGd: show only fyriseting/preposition
243
  - supine: show only supine + voice (drop verb/number/tense/person etc.)
244
  """
 
259
 
260
  parts = []
261
 
262
+ # Pronouns + conjunctions: start at subcategory (the label already contains the head)
263
  if wc in {"P","C"}:
264
  subc = group_code(vec, "subcategory")
265
  subl = clean_label(label_for(lang, "subcategory", wc, subc) or "")
 
290
  return ", ".join(parts)
291
 
292
  def expanded_text(vec: torch.Tensor, lang: str) -> str:
293
+ """Útgreinað marking / Expanded tags: codes + labels."""
 
 
 
294
  lang = "fo" if lang=="fo" else "en"
295
  wc = wc_code(vec)
296
  parts = []
 
333
  CODES_BY_WC = compute_codes_by_wc()
334
 
335
  def build_overview(lang: str) -> str:
336
+ """Markayvirlit / Tag Overview under each word class (codes used in current CSV)."""
 
 
337
  lang = "fo" if lang=="fo" else "en"
338
+ title = "### Markayvirlit" if lang=="fo" else "### Tag Overview"
339
  lines = [title, ""]
340
 
341
  for wc in sorted(CODES_BY_WC.keys()):
 
346
  cs = sorted(CODES_BY_WC[wc].get(g, set()))
347
  if not cs:
348
  continue
349
+
350
  group_name = {
351
  "fo": {
352
  "subcategory":"Undirflokkur", "gender":"Kyn", "number":"Tal", "case":"Fall",
 
455
  # ----------------------------
456
  theme = gr.themes.Soft()
457
 
458
+ with gr.Blocks(theme=theme, css=CSS, title="Marka") as demo:
459
+ # New layout: textbox left, info right (button under info)
460
  with gr.Row(equal_height=True):
 
 
 
 
 
 
461
  with gr.Column(scale=2):
462
  inp = gr.Textbox(
463
+ lines=6,
464
  placeholder="Skriva her ... / Type here ...",
465
  show_label=False,
466
  )
467
+ with gr.Column(scale=1, min_width=280):
468
+ gr.Markdown(
469
+ "### Marka\n"
470
+ "Skriv setningin í kassan vinstrumegin og fá hann markaðan.\n\n"
471
+ f"**Myndil / Model:** [{MODEL_ID}]({MODEL_LINK})"
472
+ )
473
  btn = gr.Button("Marka / Tag", variant="primary")
474
 
475
  state = gr.State([])
476
 
477
  # Results header row (components hide until first run)
478
  with gr.Row():
479
+ with gr.Column(scale=5):
480
+ results_title = gr.Markdown("### Úrslit / Results", visible=False)
481
+ with gr.Column(scale=1, min_width=170):
482
+ lang = gr.Dropdown(
483
+ choices=[("Føroyskt","fo"), ("English","en")],
484
+ value="fo",
485
+ show_label=False,
486
+ filterable=False,
487
+ elem_id="lang_dd",
488
+ visible=False,
489
+ )
490
 
491
  out_df = gr.Dataframe(
492
  value=pd.DataFrame(columns=[UI["fo"]["w"], UI["fo"]["t"], UI["fo"]["s"]]),
 
498
  visible=False,
499
  )
500
 
501
+ # Hide Expanded tags accordion until tagged
502
+ with gr.Accordion("Útgreinað marking / Expanded tags", open=False, visible=False) as expanded_acc:
503
  out_mean_df = gr.Dataframe(
504
  value=pd.DataFrame(columns=[UI["fo"]["w"], UI["fo"]["t"], UI["fo"]["m"]]),
505
  wrap=True,
 
507
  show_label=False,
508
  row_count=(0, "fixed"),
509
  col_count=(3, "fixed"),
510
+ visible=True,
511
  )
512
 
513
+ with gr.Accordion("Markayvirlit / Tag Overview", open=False):
514
  overview_md = gr.Markdown("", visible=False)
515
 
516
  def on_tag(sentence, lang_choice):
517
  rows = run_model(sentence)
518
  df_main, df_mean, overview = render(rows, lang_choice)
 
519
  return (
520
  rows,
521
  gr.update(value=df_main, visible=True),
 
523
  gr.update(value=overview, visible=True),
524
  gr.update(visible=True), # results_title
525
  gr.update(visible=True), # lang
526
+ gr.update(visible=True), # expanded_acc
527
  )
528
 
529
  def on_lang(rows, lang_choice):
 
537
  btn.click(
538
  on_tag,
539
  inputs=[inp, lang],
540
+ outputs=[state, out_df, out_mean_df, overview_md, results_title, lang, expanded_acc],
541
  queue=False,
542
  )
543
 
tag_labels.json CHANGED
@@ -84,7 +84,7 @@
84
  "G": "genitive"
85
  },
86
  "article": {
87
- "A": "with suffixed definite article"
88
  },
89
  "proper": {
90
  "P": "Proper Noun"
@@ -125,8 +125,8 @@
125
  "A": "absolute superlative"
126
  },
127
  "declension": {
128
- "S": "strong",
129
- "W": "weak",
130
  "e": "no-declension"
131
  },
132
  "gender": {
@@ -163,9 +163,9 @@
163
  "N": "neuter"
164
  },
165
  "person": {
166
- "1": "1st pers",
167
- "2": "2nd pers",
168
- "3": "3rd pers"
169
  },
170
  "number": {
171
  "S": "singular",
@@ -206,7 +206,7 @@
206
  },
207
  "V": {
208
  "word_class": {
209
- "V": "verb (except for participle)"
210
  },
211
  "mood": {
212
  "I": "infinitive",
@@ -235,15 +235,15 @@
235
  },
236
  "L": {
237
  "word_class": {
238
- "L": "participle"
239
  },
240
  "voice": {
241
  "A": "active",
242
  "M": "mediopassive"
243
  },
244
  "declension": {
245
- "S": "strong",
246
- "W": "weak",
247
  "e": "no-declension"
248
  },
249
  "gender": {
@@ -456,8 +456,8 @@
456
  "A": "absolutt hástig"
457
  },
458
  "declension": {
459
- "S": "sterk",
460
- "W": "veik",
461
  "e": "eingin sterk/veik bending"
462
  },
463
  "gender": {
@@ -494,9 +494,9 @@
494
  "N": "hvørkikyn"
495
  },
496
  "person": {
497
- "1": "fyrsti persónur",
498
- "2": "annar persónur",
499
- "3": "triði persónur"
500
  },
501
  "number": {
502
  "S": "eintal",
@@ -577,8 +577,8 @@
577
  "M": "miðalsøgn"
578
  },
579
  "declension": {
580
- "S": "sterk",
581
- "W": "veik",
582
  "e": "eingin sterk/veik bending"
583
  },
584
  "gender": {
 
84
  "G": "genitive"
85
  },
86
  "article": {
87
+ "A": "definite"
88
  },
89
  "proper": {
90
  "P": "Proper Noun"
 
125
  "A": "absolute superlative"
126
  },
127
  "declension": {
128
+ "S": "strong declension",
129
+ "W": "weak declension",
130
  "e": "no-declension"
131
  },
132
  "gender": {
 
163
  "N": "neuter"
164
  },
165
  "person": {
166
+ "1": "1st person",
167
+ "2": "2nd person",
168
+ "3": "3rd person"
169
  },
170
  "number": {
171
  "S": "singular",
 
206
  },
207
  "V": {
208
  "word_class": {
209
+ "V": "verb"
210
  },
211
  "mood": {
212
  "I": "infinitive",
 
235
  },
236
  "L": {
237
  "word_class": {
238
+ "L": "past participle"
239
  },
240
  "voice": {
241
  "A": "active",
242
  "M": "mediopassive"
243
  },
244
  "declension": {
245
+ "S": "strong declension",
246
+ "W": "weak declension",
247
  "e": "no-declension"
248
  },
249
  "gender": {
 
456
  "A": "absolutt hástig"
457
  },
458
  "declension": {
459
+ "S": "sterk bending",
460
+ "W": "veik bending",
461
  "e": "eingin sterk/veik bending"
462
  },
463
  "gender": {
 
494
  "N": "hvørkikyn"
495
  },
496
  "person": {
497
+ "1": "1. persónur",
498
+ "2": "2. persónur",
499
+ "3": "3. persónur"
500
  },
501
  "number": {
502
  "S": "eintal",
 
577
  "M": "miðalsøgn"
578
  },
579
  "declension": {
580
+ "S": "sterk bending",
581
+ "W": "veik bending",
582
  "e": "eingin sterk/veik bending"
583
  },
584
  "gender": {