Upload 2 files
Browse files- app.py +40 -40
- tag_labels.json +17 -17
app.py
CHANGED
|
@@ -38,7 +38,6 @@ UI = {
|
|
| 38 |
"fo": {"w":"Orð", "t":"Mark", "s":"Útgreining", "m":"Útgreinað marking"},
|
| 39 |
"en": {"w":"Word","t":"Tag", "s":"Analysis", "m":"Expanded tags"},
|
| 40 |
}
|
| 41 |
-
|
| 42 |
MODEL_LINK = "https://huggingface.co/Setur/BRAGD"
|
| 43 |
|
| 44 |
# Theme color: #89AFA9 (+ close shades) + system font
|
|
@@ -50,11 +49,14 @@ CSS = """
|
|
| 50 |
body, .gradio-container, .prose, .markdown, textarea, input, select, button, table{
|
| 51 |
font-family:-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Arial, "Noto Sans", sans-serif !important;
|
| 52 |
}
|
|
|
|
|
|
|
| 53 |
.gr-button-primary, button.primary, .primary{
|
| 54 |
background:var(--primary-500)!important; border-color:var(--primary-600)!important; color:#0b1b19!important;
|
| 55 |
}
|
| 56 |
.gr-button-primary:hover, button.primary:hover, .primary:hover{ background:var(--primary-600)!important; }
|
| 57 |
a{ color:var(--primary-700)!important; }
|
|
|
|
| 58 |
|
| 59 |
/* Dataframe column wrapping: keep Orð + Mark on one line */
|
| 60 |
.gr-dataframe table td:nth-child(1),
|
|
@@ -65,20 +67,18 @@ a{ color:var(--primary-700)!important; }
|
|
| 65 |
.gr-dataframe table td:nth-child(2),
|
| 66 |
.gr-dataframe table th:nth-child(2){
|
| 67 |
white-space: nowrap !important;
|
| 68 |
-
width:
|
| 69 |
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important;
|
| 70 |
}
|
| 71 |
.gr-dataframe table td:nth-child(3),
|
| 72 |
.gr-dataframe table th:nth-child(3){
|
| 73 |
white-space: normal !important;
|
| 74 |
-
width:
|
| 75 |
}
|
| 76 |
|
| 77 |
-
/*
|
| 78 |
-
#lang_dd { max-width:
|
| 79 |
-
|
| 80 |
-
/* Slightly smaller primary button */
|
| 81 |
-
.gr-button-primary{ padding: 0.35rem 0.85rem !important; font-size: 0.95rem !important; }
|
| 82 |
"""
|
| 83 |
|
| 84 |
# ----------------------------
|
|
@@ -238,6 +238,7 @@ def analysis_text(vec: torch.Tensor, lang: str) -> str:
|
|
| 238 |
Útgreining / Analysis:
|
| 239 |
- plain words (no letters/hyphens)
|
| 240 |
- pronouns: start at subcategory, not word class
|
|
|
|
| 241 |
- DGd: show only fyriseting/preposition
|
| 242 |
- supine: show only supine + voice (drop verb/number/tense/person etc.)
|
| 243 |
"""
|
|
@@ -258,7 +259,7 @@ def analysis_text(vec: torch.Tensor, lang: str) -> str:
|
|
| 258 |
|
| 259 |
parts = []
|
| 260 |
|
| 261 |
-
# Pronouns + conjunctions:
|
| 262 |
if wc in {"P","C"}:
|
| 263 |
subc = group_code(vec, "subcategory")
|
| 264 |
subl = clean_label(label_for(lang, "subcategory", wc, subc) or "")
|
|
@@ -289,10 +290,7 @@ def analysis_text(vec: torch.Tensor, lang: str) -> str:
|
|
| 289 |
return ", ".join(parts)
|
| 290 |
|
| 291 |
def expanded_text(vec: torch.Tensor, lang: str) -> str:
|
| 292 |
-
"""
|
| 293 |
-
Útgreinað marking / Expanded tags:
|
| 294 |
-
codes + labels (useful for debugging and linguists)
|
| 295 |
-
"""
|
| 296 |
lang = "fo" if lang=="fo" else "en"
|
| 297 |
wc = wc_code(vec)
|
| 298 |
parts = []
|
|
@@ -335,11 +333,9 @@ def compute_codes_by_wc():
|
|
| 335 |
CODES_BY_WC = compute_codes_by_wc()
|
| 336 |
|
| 337 |
def build_overview(lang: str) -> str:
|
| 338 |
-
"""
|
| 339 |
-
Overview under each word class with the letter codes actually used in the CURRENT CSV.
|
| 340 |
-
"""
|
| 341 |
lang = "fo" if lang=="fo" else "en"
|
| 342 |
-
title = "###
|
| 343 |
lines = [title, ""]
|
| 344 |
|
| 345 |
for wc in sorted(CODES_BY_WC.keys()):
|
|
@@ -350,6 +346,7 @@ def build_overview(lang: str) -> str:
|
|
| 350 |
cs = sorted(CODES_BY_WC[wc].get(g, set()))
|
| 351 |
if not cs:
|
| 352 |
continue
|
|
|
|
| 353 |
group_name = {
|
| 354 |
"fo": {
|
| 355 |
"subcategory":"Undirflokkur", "gender":"Kyn", "number":"Tal", "case":"Fall",
|
|
@@ -458,36 +455,38 @@ def render(rows_state, lang: str):
|
|
| 458 |
# ----------------------------
|
| 459 |
theme = gr.themes.Soft()
|
| 460 |
|
| 461 |
-
with gr.Blocks(theme=theme, css=CSS, title="
|
| 462 |
-
#
|
| 463 |
with gr.Row(equal_height=True):
|
| 464 |
-
with gr.Column(scale=1, min_width=280):
|
| 465 |
-
gr.Markdown(
|
| 466 |
-
"### BRAGD-markarin\n"
|
| 467 |
-
"Skriv ein setning og fá hann markaðan.\n\n"
|
| 468 |
-
f"**Myndil / Model:** [{MODEL_ID}]({MODEL_LINK})"
|
| 469 |
-
)
|
| 470 |
with gr.Column(scale=2):
|
| 471 |
inp = gr.Textbox(
|
| 472 |
-
lines=
|
| 473 |
placeholder="Skriva her ... / Type here ...",
|
| 474 |
show_label=False,
|
| 475 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
btn = gr.Button("Marka / Tag", variant="primary")
|
| 477 |
|
| 478 |
state = gr.State([])
|
| 479 |
|
| 480 |
# Results header row (components hide until first run)
|
| 481 |
with gr.Row():
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
|
|
|
|
|
|
| 491 |
|
| 492 |
out_df = gr.Dataframe(
|
| 493 |
value=pd.DataFrame(columns=[UI["fo"]["w"], UI["fo"]["t"], UI["fo"]["s"]]),
|
|
@@ -499,7 +498,8 @@ with gr.Blocks(theme=theme, css=CSS, title="BRAGD-markarin") as demo:
|
|
| 499 |
visible=False,
|
| 500 |
)
|
| 501 |
|
| 502 |
-
|
|
|
|
| 503 |
out_mean_df = gr.Dataframe(
|
| 504 |
value=pd.DataFrame(columns=[UI["fo"]["w"], UI["fo"]["t"], UI["fo"]["m"]]),
|
| 505 |
wrap=True,
|
|
@@ -507,16 +507,15 @@ with gr.Blocks(theme=theme, css=CSS, title="BRAGD-markarin") as demo:
|
|
| 507 |
show_label=False,
|
| 508 |
row_count=(0, "fixed"),
|
| 509 |
col_count=(3, "fixed"),
|
| 510 |
-
visible=
|
| 511 |
)
|
| 512 |
|
| 513 |
-
with gr.Accordion("
|
| 514 |
overview_md = gr.Markdown("", visible=False)
|
| 515 |
|
| 516 |
def on_tag(sentence, lang_choice):
|
| 517 |
rows = run_model(sentence)
|
| 518 |
df_main, df_mean, overview = render(rows, lang_choice)
|
| 519 |
-
|
| 520 |
return (
|
| 521 |
rows,
|
| 522 |
gr.update(value=df_main, visible=True),
|
|
@@ -524,6 +523,7 @@ with gr.Blocks(theme=theme, css=CSS, title="BRAGD-markarin") as demo:
|
|
| 524 |
gr.update(value=overview, visible=True),
|
| 525 |
gr.update(visible=True), # results_title
|
| 526 |
gr.update(visible=True), # lang
|
|
|
|
| 527 |
)
|
| 528 |
|
| 529 |
def on_lang(rows, lang_choice):
|
|
@@ -537,7 +537,7 @@ with gr.Blocks(theme=theme, css=CSS, title="BRAGD-markarin") as demo:
|
|
| 537 |
btn.click(
|
| 538 |
on_tag,
|
| 539 |
inputs=[inp, lang],
|
| 540 |
-
outputs=[state, out_df, out_mean_df, overview_md, results_title, lang],
|
| 541 |
queue=False,
|
| 542 |
)
|
| 543 |
|
|
|
|
| 38 |
"fo": {"w":"Orð", "t":"Mark", "s":"Útgreining", "m":"Útgreinað marking"},
|
| 39 |
"en": {"w":"Word","t":"Tag", "s":"Analysis", "m":"Expanded tags"},
|
| 40 |
}
|
|
|
|
| 41 |
MODEL_LINK = "https://huggingface.co/Setur/BRAGD"
|
| 42 |
|
| 43 |
# Theme color: #89AFA9 (+ close shades) + system font
|
|
|
|
| 49 |
body, .gradio-container, .prose, .markdown, textarea, input, select, button, table{
|
| 50 |
font-family:-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Arial, "Noto Sans", sans-serif !important;
|
| 51 |
}
|
| 52 |
+
|
| 53 |
+
/* Buttons */
|
| 54 |
.gr-button-primary, button.primary, .primary{
|
| 55 |
background:var(--primary-500)!important; border-color:var(--primary-600)!important; color:#0b1b19!important;
|
| 56 |
}
|
| 57 |
.gr-button-primary:hover, button.primary:hover, .primary:hover{ background:var(--primary-600)!important; }
|
| 58 |
a{ color:var(--primary-700)!important; }
|
| 59 |
+
.gr-button-primary{ padding: 0.32rem 0.75rem !important; font-size: 0.95rem !important; }
|
| 60 |
|
| 61 |
/* Dataframe column wrapping: keep Orð + Mark on one line */
|
| 62 |
.gr-dataframe table td:nth-child(1),
|
|
|
|
| 67 |
.gr-dataframe table td:nth-child(2),
|
| 68 |
.gr-dataframe table th:nth-child(2){
|
| 69 |
white-space: nowrap !important;
|
| 70 |
+
width: 20% !important;
|
| 71 |
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important;
|
| 72 |
}
|
| 73 |
.gr-dataframe table td:nth-child(3),
|
| 74 |
.gr-dataframe table th:nth-child(3){
|
| 75 |
white-space: normal !important;
|
| 76 |
+
width: 62% !important;
|
| 77 |
}
|
| 78 |
|
| 79 |
+
/* Compact dropdown on the right (avoid huge grey container) */
|
| 80 |
+
#lang_dd { max-width: 150px; min-width: 150px; margin-left: auto; }
|
| 81 |
+
#lang_dd > div { width: 150px !important; }
|
|
|
|
|
|
|
| 82 |
"""
|
| 83 |
|
| 84 |
# ----------------------------
|
|
|
|
| 238 |
Útgreining / Analysis:
|
| 239 |
- plain words (no letters/hyphens)
|
| 240 |
- pronouns: start at subcategory, not word class
|
| 241 |
+
- conjunctions: subcategory already includes 'sambindingarorð' in label
|
| 242 |
- DGd: show only fyriseting/preposition
|
| 243 |
- supine: show only supine + voice (drop verb/number/tense/person etc.)
|
| 244 |
"""
|
|
|
|
| 259 |
|
| 260 |
parts = []
|
| 261 |
|
| 262 |
+
# Pronouns + conjunctions: start at subcategory (the label already contains the head)
|
| 263 |
if wc in {"P","C"}:
|
| 264 |
subc = group_code(vec, "subcategory")
|
| 265 |
subl = clean_label(label_for(lang, "subcategory", wc, subc) or "")
|
|
|
|
| 290 |
return ", ".join(parts)
|
| 291 |
|
| 292 |
def expanded_text(vec: torch.Tensor, lang: str) -> str:
|
| 293 |
+
"""Útgreinað marking / Expanded tags: codes + labels."""
|
|
|
|
|
|
|
|
|
|
| 294 |
lang = "fo" if lang=="fo" else "en"
|
| 295 |
wc = wc_code(vec)
|
| 296 |
parts = []
|
|
|
|
| 333 |
CODES_BY_WC = compute_codes_by_wc()
|
| 334 |
|
| 335 |
def build_overview(lang: str) -> str:
|
| 336 |
+
"""Markayvirlit / Tag Overview under each word class (codes used in current CSV)."""
|
|
|
|
|
|
|
| 337 |
lang = "fo" if lang=="fo" else "en"
|
| 338 |
+
title = "### Markayvirlit" if lang=="fo" else "### Tag Overview"
|
| 339 |
lines = [title, ""]
|
| 340 |
|
| 341 |
for wc in sorted(CODES_BY_WC.keys()):
|
|
|
|
| 346 |
cs = sorted(CODES_BY_WC[wc].get(g, set()))
|
| 347 |
if not cs:
|
| 348 |
continue
|
| 349 |
+
|
| 350 |
group_name = {
|
| 351 |
"fo": {
|
| 352 |
"subcategory":"Undirflokkur", "gender":"Kyn", "number":"Tal", "case":"Fall",
|
|
|
|
| 455 |
# ----------------------------
|
| 456 |
theme = gr.themes.Soft()
|
| 457 |
|
| 458 |
+
with gr.Blocks(theme=theme, css=CSS, title="Marka") as demo:
|
| 459 |
+
# New layout: textbox left, info right (button under info)
|
| 460 |
with gr.Row(equal_height=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 461 |
with gr.Column(scale=2):
|
| 462 |
inp = gr.Textbox(
|
| 463 |
+
lines=6,
|
| 464 |
placeholder="Skriva her ... / Type here ...",
|
| 465 |
show_label=False,
|
| 466 |
)
|
| 467 |
+
with gr.Column(scale=1, min_width=280):
|
| 468 |
+
gr.Markdown(
|
| 469 |
+
"### Marka\n"
|
| 470 |
+
"Skriv setningin í kassan vinstrumegin og fá hann markaðan.\n\n"
|
| 471 |
+
f"**Myndil / Model:** [{MODEL_ID}]({MODEL_LINK})"
|
| 472 |
+
)
|
| 473 |
btn = gr.Button("Marka / Tag", variant="primary")
|
| 474 |
|
| 475 |
state = gr.State([])
|
| 476 |
|
| 477 |
# Results header row (components hide until first run)
|
| 478 |
with gr.Row():
|
| 479 |
+
with gr.Column(scale=5):
|
| 480 |
+
results_title = gr.Markdown("### Úrslit / Results", visible=False)
|
| 481 |
+
with gr.Column(scale=1, min_width=170):
|
| 482 |
+
lang = gr.Dropdown(
|
| 483 |
+
choices=[("Føroyskt","fo"), ("English","en")],
|
| 484 |
+
value="fo",
|
| 485 |
+
show_label=False,
|
| 486 |
+
filterable=False,
|
| 487 |
+
elem_id="lang_dd",
|
| 488 |
+
visible=False,
|
| 489 |
+
)
|
| 490 |
|
| 491 |
out_df = gr.Dataframe(
|
| 492 |
value=pd.DataFrame(columns=[UI["fo"]["w"], UI["fo"]["t"], UI["fo"]["s"]]),
|
|
|
|
| 498 |
visible=False,
|
| 499 |
)
|
| 500 |
|
| 501 |
+
# Hide Expanded tags accordion until tagged
|
| 502 |
+
with gr.Accordion("Útgreinað marking / Expanded tags", open=False, visible=False) as expanded_acc:
|
| 503 |
out_mean_df = gr.Dataframe(
|
| 504 |
value=pd.DataFrame(columns=[UI["fo"]["w"], UI["fo"]["t"], UI["fo"]["m"]]),
|
| 505 |
wrap=True,
|
|
|
|
| 507 |
show_label=False,
|
| 508 |
row_count=(0, "fixed"),
|
| 509 |
col_count=(3, "fixed"),
|
| 510 |
+
visible=True,
|
| 511 |
)
|
| 512 |
|
| 513 |
+
with gr.Accordion("Markayvirlit / Tag Overview", open=False):
|
| 514 |
overview_md = gr.Markdown("", visible=False)
|
| 515 |
|
| 516 |
def on_tag(sentence, lang_choice):
|
| 517 |
rows = run_model(sentence)
|
| 518 |
df_main, df_mean, overview = render(rows, lang_choice)
|
|
|
|
| 519 |
return (
|
| 520 |
rows,
|
| 521 |
gr.update(value=df_main, visible=True),
|
|
|
|
| 523 |
gr.update(value=overview, visible=True),
|
| 524 |
gr.update(visible=True), # results_title
|
| 525 |
gr.update(visible=True), # lang
|
| 526 |
+
gr.update(visible=True), # expanded_acc
|
| 527 |
)
|
| 528 |
|
| 529 |
def on_lang(rows, lang_choice):
|
|
|
|
| 537 |
btn.click(
|
| 538 |
on_tag,
|
| 539 |
inputs=[inp, lang],
|
| 540 |
+
outputs=[state, out_df, out_mean_df, overview_md, results_title, lang, expanded_acc],
|
| 541 |
queue=False,
|
| 542 |
)
|
| 543 |
|
tag_labels.json
CHANGED
|
@@ -84,7 +84,7 @@
|
|
| 84 |
"G": "genitive"
|
| 85 |
},
|
| 86 |
"article": {
|
| 87 |
-
"A": "
|
| 88 |
},
|
| 89 |
"proper": {
|
| 90 |
"P": "Proper Noun"
|
|
@@ -125,8 +125,8 @@
|
|
| 125 |
"A": "absolute superlative"
|
| 126 |
},
|
| 127 |
"declension": {
|
| 128 |
-
"S": "strong",
|
| 129 |
-
"W": "weak",
|
| 130 |
"e": "no-declension"
|
| 131 |
},
|
| 132 |
"gender": {
|
|
@@ -163,9 +163,9 @@
|
|
| 163 |
"N": "neuter"
|
| 164 |
},
|
| 165 |
"person": {
|
| 166 |
-
"1": "1st
|
| 167 |
-
"2": "2nd
|
| 168 |
-
"3": "3rd
|
| 169 |
},
|
| 170 |
"number": {
|
| 171 |
"S": "singular",
|
|
@@ -206,7 +206,7 @@
|
|
| 206 |
},
|
| 207 |
"V": {
|
| 208 |
"word_class": {
|
| 209 |
-
"V": "verb
|
| 210 |
},
|
| 211 |
"mood": {
|
| 212 |
"I": "infinitive",
|
|
@@ -235,15 +235,15 @@
|
|
| 235 |
},
|
| 236 |
"L": {
|
| 237 |
"word_class": {
|
| 238 |
-
"L": "participle"
|
| 239 |
},
|
| 240 |
"voice": {
|
| 241 |
"A": "active",
|
| 242 |
"M": "mediopassive"
|
| 243 |
},
|
| 244 |
"declension": {
|
| 245 |
-
"S": "strong",
|
| 246 |
-
"W": "weak",
|
| 247 |
"e": "no-declension"
|
| 248 |
},
|
| 249 |
"gender": {
|
|
@@ -456,8 +456,8 @@
|
|
| 456 |
"A": "absolutt hástig"
|
| 457 |
},
|
| 458 |
"declension": {
|
| 459 |
-
"S": "sterk",
|
| 460 |
-
"W": "veik",
|
| 461 |
"e": "eingin sterk/veik bending"
|
| 462 |
},
|
| 463 |
"gender": {
|
|
@@ -494,9 +494,9 @@
|
|
| 494 |
"N": "hvørkikyn"
|
| 495 |
},
|
| 496 |
"person": {
|
| 497 |
-
"1": "
|
| 498 |
-
"2": "
|
| 499 |
-
"3": "
|
| 500 |
},
|
| 501 |
"number": {
|
| 502 |
"S": "eintal",
|
|
@@ -577,8 +577,8 @@
|
|
| 577 |
"M": "miðalsøgn"
|
| 578 |
},
|
| 579 |
"declension": {
|
| 580 |
-
"S": "sterk",
|
| 581 |
-
"W": "veik",
|
| 582 |
"e": "eingin sterk/veik bending"
|
| 583 |
},
|
| 584 |
"gender": {
|
|
|
|
| 84 |
"G": "genitive"
|
| 85 |
},
|
| 86 |
"article": {
|
| 87 |
+
"A": "definite"
|
| 88 |
},
|
| 89 |
"proper": {
|
| 90 |
"P": "Proper Noun"
|
|
|
|
| 125 |
"A": "absolute superlative"
|
| 126 |
},
|
| 127 |
"declension": {
|
| 128 |
+
"S": "strong declension",
|
| 129 |
+
"W": "weak declension",
|
| 130 |
"e": "no-declension"
|
| 131 |
},
|
| 132 |
"gender": {
|
|
|
|
| 163 |
"N": "neuter"
|
| 164 |
},
|
| 165 |
"person": {
|
| 166 |
+
"1": "1st person",
|
| 167 |
+
"2": "2nd person",
|
| 168 |
+
"3": "3rd person"
|
| 169 |
},
|
| 170 |
"number": {
|
| 171 |
"S": "singular",
|
|
|
|
| 206 |
},
|
| 207 |
"V": {
|
| 208 |
"word_class": {
|
| 209 |
+
"V": "verb"
|
| 210 |
},
|
| 211 |
"mood": {
|
| 212 |
"I": "infinitive",
|
|
|
|
| 235 |
},
|
| 236 |
"L": {
|
| 237 |
"word_class": {
|
| 238 |
+
"L": "past participle"
|
| 239 |
},
|
| 240 |
"voice": {
|
| 241 |
"A": "active",
|
| 242 |
"M": "mediopassive"
|
| 243 |
},
|
| 244 |
"declension": {
|
| 245 |
+
"S": "strong declension",
|
| 246 |
+
"W": "weak declension",
|
| 247 |
"e": "no-declension"
|
| 248 |
},
|
| 249 |
"gender": {
|
|
|
|
| 456 |
"A": "absolutt hástig"
|
| 457 |
},
|
| 458 |
"declension": {
|
| 459 |
+
"S": "sterk bending",
|
| 460 |
+
"W": "veik bending",
|
| 461 |
"e": "eingin sterk/veik bending"
|
| 462 |
},
|
| 463 |
"gender": {
|
|
|
|
| 494 |
"N": "hvørkikyn"
|
| 495 |
},
|
| 496 |
"person": {
|
| 497 |
+
"1": "1. persónur",
|
| 498 |
+
"2": "2. persónur",
|
| 499 |
+
"3": "3. persónur"
|
| 500 |
},
|
| 501 |
"number": {
|
| 502 |
"S": "eintal",
|
|
|
|
| 577 |
"M": "miðalsøgn"
|
| 578 |
},
|
| 579 |
"declension": {
|
| 580 |
+
"S": "sterk bending",
|
| 581 |
+
"W": "veik bending",
|
| 582 |
"e": "eingin sterk/veik bending"
|
| 583 |
},
|
| 584 |
"gender": {
|