Upload app.py
Browse files
app.py
CHANGED
|
@@ -38,6 +38,7 @@ UI = {
|
|
| 38 |
"fo": {"w":"Orð", "t":"Mark", "s":"Útgreining", "m":"Útgreinað marking"},
|
| 39 |
"en": {"w":"Word","t":"Tag", "s":"Analysis", "m":"Expanded tags"},
|
| 40 |
}
|
|
|
|
| 41 |
MODEL_LINK = "https://huggingface.co/Setur/BRAGD"
|
| 42 |
|
| 43 |
# Theme color: #89AFA9 (+ close shades) + system font
|
|
@@ -49,14 +50,11 @@ CSS = """
|
|
| 49 |
body, .gradio-container, .prose, .markdown, textarea, input, select, button, table{
|
| 50 |
font-family:-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Arial, "Noto Sans", sans-serif !important;
|
| 51 |
}
|
| 52 |
-
|
| 53 |
-
/* Buttons */
|
| 54 |
.gr-button-primary, button.primary, .primary{
|
| 55 |
background:var(--primary-500)!important; border-color:var(--primary-600)!important; color:#0b1b19!important;
|
| 56 |
}
|
| 57 |
.gr-button-primary:hover, button.primary:hover, .primary:hover{ background:var(--primary-600)!important; }
|
| 58 |
a{ color:var(--primary-700)!important; }
|
| 59 |
-
.gr-button-primary{ padding: 0.32rem 0.75rem !important; font-size: 0.95rem !important; }
|
| 60 |
|
| 61 |
/* Dataframe column wrapping: keep Orð + Mark on one line */
|
| 62 |
.gr-dataframe table td:nth-child(1),
|
|
@@ -67,18 +65,68 @@ a{ color:var(--primary-700)!important; }
|
|
| 67 |
.gr-dataframe table td:nth-child(2),
|
| 68 |
.gr-dataframe table th:nth-child(2){
|
| 69 |
white-space: nowrap !important;
|
| 70 |
-
width:
|
| 71 |
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important;
|
| 72 |
}
|
| 73 |
.gr-dataframe table td:nth-child(3),
|
| 74 |
.gr-dataframe table th:nth-child(3){
|
| 75 |
white-space: normal !important;
|
| 76 |
-
width:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
}
|
| 78 |
|
| 79 |
-
/*
|
| 80 |
-
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
"""
|
| 83 |
|
| 84 |
# ----------------------------
|
|
@@ -234,14 +282,6 @@ VOICE_ANALYSIS = {
|
|
| 234 |
}
|
| 235 |
|
| 236 |
def analysis_text(vec: torch.Tensor, lang: str) -> str:
|
| 237 |
-
"""
|
| 238 |
-
Útgreining / Analysis:
|
| 239 |
-
- plain words (no letters/hyphens)
|
| 240 |
-
- pronouns: start at subcategory, not word class
|
| 241 |
-
- conjunctions: subcategory already includes 'sambindingarorð' in label
|
| 242 |
-
- DGd: show only fyriseting/preposition
|
| 243 |
-
- supine: show only supine + voice (drop verb/number/tense/person etc.)
|
| 244 |
-
"""
|
| 245 |
lang = "fo" if lang=="fo" else "en"
|
| 246 |
tag = vector_to_tag(vec)
|
| 247 |
wc = wc_code(vec)
|
|
@@ -259,7 +299,7 @@ def analysis_text(vec: torch.Tensor, lang: str) -> str:
|
|
| 259 |
|
| 260 |
parts = []
|
| 261 |
|
| 262 |
-
# Pronouns + conjunctions:
|
| 263 |
if wc in {"P","C"}:
|
| 264 |
subc = group_code(vec, "subcategory")
|
| 265 |
subl = clean_label(label_for(lang, "subcategory", wc, subc) or "")
|
|
@@ -275,7 +315,7 @@ def analysis_text(vec: torch.Tensor, lang: str) -> str:
|
|
| 275 |
if not c:
|
| 276 |
continue
|
| 277 |
if wc in {"P","C"} and g == "subcategory":
|
| 278 |
-
continue
|
| 279 |
if (wc, g, c) in HIDE_IN_ANALYSIS:
|
| 280 |
continue
|
| 281 |
|
|
@@ -290,7 +330,6 @@ def analysis_text(vec: torch.Tensor, lang: str) -> str:
|
|
| 290 |
return ", ".join(parts)
|
| 291 |
|
| 292 |
def expanded_text(vec: torch.Tensor, lang: str) -> str:
|
| 293 |
-
"""Útgreinað marking / Expanded tags: codes + labels."""
|
| 294 |
lang = "fo" if lang=="fo" else "en"
|
| 295 |
wc = wc_code(vec)
|
| 296 |
parts = []
|
|
@@ -308,7 +347,7 @@ def expanded_text(vec: torch.Tensor, lang: str) -> str:
|
|
| 308 |
return "; ".join([p for p in parts if p])
|
| 309 |
|
| 310 |
def compute_codes_by_wc():
|
| 311 |
-
codes = defaultdict(lambda: defaultdict(set))
|
| 312 |
for arr in tag_to_features.values():
|
| 313 |
arr = np.array(arr)
|
| 314 |
|
|
@@ -333,7 +372,6 @@ def compute_codes_by_wc():
|
|
| 333 |
CODES_BY_WC = compute_codes_by_wc()
|
| 334 |
|
| 335 |
def build_overview(lang: str) -> str:
|
| 336 |
-
"""Markayvirlit / Tag Overview under each word class (codes used in current CSV)."""
|
| 337 |
lang = "fo" if lang=="fo" else "en"
|
| 338 |
title = "### Markayvirlit" if lang=="fo" else "### Tag Overview"
|
| 339 |
lines = [title, ""]
|
|
@@ -346,7 +384,6 @@ def build_overview(lang: str) -> str:
|
|
| 346 |
cs = sorted(CODES_BY_WC[wc].get(g, set()))
|
| 347 |
if not cs:
|
| 348 |
continue
|
| 349 |
-
|
| 350 |
group_name = {
|
| 351 |
"fo": {
|
| 352 |
"subcategory":"Undirflokkur", "gender":"Kyn", "number":"Tal", "case":"Fall",
|
|
@@ -456,7 +493,7 @@ def render(rows_state, lang: str):
|
|
| 456 |
theme = gr.themes.Soft()
|
| 457 |
|
| 458 |
with gr.Blocks(theme=theme, css=CSS, title="Marka") as demo:
|
| 459 |
-
#
|
| 460 |
with gr.Row(equal_height=True):
|
| 461 |
with gr.Column(scale=2):
|
| 462 |
inp = gr.Textbox(
|
|
@@ -464,7 +501,7 @@ with gr.Blocks(theme=theme, css=CSS, title="Marka") as demo:
|
|
| 464 |
placeholder="Skriva her ... / Type here ...",
|
| 465 |
show_label=False,
|
| 466 |
)
|
| 467 |
-
with gr.Column(scale=1, min_width=
|
| 468 |
gr.Markdown(
|
| 469 |
"### Marka\n"
|
| 470 |
"Skriv setningin í kassan vinstrumegin og fá hann markaðan.\n\n"
|
|
@@ -474,19 +511,16 @@ with gr.Blocks(theme=theme, css=CSS, title="Marka") as demo:
|
|
| 474 |
|
| 475 |
state = gr.State([])
|
| 476 |
|
| 477 |
-
# Results header row (
|
| 478 |
-
with gr.Row():
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
elem_id="lang_dd",
|
| 488 |
-
visible=False,
|
| 489 |
-
)
|
| 490 |
|
| 491 |
out_df = gr.Dataframe(
|
| 492 |
value=pd.DataFrame(columns=[UI["fo"]["w"], UI["fo"]["t"], UI["fo"]["s"]]),
|
|
@@ -498,8 +532,8 @@ with gr.Blocks(theme=theme, css=CSS, title="Marka") as demo:
|
|
| 498 |
visible=False,
|
| 499 |
)
|
| 500 |
|
| 501 |
-
|
| 502 |
-
with
|
| 503 |
out_mean_df = gr.Dataframe(
|
| 504 |
value=pd.DataFrame(columns=[UI["fo"]["w"], UI["fo"]["t"], UI["fo"]["m"]]),
|
| 505 |
wrap=True,
|
|
@@ -507,11 +541,11 @@ with gr.Blocks(theme=theme, css=CSS, title="Marka") as demo:
|
|
| 507 |
show_label=False,
|
| 508 |
row_count=(0, "fixed"),
|
| 509 |
col_count=(3, "fixed"),
|
| 510 |
-
visible=True,
|
| 511 |
)
|
| 512 |
|
| 513 |
-
|
| 514 |
-
|
|
|
|
| 515 |
|
| 516 |
def on_tag(sentence, lang_choice):
|
| 517 |
rows = run_model(sentence)
|
|
@@ -519,11 +553,12 @@ with gr.Blocks(theme=theme, css=CSS, title="Marka") as demo:
|
|
| 519 |
return (
|
| 520 |
rows,
|
| 521 |
gr.update(value=df_main, visible=True),
|
| 522 |
-
gr.update(value=df_mean
|
| 523 |
-
gr.update(value=overview
|
| 524 |
gr.update(visible=True), # results_title
|
| 525 |
gr.update(visible=True), # lang
|
| 526 |
gr.update(visible=True), # expanded_acc
|
|
|
|
| 527 |
)
|
| 528 |
|
| 529 |
def on_lang(rows, lang_choice):
|
|
@@ -537,7 +572,7 @@ with gr.Blocks(theme=theme, css=CSS, title="Marka") as demo:
|
|
| 537 |
btn.click(
|
| 538 |
on_tag,
|
| 539 |
inputs=[inp, lang],
|
| 540 |
-
outputs=[state, out_df, out_mean_df, overview_md, results_title, lang, expanded_acc],
|
| 541 |
queue=False,
|
| 542 |
)
|
| 543 |
|
|
|
|
| 38 |
"fo": {"w":"Orð", "t":"Mark", "s":"Útgreining", "m":"Útgreinað marking"},
|
| 39 |
"en": {"w":"Word","t":"Tag", "s":"Analysis", "m":"Expanded tags"},
|
| 40 |
}
|
| 41 |
+
|
| 42 |
MODEL_LINK = "https://huggingface.co/Setur/BRAGD"
|
| 43 |
|
| 44 |
# Theme color: #89AFA9 (+ close shades) + system font
|
|
|
|
| 50 |
body, .gradio-container, .prose, .markdown, textarea, input, select, button, table{
|
| 51 |
font-family:-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Arial, "Noto Sans", sans-serif !important;
|
| 52 |
}
|
|
|
|
|
|
|
| 53 |
.gr-button-primary, button.primary, .primary{
|
| 54 |
background:var(--primary-500)!important; border-color:var(--primary-600)!important; color:#0b1b19!important;
|
| 55 |
}
|
| 56 |
.gr-button-primary:hover, button.primary:hover, .primary:hover{ background:var(--primary-600)!important; }
|
| 57 |
a{ color:var(--primary-700)!important; }
|
|
|
|
| 58 |
|
| 59 |
/* Dataframe column wrapping: keep Orð + Mark on one line */
|
| 60 |
.gr-dataframe table td:nth-child(1),
|
|
|
|
| 65 |
.gr-dataframe table td:nth-child(2),
|
| 66 |
.gr-dataframe table th:nth-child(2){
|
| 67 |
white-space: nowrap !important;
|
| 68 |
+
width: 18% !important;
|
| 69 |
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important;
|
| 70 |
}
|
| 71 |
.gr-dataframe table td:nth-child(3),
|
| 72 |
.gr-dataframe table th:nth-child(3){
|
| 73 |
white-space: normal !important;
|
| 74 |
+
width: 64% !important;
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
/* Results header: make it a clean left-title / right-language bar (no grey blocks) */
|
| 78 |
+
#results_hdr{
|
| 79 |
+
display:flex;
|
| 80 |
+
align-items:center;
|
| 81 |
+
justify-content:space-between;
|
| 82 |
+
gap: 12px;
|
| 83 |
+
padding: 0;
|
| 84 |
+
background: transparent !important;
|
| 85 |
+
}
|
| 86 |
+
#results_hdr > div{
|
| 87 |
+
background: transparent !important;
|
| 88 |
+
padding: 0 !important;
|
| 89 |
+
margin: 0 !important;
|
| 90 |
+
box-shadow: none !important;
|
| 91 |
+
border: 0 !important;
|
| 92 |
}
|
| 93 |
|
| 94 |
+
/* Language toggle (radio styled as segmented buttons) */
|
| 95 |
+
.lang_toggle{
|
| 96 |
+
display:flex;
|
| 97 |
+
gap: 8px;
|
| 98 |
+
justify-content:flex-end;
|
| 99 |
+
align-items:center;
|
| 100 |
+
}
|
| 101 |
+
.lang_toggle fieldset{
|
| 102 |
+
border: 0 !important;
|
| 103 |
+
padding: 0 !important;
|
| 104 |
+
margin: 0 !important;
|
| 105 |
+
}
|
| 106 |
+
.lang_toggle .wrap{
|
| 107 |
+
display:flex !important;
|
| 108 |
+
gap: 8px !important;
|
| 109 |
+
}
|
| 110 |
+
.lang_toggle input{
|
| 111 |
+
display:none !important; /* removes cursor + text field behavior entirely */
|
| 112 |
+
}
|
| 113 |
+
.lang_toggle label{
|
| 114 |
+
cursor:pointer;
|
| 115 |
+
padding: 8px 12px;
|
| 116 |
+
border-radius: 10px;
|
| 117 |
+
border: 1px solid rgba(0,0,0,.12);
|
| 118 |
+
background: white;
|
| 119 |
+
user-select:none;
|
| 120 |
+
font-size: 0.95rem;
|
| 121 |
+
}
|
| 122 |
+
.lang_toggle input:checked + span,
|
| 123 |
+
.lang_toggle label.selected{
|
| 124 |
+
background: var(--primary-100) !important;
|
| 125 |
+
border-color: var(--primary-500) !important;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
/* Slightly smaller primary button */
|
| 129 |
+
.gr-button-primary{ padding: 0.35rem 0.85rem !important; font-size: 0.95rem !important; }
|
| 130 |
"""
|
| 131 |
|
| 132 |
# ----------------------------
|
|
|
|
| 282 |
}
|
| 283 |
|
| 284 |
def analysis_text(vec: torch.Tensor, lang: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
lang = "fo" if lang=="fo" else "en"
|
| 286 |
tag = vector_to_tag(vec)
|
| 287 |
wc = wc_code(vec)
|
|
|
|
| 299 |
|
| 300 |
parts = []
|
| 301 |
|
| 302 |
+
# Pronouns + conjunctions: subcategory already carries the head noun
|
| 303 |
if wc in {"P","C"}:
|
| 304 |
subc = group_code(vec, "subcategory")
|
| 305 |
subl = clean_label(label_for(lang, "subcategory", wc, subc) or "")
|
|
|
|
| 315 |
if not c:
|
| 316 |
continue
|
| 317 |
if wc in {"P","C"} and g == "subcategory":
|
| 318 |
+
continue
|
| 319 |
if (wc, g, c) in HIDE_IN_ANALYSIS:
|
| 320 |
continue
|
| 321 |
|
|
|
|
| 330 |
return ", ".join(parts)
|
| 331 |
|
| 332 |
def expanded_text(vec: torch.Tensor, lang: str) -> str:
|
|
|
|
| 333 |
lang = "fo" if lang=="fo" else "en"
|
| 334 |
wc = wc_code(vec)
|
| 335 |
parts = []
|
|
|
|
| 347 |
return "; ".join([p for p in parts if p])
|
| 348 |
|
| 349 |
def compute_codes_by_wc():
|
| 350 |
+
codes = defaultdict(lambda: defaultdict(set))
|
| 351 |
for arr in tag_to_features.values():
|
| 352 |
arr = np.array(arr)
|
| 353 |
|
|
|
|
| 372 |
CODES_BY_WC = compute_codes_by_wc()
|
| 373 |
|
| 374 |
def build_overview(lang: str) -> str:
|
|
|
|
| 375 |
lang = "fo" if lang=="fo" else "en"
|
| 376 |
title = "### Markayvirlit" if lang=="fo" else "### Tag Overview"
|
| 377 |
lines = [title, ""]
|
|
|
|
| 384 |
cs = sorted(CODES_BY_WC[wc].get(g, set()))
|
| 385 |
if not cs:
|
| 386 |
continue
|
|
|
|
| 387 |
group_name = {
|
| 388 |
"fo": {
|
| 389 |
"subcategory":"Undirflokkur", "gender":"Kyn", "number":"Tal", "case":"Fall",
|
|
|
|
| 493 |
theme = gr.themes.Soft()
|
| 494 |
|
| 495 |
with gr.Blocks(theme=theme, css=CSS, title="Marka") as demo:
|
| 496 |
+
# Layout: textbox left, info right, button under info
|
| 497 |
with gr.Row(equal_height=True):
|
| 498 |
with gr.Column(scale=2):
|
| 499 |
inp = gr.Textbox(
|
|
|
|
| 501 |
placeholder="Skriva her ... / Type here ...",
|
| 502 |
show_label=False,
|
| 503 |
)
|
| 504 |
+
with gr.Column(scale=1, min_width=320):
|
| 505 |
gr.Markdown(
|
| 506 |
"### Marka\n"
|
| 507 |
"Skriv setningin í kassan vinstrumegin og fá hann markaðan.\n\n"
|
|
|
|
| 511 |
|
| 512 |
state = gr.State([])
|
| 513 |
|
| 514 |
+
# Results header row (hidden until first run)
|
| 515 |
+
with gr.Row(elem_id="results_hdr"):
|
| 516 |
+
results_title = gr.Markdown("### Úrslit / Results", visible=False)
|
| 517 |
+
lang = gr.Radio(
|
| 518 |
+
choices=[("Føroyskt","fo"), ("English","en")],
|
| 519 |
+
value="fo",
|
| 520 |
+
show_label=False,
|
| 521 |
+
visible=False,
|
| 522 |
+
elem_classes=["lang_toggle"],
|
| 523 |
+
)
|
|
|
|
|
|
|
|
|
|
| 524 |
|
| 525 |
out_df = gr.Dataframe(
|
| 526 |
value=pd.DataFrame(columns=[UI["fo"]["w"], UI["fo"]["t"], UI["fo"]["s"]]),
|
|
|
|
| 532 |
visible=False,
|
| 533 |
)
|
| 534 |
|
| 535 |
+
expanded_acc = gr.Accordion("Útgreinað marking / Expanded tags", open=False, visible=False)
|
| 536 |
+
with expanded_acc:
|
| 537 |
out_mean_df = gr.Dataframe(
|
| 538 |
value=pd.DataFrame(columns=[UI["fo"]["w"], UI["fo"]["t"], UI["fo"]["m"]]),
|
| 539 |
wrap=True,
|
|
|
|
| 541 |
show_label=False,
|
| 542 |
row_count=(0, "fixed"),
|
| 543 |
col_count=(3, "fixed"),
|
|
|
|
| 544 |
)
|
| 545 |
|
| 546 |
+
overview_acc = gr.Accordion("Markayvirlit / Tag Overview", open=False, visible=False)
|
| 547 |
+
with overview_acc:
|
| 548 |
+
overview_md = gr.Markdown("")
|
| 549 |
|
| 550 |
def on_tag(sentence, lang_choice):
|
| 551 |
rows = run_model(sentence)
|
|
|
|
| 553 |
return (
|
| 554 |
rows,
|
| 555 |
gr.update(value=df_main, visible=True),
|
| 556 |
+
gr.update(value=df_mean),
|
| 557 |
+
gr.update(value=overview),
|
| 558 |
gr.update(visible=True), # results_title
|
| 559 |
gr.update(visible=True), # lang
|
| 560 |
gr.update(visible=True), # expanded_acc
|
| 561 |
+
gr.update(visible=True), # overview_acc
|
| 562 |
)
|
| 563 |
|
| 564 |
def on_lang(rows, lang_choice):
|
|
|
|
| 572 |
btn.click(
|
| 573 |
on_tag,
|
| 574 |
inputs=[inp, lang],
|
| 575 |
+
outputs=[state, out_df, out_mean_df, overview_md, results_title, lang, expanded_acc, overview_acc],
|
| 576 |
queue=False,
|
| 577 |
)
|
| 578 |
|