AdamTT commited on
Commit
e9c4512
·
verified ·
1 Parent(s): 487a04b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +216 -171
app.py CHANGED
@@ -1,51 +1,130 @@
1
  import time
2
- from typing import List, Tuple, Dict
3
 
4
  import gradio as gr
5
- import numpy as np
6
- from sklearn.metrics.pairwise import cosine_similarity
7
- import torch
8
- from transformers import AutoTokenizer, AutoModel
9
  from huggingface_hub import HfApi
10
 
11
  api = HfApi()
12
 
13
- # Twarde, stabilne rekomendacje (min. 3)
14
- RECOMMENDATIONS = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "instruction": [
16
- ("google/flan-t5-small", "Lekki text2text, dobry na CPU do poleceń i krótkich odpowiedzi."),
17
- ("google/flan-t5-base", "Lepsza jakość kosztem szybkości; nadal sensowne na CPU."),
18
- ("google-t5/t5-small", "Prosty fallback text2text, gdy chcesz klasykę i szybkość.")
19
  ],
20
  "qa": [
21
- ("distilbert/distilbert-base-cased-distilled-squad", "Szybki QA extractive na CPU; klasyczny wybór."),
22
- ("distilbert/distilbert-base-uncased-distilled-squad", "Popularny model SQuAD; dobry default."),
23
- ("deepset/bert-base-cased-squad2", "SQuAD2; częściej zwraca 'brak odpowiedzi'.")
24
  ],
25
  "embeddings": [
26
- ("sentence-transformers/all-MiniLM-L6-v2", "Popularny do similarity search; szybki na CPU."),
27
- ("intfloat/e5-small-v2", "Mocny embedding do wyszukiwania; dobry kompromis."),
28
- ("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "Multilingual (lepszy przy PL/mix).")
29
  ],
30
  }
31
 
 
 
 
32
  # -----------------------
33
- # A) Hub bonus: cache + filtr językowy
34
  # -----------------------
35
  _HUB_CACHE: Dict[Tuple[str, str], Tuple[float, List[str]]] = {}
36
- CACHE_TTL_SEC = 6 * 60 * 60 # 6 godzin
37
-
38
- def _language_tag_predicate(tags: List[str], language: str) -> bool:
39
- if language == "Mieszany":
 
 
 
 
40
  return True
41
- # Tagowanie językowe na Hubie nie jest 100% spójne, więc sprawdzamy kilka wariantów.
42
- lang = language.lower()
43
- candidates = {lang, f"language:{lang}", f"lang:{lang}"}
44
- tags_lower = {t.lower() for t in (tags or [])}
45
  return any(c in tags_lower for c in candidates)
46
 
47
- def hub_bonus_models(pipeline_tag: str, language: str, limit: int = 12) -> List[str]:
48
- key = (pipeline_tag, language)
49
  now = time.time()
50
 
51
  if key in _HUB_CACHE:
@@ -54,13 +133,12 @@ def hub_bonus_models(pipeline_tag: str, language: str, limit: int = 12) -> List[
54
  return cached
55
 
56
  try:
57
- # list_models: filtrujemy po pipeline tagu i sortujemy po pobraniach (popularność).
58
  models = api.list_models(filter=pipeline_tag, sort="downloads", direction=-1, limit=limit)
59
  out = []
60
  for m in models:
61
  mid = getattr(m, "modelId", None)
62
  tags = getattr(m, "tags", []) or []
63
- if mid and _language_tag_predicate(tags, language):
64
  out.append(mid)
65
  _HUB_CACHE[key] = (now, out)
66
  return out
@@ -68,187 +146,154 @@ def hub_bonus_models(pipeline_tag: str, language: str, limit: int = 12) -> List[
68
  return []
69
 
70
  # -----------------------
71
- # B) Embeddingi lokalnie (CPU): mean pooling
72
  # -----------------------
73
- _MODEL_CACHE: Dict[str, Tuple[AutoTokenizer, AutoModel]] = {}
74
-
75
- def _load_encoder(model_id: str):
76
- if model_id in _MODEL_CACHE:
77
- return _MODEL_CACHE[model_id]
78
- tok = AutoTokenizer.from_pretrained(model_id)
79
- mdl = AutoModel.from_pretrained(model_id)
80
- mdl.eval()
81
- _MODEL_CACHE[model_id] = (tok, mdl)
82
- return tok, mdl
83
-
84
- @torch.no_grad()
85
- def embed_texts(model_id: str, texts: List[str], batch_size: int = 16) -> np.ndarray:
86
- tok, mdl = _load_encoder(model_id)
87
-
88
- all_vecs = []
89
- for i in range(0, len(texts), batch_size):
90
- batch = texts[i:i+batch_size]
91
- enc = tok(batch, padding=True, truncation=True, return_tensors="pt")
92
- out = mdl(**enc)
93
-
94
- # Mean pooling po tokenach z maską attention
95
- token_emb = out.last_hidden_state # [B, T, H]
96
- mask = enc["attention_mask"].unsqueeze(-1).expand(token_emb.size()).float()
97
- summed = torch.sum(token_emb * mask, dim=1)
98
- counts = torch.clamp(mask.sum(dim=1), min=1e-9)
99
- mean_pooled = summed / counts
100
-
101
- # Normalizacja L2 pomaga dla cosine similarity
102
- normed = torch.nn.functional.normalize(mean_pooled, p=2, dim=1)
103
- all_vecs.append(normed.cpu().numpy())
104
-
105
- return np.vstack(all_vecs)
106
-
107
- def deduplicate_notes(model_id: str, raw_notes: str, threshold: float) -> str:
108
- notes = [n.strip() for n in raw_notes.splitlines() if n.strip()]
109
- if len(notes) < 2:
110
- return "Wklej co najmniej 2 wpisy (po jednej linijce)."
111
-
112
- vecs = embed_texts(model_id, notes)
113
- sim = cosine_similarity(vecs)
114
-
115
- # Grupowanie prostym union-find (spójne składowe przy sim >= threshold)
116
- parent = list(range(len(notes)))
117
-
118
- def find(x):
119
- while parent[x] != x:
120
- parent[x] = parent[parent[x]]
121
- x = parent[x]
122
- return x
123
-
124
- def union(a, b):
125
- ra, rb = find(a), find(b)
126
- if ra != rb:
127
- parent[rb] = ra
128
-
129
- for i in range(len(notes)):
130
- for j in range(i + 1, len(notes)):
131
- if sim[i, j] >= threshold:
132
- union(i, j)
133
-
134
- groups: Dict[int, List[int]] = {}
135
- for idx in range(len(notes)):
136
- r = find(idx)
137
- groups.setdefault(r, []).append(idx)
138
-
139
- # Interesują nas grupy z duplikatami (rozmiar > 1)
140
- dup_groups = [g for g in groups.values() if len(g) > 1]
141
- dup_groups.sort(key=len, reverse=True)
142
-
143
- if not dup_groups:
144
- return f"Brak duplikatów przy progu {threshold:.2f}."
145
-
146
- lines = []
147
- lines.append(f"Znalezione grupy podobnych wpisów (próg {threshold:.2f}):")
148
- lines.append("")
149
- for gi, g in enumerate(dup_groups, start=1):
150
- lines.append(f"Grupa {gi} (rozmiar {len(g)}):")
151
- for idx in g:
152
- lines.append(f"- {notes[idx]}")
153
- lines.append("Sugestia: zostaw 1 wpis, pozostałe oznacz jako duplikaty.")
154
- lines.append("")
155
 
156
- return "\n".join(lines).strip()
 
157
 
158
  # -----------------------
159
- # Doradca modeli
160
  # -----------------------
161
- def recommend(task, has_docs, language, cpu_only, priority):
162
- if task == "Semantyczne podobieństwo / duplikaty / wyszukiwanie":
163
  model_type = "embeddings"
164
  why = (
165
- "Zadanie polega na porównaniu znaczenia wpisów i wykryciu duplikatów. "
166
- "Najlepsze modele embeddingowe + podobieństwo cosinusowe (sentence similarity)."
 
167
  )
168
  pipeline_tag = "sentence-similarity"
169
- elif task == "Odpowiedzi na pytania z dokumentu (tekst wejściowy)":
 
170
  model_type = "qa"
171
  why = (
172
- "Masz kontekst (dokument/tekst) i pytanie. QA extractive znajduje odpowiedź w kontekście "
173
- "i zwykle halucynuje rzadziej niż modele generatywne."
 
174
  )
175
  pipeline_tag = "question-answering"
 
176
  else:
177
  model_type = "instruction"
178
  why = (
179
- "Chcesz odpowiedzi sterowane poleceniem (chat/wyjaśnianie/streszczanie). "
180
- "Modele instrukcyjne dostrajane do wykonywania instrukcji."
 
181
  )
182
  pipeline_tag = "text-generation"
 
183
 
184
  recs = RECOMMENDATIONS[model_type].copy()
185
 
186
- # Bonus: dociągamy popularne modele z Hub (filtrowane po języku)
187
- bonus = hub_bonus_models(pipeline_tag, language, limit=12)
188
  existing = {mid for mid, _ in recs}
189
  bonus = [m for m in bonus if m not in existing]
190
-
191
- # Dodajemy do 2 bonusów, żeby nie zalać użytkownika
192
  for m in bonus[:2]:
193
- recs.append((m, "Popularny model z Hub (dobrany po tagu zadania, sort po pobraniach)."))
194
 
195
- lines = []
196
- lines.append(f"Rekomendowany typ modelu: {model_type}")
197
  lines.append("")
198
- lines.append("Uzasadnienie:")
199
  lines.append(f"- {why}")
200
  lines.append("")
201
- lines.append("Modele (min. 3):")
202
  for mid, note in recs[:5]:
203
  lines.append(f"- {mid} — {note}")
204
-
205
- if model_type == "embeddings":
206
- lines.append("")
207
- lines.append("Zastosowanie do duplikatów (skrót): embeddingi -> cosine similarity -> próg -> grupy.")
208
- if language in ["PL", "Mieszany"]:
209
- lines.append("Wskazówka: preferuj model multilingual przy PL/mix języków.")
210
 
211
  return "\n".join(lines)
212
 
213
  # -----------------------
214
- # UI (2 zakładki)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  # -----------------------
216
- with gr.Blocks(title="Model Fit Finder (CPU)") as demo:
217
- gr.Markdown("# Model Fit Finder (CPU)\nDobiera typ modelu i pokazuje minimum 3 propozycje. Zawiera też deduplikację embeddingami.")
218
 
219
- with gr.Tab("Doradca modeli"):
 
 
 
 
 
 
 
220
  task = gr.Dropdown(
221
- choices=[
222
- "Chat / polecenia / generowanie",
223
- "Odpowiedzi na pytania z dokumentu (tekst wejściowy)",
224
- "Semantyczne podobieństwo / duplikaty / wyszukiwanie",
225
- ],
226
- value="Semantyczne podobieństwo / duplikaty / wyszukiwanie",
227
- label="Co chcesz zrobić?"
 
 
 
 
 
 
228
  )
229
- has_docs = gr.Radio(choices=["Tak", "Nie"], value="Tak", label="Czy masz własne dokumenty/teksty do analizy?")
230
- language = gr.Radio(choices=["EN", "PL", "Mieszany"], value="Mieszany", label="Język danych")
231
- cpu_only = gr.Checkbox(value=True, label="CPU only")
232
- priority = gr.Radio(choices=["Szybkość", "Jakość"], value="Szybkość", label="Priorytet")
233
-
234
- btn = gr.Button("Zarekomenduj")
235
- out = gr.Textbox(lines=18, label="Wynik")
236
- btn.click(fn=recommend, inputs=[task, has_docs, language, cpu_only, priority], outputs=[out])
237
-
238
- with gr.Tab("Deduplikacja wpisów (embeddingi)"):
239
- gr.Markdown(
240
- "Wklej wpisy (po jednej linijce). Space policzy embeddingi lokalnie na CPU i pogrupuje duplikaty.\n"
241
- "Uwaga: przy bardzo krótkich, technicznych wpisach warto testować próg w zakresie 0.85–0.95."
242
  )
243
- embed_model = gr.Dropdown(
244
- choices=[m for m, _ in RECOMMENDATIONS["embeddings"]],
245
- value=RECOMMENDATIONS["embeddings"][0][0],
246
- label="Model embeddingowy"
 
 
 
 
247
  )
248
- threshold = gr.Slider(0.70, 0.99, value=0.90, step=0.01, label="Próg podobieństwa (cosine)")
249
- notes = gr.Textbox(lines=12, label="Wpisy (1 linia = 1 wpis)")
250
- run = gr.Button("Wykryj duplikaty")
251
- dup_out = gr.Textbox(lines=18, label="Grupy duplikatów")
252
- run.click(fn=deduplicate_notes, inputs=[embed_model, notes, threshold], outputs=[dup_out])
 
 
253
 
254
  demo.launch()
 
1
  import time
2
+ from typing import Dict, List, Tuple, Any
3
 
4
  import gradio as gr
 
 
 
 
5
  from huggingface_hub import HfApi
6
 
7
  api = HfApi()
8
 
9
+ # -----------------------
10
+ # i18n
11
+ # -----------------------
12
+ I18N: Dict[str, Dict[str, str]] = {
13
+ "EN": {
14
+ "title": "Model Fit Finder (CPU)",
15
+ "intro": (
16
+ "Pick your NLP task and constraints. The Space will recommend an appropriate model type "
17
+ "and list at least 3 concrete Hugging Face models, with short rationale."
18
+ ),
19
+ "ui_lang": "UI language",
20
+ "tab_main": "Model advisor",
21
+ "task": "What do you want to do?",
22
+ "has_docs": "Do you have your own documents/text to analyze?",
23
+ "data_lang": "Data language",
24
+ "cpu_only": "CPU only",
25
+ "priority": "Priority",
26
+ "recommend_btn": "Recommend",
27
+ "result": "Result",
28
+ "yes": "Yes",
29
+ "no": "No",
30
+ "en": "EN",
31
+ "pl": "PL",
32
+ "mixed": "Mixed",
33
+ "speed": "Speed",
34
+ "quality": "Quality",
35
+ "task_chat": "Chat / instructions / generation",
36
+ "task_qa": "Answer questions from a document (input text)",
37
+ "task_sim": "Semantic similarity / duplicates / search",
38
+ "rec_type": "Recommended model type: {model_type}",
39
+ "rationale": "Rationale:",
40
+ "models_min3": "Models (min. 3):",
41
+ "emb_note": "Note: embedding models do not generate text; they produce vectors for similarity/search.",
42
+ "qa_note": "Note: extractive QA works best when you provide the relevant context text.",
43
+ "instr_note": "Note: instruction-tuned models follow your prompts; smaller variants are CPU-friendly.",
44
+ "bonus_note": "Popular model from Hub (selected by task tag and downloads).",
45
+ },
46
+ "PL": {
47
+ "title": "Model Fit Finder (CPU)",
48
+ "intro": (
49
+ "Wybierz zadanie NLP i ograniczenia. Space zarekomenduje typ modelu "
50
+ "i pokaże co najmniej 3 konkretne modele z Hugging Face wraz z uzasadnieniem."
51
+ ),
52
+ "ui_lang": "Język interfejsu",
53
+ "tab_main": "Doradca modeli",
54
+ "task": "Co chcesz zrobić?",
55
+ "has_docs": "Czy masz własne dokumenty/teksty do analizy?",
56
+ "data_lang": "Język danych",
57
+ "cpu_only": "CPU only",
58
+ "priority": "Priorytet",
59
+ "recommend_btn": "Zarekomenduj",
60
+ "result": "Wynik",
61
+ "yes": "Tak",
62
+ "no": "Nie",
63
+ "en": "EN",
64
+ "pl": "PL",
65
+ "mixed": "Mieszany",
66
+ "speed": "Szybkość",
67
+ "quality": "Jakość",
68
+ "task_chat": "Chat / polecenia / generowanie",
69
+ "task_qa": "Odpowiedzi na pytania z dokumentu (tekst wejściowy)",
70
+ "task_sim": "Semantyczne podobieństwo / duplikaty / wyszukiwanie",
71
+ "rec_type": "Rekomendowany typ modelu: {model_type}",
72
+ "rationale": "Uzasadnienie:",
73
+ "models_min3": "Modele (min. 3):",
74
+ "emb_note": "Uwaga: modele embeddingowe nie generują tekstu; produkują wektory do podobieństwa/wyszukiwania.",
75
+ "qa_note": "Uwaga: QA extractive działa najlepiej, gdy podasz kontekst (tekst źródłowy).",
76
+ "instr_note": "Uwaga: modele instrukcyjne wykonują polecenia; mniejsze warianty są przyjazne dla CPU.",
77
+ "bonus_note": "Popularny model z Hub (dobrany po tagu zadania i pobraniach).",
78
+ },
79
+ }
80
+
81
+ def t(ui_lang: str, key: str) -> str:
82
+ return I18N.get(ui_lang, I18N["EN"]).get(key, I18N["EN"].get(key, key))
83
+
84
+ # -----------------------
85
+ # Stable baseline recommendations (min. 3 per type)
86
+ # -----------------------
87
+ RECOMMENDATIONS: Dict[str, List[Tuple[str, str]]] = {
88
  "instruction": [
89
+ ("google/flan-t5-small", "Light text2text, good CPU baseline for instruction following."),
90
+ ("google/flan-t5-base", "Better quality, slower than small; still workable on CPU."),
91
+ ("google-t5/t5-small", "Simple text2text fallback when you want a fast baseline."),
92
  ],
93
  "qa": [
94
+ ("distilbert/distilbert-base-cased-distilled-squad", "Fast extractive QA on CPU; classic choice."),
95
+ ("distilbert/distilbert-base-uncased-distilled-squad", "Very popular SQuAD QA default."),
96
+ ("deepset/bert-base-cased-squad2", "SQuAD2; handles 'no answer' cases better."),
97
  ],
98
  "embeddings": [
99
+ ("sentence-transformers/all-MiniLM-L6-v2", "Popular sentence embeddings; fast on CPU."),
100
+ ("intfloat/e5-small-v2", "Strong retrieval embeddings; good quality/speed tradeoff."),
101
+ ("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "Multilingual; better for PL/mixed."),
102
  ],
103
  }
104
 
105
+ # If you want Polish descriptions here as well, keep EN here and localize notes in output.
106
+ # (Model IDs are universal; notes can be in EN and output can add localized note lines.)
107
+
108
  # -----------------------
109
+ # Hub bonus models (cache)
110
  # -----------------------
111
  _HUB_CACHE: Dict[Tuple[str, str], Tuple[float, List[str]]] = {}
112
+ CACHE_TTL_SEC = 6 * 60 * 60 # 6h
113
+
114
+ def _language_tag_predicate(tags: List[str], data_lang_value: str) -> bool:
115
+ """
116
+ data_lang_value is one of: EN, PL, MIXED (internal values).
117
+ HF tags aren't perfectly consistent; we do best-effort filtering.
118
+ """
119
+ if data_lang_value == "MIXED":
120
  return True
121
+ target = "en" if data_lang_value == "EN" else "pl"
122
+ candidates = {target, f"language:{target}", f"lang:{target}"}
123
+ tags_lower = {str(x).lower() for x in (tags or [])}
 
124
  return any(c in tags_lower for c in candidates)
125
 
126
+ def hub_bonus_models(pipeline_tag: str, data_lang_value: str, limit: int = 12) -> List[str]:
127
+ key = (pipeline_tag, data_lang_value)
128
  now = time.time()
129
 
130
  if key in _HUB_CACHE:
 
133
  return cached
134
 
135
  try:
 
136
  models = api.list_models(filter=pipeline_tag, sort="downloads", direction=-1, limit=limit)
137
  out = []
138
  for m in models:
139
  mid = getattr(m, "modelId", None)
140
  tags = getattr(m, "tags", []) or []
141
+ if mid and _language_tag_predicate(tags, data_lang_value):
142
  out.append(mid)
143
  _HUB_CACHE[key] = (now, out)
144
  return out
 
146
  return []
147
 
148
  # -----------------------
149
+ # Internal "task ids" (do NOT depend on UI language)
150
  # -----------------------
151
+ TASK_CHAT = "CHAT"
152
+ TASK_QA = "QA"
153
+ TASK_SIM = "SIM"
154
+
155
+ def task_choices(ui_lang: str) -> List[Tuple[str, str]]:
156
+ """Return Gradio dropdown choices as (label, value)."""
157
+ return [
158
+ (t(ui_lang, "task_chat"), TASK_CHAT),
159
+ (t(ui_lang, "task_qa"), TASK_QA),
160
+ (t(ui_lang, "task_sim"), TASK_SIM),
161
+ ]
162
+
163
+ def yesno_choices(ui_lang: str) -> List[Tuple[str, str]]:
164
+ return [(t(ui_lang, "yes"), "YES"), (t(ui_lang, "no"), "NO")]
165
+
166
+ def data_lang_choices(ui_lang: str) -> List[Tuple[str, str]]:
167
+ return [(t(ui_lang, "en"), "EN"), (t(ui_lang, "pl"), "PL"), (t(ui_lang, "mixed"), "MIXED")]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
+ def priority_choices(ui_lang: str) -> List[Tuple[str, str]]:
170
+ return [(t(ui_lang, "speed"), "SPEED"), (t(ui_lang, "quality"), "QUALITY")]
171
 
172
  # -----------------------
173
+ # Recommendation logic
174
  # -----------------------
175
+ def recommend(ui_lang: str, task_id: str, has_docs: str, data_lang_value: str, cpu_only: bool, priority: str) -> str:
176
+ if task_id == TASK_SIM:
177
  model_type = "embeddings"
178
  why = (
179
+ "You want semantic similarity / duplicate detection / search. Use embeddings + cosine similarity."
180
+ if ui_lang == "EN"
181
+ else "Chcesz podobieństwo semantyczne / duplikaty / wyszukiwanie. Użyj embeddingów + podobieństwa cosinusowego."
182
  )
183
  pipeline_tag = "sentence-similarity"
184
+ note_key = "emb_note"
185
+ elif task_id == TASK_QA:
186
  model_type = "qa"
187
  why = (
188
+ "You have a context (document/text) and a question. Extractive QA finds answers in the context."
189
+ if ui_lang == "EN"
190
+ else "Masz kontekst (dokument/tekst) i pytanie. QA extractive znajduje odpowiedź w kontekście."
191
  )
192
  pipeline_tag = "question-answering"
193
+ note_key = "qa_note"
194
  else:
195
  model_type = "instruction"
196
  why = (
197
+ "You want instruction-following responses (chat/explain/summarize). Instruction-tuned models fit best."
198
+ if ui_lang == "EN"
199
+ else "Chcesz odpowiedzi sterowane poleceniem (chat/wyjaśnianie/streszczanie). Najlepsze są modele instrukcyjne."
200
  )
201
  pipeline_tag = "text-generation"
202
+ note_key = "instr_note"
203
 
204
  recs = RECOMMENDATIONS[model_type].copy()
205
 
206
+ # Add 1–2 "bonus" models from Hub, filtered by task tag + best-effort language tags.
207
+ bonus = hub_bonus_models(pipeline_tag, data_lang_value, limit=12)
208
  existing = {mid for mid, _ in recs}
209
  bonus = [m for m in bonus if m not in existing]
 
 
210
  for m in bonus[:2]:
211
+ recs.append((m, t(ui_lang, "bonus_note")))
212
 
213
+ lines: List[str] = []
214
+ lines.append(t(ui_lang, "rec_type").format(model_type=model_type))
215
  lines.append("")
216
+ lines.append(t(ui_lang, "rationale"))
217
  lines.append(f"- {why}")
218
  lines.append("")
219
+ lines.append(t(ui_lang, "models_min3"))
220
  for mid, note in recs[:5]:
221
  lines.append(f"- {mid} — {note}")
222
+ lines.append("")
223
+ lines.append(t(ui_lang, note_key))
 
 
 
 
224
 
225
  return "\n".join(lines)
226
 
227
  # -----------------------
228
+ # Dynamic UI language updates
229
+ # -----------------------
230
+ def apply_language(ui_lang: str) -> Tuple[Any, ...]:
231
+ """
232
+ Returns gr.update objects for all UI text elements that should change when language changes.
233
+ """
234
+ return (
235
+ gr.update(value=f"# {t(ui_lang, 'title')}\n{t(ui_lang, 'intro')}"), # header_md
236
+ gr.update(label=t(ui_lang, "ui_lang")), # ui_lang radio label (cosmetic)
237
+ gr.update(label=t(ui_lang, "task"), choices=task_choices(ui_lang)), # task dropdown
238
+ gr.update(label=t(ui_lang, "has_docs"), choices=yesno_choices(ui_lang)), # has_docs
239
+ gr.update(label=t(ui_lang, "data_lang"), choices=data_lang_choices(ui_lang)), # data_lang
240
+ gr.update(label=t(ui_lang, "cpu_only")), # cpu_only
241
+ gr.update(label=t(ui_lang, "priority"), choices=priority_choices(ui_lang)), # priority
242
+ gr.update(value=t(ui_lang, "recommend_btn")), # button text
243
+ gr.update(label=t(ui_lang, "result")), # output label
244
+ gr.update(label=t(ui_lang, "tab_main")), # tab label (Gradio may not update tab titles live in all versions)
245
+ )
246
+
247
+ # -----------------------
248
+ # UI
249
  # -----------------------
250
+ with gr.Blocks(title=I18N["EN"]["title"]) as demo:
251
+ header_md = gr.Markdown(f"# {t('EN', 'title')}\n{t('EN', 'intro')}")
252
 
253
+ ui_lang = gr.Radio(
254
+ choices=["EN", "PL"],
255
+ value="EN",
256
+ label=t("EN", "ui_lang"),
257
+ )
258
+
259
+ # Tab title live-update is not guaranteed across Gradio versions; we still keep the label update output.
260
+ with gr.Tab(t("EN", "tab_main")) as tab_main:
261
  task = gr.Dropdown(
262
+ choices=task_choices("EN"),
263
+ value=TASK_SIM,
264
+ label=t("EN", "task"),
265
+ )
266
+ has_docs = gr.Radio(
267
+ choices=yesno_choices("EN"),
268
+ value="YES",
269
+ label=t("EN", "has_docs"),
270
+ )
271
+ data_lang = gr.Radio(
272
+ choices=data_lang_choices("EN"),
273
+ value="MIXED",
274
+ label=t("EN", "data_lang"),
275
  )
276
+ cpu_only = gr.Checkbox(value=True, label=t("EN", "cpu_only"))
277
+ priority = gr.Radio(
278
+ choices=priority_choices("EN"),
279
+ value="SPEED",
280
+ label=t("EN", "priority"),
 
 
 
 
 
 
 
 
281
  )
282
+
283
+ recommend_btn = gr.Button(t("EN", "recommend_btn"))
284
+ out = gr.Textbox(lines=18, label=t("EN", "result"))
285
+
286
+ recommend_btn.click(
287
+ fn=recommend,
288
+ inputs=[ui_lang, task, has_docs, data_lang, cpu_only, priority],
289
+ outputs=[out],
290
  )
291
+
292
+ # When UI language changes, update labels + choices.
293
+ ui_lang.change(
294
+ fn=apply_language,
295
+ inputs=[ui_lang],
296
+ outputs=[header_md, ui_lang, task, has_docs, data_lang, cpu_only, priority, recommend_btn, out, tab_main],
297
+ )
298
 
299
  demo.launch()