AdamTT commited on
Commit
de3dea2
·
verified ·
1 Parent(s): a89188a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +383 -169
app.py CHANGED
@@ -7,25 +7,28 @@ from huggingface_hub import HfApi
7
 
8
  api = HfApi()
9
 
10
- # -----------------------
11
  # i18n
12
- # -----------------------
13
  I18N: Dict[str, Dict[str, str]] = {
14
  "EN": {
15
  "title": "Model Fit Finder (CPU)",
16
  "intro": (
17
- "Pick your NLP task and constraints. The Space will recommend an appropriate model type "
18
- "and list at least 3 concrete Hugging Face models. Recommendations change based on your settings."
19
  ),
20
  "ui_lang": "UI language",
21
  "tab_main": "Model advisor",
22
  "task": "What do you want to do?",
23
  "has_docs": "Do you have your own documents/text to analyze?",
24
  "data_lang": "Data language",
25
- "cpu_only": "CPU only",
26
  "priority": "Priority",
 
 
 
27
  "recommend_btn": "Recommend",
28
  "result": "Result",
 
29
  "yes": "Yes",
30
  "no": "No",
31
  "en": "EN",
@@ -33,6 +36,11 @@ I18N: Dict[str, Dict[str, str]] = {
33
  "mixed": "Mixed",
34
  "speed": "Speed",
35
  "quality": "Quality",
 
 
 
 
 
36
  "task_chat": "Chat / instructions / generation",
37
  "task_qa": "Answer questions from a document (input text)",
38
  "task_sim": "Semantic similarity / duplicates / search",
@@ -43,26 +51,31 @@ I18N: Dict[str, Dict[str, str]] = {
43
  "why_these": "Why these models:",
44
  "warning": "Warning:",
45
  "qa_need_docs": "Extractive QA needs a context document/text. With no documents, consider an instruction model or embeddings-based search.",
46
- "note_emb": "Embedding models do not generate text; they produce vectors for similarity/search.",
47
- "note_qa": "Extractive QA finds answers in the provided context.",
48
- "note_instr": "Instruction-tuned models follow prompts; smaller variants are CPU-friendly.",
49
- "bonus_note": "Popular model from Hub (selected by task tag and downloads).",
 
 
50
  },
51
  "PL": {
52
  "title": "Model Fit Finder (CPU)",
53
  "intro": (
54
- "Wybierz zadanie NLP i ograniczenia. Space zarekomenduje typ modelu "
55
- "i pokaże co najmniej 3 modele. Rekomendacje zmieniają się zależnie od ustawień."
56
  ),
57
  "ui_lang": "Język interfejsu",
58
  "tab_main": "Doradca modeli",
59
  "task": "Co chcesz zrobić?",
60
  "has_docs": "Czy masz własne dokumenty/teksty do analizy?",
61
  "data_lang": "Język danych",
62
- "cpu_only": "CPU only",
63
  "priority": "Priorytet",
 
 
 
64
  "recommend_btn": "Zarekomenduj",
65
  "result": "Wynik",
 
66
  "yes": "Tak",
67
  "no": "Nie",
68
  "en": "EN",
@@ -70,6 +83,11 @@ I18N: Dict[str, Dict[str, str]] = {
70
  "mixed": "Mieszany",
71
  "speed": "Szybkość",
72
  "quality": "Jakość",
 
 
 
 
 
73
  "task_chat": "Chat / polecenia / generowanie",
74
  "task_qa": "Odpowiedzi na pytania z dokumentu (tekst wejściowy)",
75
  "task_sim": "Semantyczne podobieństwo / duplikaty / wyszukiwanie",
@@ -80,23 +98,39 @@ I18N: Dict[str, Dict[str, str]] = {
80
  "why_these": "Dlaczego te modele:",
81
  "warning": "Ostrzeżenie:",
82
  "qa_need_docs": "QA extractive wymaga kontekstu (dokumentu/tekstu). Bez dokumentów rozważ model instrukcyjny albo wyszukiwanie embeddingowe.",
83
- "note_emb": "Modele embeddingowe nie generują tekstu; produkują wektory do podobieństwa/wyszukiwania.",
84
- "note_qa": "QA extractive znajduje odpowiedzi w podanym kontekście.",
85
- "note_instr": "Modele instrukcyjne wykonują polecenia; mniejsze warianty są przyjazne dla CPU.",
86
- "bonus_note": "Popularny model z Hub (dobrany po tagu zadania i pobraniach).",
 
 
87
  },
88
  }
89
 
90
  def t(ui_lang: str, key: str) -> str:
91
  return I18N.get(ui_lang, I18N["EN"]).get(key, I18N["EN"].get(key, key))
92
 
93
- # -----------------------
94
- # Internal stable values
95
- # -----------------------
96
  TASK_CHAT = "CHAT"
97
  TASK_QA = "QA"
98
  TASK_SIM = "SIM"
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  def task_choices(ui_lang: str) -> List[Tuple[str, str]]:
101
  return [
102
  (t(ui_lang, "task_chat"), TASK_CHAT),
@@ -108,122 +142,197 @@ def yesno_choices(ui_lang: str) -> List[Tuple[str, str]]:
108
  return [(t(ui_lang, "yes"), "YES"), (t(ui_lang, "no"), "NO")]
109
 
110
  def data_lang_choices(ui_lang: str) -> List[Tuple[str, str]]:
111
- return [(t(ui_lang, "en"), "EN"), (t(ui_lang, "pl"), "PL"), (t(ui_lang, "mixed"), "MIXED")]
112
 
113
  def priority_choices(ui_lang: str) -> List[Tuple[str, str]]:
114
- return [(t(ui_lang, "speed"), "SPEED"), (t(ui_lang, "quality"), "QUALITY")]
 
 
 
 
 
 
 
 
 
 
115
 
116
- # -----------------------
117
- # Candidate pool with metadata so settings can affect ranking
118
- # -----------------------
119
  @dataclass(frozen=True)
120
  class Candidate:
121
  model_id: str
122
- # heuristics / tags:
123
- size: str # "small" | "base" | "large"
124
- languages: str # "EN" | "MULTI"
125
- cpu_ok: bool
126
  note_en: str
127
  note_pl: str
 
128
 
129
- CANDIDATES: Dict[str, List[Candidate]] = {
130
  "instruction": [
131
- Candidate("google/flan-t5-small", "small", "EN", True,
132
- "Very light instruction-following text2text model.", "Bardzo lekki model text2text do poleceń."),
133
- Candidate("google/flan-t5-base", "base", "EN", True,
134
- "Better quality than small; slower on CPU.", "Lepsza jakość niż small; wolniejszy na CPU."),
135
- Candidate("google-t5/t5-small", "small", "EN", True,
136
- "Fast fallback text2text baseline.", "Szybki fallback text2text."),
137
- # multilingual-ish option (not perfect, but helps when user insists on PL/mixed for generation)
138
- Candidate("google/mt5-small", "small", "MULTI", True,
139
- "Multilingual T5 small for mixed-language tasks.", "Wielojęzyczny mT5 small dla zadań mix języków."),
140
- Candidate("google/mt5-base", "base", "MULTI", True,
141
- "Multilingual, higher quality than mt5-small; slower.", "Wielojęzyczny, lepsza jakość niż mt5-small; wolniejszy."),
 
 
 
 
142
  ],
143
  "qa": [
144
- Candidate("distilbert/distilbert-base-cased-distilled-squad", "small", "EN", True,
145
- "Fast extractive QA; classic CPU choice.", "Szybki QA extractive; klasyk na CPU."),
146
- Candidate("distilbert/distilbert-base-uncased-distilled-squad", "small", "EN", True,
147
- "Popular extractive QA default.", "Popularny domyślny QA extractive."),
148
- Candidate("deepset/bert-base-cased-squad2", "base", "EN", True,
149
- "SQuAD2 variant; better 'no answer' behavior.", "Wariant SQuAD2; lepiej obsługuje 'brak odpowiedzi'."),
150
- # multilingual QA is trickier; we provide one common multilingual baseline
151
- Candidate("deepset/xlm-roberta-base-squad2", "base", "MULTI", True,
152
- "Multilingual extractive QA baseline (XLM-R).", "Wielojęzyczny QA extractive (XLM-R)."),
 
 
 
153
  ],
154
  "embeddings": [
155
- Candidate("sentence-transformers/all-MiniLM-L6-v2", "small", "EN", True,
156
- "Very fast sentence embeddings; great for similarity on CPU.", "Bardzo szybkie embeddingi; świetne do podobieństwa na CPU."),
157
- Candidate("sentence-transformers/all-mpnet-base-v2", "base", "EN", True,
158
- "Higher quality embeddings than MiniLM; slower.", "Lepsza jakość niż MiniLM; wolniejsze."),
159
- Candidate("intfloat/e5-small-v2", "small", "EN", True,
160
- "Strong retrieval embeddings, good speed/quality balance.", "Mocne embeddingi do wyszukiwania; dobry balans."),
161
- Candidate("intfloat/e5-base-v2", "base", "EN", True,
162
- "Higher quality e5; heavier on CPU.", "Lepsza jakość e5; cięższy na CPU."),
163
- Candidate("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "base", "MULTI", True,
164
- "Multilingual embeddings; good for PL/mixed.", "Wielojęzyczne embeddingi; dobre dla PL/mix."),
 
 
 
 
 
165
  ],
166
  }
167
 
168
- # -----------------------
169
- # Hub bonus models (cache)
170
- # -----------------------
171
- _HUB_CACHE: Dict[Tuple[str, str], Tuple[float, List[str]]] = {}
172
- CACHE_TTL_SEC = 6 * 60 * 60 # 6h
 
173
 
174
  def _language_tag_predicate(tags: List[str], data_lang_value: str) -> bool:
175
- if data_lang_value == "MIXED":
176
  return True
177
- target = "en" if data_lang_value == "EN" else "pl"
178
  candidates = {target, f"language:{target}", f"lang:{target}"}
179
  tags_lower = {str(x).lower() for x in (tags or [])}
180
  return any(c in tags_lower for c in candidates)
181
 
182
- def hub_bonus_models(pipeline_tag: str, data_lang_value: str, limit: int = 20) -> List[str]:
183
- key = (pipeline_tag, data_lang_value)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  now = time.time()
 
185
  if key in _HUB_CACHE:
186
  ts, cached = _HUB_CACHE[key]
187
  if now - ts < CACHE_TTL_SEC:
188
  return cached
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  try:
190
- models = api.list_models(filter=pipeline_tag, sort="downloads", direction=-1, limit=limit)
191
- out = []
192
- for m in models:
193
- mid = getattr(m, "modelId", None)
194
- tags = getattr(m, "tags", []) or []
195
- if mid and _language_tag_predicate(tags, data_lang_value):
196
- out.append(mid)
197
- _HUB_CACHE[key] = (now, out)
198
- return out
199
  except Exception:
200
- return []
201
-
202
- # -----------------------
203
- # Ranking rules (this is what makes settings matter)
204
- # -----------------------
205
- def score_candidate(c: Candidate, data_lang_value: str, cpu_only: bool, priority: str) -> Tuple[int, List[str]]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  score = 0
207
  reasons: List[str] = []
208
 
209
- # CPU constraint
210
- if cpu_only:
211
- if c.cpu_ok:
212
- score += 2
213
- reasons.append("CPU-friendly" if True else "")
214
- else:
215
- score -= 100 # effectively exclude
216
- reasons.append("Not CPU-friendly")
217
-
218
  # Language preference
219
- if data_lang_value in ("PL", "MIXED"):
220
  if c.languages == "MULTI":
221
  score += 4
222
- reasons.append("Multilingual (better for PL/mixed)")
223
  else:
224
  score -= 1
225
  reasons.append("EN-focused")
226
- else: # EN
227
  if c.languages == "EN":
228
  score += 3
229
  reasons.append("EN-optimized")
@@ -231,17 +340,39 @@ def score_candidate(c: Candidate, data_lang_value: str, cpu_only: bool, priority
231
  score += 1
232
  reasons.append("Multilingual")
233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  # Priority: speed vs quality
235
- if priority == "SPEED":
236
  if c.size == "small":
237
  score += 4
238
- reasons.append("Smaller/faster")
239
  elif c.size == "base":
240
  score += 1
241
- reasons.append("Medium size")
242
  else:
243
- score -= 1
244
- reasons.append("Heavier/slower")
245
  else: # QUALITY
246
  if c.size == "base":
247
  score += 4
@@ -253,13 +384,78 @@ def score_candidate(c: Candidate, data_lang_value: str, cpu_only: bool, priority
253
  score += 3
254
  reasons.append("High capacity")
255
 
 
 
 
 
 
256
  return score, reasons
257
 
258
- def pick_models(model_type: str, data_lang_value: str, cpu_only: bool, priority: str, k: int = 4) -> Tuple[List[Candidate], Dict[str, List[str]]]:
259
- candidates = CANDIDATES[model_type]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  scored: List[Tuple[int, Candidate, List[str]]] = []
261
- for c in candidates:
262
- s, reasons = score_candidate(c, data_lang_value, cpu_only, priority)
263
  scored.append((s, c, reasons))
264
 
265
  scored.sort(key=lambda x: x[0], reverse=True)
@@ -267,30 +463,36 @@ def pick_models(model_type: str, data_lang_value: str, cpu_only: bool, priority:
267
  chosen: List[Candidate] = []
268
  why: Dict[str, List[str]] = {}
269
  for s, c, reasons in scored:
270
- if s < -50:
271
  continue
272
- if c.model_id not in {x.model_id for x in chosen}:
273
- chosen.append(c)
274
- why[c.model_id] = reasons
275
  if len(chosen) >= k:
276
  break
277
 
278
  # ensure min 3
279
  if len(chosen) < 3:
280
- # fallback: take top regardless of language
281
  for s, c, reasons in scored:
282
- if c.model_id not in {x.model_id for x in chosen} and s > -50:
283
  chosen.append(c)
284
  why[c.model_id] = reasons
285
  if len(chosen) >= 3:
286
  break
287
 
288
- return chosen, why
289
-
290
- # -----------------------
291
- # Main recommend function (now settings drive different outputs)
292
- # -----------------------
293
- def recommend(ui_lang: str, task_id: str, has_docs: str, data_lang_value: str, cpu_only: bool, priority: str) -> str:
 
 
 
 
 
 
 
 
294
  warning: Optional[str] = None
295
 
296
  if task_id == TASK_SIM:
@@ -300,7 +502,6 @@ def recommend(ui_lang: str, task_id: str, has_docs: str, data_lang_value: str, c
300
  if ui_lang == "EN"
301
  else "Chcesz podobieństwo semantyczne / deduplikację / wyszukiwanie. Najlepsze są embeddingi + cosine similarity."
302
  )
303
- pipeline_tag = "sentence-similarity"
304
  note_key = "note_emb"
305
  elif task_id == TASK_QA:
306
  model_type = "qa"
@@ -309,7 +510,6 @@ def recommend(ui_lang: str, task_id: str, has_docs: str, data_lang_value: str, c
309
  if ui_lang == "EN"
310
  else "Masz kontekst (dokument/tekst) i pytanie. QA extractive znajduje odpowiedzi w kontekście."
311
  )
312
- pipeline_tag = "question-answering"
313
  note_key = "note_qa"
314
  if has_docs == "NO":
315
  warning = t(ui_lang, "qa_need_docs")
@@ -320,19 +520,18 @@ def recommend(ui_lang: str, task_id: str, has_docs: str, data_lang_value: str, c
320
  if ui_lang == "EN"
321
  else "Chcesz odpowiedzi sterowane poleceniem (chat/wyjaśnianie/streszczanie). Najlepsze są modele instrukcyjne."
322
  )
323
- pipeline_tag = "text-generation"
324
  note_key = "note_instr"
325
 
326
- # Pick models based on settings
327
- chosen, why_map = pick_models(model_type, data_lang_value, cpu_only, priority, k=4)
328
-
329
- # Add 1–2 hub bonus models, but only if they diversify beyond chosen
330
- bonus = hub_bonus_models(pipeline_tag, data_lang_value, limit=25)
331
- chosen_ids = {c.model_id for c in chosen}
332
- bonus = [m for m in bonus if m not in chosen_ids]
333
- bonus = bonus[:2]
 
334
 
335
- # Build output
336
  lines: List[str] = []
337
  lines.append(t(ui_lang, "rec_type").format(model_type=model_type))
338
  lines.append("")
@@ -342,7 +541,8 @@ def recommend(ui_lang: str, task_id: str, has_docs: str, data_lang_value: str, c
342
  lines.append(t(ui_lang, "settings"))
343
  lines.append(f"- data language: {data_lang_value}")
344
  lines.append(f"- priority: {priority}")
345
- lines.append(f"- cpu only: {cpu_only}")
 
346
  lines.append(f"- has documents: {has_docs}")
347
  lines.append("")
348
 
@@ -351,40 +551,41 @@ def recommend(ui_lang: str, task_id: str, has_docs: str, data_lang_value: str, c
351
  lines.append(f"- {warning}")
352
  lines.append("")
353
 
 
 
 
 
354
  lines.append(t(ui_lang, "models_min3"))
355
- for c in chosen:
356
  note = c.note_en if ui_lang == "EN" else c.note_pl
357
  lines.append(f"- {c.model_id} — {note}")
358
 
359
- for mid in bonus:
360
- lines.append(f"- {mid} — {t(ui_lang, 'bonus_note')}")
361
-
362
  lines.append("")
363
  lines.append(t(ui_lang, "why_these"))
364
- for c in chosen:
365
  reasons = why_map.get(c.model_id, [])
366
- # Localize reason snippets lightly
367
  if ui_lang == "PL":
368
  localized = []
369
  for r in reasons:
370
- if r == "CPU-friendly":
371
- localized.append("Działa na CPU")
372
- elif r == "Multilingual (better for PL/mixed)":
373
- localized.append("Wielojęzyczny (lepszy dla PL/mix)")
374
- elif r == "EN-optimized":
375
- localized.append("Optymalny dla EN")
376
- elif r == "Smaller/faster":
377
- localized.append("Mniejszy/szybszy")
378
- elif r == "Better quality baseline":
379
- localized.append("Lepsza jakość (baseline)")
380
- elif r == "Fast but may be lower quality":
381
- localized.append("Szybki, ale może gorsza jakość")
382
- elif r == "Medium size":
383
- localized.append("Średni rozmiar")
384
- elif r == "Heavier/slower":
385
- localized.append("Cięższy/wolniejszy")
386
- else:
387
- localized.append(r)
 
388
  reasons_txt = ", ".join(localized)
389
  else:
390
  reasons_txt = ", ".join(reasons)
@@ -394,26 +595,29 @@ def recommend(ui_lang: str, task_id: str, has_docs: str, data_lang_value: str, c
394
  lines.append(t(ui_lang, note_key))
395
  return "\n".join(lines)
396
 
397
- # -----------------------
398
- # Dynamic UI language updates
399
- # -----------------------
400
  def apply_language(ui_lang: str) -> Tuple[Any, ...]:
401
  return (
402
- gr.update(value=f"# {t(ui_lang, 'title')}\n{t(ui_lang, 'intro')}"), # header
403
- gr.update(label=t(ui_lang, "ui_lang")), # ui lang label
404
- gr.update(label=t(ui_lang, "task"), choices=task_choices(ui_lang)), # task choices localized
405
  gr.update(label=t(ui_lang, "has_docs"), choices=yesno_choices(ui_lang)),
406
  gr.update(label=t(ui_lang, "data_lang"), choices=data_lang_choices(ui_lang)),
407
- gr.update(label=t(ui_lang, "cpu_only")),
408
  gr.update(label=t(ui_lang, "priority"), choices=priority_choices(ui_lang)),
 
 
 
409
  gr.update(value=t(ui_lang, "recommend_btn")),
410
  gr.update(label=t(ui_lang, "result")),
 
411
  gr.update(label=t(ui_lang, "tab_main")),
412
  )
413
 
414
- # -----------------------
415
- # UI
416
- # -----------------------
417
  with gr.Blocks(title=I18N["EN"]["title"]) as demo:
418
  header_md = gr.Markdown(f"# {t('EN', 'title')}\n{t('EN', 'intro')}")
419
 
@@ -422,23 +626,33 @@ with gr.Blocks(title=I18N["EN"]["title"]) as demo:
422
  with gr.Tab(t("EN", "tab_main")) as tab_main:
423
  task = gr.Dropdown(choices=task_choices("EN"), value=TASK_SIM, label=t("EN", "task"))
424
  has_docs = gr.Radio(choices=yesno_choices("EN"), value="YES", label=t("EN", "has_docs"))
425
- data_lang = gr.Radio(choices=data_lang_choices("EN"), value="MIXED", label=t("EN", "data_lang"))
426
- cpu_only = gr.Checkbox(value=True, label=t("EN", "cpu_only"))
427
- priority = gr.Radio(choices=priority_choices("EN"), value="SPEED", label=t("EN", "priority"))
 
 
 
 
 
428
 
429
  recommend_btn = gr.Button(t("EN", "recommend_btn"))
430
- out = gr.Textbox(lines=22, label=t("EN", "result"))
 
 
431
 
432
  recommend_btn.click(
433
  fn=recommend,
434
- inputs=[ui_lang, task, has_docs, data_lang, cpu_only, priority],
435
  outputs=[out],
436
  )
437
 
438
  ui_lang.change(
439
  fn=apply_language,
440
  inputs=[ui_lang],
441
- outputs=[header_md, ui_lang, task, has_docs, data_lang, cpu_only, priority, recommend_btn, out, tab_main],
 
 
 
442
  )
443
 
444
  demo.launch()
 
7
 
8
  api = HfApi()
9
 
10
+ # =======================
11
  # i18n
12
+ # =======================
13
  I18N: Dict[str, Dict[str, str]] = {
14
  "EN": {
15
  "title": "Model Fit Finder (CPU)",
16
  "intro": (
17
+ "Pick an NLP task and constraints. The Space recommends an appropriate model type and returns "
18
+ "at least 3 concrete Hugging Face models. Recommendations change based on your settings."
19
  ),
20
  "ui_lang": "UI language",
21
  "tab_main": "Model advisor",
22
  "task": "What do you want to do?",
23
  "has_docs": "Do you have your own documents/text to analyze?",
24
  "data_lang": "Data language",
 
25
  "priority": "Priority",
26
+ "budget": "Compute budget",
27
+ "source": "Model source",
28
+ "refresh": "Refresh HF cache",
29
  "recommend_btn": "Recommend",
30
  "result": "Result",
31
+ "status": "Status",
32
  "yes": "Yes",
33
  "no": "No",
34
  "en": "EN",
 
36
  "mixed": "Mixed",
37
  "speed": "Speed",
38
  "quality": "Quality",
39
+ "budget_low": "Low (fast/small models)",
40
+ "budget_med": "Medium (allow larger models)",
41
+ "source_curated": "Curated (stable baseline)",
42
+ "source_live": "HF Live (fresh from Hub)",
43
+ "source_hybrid": "Hybrid (curated + live)",
44
  "task_chat": "Chat / instructions / generation",
45
  "task_qa": "Answer questions from a document (input text)",
46
  "task_sim": "Semantic similarity / duplicates / search",
 
51
  "why_these": "Why these models:",
52
  "warning": "Warning:",
53
  "qa_need_docs": "Extractive QA needs a context document/text. With no documents, consider an instruction model or embeddings-based search.",
54
+ "note_emb": "Note: embedding models do not generate text; they produce vectors for similarity/search.",
55
+ "note_qa": "Note: extractive QA finds answers in the provided context.",
56
+ "note_instr": "Note: instruction-tuned models follow prompts; smaller variants are CPU-friendly.",
57
+ "live_note": "Live candidates pulled from Hub using pipeline tag and downloads ranking.",
58
+ "refreshed": "HF cache refreshed at {ts}.",
59
+ "refresh_failed": "Refresh failed; using cached/curated lists.",
60
  },
61
  "PL": {
62
  "title": "Model Fit Finder (CPU)",
63
  "intro": (
64
+ "Wybierz zadanie NLP i ograniczenia. Space rekomenduje typ modelu i zwraca "
65
+ "co najmniej 3 konkretne modele z Hugging Face. Rekomendacje zmieniają się zależnie od ustawień."
66
  ),
67
  "ui_lang": "Język interfejsu",
68
  "tab_main": "Doradca modeli",
69
  "task": "Co chcesz zrobić?",
70
  "has_docs": "Czy masz własne dokumenty/teksty do analizy?",
71
  "data_lang": "Język danych",
 
72
  "priority": "Priorytet",
73
+ "budget": "Budżet obliczeniowy",
74
+ "source": "Źródło modeli",
75
+ "refresh": "Odśwież cache HF",
76
  "recommend_btn": "Zarekomenduj",
77
  "result": "Wynik",
78
+ "status": "Status",
79
  "yes": "Tak",
80
  "no": "Nie",
81
  "en": "EN",
 
83
  "mixed": "Mieszany",
84
  "speed": "Szybkość",
85
  "quality": "Jakość",
86
+ "budget_low": "Niski (szybkie/małe modele)",
87
+ "budget_med": "Średni (pozwól na większe modele)",
88
+ "source_curated": "Kuratorskie (stabilna baza)",
89
+ "source_live": "HF Live (świeże z Hub)",
90
+ "source_hybrid": "Hybryda (baza + live)",
91
  "task_chat": "Chat / polecenia / generowanie",
92
  "task_qa": "Odpowiedzi na pytania z dokumentu (tekst wejściowy)",
93
  "task_sim": "Semantyczne podobieństwo / duplikaty / wyszukiwanie",
 
98
  "why_these": "Dlaczego te modele:",
99
  "warning": "Ostrzeżenie:",
100
  "qa_need_docs": "QA extractive wymaga kontekstu (dokumentu/tekstu). Bez dokumentów rozważ model instrukcyjny albo wyszukiwanie embeddingowe.",
101
+ "note_emb": "Uwaga: modele embeddingowe nie generują tekstu; produkują wektory do podobieństwa/wyszukiwania.",
102
+ "note_qa": "Uwaga: QA extractive znajduje odpowiedzi w podanym kontekście.",
103
+ "note_instr": "Uwaga: modele instrukcyjne wykonują polecenia; mniejsze warianty są przyjazne dla CPU.",
104
+ "live_note": "Kandydaci live pobierani z Hub po pipeline tag i rankingu pobrań.",
105
+ "refreshed": "Cache HF odświeżony: {ts}.",
106
+ "refresh_failed": "Nie udało się odświeżyć; używam cache/list kuratorskich.",
107
  },
108
  }
109
 
110
  def t(ui_lang: str, key: str) -> str:
111
  return I18N.get(ui_lang, I18N["EN"]).get(key, I18N["EN"].get(key, key))
112
 
113
+ # =======================
114
+ # Stable internal values
115
+ # =======================
116
  TASK_CHAT = "CHAT"
117
  TASK_QA = "QA"
118
  TASK_SIM = "SIM"
119
 
120
+ DATA_EN = "EN"
121
+ DATA_PL = "PL"
122
+ DATA_MIXED = "MIXED"
123
+
124
+ PRIO_SPEED = "SPEED"
125
+ PRIO_QUALITY = "QUALITY"
126
+
127
+ BUDGET_LOW = "LOW"
128
+ BUDGET_MED = "MED"
129
+
130
+ SRC_CURATED = "CURATED"
131
+ SRC_LIVE = "LIVE"
132
+ SRC_HYBRID = "HYBRID"
133
+
134
  def task_choices(ui_lang: str) -> List[Tuple[str, str]]:
135
  return [
136
  (t(ui_lang, "task_chat"), TASK_CHAT),
 
142
  return [(t(ui_lang, "yes"), "YES"), (t(ui_lang, "no"), "NO")]
143
 
144
  def data_lang_choices(ui_lang: str) -> List[Tuple[str, str]]:
145
+ return [(t(ui_lang, "en"), DATA_EN), (t(ui_lang, "pl"), DATA_PL), (t(ui_lang, "mixed"), DATA_MIXED)]
146
 
147
  def priority_choices(ui_lang: str) -> List[Tuple[str, str]]:
148
+ return [(t(ui_lang, "speed"), PRIO_SPEED), (t(ui_lang, "quality"), PRIO_QUALITY)]
149
+
150
+ def budget_choices(ui_lang: str) -> List[Tuple[str, str]]:
151
+ return [(t(ui_lang, "budget_low"), BUDGET_LOW), (t(ui_lang, "budget_med"), BUDGET_MED)]
152
+
153
+ def source_choices(ui_lang: str) -> List[Tuple[str, str]]:
154
+ return [
155
+ (t(ui_lang, "source_curated"), SRC_CURATED),
156
+ (t(ui_lang, "source_live"), SRC_LIVE),
157
+ (t(ui_lang, "source_hybrid"), SRC_HYBRID),
158
+ ]
159
 
160
+ # =======================
161
+ # Curated candidates (stable baseline)
162
+ # =======================
163
  @dataclass(frozen=True)
164
  class Candidate:
165
  model_id: str
166
+ size: str # "small" | "base" | "large" (heuristic)
167
+ languages: str # "EN" | "MULTI"
 
 
168
  note_en: str
169
  note_pl: str
170
+ origin: str # "curated" | "live"
171
 
172
+ CURATED: Dict[str, List[Candidate]] = {
173
  "instruction": [
174
+ Candidate("google/flan-t5-small", "small", "EN",
175
+ "Very light instruction-following text2text model.",
176
+ "Bardzo lekki model text2text do poleceń.", "curated"),
177
+ Candidate("google/flan-t5-base", "base", "EN",
178
+ "Better quality than small; slower on CPU.",
179
+ "Lepsza jakość niż small; wolniejszy na CPU.", "curated"),
180
+ Candidate("google-t5/t5-small", "small", "EN",
181
+ "Fast text2text fallback baseline.",
182
+ "Szybki fallback text2text.", "curated"),
183
+ Candidate("google/mt5-small", "small", "MULTI",
184
+ "Multilingual text2text (useful for mixed-language prompts).",
185
+ "Wielojęzyczny text2text (przydatny dla mieszanych języków).", "curated"),
186
+ Candidate("google/mt5-base", "base", "MULTI",
187
+ "Multilingual, higher quality than mt5-small; slower.",
188
+ "Wielojęzyczny, lepsza jakość niż mt5-small; wolniejszy.", "curated"),
189
  ],
190
  "qa": [
191
+ Candidate("distilbert/distilbert-base-cased-distilled-squad", "small", "EN",
192
+ "Fast extractive QA; classic CPU choice.",
193
+ "Szybki QA extractive; klasyk na CPU.", "curated"),
194
+ Candidate("distilbert/distilbert-base-uncased-distilled-squad", "small", "EN",
195
+ "Popular extractive QA default.",
196
+ "Popularny domyślny QA extractive.", "curated"),
197
+ Candidate("deepset/bert-base-cased-squad2", "base", "EN",
198
+ "SQuAD2 variant; better 'no answer' behavior.",
199
+ "Wariant SQuAD2; lepiej obsługuje 'brak odpowiedzi'.", "curated"),
200
+ Candidate("deepset/xlm-roberta-base-squad2", "base", "MULTI",
201
+ "Multilingual extractive QA baseline (XLM-R).",
202
+ "Wielojęzyczny QA extractive (XLM-R).", "curated"),
203
  ],
204
  "embeddings": [
205
+ Candidate("sentence-transformers/all-MiniLM-L6-v2", "small", "EN",
206
+ "Very fast sentence embeddings; great for similarity on CPU.",
207
+ "Bardzo szybkie embeddingi; świetne do podobieństwa na CPU.", "curated"),
208
+ Candidate("sentence-transformers/all-mpnet-base-v2", "base", "EN",
209
+ "Higher quality embeddings than MiniLM; slower.",
210
+ "Lepsza jakość niż MiniLM; wolniejsze.", "curated"),
211
+ Candidate("intfloat/e5-small-v2", "small", "EN",
212
+ "Strong retrieval embeddings, good speed/quality balance.",
213
+ "Mocne embeddingi do wyszukiwania; dobry balans.", "curated"),
214
+ Candidate("intfloat/e5-base-v2", "base", "EN",
215
+ "Higher quality e5; heavier on CPU.",
216
+ "Lepsza jakość e5; cięższy na CPU.", "curated"),
217
+ Candidate("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "base", "MULTI",
218
+ "Multilingual embeddings; good for Polish/mixed.",
219
+ "Wielojęzyczne embeddingi; dobre dla PL/mix.", "curated"),
220
  ],
221
  }
222
 
223
+ # =======================
224
+ # HF Live cache (in-memory TTL) + refresh button
225
+ # =======================
226
+ CACHE_TTL_SEC = 24 * 60 * 60 # 24h
227
+ # cache key: (pipeline_tag, data_lang_value, library_hint, budget)
228
+ _HUB_CACHE: Dict[Tuple[str, str, str, str], Tuple[float, List[str]]] = {}
229
 
230
  def _language_tag_predicate(tags: List[str], data_lang_value: str) -> bool:
231
+ if data_lang_value == DATA_MIXED:
232
  return True
233
+ target = "en" if data_lang_value == DATA_EN else "pl"
234
  candidates = {target, f"language:{target}", f"lang:{target}"}
235
  tags_lower = {str(x).lower() for x in (tags or [])}
236
  return any(c in tags_lower for c in candidates)
237
 
238
+ def _library_predicate(tags: List[str], library_hint: str) -> bool:
239
+ # Best-effort: many models have tags like "library:sentence-transformers" or "library:transformers"
240
+ tags_lower = {str(x).lower() for x in (tags or [])}
241
+ if not library_hint:
242
+ return True
243
+ return (f"library:{library_hint.lower()}" in tags_lower) or (library_hint.lower() in tags_lower)
244
+
245
+ def _budget_predicate(model_id: str, tags: List[str], budget: str) -> bool:
246
+ # Heuristic to keep "Low" budget models lightweight.
247
+ # We avoid explicit "large" and certain common huge families by name heuristics.
248
+ # This is intentionally conservative.
249
+ mid = model_id.lower()
250
+ if budget == BUDGET_MED:
251
+ return True
252
+
253
+ # Low budget: prefer smaller-ish names and avoid obvious large ones.
254
+ if any(x in mid for x in ["-large", "large-", "xxl", "xl", "13b", "30b", "70b", "mixtral", "llama-2-70b", "llama-3-70b"]):
255
+ return False
256
+ # Keep common small cues
257
+ # If it doesn't contain small cues, we still allow it, but overall ranking will prefer small/base from curated anyway.
258
+ return True
259
+
260
+ def fetch_live_model_ids(
261
+ pipeline_tag: str,
262
+ data_lang_value: str,
263
+ library_hint: str,
264
+ budget: str,
265
+ limit: int = 30,
266
+ ) -> List[str]:
267
+ key = (pipeline_tag, data_lang_value, library_hint or "", budget)
268
  now = time.time()
269
+
270
  if key in _HUB_CACHE:
271
  ts, cached = _HUB_CACHE[key]
272
  if now - ts < CACHE_TTL_SEC:
273
  return cached
274
+
275
+ models = api.list_models(filter=pipeline_tag, sort="downloads", direction=-1, limit=limit)
276
+ out: List[str] = []
277
+ for m in models:
278
+ mid = getattr(m, "modelId", None)
279
+ tags = getattr(m, "tags", []) or []
280
+ if not mid:
281
+ continue
282
+ if not _language_tag_predicate(tags, data_lang_value):
283
+ continue
284
+ if not _library_predicate(tags, library_hint):
285
+ continue
286
+ if not _budget_predicate(mid, tags, budget):
287
+ continue
288
+ out.append(mid)
289
+
290
+ _HUB_CACHE[key] = (now, out)
291
+ return out
292
+
293
+ def refresh_cache() -> None:
294
+ _HUB_CACHE.clear()
295
+
296
+ def refresh_button(ui_lang: str) -> str:
297
  try:
298
+ refresh_cache()
299
+ ts = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
300
+ return t(ui_lang, "refreshed").format(ts=ts)
 
 
 
 
 
 
301
  except Exception:
302
+ return t(ui_lang, "refresh_failed")
303
+
304
+ # =======================
305
+ # Ranking (settings must matter)
306
+ # =======================
307
+ def _infer_size_from_id(model_id: str) -> str:
308
+ mid = model_id.lower()
309
+ if any(x in mid for x in ["-large", "large-", "xxl", "xl"]):
310
+ return "large"
311
+ if any(x in mid for x in ["-base", "base-", "mpnet", "xlm-roberta-base", "bert-base"]):
312
+ return "base"
313
+ if any(x in mid for x in ["small", "mini", "minilm", "distil", "tiny"]):
314
+ return "small"
315
+ return "base"
316
+
317
+ def _infer_lang_from_tags_or_id(model_id: str) -> str:
318
+ mid = model_id.lower()
319
+ if "multilingual" in mid or "xlm" in mid or "mt5" in mid:
320
+ return "MULTI"
321
+ return "EN"
322
+
323
+ def score_candidate(c: Candidate, data_lang_value: str, priority: str, budget: str) -> Tuple[int, List[str]]:
324
  score = 0
325
  reasons: List[str] = []
326
 
 
 
 
 
 
 
 
 
 
327
  # Language preference
328
+ if data_lang_value in (DATA_PL, DATA_MIXED):
329
  if c.languages == "MULTI":
330
  score += 4
331
+ reasons.append("Multilingual")
332
  else:
333
  score -= 1
334
  reasons.append("EN-focused")
335
+ else:
336
  if c.languages == "EN":
337
  score += 3
338
  reasons.append("EN-optimized")
 
340
  score += 1
341
  reasons.append("Multilingual")
342
 
343
+ # Compute budget constraint
344
+ if budget == BUDGET_LOW:
345
+ if c.size == "small":
346
+ score += 5
347
+ reasons.append("Low budget friendly")
348
+ elif c.size == "base":
349
+ score += 1
350
+ reasons.append("May be slower on low budget")
351
+ else:
352
+ score -= 6
353
+ reasons.append("Too heavy for low budget")
354
+ else: # MED
355
+ if c.size == "small":
356
+ score += 2
357
+ reasons.append("Fast")
358
+ elif c.size == "base":
359
+ score += 4
360
+ reasons.append("Allowed by medium budget")
361
+ else:
362
+ score += 1
363
+ reasons.append("Heavier option")
364
+
365
  # Priority: speed vs quality
366
+ if priority == PRIO_SPEED:
367
  if c.size == "small":
368
  score += 4
369
+ reasons.append("Faster")
370
  elif c.size == "base":
371
  score += 1
372
+ reasons.append("Medium")
373
  else:
374
+ score -= 2
375
+ reasons.append("Slower")
376
  else: # QUALITY
377
  if c.size == "base":
378
  score += 4
 
384
  score += 3
385
  reasons.append("High capacity")
386
 
387
+ # Prefer curated slightly for stability (unless source is live-only)
388
+ if c.origin == "curated":
389
+ score += 1
390
+ reasons.append("Curated/stable")
391
+
392
  return score, reasons
393
 
394
+ def select_models(
395
+ model_type: str,
396
+ data_lang_value: str,
397
+ priority: str,
398
+ budget: str,
399
+ source_mode: str,
400
+ ui_lang: str,
401
+ k: int = 4,
402
+ ) -> Tuple[List[Candidate], Dict[str, List[str]], bool]:
403
+ """
404
+ Returns chosen candidates, reasons map, and whether live candidates were used.
405
+ """
406
+ pool: List[Candidate] = []
407
+ used_live = False
408
+
409
+ if source_mode in (SRC_CURATED, SRC_HYBRID):
410
+ pool.extend(CURATED[model_type])
411
+
412
+ if source_mode in (SRC_LIVE, SRC_HYBRID):
413
+ # Map our types to pipeline tags and library hints
414
+ if model_type == "embeddings":
415
+ pipeline_tag = "sentence-similarity"
416
+ library_hint = "sentence-transformers"
417
+ elif model_type == "qa":
418
+ pipeline_tag = "question-answering"
419
+ library_hint = "transformers"
420
+ else:
421
+ pipeline_tag = "text-generation"
422
+ library_hint = "transformers"
423
+
424
+ live_ids = fetch_live_model_ids(
425
+ pipeline_tag=pipeline_tag,
426
+ data_lang_value=data_lang_value,
427
+ library_hint=library_hint,
428
+ budget=budget,
429
+ limit=35,
430
+ )
431
+
432
+ # Convert to Candidates (notes are generic because we don't parse model card here)
433
+ for mid in live_ids:
434
+ c = Candidate(
435
+ model_id=mid,
436
+ size=_infer_size_from_id(mid),
437
+ languages=_infer_lang_from_tags_or_id(mid),
438
+ note_en="Live candidate from Hub (ranked by downloads).",
439
+ note_pl="Kandydat live z Hub (ranking po pobraniach).",
440
+ origin="live",
441
+ )
442
+ pool.append(c)
443
+ used_live = True
444
+
445
+ # Deduplicate pool by model_id, keeping curated version if present
446
+ dedup: Dict[str, Candidate] = {}
447
+ for c in pool:
448
+ if c.model_id not in dedup:
449
+ dedup[c.model_id] = c
450
+ else:
451
+ # prefer curated notes
452
+ if dedup[c.model_id].origin == "live" and c.origin == "curated":
453
+ dedup[c.model_id] = c
454
+ pool = list(dedup.values())
455
+
456
  scored: List[Tuple[int, Candidate, List[str]]] = []
457
+ for c in pool:
458
+ s, reasons = score_candidate(c, data_lang_value, priority, budget)
459
  scored.append((s, c, reasons))
460
 
461
  scored.sort(key=lambda x: x[0], reverse=True)
 
463
  chosen: List[Candidate] = []
464
  why: Dict[str, List[str]] = {}
465
  for s, c, reasons in scored:
466
+ if c.model_id in why:
467
  continue
468
+ chosen.append(c)
469
+ why[c.model_id] = reasons
 
470
  if len(chosen) >= k:
471
  break
472
 
473
  # ensure min 3
474
  if len(chosen) < 3:
 
475
  for s, c, reasons in scored:
476
+ if c.model_id not in why:
477
  chosen.append(c)
478
  why[c.model_id] = reasons
479
  if len(chosen) >= 3:
480
  break
481
 
482
+ return chosen, why, used_live
483
+
484
+ # =======================
485
+ # Main recommend function
486
+ # =======================
487
+ def recommend(
488
+ ui_lang: str,
489
+ task_id: str,
490
+ has_docs: str,
491
+ data_lang_value: str,
492
+ priority: str,
493
+ budget: str,
494
+ source_mode: str,
495
+ ) -> str:
496
  warning: Optional[str] = None
497
 
498
  if task_id == TASK_SIM:
 
502
  if ui_lang == "EN"
503
  else "Chcesz podobieństwo semantyczne / deduplikację / wyszukiwanie. Najlepsze są embeddingi + cosine similarity."
504
  )
 
505
  note_key = "note_emb"
506
  elif task_id == TASK_QA:
507
  model_type = "qa"
 
510
  if ui_lang == "EN"
511
  else "Masz kontekst (dokument/tekst) i pytanie. QA extractive znajduje odpowiedzi w kontekście."
512
  )
 
513
  note_key = "note_qa"
514
  if has_docs == "NO":
515
  warning = t(ui_lang, "qa_need_docs")
 
520
  if ui_lang == "EN"
521
  else "Chcesz odpowiedzi sterowane poleceniem (chat/wyjaśnianie/streszczanie). Najlepsze są modele instrukcyjne."
522
  )
 
523
  note_key = "note_instr"
524
 
525
+ chosen, why_map, used_live = select_models(
526
+ model_type=model_type,
527
+ data_lang_value=data_lang_value,
528
+ priority=priority,
529
+ budget=budget,
530
+ source_mode=source_mode,
531
+ ui_lang=ui_lang,
532
+ k=5,
533
+ )
534
 
 
535
  lines: List[str] = []
536
  lines.append(t(ui_lang, "rec_type").format(model_type=model_type))
537
  lines.append("")
 
541
  lines.append(t(ui_lang, "settings"))
542
  lines.append(f"- data language: {data_lang_value}")
543
  lines.append(f"- priority: {priority}")
544
+ lines.append(f"- budget: {budget}")
545
+ lines.append(f"- source: {source_mode}")
546
  lines.append(f"- has documents: {has_docs}")
547
  lines.append("")
548
 
 
551
  lines.append(f"- {warning}")
552
  lines.append("")
553
 
554
+ if used_live and source_mode in (SRC_LIVE, SRC_HYBRID):
555
+ lines.append(t(ui_lang, "live_note"))
556
+ lines.append("")
557
+
558
  lines.append(t(ui_lang, "models_min3"))
559
+ for c in chosen[:5]:
560
  note = c.note_en if ui_lang == "EN" else c.note_pl
561
  lines.append(f"- {c.model_id} — {note}")
562
 
 
 
 
563
  lines.append("")
564
  lines.append(t(ui_lang, "why_these"))
565
+ for c in chosen[:5]:
566
  reasons = why_map.get(c.model_id, [])
 
567
  if ui_lang == "PL":
568
  localized = []
569
  for r in reasons:
570
+ mapping = {
571
+ "Multilingual": "Wielojęzyczny",
572
+ "EN-focused": "Skupiony na EN",
573
+ "EN-optimized": "Optymalny dla EN",
574
+ "Low budget friendly": "Dobry dla niskiego budżetu",
575
+ "May be slower on low budget": "Może być wolniejszy przy niskim budżecie",
576
+ "Too heavy for low budget": "Za ciężki dla niskiego budżetu",
577
+ "Allowed by medium budget": "Dozwolony przy średnim budżecie",
578
+ "Heavier option": "Cięższa opcja",
579
+ "Fast": "Szybki",
580
+ "Faster": "Szybszy",
581
+ "Medium": "Średni",
582
+ "Slower": "Wolniejszy",
583
+ "Better quality baseline": "Lepsza jakość (baseline)",
584
+ "Fast but may be lower quality": "Szybki, ale może gorsza jakość",
585
+ "High capacity": "Duża pojemność",
586
+ "Curated/stable": "Kuratorski/stabilny",
587
+ }
588
+ localized.append(mapping.get(r, r))
589
  reasons_txt = ", ".join(localized)
590
  else:
591
  reasons_txt = ", ".join(reasons)
 
595
  lines.append(t(ui_lang, note_key))
596
  return "\n".join(lines)
597
 
598
+ # =======================
599
+ # UI language dynamic updates
600
+ # =======================
601
  def apply_language(ui_lang: str) -> Tuple[Any, ...]:
602
  return (
603
+ gr.update(value=f"# {t(ui_lang, 'title')}\n{t(ui_lang, 'intro')}"),
604
+ gr.update(label=t(ui_lang, "ui_lang")),
605
+ gr.update(label=t(ui_lang, "task"), choices=task_choices(ui_lang)),
606
  gr.update(label=t(ui_lang, "has_docs"), choices=yesno_choices(ui_lang)),
607
  gr.update(label=t(ui_lang, "data_lang"), choices=data_lang_choices(ui_lang)),
 
608
  gr.update(label=t(ui_lang, "priority"), choices=priority_choices(ui_lang)),
609
+ gr.update(label=t(ui_lang, "budget"), choices=budget_choices(ui_lang)),
610
+ gr.update(label=t(ui_lang, "source"), choices=source_choices(ui_lang)),
611
+ gr.update(value=t(ui_lang, "refresh")),
612
  gr.update(value=t(ui_lang, "recommend_btn")),
613
  gr.update(label=t(ui_lang, "result")),
614
+ gr.update(label=t(ui_lang, "status")),
615
  gr.update(label=t(ui_lang, "tab_main")),
616
  )
617
 
618
+ # =======================
619
+ # Build UI
620
+ # =======================
621
  with gr.Blocks(title=I18N["EN"]["title"]) as demo:
622
  header_md = gr.Markdown(f"# {t('EN', 'title')}\n{t('EN', 'intro')}")
623
 
 
626
  with gr.Tab(t("EN", "tab_main")) as tab_main:
627
  task = gr.Dropdown(choices=task_choices("EN"), value=TASK_SIM, label=t("EN", "task"))
628
  has_docs = gr.Radio(choices=yesno_choices("EN"), value="YES", label=t("EN", "has_docs"))
629
+ data_lang = gr.Radio(choices=data_lang_choices("EN"), value=DATA_MIXED, label=t("EN", "data_lang"))
630
+ priority = gr.Radio(choices=priority_choices("EN"), value=PRIO_SPEED, label=t("EN", "priority"))
631
+ budget = gr.Radio(choices=budget_choices("EN"), value=BUDGET_LOW, label=t("EN", "budget"))
632
+ source_mode = gr.Radio(choices=source_choices("EN"), value=SRC_HYBRID, label=t("EN", "source"))
633
+
634
+ with gr.Row():
635
+ refresh_btn = gr.Button(t("EN", "refresh"))
636
+ status = gr.Textbox(lines=1, label=t("EN", "status"))
637
 
638
  recommend_btn = gr.Button(t("EN", "recommend_btn"))
639
+ out = gr.Textbox(lines=24, label=t("EN", "result"))
640
+
641
+ refresh_btn.click(fn=refresh_button, inputs=[ui_lang], outputs=[status])
642
 
643
  recommend_btn.click(
644
  fn=recommend,
645
+ inputs=[ui_lang, task, has_docs, data_lang, priority, budget, source_mode],
646
  outputs=[out],
647
  )
648
 
649
  ui_lang.change(
650
  fn=apply_language,
651
  inputs=[ui_lang],
652
+ outputs=[
653
+ header_md, ui_lang, task, has_docs, data_lang, priority, budget, source_mode,
654
+ refresh_btn, recommend_btn, out, status, tab_main
655
+ ],
656
  )
657
 
658
  demo.launch()