AdamTT's picture
Update app.py
de3dea2 verified
import time
from dataclasses import dataclass
from typing import Dict, List, Tuple, Any, Optional
import gradio as gr
from huggingface_hub import HfApi
api = HfApi()
# =======================
# i18n
# =======================
I18N: Dict[str, Dict[str, str]] = {
"EN": {
"title": "Model Fit Finder (CPU)",
"intro": (
"Pick an NLP task and constraints. The Space recommends an appropriate model type and returns "
"at least 3 concrete Hugging Face models. Recommendations change based on your settings."
),
"ui_lang": "UI language",
"tab_main": "Model advisor",
"task": "What do you want to do?",
"has_docs": "Do you have your own documents/text to analyze?",
"data_lang": "Data language",
"priority": "Priority",
"budget": "Compute budget",
"source": "Model source",
"refresh": "Refresh HF cache",
"recommend_btn": "Recommend",
"result": "Result",
"status": "Status",
"yes": "Yes",
"no": "No",
"en": "EN",
"pl": "PL",
"mixed": "Mixed",
"speed": "Speed",
"quality": "Quality",
"budget_low": "Low (fast/small models)",
"budget_med": "Medium (allow larger models)",
"source_curated": "Curated (stable baseline)",
"source_live": "HF Live (fresh from Hub)",
"source_hybrid": "Hybrid (curated + live)",
"task_chat": "Chat / instructions / generation",
"task_qa": "Answer questions from a document (input text)",
"task_sim": "Semantic similarity / duplicates / search",
"rec_type": "Recommended model type: {model_type}",
"rationale": "Rationale:",
"settings": "Settings used:",
"models_min3": "Models (min. 3):",
"why_these": "Why these models:",
"warning": "Warning:",
"qa_need_docs": "Extractive QA needs a context document/text. With no documents, consider an instruction model or embeddings-based search.",
"note_emb": "Note: embedding models do not generate text; they produce vectors for similarity/search.",
"note_qa": "Note: extractive QA finds answers in the provided context.",
"note_instr": "Note: instruction-tuned models follow prompts; smaller variants are CPU-friendly.",
"live_note": "Live candidates pulled from Hub using pipeline tag and downloads ranking.",
"refreshed": "HF cache refreshed at {ts}.",
"refresh_failed": "Refresh failed; using cached/curated lists.",
},
"PL": {
"title": "Model Fit Finder (CPU)",
"intro": (
"Wybierz zadanie NLP i ograniczenia. Space rekomenduje typ modelu i zwraca "
"co najmniej 3 konkretne modele z Hugging Face. Rekomendacje zmieniają się zależnie od ustawień."
),
"ui_lang": "Język interfejsu",
"tab_main": "Doradca modeli",
"task": "Co chcesz zrobić?",
"has_docs": "Czy masz własne dokumenty/teksty do analizy?",
"data_lang": "Język danych",
"priority": "Priorytet",
"budget": "Budżet obliczeniowy",
"source": "Źródło modeli",
"refresh": "Odśwież cache HF",
"recommend_btn": "Zarekomenduj",
"result": "Wynik",
"status": "Status",
"yes": "Tak",
"no": "Nie",
"en": "EN",
"pl": "PL",
"mixed": "Mieszany",
"speed": "Szybkość",
"quality": "Jakość",
"budget_low": "Niski (szybkie/małe modele)",
"budget_med": "Średni (pozwól na większe modele)",
"source_curated": "Kuratorskie (stabilna baza)",
"source_live": "HF Live (świeże z Hub)",
"source_hybrid": "Hybryda (baza + live)",
"task_chat": "Chat / polecenia / generowanie",
"task_qa": "Odpowiedzi na pytania z dokumentu (tekst wejściowy)",
"task_sim": "Semantyczne podobieństwo / duplikaty / wyszukiwanie",
"rec_type": "Rekomendowany typ modelu: {model_type}",
"rationale": "Uzasadnienie:",
"settings": "Użyte ustawienia:",
"models_min3": "Modele (min. 3):",
"why_these": "Dlaczego te modele:",
"warning": "Ostrzeżenie:",
"qa_need_docs": "QA extractive wymaga kontekstu (dokumentu/tekstu). Bez dokumentów rozważ model instrukcyjny albo wyszukiwanie embeddingowe.",
"note_emb": "Uwaga: modele embeddingowe nie generują tekstu; produkują wektory do podobieństwa/wyszukiwania.",
"note_qa": "Uwaga: QA extractive znajduje odpowiedzi w podanym kontekście.",
"note_instr": "Uwaga: modele instrukcyjne wykonują polecenia; mniejsze warianty są przyjazne dla CPU.",
"live_note": "Kandydaci live pobierani z Hub po pipeline tag i rankingu pobrań.",
"refreshed": "Cache HF odświeżony: {ts}.",
"refresh_failed": "Nie udało się odświeżyć; używam cache/list kuratorskich.",
},
}
def t(ui_lang: str, key: str) -> str:
return I18N.get(ui_lang, I18N["EN"]).get(key, I18N["EN"].get(key, key))
# =======================
# Stable internal values
# =======================
TASK_CHAT = "CHAT"
TASK_QA = "QA"
TASK_SIM = "SIM"
DATA_EN = "EN"
DATA_PL = "PL"
DATA_MIXED = "MIXED"
PRIO_SPEED = "SPEED"
PRIO_QUALITY = "QUALITY"
BUDGET_LOW = "LOW"
BUDGET_MED = "MED"
SRC_CURATED = "CURATED"
SRC_LIVE = "LIVE"
SRC_HYBRID = "HYBRID"
def task_choices(ui_lang: str) -> List[Tuple[str, str]]:
return [
(t(ui_lang, "task_chat"), TASK_CHAT),
(t(ui_lang, "task_qa"), TASK_QA),
(t(ui_lang, "task_sim"), TASK_SIM),
]
def yesno_choices(ui_lang: str) -> List[Tuple[str, str]]:
return [(t(ui_lang, "yes"), "YES"), (t(ui_lang, "no"), "NO")]
def data_lang_choices(ui_lang: str) -> List[Tuple[str, str]]:
return [(t(ui_lang, "en"), DATA_EN), (t(ui_lang, "pl"), DATA_PL), (t(ui_lang, "mixed"), DATA_MIXED)]
def priority_choices(ui_lang: str) -> List[Tuple[str, str]]:
return [(t(ui_lang, "speed"), PRIO_SPEED), (t(ui_lang, "quality"), PRIO_QUALITY)]
def budget_choices(ui_lang: str) -> List[Tuple[str, str]]:
return [(t(ui_lang, "budget_low"), BUDGET_LOW), (t(ui_lang, "budget_med"), BUDGET_MED)]
def source_choices(ui_lang: str) -> List[Tuple[str, str]]:
return [
(t(ui_lang, "source_curated"), SRC_CURATED),
(t(ui_lang, "source_live"), SRC_LIVE),
(t(ui_lang, "source_hybrid"), SRC_HYBRID),
]
# =======================
# Curated candidates (stable baseline)
# =======================
@dataclass(frozen=True)
class Candidate:
model_id: str
size: str # "small" | "base" | "large" (heuristic)
languages: str # "EN" | "MULTI"
note_en: str
note_pl: str
origin: str # "curated" | "live"
CURATED: Dict[str, List[Candidate]] = {
"instruction": [
Candidate("google/flan-t5-small", "small", "EN",
"Very light instruction-following text2text model.",
"Bardzo lekki model text2text do poleceń.", "curated"),
Candidate("google/flan-t5-base", "base", "EN",
"Better quality than small; slower on CPU.",
"Lepsza jakość niż small; wolniejszy na CPU.", "curated"),
Candidate("google-t5/t5-small", "small", "EN",
"Fast text2text fallback baseline.",
"Szybki fallback text2text.", "curated"),
Candidate("google/mt5-small", "small", "MULTI",
"Multilingual text2text (useful for mixed-language prompts).",
"Wielojęzyczny text2text (przydatny dla mieszanych języków).", "curated"),
Candidate("google/mt5-base", "base", "MULTI",
"Multilingual, higher quality than mt5-small; slower.",
"Wielojęzyczny, lepsza jakość niż mt5-small; wolniejszy.", "curated"),
],
"qa": [
Candidate("distilbert/distilbert-base-cased-distilled-squad", "small", "EN",
"Fast extractive QA; classic CPU choice.",
"Szybki QA extractive; klasyk na CPU.", "curated"),
Candidate("distilbert/distilbert-base-uncased-distilled-squad", "small", "EN",
"Popular extractive QA default.",
"Popularny domyślny QA extractive.", "curated"),
Candidate("deepset/bert-base-cased-squad2", "base", "EN",
"SQuAD2 variant; better 'no answer' behavior.",
"Wariant SQuAD2; lepiej obsługuje 'brak odpowiedzi'.", "curated"),
Candidate("deepset/xlm-roberta-base-squad2", "base", "MULTI",
"Multilingual extractive QA baseline (XLM-R).",
"Wielojęzyczny QA extractive (XLM-R).", "curated"),
],
"embeddings": [
Candidate("sentence-transformers/all-MiniLM-L6-v2", "small", "EN",
"Very fast sentence embeddings; great for similarity on CPU.",
"Bardzo szybkie embeddingi; świetne do podobieństwa na CPU.", "curated"),
Candidate("sentence-transformers/all-mpnet-base-v2", "base", "EN",
"Higher quality embeddings than MiniLM; slower.",
"Lepsza jakość niż MiniLM; wolniejsze.", "curated"),
Candidate("intfloat/e5-small-v2", "small", "EN",
"Strong retrieval embeddings, good speed/quality balance.",
"Mocne embeddingi do wyszukiwania; dobry balans.", "curated"),
Candidate("intfloat/e5-base-v2", "base", "EN",
"Higher quality e5; heavier on CPU.",
"Lepsza jakość e5; cięższy na CPU.", "curated"),
Candidate("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "base", "MULTI",
"Multilingual embeddings; good for Polish/mixed.",
"Wielojęzyczne embeddingi; dobre dla PL/mix.", "curated"),
],
}
# =======================
# HF Live cache (in-memory TTL) + refresh button
# =======================
CACHE_TTL_SEC = 24 * 60 * 60 # 24h
# cache key: (pipeline_tag, data_lang_value, library_hint, budget)
_HUB_CACHE: Dict[Tuple[str, str, str, str], Tuple[float, List[str]]] = {}
def _language_tag_predicate(tags: List[str], data_lang_value: str) -> bool:
if data_lang_value == DATA_MIXED:
return True
target = "en" if data_lang_value == DATA_EN else "pl"
candidates = {target, f"language:{target}", f"lang:{target}"}
tags_lower = {str(x).lower() for x in (tags or [])}
return any(c in tags_lower for c in candidates)
def _library_predicate(tags: List[str], library_hint: str) -> bool:
# Best-effort: many models have tags like "library:sentence-transformers" or "library:transformers"
tags_lower = {str(x).lower() for x in (tags or [])}
if not library_hint:
return True
return (f"library:{library_hint.lower()}" in tags_lower) or (library_hint.lower() in tags_lower)
def _budget_predicate(model_id: str, tags: List[str], budget: str) -> bool:
# Heuristic to keep "Low" budget models lightweight.
# We avoid explicit "large" and certain common huge families by name heuristics.
# This is intentionally conservative.
mid = model_id.lower()
if budget == BUDGET_MED:
return True
# Low budget: prefer smaller-ish names and avoid obvious large ones.
if any(x in mid for x in ["-large", "large-", "xxl", "xl", "13b", "30b", "70b", "mixtral", "llama-2-70b", "llama-3-70b"]):
return False
# Keep common small cues
# If it doesn't contain small cues, we still allow it, but overall ranking will prefer small/base from curated anyway.
return True
def fetch_live_model_ids(
pipeline_tag: str,
data_lang_value: str,
library_hint: str,
budget: str,
limit: int = 30,
) -> List[str]:
key = (pipeline_tag, data_lang_value, library_hint or "", budget)
now = time.time()
if key in _HUB_CACHE:
ts, cached = _HUB_CACHE[key]
if now - ts < CACHE_TTL_SEC:
return cached
models = api.list_models(filter=pipeline_tag, sort="downloads", direction=-1, limit=limit)
out: List[str] = []
for m in models:
mid = getattr(m, "modelId", None)
tags = getattr(m, "tags", []) or []
if not mid:
continue
if not _language_tag_predicate(tags, data_lang_value):
continue
if not _library_predicate(tags, library_hint):
continue
if not _budget_predicate(mid, tags, budget):
continue
out.append(mid)
_HUB_CACHE[key] = (now, out)
return out
def refresh_cache() -> None:
_HUB_CACHE.clear()
def refresh_button(ui_lang: str) -> str:
try:
refresh_cache()
ts = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
return t(ui_lang, "refreshed").format(ts=ts)
except Exception:
return t(ui_lang, "refresh_failed")
# =======================
# Ranking (settings must matter)
# =======================
def _infer_size_from_id(model_id: str) -> str:
mid = model_id.lower()
if any(x in mid for x in ["-large", "large-", "xxl", "xl"]):
return "large"
if any(x in mid for x in ["-base", "base-", "mpnet", "xlm-roberta-base", "bert-base"]):
return "base"
if any(x in mid for x in ["small", "mini", "minilm", "distil", "tiny"]):
return "small"
return "base"
def _infer_lang_from_tags_or_id(model_id: str) -> str:
mid = model_id.lower()
if "multilingual" in mid or "xlm" in mid or "mt5" in mid:
return "MULTI"
return "EN"
def score_candidate(c: Candidate, data_lang_value: str, priority: str, budget: str) -> Tuple[int, List[str]]:
score = 0
reasons: List[str] = []
# Language preference
if data_lang_value in (DATA_PL, DATA_MIXED):
if c.languages == "MULTI":
score += 4
reasons.append("Multilingual")
else:
score -= 1
reasons.append("EN-focused")
else:
if c.languages == "EN":
score += 3
reasons.append("EN-optimized")
else:
score += 1
reasons.append("Multilingual")
# Compute budget constraint
if budget == BUDGET_LOW:
if c.size == "small":
score += 5
reasons.append("Low budget friendly")
elif c.size == "base":
score += 1
reasons.append("May be slower on low budget")
else:
score -= 6
reasons.append("Too heavy for low budget")
else: # MED
if c.size == "small":
score += 2
reasons.append("Fast")
elif c.size == "base":
score += 4
reasons.append("Allowed by medium budget")
else:
score += 1
reasons.append("Heavier option")
# Priority: speed vs quality
if priority == PRIO_SPEED:
if c.size == "small":
score += 4
reasons.append("Faster")
elif c.size == "base":
score += 1
reasons.append("Medium")
else:
score -= 2
reasons.append("Slower")
else: # QUALITY
if c.size == "base":
score += 4
reasons.append("Better quality baseline")
elif c.size == "small":
score += 2
reasons.append("Fast but may be lower quality")
else:
score += 3
reasons.append("High capacity")
# Prefer curated slightly for stability (unless source is live-only)
if c.origin == "curated":
score += 1
reasons.append("Curated/stable")
return score, reasons
def select_models(
model_type: str,
data_lang_value: str,
priority: str,
budget: str,
source_mode: str,
ui_lang: str,
k: int = 4,
) -> Tuple[List[Candidate], Dict[str, List[str]], bool]:
"""
Returns chosen candidates, reasons map, and whether live candidates were used.
"""
pool: List[Candidate] = []
used_live = False
if source_mode in (SRC_CURATED, SRC_HYBRID):
pool.extend(CURATED[model_type])
if source_mode in (SRC_LIVE, SRC_HYBRID):
# Map our types to pipeline tags and library hints
if model_type == "embeddings":
pipeline_tag = "sentence-similarity"
library_hint = "sentence-transformers"
elif model_type == "qa":
pipeline_tag = "question-answering"
library_hint = "transformers"
else:
pipeline_tag = "text-generation"
library_hint = "transformers"
live_ids = fetch_live_model_ids(
pipeline_tag=pipeline_tag,
data_lang_value=data_lang_value,
library_hint=library_hint,
budget=budget,
limit=35,
)
# Convert to Candidates (notes are generic because we don't parse model card here)
for mid in live_ids:
c = Candidate(
model_id=mid,
size=_infer_size_from_id(mid),
languages=_infer_lang_from_tags_or_id(mid),
note_en="Live candidate from Hub (ranked by downloads).",
note_pl="Kandydat live z Hub (ranking po pobraniach).",
origin="live",
)
pool.append(c)
used_live = True
# Deduplicate pool by model_id, keeping curated version if present
dedup: Dict[str, Candidate] = {}
for c in pool:
if c.model_id not in dedup:
dedup[c.model_id] = c
else:
# prefer curated notes
if dedup[c.model_id].origin == "live" and c.origin == "curated":
dedup[c.model_id] = c
pool = list(dedup.values())
scored: List[Tuple[int, Candidate, List[str]]] = []
for c in pool:
s, reasons = score_candidate(c, data_lang_value, priority, budget)
scored.append((s, c, reasons))
scored.sort(key=lambda x: x[0], reverse=True)
chosen: List[Candidate] = []
why: Dict[str, List[str]] = {}
for s, c, reasons in scored:
if c.model_id in why:
continue
chosen.append(c)
why[c.model_id] = reasons
if len(chosen) >= k:
break
# ensure min 3
if len(chosen) < 3:
for s, c, reasons in scored:
if c.model_id not in why:
chosen.append(c)
why[c.model_id] = reasons
if len(chosen) >= 3:
break
return chosen, why, used_live
# =======================
# Main recommend function
# =======================
def recommend(
ui_lang: str,
task_id: str,
has_docs: str,
data_lang_value: str,
priority: str,
budget: str,
source_mode: str,
) -> str:
warning: Optional[str] = None
if task_id == TASK_SIM:
model_type = "embeddings"
why_task = (
"You want semantic similarity / deduplication / search. Embeddings + cosine similarity fit best."
if ui_lang == "EN"
else "Chcesz podobieństwo semantyczne / deduplikację / wyszukiwanie. Najlepsze są embeddingi + cosine similarity."
)
note_key = "note_emb"
elif task_id == TASK_QA:
model_type = "qa"
why_task = (
"You have a context (document/text) and a question. Extractive QA finds answers in the context."
if ui_lang == "EN"
else "Masz kontekst (dokument/tekst) i pytanie. QA extractive znajduje odpowiedzi w kontekście."
)
note_key = "note_qa"
if has_docs == "NO":
warning = t(ui_lang, "qa_need_docs")
else:
model_type = "instruction"
why_task = (
"You want instruction-following responses (chat/explain/summarize). Instruction-tuned models fit best."
if ui_lang == "EN"
else "Chcesz odpowiedzi sterowane poleceniem (chat/wyjaśnianie/streszczanie). Najlepsze są modele instrukcyjne."
)
note_key = "note_instr"
chosen, why_map, used_live = select_models(
model_type=model_type,
data_lang_value=data_lang_value,
priority=priority,
budget=budget,
source_mode=source_mode,
ui_lang=ui_lang,
k=5,
)
lines: List[str] = []
lines.append(t(ui_lang, "rec_type").format(model_type=model_type))
lines.append("")
lines.append(t(ui_lang, "rationale"))
lines.append(f"- {why_task}")
lines.append("")
lines.append(t(ui_lang, "settings"))
lines.append(f"- data language: {data_lang_value}")
lines.append(f"- priority: {priority}")
lines.append(f"- budget: {budget}")
lines.append(f"- source: {source_mode}")
lines.append(f"- has documents: {has_docs}")
lines.append("")
if warning:
lines.append(t(ui_lang, "warning"))
lines.append(f"- {warning}")
lines.append("")
if used_live and source_mode in (SRC_LIVE, SRC_HYBRID):
lines.append(t(ui_lang, "live_note"))
lines.append("")
lines.append(t(ui_lang, "models_min3"))
for c in chosen[:5]:
note = c.note_en if ui_lang == "EN" else c.note_pl
lines.append(f"- {c.model_id}{note}")
lines.append("")
lines.append(t(ui_lang, "why_these"))
for c in chosen[:5]:
reasons = why_map.get(c.model_id, [])
if ui_lang == "PL":
localized = []
for r in reasons:
mapping = {
"Multilingual": "Wielojęzyczny",
"EN-focused": "Skupiony na EN",
"EN-optimized": "Optymalny dla EN",
"Low budget friendly": "Dobry dla niskiego budżetu",
"May be slower on low budget": "Może być wolniejszy przy niskim budżecie",
"Too heavy for low budget": "Za ciężki dla niskiego budżetu",
"Allowed by medium budget": "Dozwolony przy średnim budżecie",
"Heavier option": "Cięższa opcja",
"Fast": "Szybki",
"Faster": "Szybszy",
"Medium": "Średni",
"Slower": "Wolniejszy",
"Better quality baseline": "Lepsza jakość (baseline)",
"Fast but may be lower quality": "Szybki, ale może gorsza jakość",
"High capacity": "Duża pojemność",
"Curated/stable": "Kuratorski/stabilny",
}
localized.append(mapping.get(r, r))
reasons_txt = ", ".join(localized)
else:
reasons_txt = ", ".join(reasons)
lines.append(f"- {c.model_id}: {reasons_txt}")
lines.append("")
lines.append(t(ui_lang, note_key))
return "\n".join(lines)
# =======================
# UI language dynamic updates
# =======================
def apply_language(ui_lang: str) -> Tuple[Any, ...]:
return (
gr.update(value=f"# {t(ui_lang, 'title')}\n{t(ui_lang, 'intro')}"),
gr.update(label=t(ui_lang, "ui_lang")),
gr.update(label=t(ui_lang, "task"), choices=task_choices(ui_lang)),
gr.update(label=t(ui_lang, "has_docs"), choices=yesno_choices(ui_lang)),
gr.update(label=t(ui_lang, "data_lang"), choices=data_lang_choices(ui_lang)),
gr.update(label=t(ui_lang, "priority"), choices=priority_choices(ui_lang)),
gr.update(label=t(ui_lang, "budget"), choices=budget_choices(ui_lang)),
gr.update(label=t(ui_lang, "source"), choices=source_choices(ui_lang)),
gr.update(value=t(ui_lang, "refresh")),
gr.update(value=t(ui_lang, "recommend_btn")),
gr.update(label=t(ui_lang, "result")),
gr.update(label=t(ui_lang, "status")),
gr.update(label=t(ui_lang, "tab_main")),
)
# =======================
# Build UI
# =======================
with gr.Blocks(title=I18N["EN"]["title"]) as demo:
header_md = gr.Markdown(f"# {t('EN', 'title')}\n{t('EN', 'intro')}")
ui_lang = gr.Radio(choices=["EN", "PL"], value="EN", label=t("EN", "ui_lang"))
with gr.Tab(t("EN", "tab_main")) as tab_main:
task = gr.Dropdown(choices=task_choices("EN"), value=TASK_SIM, label=t("EN", "task"))
has_docs = gr.Radio(choices=yesno_choices("EN"), value="YES", label=t("EN", "has_docs"))
data_lang = gr.Radio(choices=data_lang_choices("EN"), value=DATA_MIXED, label=t("EN", "data_lang"))
priority = gr.Radio(choices=priority_choices("EN"), value=PRIO_SPEED, label=t("EN", "priority"))
budget = gr.Radio(choices=budget_choices("EN"), value=BUDGET_LOW, label=t("EN", "budget"))
source_mode = gr.Radio(choices=source_choices("EN"), value=SRC_HYBRID, label=t("EN", "source"))
with gr.Row():
refresh_btn = gr.Button(t("EN", "refresh"))
status = gr.Textbox(lines=1, label=t("EN", "status"))
recommend_btn = gr.Button(t("EN", "recommend_btn"))
out = gr.Textbox(lines=24, label=t("EN", "result"))
refresh_btn.click(fn=refresh_button, inputs=[ui_lang], outputs=[status])
recommend_btn.click(
fn=recommend,
inputs=[ui_lang, task, has_docs, data_lang, priority, budget, source_mode],
outputs=[out],
)
ui_lang.change(
fn=apply_language,
inputs=[ui_lang],
outputs=[
header_md, ui_lang, task, has_docs, data_lang, priority, budget, source_mode,
refresh_btn, recommend_btn, out, status, tab_main
],
)
demo.launch()