import time from dataclasses import dataclass from typing import Dict, List, Tuple, Any, Optional import gradio as gr from huggingface_hub import HfApi api = HfApi() # ======================= # i18n # ======================= I18N: Dict[str, Dict[str, str]] = { "EN": { "title": "Model Fit Finder (CPU)", "intro": ( "Pick an NLP task and constraints. The Space recommends an appropriate model type and returns " "at least 3 concrete Hugging Face models. Recommendations change based on your settings." ), "ui_lang": "UI language", "tab_main": "Model advisor", "task": "What do you want to do?", "has_docs": "Do you have your own documents/text to analyze?", "data_lang": "Data language", "priority": "Priority", "budget": "Compute budget", "source": "Model source", "refresh": "Refresh HF cache", "recommend_btn": "Recommend", "result": "Result", "status": "Status", "yes": "Yes", "no": "No", "en": "EN", "pl": "PL", "mixed": "Mixed", "speed": "Speed", "quality": "Quality", "budget_low": "Low (fast/small models)", "budget_med": "Medium (allow larger models)", "source_curated": "Curated (stable baseline)", "source_live": "HF Live (fresh from Hub)", "source_hybrid": "Hybrid (curated + live)", "task_chat": "Chat / instructions / generation", "task_qa": "Answer questions from a document (input text)", "task_sim": "Semantic similarity / duplicates / search", "rec_type": "Recommended model type: {model_type}", "rationale": "Rationale:", "settings": "Settings used:", "models_min3": "Models (min. 3):", "why_these": "Why these models:", "warning": "Warning:", "qa_need_docs": "Extractive QA needs a context document/text. With no documents, consider an instruction model or embeddings-based search.", "note_emb": "Note: embedding models do not generate text; they produce vectors for similarity/search.", "note_qa": "Note: extractive QA finds answers in the provided context.", "note_instr": "Note: instruction-tuned models follow prompts; smaller variants are CPU-friendly.", "live_note": "Live candidates pulled from Hub using pipeline tag and downloads ranking.", "refreshed": "HF cache refreshed at {ts}.", "refresh_failed": "Refresh failed; using cached/curated lists.", }, "PL": { "title": "Model Fit Finder (CPU)", "intro": ( "Wybierz zadanie NLP i ograniczenia. Space rekomenduje typ modelu i zwraca " "co najmniej 3 konkretne modele z Hugging Face. Rekomendacje zmieniają się zależnie od ustawień." ), "ui_lang": "Język interfejsu", "tab_main": "Doradca modeli", "task": "Co chcesz zrobić?", "has_docs": "Czy masz własne dokumenty/teksty do analizy?", "data_lang": "Język danych", "priority": "Priorytet", "budget": "Budżet obliczeniowy", "source": "Źródło modeli", "refresh": "Odśwież cache HF", "recommend_btn": "Zarekomenduj", "result": "Wynik", "status": "Status", "yes": "Tak", "no": "Nie", "en": "EN", "pl": "PL", "mixed": "Mieszany", "speed": "Szybkość", "quality": "Jakość", "budget_low": "Niski (szybkie/małe modele)", "budget_med": "Średni (pozwól na większe modele)", "source_curated": "Kuratorskie (stabilna baza)", "source_live": "HF Live (świeże z Hub)", "source_hybrid": "Hybryda (baza + live)", "task_chat": "Chat / polecenia / generowanie", "task_qa": "Odpowiedzi na pytania z dokumentu (tekst wejściowy)", "task_sim": "Semantyczne podobieństwo / duplikaty / wyszukiwanie", "rec_type": "Rekomendowany typ modelu: {model_type}", "rationale": "Uzasadnienie:", "settings": "Użyte ustawienia:", "models_min3": "Modele (min. 3):", "why_these": "Dlaczego te modele:", "warning": "Ostrzeżenie:", "qa_need_docs": "QA extractive wymaga kontekstu (dokumentu/tekstu). Bez dokumentów rozważ model instrukcyjny albo wyszukiwanie embeddingowe.", "note_emb": "Uwaga: modele embeddingowe nie generują tekstu; produkują wektory do podobieństwa/wyszukiwania.", "note_qa": "Uwaga: QA extractive znajduje odpowiedzi w podanym kontekście.", "note_instr": "Uwaga: modele instrukcyjne wykonują polecenia; mniejsze warianty są przyjazne dla CPU.", "live_note": "Kandydaci live pobierani z Hub po pipeline tag i rankingu pobrań.", "refreshed": "Cache HF odświeżony: {ts}.", "refresh_failed": "Nie udało się odświeżyć; używam cache/list kuratorskich.", }, } def t(ui_lang: str, key: str) -> str: return I18N.get(ui_lang, I18N["EN"]).get(key, I18N["EN"].get(key, key)) # ======================= # Stable internal values # ======================= TASK_CHAT = "CHAT" TASK_QA = "QA" TASK_SIM = "SIM" DATA_EN = "EN" DATA_PL = "PL" DATA_MIXED = "MIXED" PRIO_SPEED = "SPEED" PRIO_QUALITY = "QUALITY" BUDGET_LOW = "LOW" BUDGET_MED = "MED" SRC_CURATED = "CURATED" SRC_LIVE = "LIVE" SRC_HYBRID = "HYBRID" def task_choices(ui_lang: str) -> List[Tuple[str, str]]: return [ (t(ui_lang, "task_chat"), TASK_CHAT), (t(ui_lang, "task_qa"), TASK_QA), (t(ui_lang, "task_sim"), TASK_SIM), ] def yesno_choices(ui_lang: str) -> List[Tuple[str, str]]: return [(t(ui_lang, "yes"), "YES"), (t(ui_lang, "no"), "NO")] def data_lang_choices(ui_lang: str) -> List[Tuple[str, str]]: return [(t(ui_lang, "en"), DATA_EN), (t(ui_lang, "pl"), DATA_PL), (t(ui_lang, "mixed"), DATA_MIXED)] def priority_choices(ui_lang: str) -> List[Tuple[str, str]]: return [(t(ui_lang, "speed"), PRIO_SPEED), (t(ui_lang, "quality"), PRIO_QUALITY)] def budget_choices(ui_lang: str) -> List[Tuple[str, str]]: return [(t(ui_lang, "budget_low"), BUDGET_LOW), (t(ui_lang, "budget_med"), BUDGET_MED)] def source_choices(ui_lang: str) -> List[Tuple[str, str]]: return [ (t(ui_lang, "source_curated"), SRC_CURATED), (t(ui_lang, "source_live"), SRC_LIVE), (t(ui_lang, "source_hybrid"), SRC_HYBRID), ] # ======================= # Curated candidates (stable baseline) # ======================= @dataclass(frozen=True) class Candidate: model_id: str size: str # "small" | "base" | "large" (heuristic) languages: str # "EN" | "MULTI" note_en: str note_pl: str origin: str # "curated" | "live" CURATED: Dict[str, List[Candidate]] = { "instruction": [ Candidate("google/flan-t5-small", "small", "EN", "Very light instruction-following text2text model.", "Bardzo lekki model text2text do poleceń.", "curated"), Candidate("google/flan-t5-base", "base", "EN", "Better quality than small; slower on CPU.", "Lepsza jakość niż small; wolniejszy na CPU.", "curated"), Candidate("google-t5/t5-small", "small", "EN", "Fast text2text fallback baseline.", "Szybki fallback text2text.", "curated"), Candidate("google/mt5-small", "small", "MULTI", "Multilingual text2text (useful for mixed-language prompts).", "Wielojęzyczny text2text (przydatny dla mieszanych języków).", "curated"), Candidate("google/mt5-base", "base", "MULTI", "Multilingual, higher quality than mt5-small; slower.", "Wielojęzyczny, lepsza jakość niż mt5-small; wolniejszy.", "curated"), ], "qa": [ Candidate("distilbert/distilbert-base-cased-distilled-squad", "small", "EN", "Fast extractive QA; classic CPU choice.", "Szybki QA extractive; klasyk na CPU.", "curated"), Candidate("distilbert/distilbert-base-uncased-distilled-squad", "small", "EN", "Popular extractive QA default.", "Popularny domyślny QA extractive.", "curated"), Candidate("deepset/bert-base-cased-squad2", "base", "EN", "SQuAD2 variant; better 'no answer' behavior.", "Wariant SQuAD2; lepiej obsługuje 'brak odpowiedzi'.", "curated"), Candidate("deepset/xlm-roberta-base-squad2", "base", "MULTI", "Multilingual extractive QA baseline (XLM-R).", "Wielojęzyczny QA extractive (XLM-R).", "curated"), ], "embeddings": [ Candidate("sentence-transformers/all-MiniLM-L6-v2", "small", "EN", "Very fast sentence embeddings; great for similarity on CPU.", "Bardzo szybkie embeddingi; świetne do podobieństwa na CPU.", "curated"), Candidate("sentence-transformers/all-mpnet-base-v2", "base", "EN", "Higher quality embeddings than MiniLM; slower.", "Lepsza jakość niż MiniLM; wolniejsze.", "curated"), Candidate("intfloat/e5-small-v2", "small", "EN", "Strong retrieval embeddings, good speed/quality balance.", "Mocne embeddingi do wyszukiwania; dobry balans.", "curated"), Candidate("intfloat/e5-base-v2", "base", "EN", "Higher quality e5; heavier on CPU.", "Lepsza jakość e5; cięższy na CPU.", "curated"), Candidate("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "base", "MULTI", "Multilingual embeddings; good for Polish/mixed.", "Wielojęzyczne embeddingi; dobre dla PL/mix.", "curated"), ], } # ======================= # HF Live cache (in-memory TTL) + refresh button # ======================= CACHE_TTL_SEC = 24 * 60 * 60 # 24h # cache key: (pipeline_tag, data_lang_value, library_hint, budget) _HUB_CACHE: Dict[Tuple[str, str, str, str], Tuple[float, List[str]]] = {} def _language_tag_predicate(tags: List[str], data_lang_value: str) -> bool: if data_lang_value == DATA_MIXED: return True target = "en" if data_lang_value == DATA_EN else "pl" candidates = {target, f"language:{target}", f"lang:{target}"} tags_lower = {str(x).lower() for x in (tags or [])} return any(c in tags_lower for c in candidates) def _library_predicate(tags: List[str], library_hint: str) -> bool: # Best-effort: many models have tags like "library:sentence-transformers" or "library:transformers" tags_lower = {str(x).lower() for x in (tags or [])} if not library_hint: return True return (f"library:{library_hint.lower()}" in tags_lower) or (library_hint.lower() in tags_lower) def _budget_predicate(model_id: str, tags: List[str], budget: str) -> bool: # Heuristic to keep "Low" budget models lightweight. # We avoid explicit "large" and certain common huge families by name heuristics. # This is intentionally conservative. mid = model_id.lower() if budget == BUDGET_MED: return True # Low budget: prefer smaller-ish names and avoid obvious large ones. if any(x in mid for x in ["-large", "large-", "xxl", "xl", "13b", "30b", "70b", "mixtral", "llama-2-70b", "llama-3-70b"]): return False # Keep common small cues # If it doesn't contain small cues, we still allow it, but overall ranking will prefer small/base from curated anyway. return True def fetch_live_model_ids( pipeline_tag: str, data_lang_value: str, library_hint: str, budget: str, limit: int = 30, ) -> List[str]: key = (pipeline_tag, data_lang_value, library_hint or "", budget) now = time.time() if key in _HUB_CACHE: ts, cached = _HUB_CACHE[key] if now - ts < CACHE_TTL_SEC: return cached models = api.list_models(filter=pipeline_tag, sort="downloads", direction=-1, limit=limit) out: List[str] = [] for m in models: mid = getattr(m, "modelId", None) tags = getattr(m, "tags", []) or [] if not mid: continue if not _language_tag_predicate(tags, data_lang_value): continue if not _library_predicate(tags, library_hint): continue if not _budget_predicate(mid, tags, budget): continue out.append(mid) _HUB_CACHE[key] = (now, out) return out def refresh_cache() -> None: _HUB_CACHE.clear() def refresh_button(ui_lang: str) -> str: try: refresh_cache() ts = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) return t(ui_lang, "refreshed").format(ts=ts) except Exception: return t(ui_lang, "refresh_failed") # ======================= # Ranking (settings must matter) # ======================= def _infer_size_from_id(model_id: str) -> str: mid = model_id.lower() if any(x in mid for x in ["-large", "large-", "xxl", "xl"]): return "large" if any(x in mid for x in ["-base", "base-", "mpnet", "xlm-roberta-base", "bert-base"]): return "base" if any(x in mid for x in ["small", "mini", "minilm", "distil", "tiny"]): return "small" return "base" def _infer_lang_from_tags_or_id(model_id: str) -> str: mid = model_id.lower() if "multilingual" in mid or "xlm" in mid or "mt5" in mid: return "MULTI" return "EN" def score_candidate(c: Candidate, data_lang_value: str, priority: str, budget: str) -> Tuple[int, List[str]]: score = 0 reasons: List[str] = [] # Language preference if data_lang_value in (DATA_PL, DATA_MIXED): if c.languages == "MULTI": score += 4 reasons.append("Multilingual") else: score -= 1 reasons.append("EN-focused") else: if c.languages == "EN": score += 3 reasons.append("EN-optimized") else: score += 1 reasons.append("Multilingual") # Compute budget constraint if budget == BUDGET_LOW: if c.size == "small": score += 5 reasons.append("Low budget friendly") elif c.size == "base": score += 1 reasons.append("May be slower on low budget") else: score -= 6 reasons.append("Too heavy for low budget") else: # MED if c.size == "small": score += 2 reasons.append("Fast") elif c.size == "base": score += 4 reasons.append("Allowed by medium budget") else: score += 1 reasons.append("Heavier option") # Priority: speed vs quality if priority == PRIO_SPEED: if c.size == "small": score += 4 reasons.append("Faster") elif c.size == "base": score += 1 reasons.append("Medium") else: score -= 2 reasons.append("Slower") else: # QUALITY if c.size == "base": score += 4 reasons.append("Better quality baseline") elif c.size == "small": score += 2 reasons.append("Fast but may be lower quality") else: score += 3 reasons.append("High capacity") # Prefer curated slightly for stability (unless source is live-only) if c.origin == "curated": score += 1 reasons.append("Curated/stable") return score, reasons def select_models( model_type: str, data_lang_value: str, priority: str, budget: str, source_mode: str, ui_lang: str, k: int = 4, ) -> Tuple[List[Candidate], Dict[str, List[str]], bool]: """ Returns chosen candidates, reasons map, and whether live candidates were used. """ pool: List[Candidate] = [] used_live = False if source_mode in (SRC_CURATED, SRC_HYBRID): pool.extend(CURATED[model_type]) if source_mode in (SRC_LIVE, SRC_HYBRID): # Map our types to pipeline tags and library hints if model_type == "embeddings": pipeline_tag = "sentence-similarity" library_hint = "sentence-transformers" elif model_type == "qa": pipeline_tag = "question-answering" library_hint = "transformers" else: pipeline_tag = "text-generation" library_hint = "transformers" live_ids = fetch_live_model_ids( pipeline_tag=pipeline_tag, data_lang_value=data_lang_value, library_hint=library_hint, budget=budget, limit=35, ) # Convert to Candidates (notes are generic because we don't parse model card here) for mid in live_ids: c = Candidate( model_id=mid, size=_infer_size_from_id(mid), languages=_infer_lang_from_tags_or_id(mid), note_en="Live candidate from Hub (ranked by downloads).", note_pl="Kandydat live z Hub (ranking po pobraniach).", origin="live", ) pool.append(c) used_live = True # Deduplicate pool by model_id, keeping curated version if present dedup: Dict[str, Candidate] = {} for c in pool: if c.model_id not in dedup: dedup[c.model_id] = c else: # prefer curated notes if dedup[c.model_id].origin == "live" and c.origin == "curated": dedup[c.model_id] = c pool = list(dedup.values()) scored: List[Tuple[int, Candidate, List[str]]] = [] for c in pool: s, reasons = score_candidate(c, data_lang_value, priority, budget) scored.append((s, c, reasons)) scored.sort(key=lambda x: x[0], reverse=True) chosen: List[Candidate] = [] why: Dict[str, List[str]] = {} for s, c, reasons in scored: if c.model_id in why: continue chosen.append(c) why[c.model_id] = reasons if len(chosen) >= k: break # ensure min 3 if len(chosen) < 3: for s, c, reasons in scored: if c.model_id not in why: chosen.append(c) why[c.model_id] = reasons if len(chosen) >= 3: break return chosen, why, used_live # ======================= # Main recommend function # ======================= def recommend( ui_lang: str, task_id: str, has_docs: str, data_lang_value: str, priority: str, budget: str, source_mode: str, ) -> str: warning: Optional[str] = None if task_id == TASK_SIM: model_type = "embeddings" why_task = ( "You want semantic similarity / deduplication / search. Embeddings + cosine similarity fit best." if ui_lang == "EN" else "Chcesz podobieństwo semantyczne / deduplikację / wyszukiwanie. Najlepsze są embeddingi + cosine similarity." ) note_key = "note_emb" elif task_id == TASK_QA: model_type = "qa" why_task = ( "You have a context (document/text) and a question. Extractive QA finds answers in the context." if ui_lang == "EN" else "Masz kontekst (dokument/tekst) i pytanie. QA extractive znajduje odpowiedzi w kontekście." ) note_key = "note_qa" if has_docs == "NO": warning = t(ui_lang, "qa_need_docs") else: model_type = "instruction" why_task = ( "You want instruction-following responses (chat/explain/summarize). Instruction-tuned models fit best." if ui_lang == "EN" else "Chcesz odpowiedzi sterowane poleceniem (chat/wyjaśnianie/streszczanie). Najlepsze są modele instrukcyjne." ) note_key = "note_instr" chosen, why_map, used_live = select_models( model_type=model_type, data_lang_value=data_lang_value, priority=priority, budget=budget, source_mode=source_mode, ui_lang=ui_lang, k=5, ) lines: List[str] = [] lines.append(t(ui_lang, "rec_type").format(model_type=model_type)) lines.append("") lines.append(t(ui_lang, "rationale")) lines.append(f"- {why_task}") lines.append("") lines.append(t(ui_lang, "settings")) lines.append(f"- data language: {data_lang_value}") lines.append(f"- priority: {priority}") lines.append(f"- budget: {budget}") lines.append(f"- source: {source_mode}") lines.append(f"- has documents: {has_docs}") lines.append("") if warning: lines.append(t(ui_lang, "warning")) lines.append(f"- {warning}") lines.append("") if used_live and source_mode in (SRC_LIVE, SRC_HYBRID): lines.append(t(ui_lang, "live_note")) lines.append("") lines.append(t(ui_lang, "models_min3")) for c in chosen[:5]: note = c.note_en if ui_lang == "EN" else c.note_pl lines.append(f"- {c.model_id} — {note}") lines.append("") lines.append(t(ui_lang, "why_these")) for c in chosen[:5]: reasons = why_map.get(c.model_id, []) if ui_lang == "PL": localized = [] for r in reasons: mapping = { "Multilingual": "Wielojęzyczny", "EN-focused": "Skupiony na EN", "EN-optimized": "Optymalny dla EN", "Low budget friendly": "Dobry dla niskiego budżetu", "May be slower on low budget": "Może być wolniejszy przy niskim budżecie", "Too heavy for low budget": "Za ciężki dla niskiego budżetu", "Allowed by medium budget": "Dozwolony przy średnim budżecie", "Heavier option": "Cięższa opcja", "Fast": "Szybki", "Faster": "Szybszy", "Medium": "Średni", "Slower": "Wolniejszy", "Better quality baseline": "Lepsza jakość (baseline)", "Fast but may be lower quality": "Szybki, ale może gorsza jakość", "High capacity": "Duża pojemność", "Curated/stable": "Kuratorski/stabilny", } localized.append(mapping.get(r, r)) reasons_txt = ", ".join(localized) else: reasons_txt = ", ".join(reasons) lines.append(f"- {c.model_id}: {reasons_txt}") lines.append("") lines.append(t(ui_lang, note_key)) return "\n".join(lines) # ======================= # UI language dynamic updates # ======================= def apply_language(ui_lang: str) -> Tuple[Any, ...]: return ( gr.update(value=f"# {t(ui_lang, 'title')}\n{t(ui_lang, 'intro')}"), gr.update(label=t(ui_lang, "ui_lang")), gr.update(label=t(ui_lang, "task"), choices=task_choices(ui_lang)), gr.update(label=t(ui_lang, "has_docs"), choices=yesno_choices(ui_lang)), gr.update(label=t(ui_lang, "data_lang"), choices=data_lang_choices(ui_lang)), gr.update(label=t(ui_lang, "priority"), choices=priority_choices(ui_lang)), gr.update(label=t(ui_lang, "budget"), choices=budget_choices(ui_lang)), gr.update(label=t(ui_lang, "source"), choices=source_choices(ui_lang)), gr.update(value=t(ui_lang, "refresh")), gr.update(value=t(ui_lang, "recommend_btn")), gr.update(label=t(ui_lang, "result")), gr.update(label=t(ui_lang, "status")), gr.update(label=t(ui_lang, "tab_main")), ) # ======================= # Build UI # ======================= with gr.Blocks(title=I18N["EN"]["title"]) as demo: header_md = gr.Markdown(f"# {t('EN', 'title')}\n{t('EN', 'intro')}") ui_lang = gr.Radio(choices=["EN", "PL"], value="EN", label=t("EN", "ui_lang")) with gr.Tab(t("EN", "tab_main")) as tab_main: task = gr.Dropdown(choices=task_choices("EN"), value=TASK_SIM, label=t("EN", "task")) has_docs = gr.Radio(choices=yesno_choices("EN"), value="YES", label=t("EN", "has_docs")) data_lang = gr.Radio(choices=data_lang_choices("EN"), value=DATA_MIXED, label=t("EN", "data_lang")) priority = gr.Radio(choices=priority_choices("EN"), value=PRIO_SPEED, label=t("EN", "priority")) budget = gr.Radio(choices=budget_choices("EN"), value=BUDGET_LOW, label=t("EN", "budget")) source_mode = gr.Radio(choices=source_choices("EN"), value=SRC_HYBRID, label=t("EN", "source")) with gr.Row(): refresh_btn = gr.Button(t("EN", "refresh")) status = gr.Textbox(lines=1, label=t("EN", "status")) recommend_btn = gr.Button(t("EN", "recommend_btn")) out = gr.Textbox(lines=24, label=t("EN", "result")) refresh_btn.click(fn=refresh_button, inputs=[ui_lang], outputs=[status]) recommend_btn.click( fn=recommend, inputs=[ui_lang, task, has_docs, data_lang, priority, budget, source_mode], outputs=[out], ) ui_lang.change( fn=apply_language, inputs=[ui_lang], outputs=[ header_md, ui_lang, task, has_docs, data_lang, priority, budget, source_mode, refresh_btn, recommend_btn, out, status, tab_main ], ) demo.launch()