Spaces:
Running
Running
| """ | |
| app.py – Streamlit PoC: AI Matching Assistant pro ČSOB | |
| Diplomová práce – Filip Husein | |
| Perspektiva ZAMĚSTNANCE (3-polní formulář): | |
| 1. Životopis (PDF/DOCX/TXT) – volitelné | |
| 2. Současná pozice + důvod pro změnu | |
| 3. Cokoliv dodat (koníčky, preference, silné stránky…) – volitelné | |
| Systém: | |
| - Vyloučí pozice odpovídající AKTUÁLNÍ roli zaměstnance | |
| (multilinguálně: CS/SK/EN – díky fine-tuned JobBERT-v3) | |
| - Doporučí top 5 pozic v ČSOB | |
| - Ollama vygeneruje přátelskou HR odpověď | |
| Spuštění: | |
| streamlit run app.py | |
| Prerekvizity: | |
| - Fine-tuned model v models/jobbert-v3-czsk-final/ | |
| - FAISS index v index/ | |
| - Ollama běží (ollama serve) s modelem llama3.2 | |
| """ | |
| import json | |
| import os | |
| import re | |
| import sys | |
| import time | |
| import unicodedata | |
| import numpy as np | |
| import streamlit as st | |
| # ============================================================================= | |
| # Konfigurace – musí být DEFINOVÁNA před prvním použitím BASE_DIR | |
| # ============================================================================= | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| # Přidej aktuální adresář do PYTHONPATH | |
| sys.path.insert(0, BASE_DIR) | |
| # Model – fallback chain: HF snapshot (stažený v build_index_startup.py) → base | |
| MODEL_PATHS = [ | |
| os.path.join(BASE_DIR, "model"), # HF snapshot | |
| os.path.join(BASE_DIR, "models", "jobbert-v3-czsk-hn-final"), # lokální (volitelné) | |
| os.path.join(BASE_DIR, "models", "jobbert-v3-czsk-final"), # fallback | |
| "TechWolf/JobBERT-v3", # base | |
| ] | |
| INDEX_DIR = os.path.join(BASE_DIR, "index") | |
| FAISS_PATH = os.path.join(INDEX_DIR, "positions.faiss") | |
| META_PATH = os.path.join(INDEX_DIR, "positions_metadata.json") | |
| # Kolik kandidátů vytáhnout z FAISS (velká rezerva po filtru) | |
| TOP_K_SEARCH = 40 | |
| # Kolik finálně zobrazit uživateli | |
| TOP_K_SHOW = 5 | |
| # ── Exclusion filter ── | |
| # 0.88 = jen opravdu podobné role (varianty "Junior/Senior X"), ne příbuzné obory. | |
| EXCLUSION_SIM_THRESHOLD = 0.88 | |
| EXCLUSION_MAX_COUNT = 30 # hard cap – nikdy nevyhodíme víc pozic | |
| # ── Rescale raw cosine similarity pro user-friendly zobrazení ── | |
| # Fine-tuned model má gap cca pos=0.83, neg=0.08. | |
| # Relevantní "sousední" role padnou do range 0.25–0.70. | |
| # Mapujeme: 0.15 → 50%, 0.85 → 99% (lineárně), clamp na [20%, 99%]. | |
| DISPLAY_MIN_RAW = 0.15 | |
| DISPLAY_MAX_RAW = 0.85 | |
| DISPLAY_MIN_PCT = 50.0 | |
| DISPLAY_MAX_PCT = 99.0 | |
| def display_score(raw: float) -> float: | |
| """Přemapuj raw cosine similarity na user-facing procento (0–100).""" | |
| if raw is None: | |
| return 0.0 | |
| t = (raw - DISPLAY_MIN_RAW) / (DISPLAY_MAX_RAW - DISPLAY_MIN_RAW) | |
| t = max(0.0, min(1.0, t)) | |
| pct = DISPLAY_MIN_PCT + t * (DISPLAY_MAX_PCT - DISPLAY_MIN_PCT) | |
| return max(20.0, min(99.0, pct)) | |
| # ============================================================================= | |
| # Auto-build index + model při prvním startu (HF Spaces) | |
| # ============================================================================= | |
| if not os.path.exists(FAISS_PATH): | |
| import subprocess | |
| subprocess.run(["python", "build_index_startup.py"], check=True) | |
| from cv_parser import extract_cv_text, summarize_cv | |
| from rag_engine import ( | |
| synthesize_profile, | |
| generate_response, | |
| check_llm_available as check_ollama_available, # alias kvůli rest kódu | |
| llm_generate as ollama_generate, | |
| _fallback_response, | |
| ) | |
| # ============================================================================= | |
| # Lazy loading (cache pro Streamlit) | |
| # ============================================================================= | |
| def load_model(): | |
| """Načti SentenceTransformer model (cached).""" | |
| from sentence_transformers import SentenceTransformer | |
| for path in MODEL_PATHS: | |
| if os.path.isdir(path) or not path.startswith("/"): | |
| try: | |
| model = SentenceTransformer(path) | |
| return model, path | |
| except Exception as e: | |
| st.warning(f"Model {path} nelze načíst: {e}") | |
| continue | |
| st.error("Žádný model nebyl nalezen! Spusť nejdříve trénink (12_train_jobbert.py).") | |
| st.stop() | |
| def load_index(): | |
| """Načti FAISS index a metadata (cached).""" | |
| import faiss | |
| if not os.path.exists(FAISS_PATH): | |
| st.error(f"FAISS index nenalezen: {FAISS_PATH}\nSpusť: python 14_build_index.py") | |
| st.stop() | |
| index = faiss.read_index(FAISS_PATH) | |
| with open(META_PATH, "r", encoding="utf-8") as f: | |
| metadata = json.load(f) | |
| return index, metadata | |
| # ============================================================================= | |
| # Text normalizace (case + diakritika insensitive) | |
| # ============================================================================= | |
| def _normalize(s: str) -> str: | |
| """Lower + NFKD + odstraň diakritiku + squeeze whitespace.""" | |
| if not s: | |
| return "" | |
| s = s.lower() | |
| s = unicodedata.normalize("NFKD", s) | |
| s = "".join(c for c in s if not unicodedata.combining(c)) | |
| s = re.sub(r"\s+", " ", s).strip() | |
| return s | |
| def _title_contains_any(title: str, phrases: list[str]) -> bool: | |
| """Vrátí True, pokud titul obsahuje kteroukoli z frází (normalizovaně).""" | |
| t = _normalize(title) | |
| for p in phrases: | |
| p_norm = _normalize(p) | |
| if p_norm and p_norm in t: | |
| return True | |
| return False | |
| # ============================================================================= | |
| # Multilinguální varianty role (LLM translate) | |
| # ============================================================================= | |
| ROLE_TRANSLATE_PROMPT = """Přelož následující název pracovní pozice do češtiny, slovenštiny a angličtiny. | |
| Vrať POUZE validní JSON ve formátu {{"cs": "...", "sk": "...", "en": "..."}}. | |
| Žádný komentář, žádný markdown, pouze JSON. | |
| Pozice: {role} | |
| JSON:""" | |
| def get_role_variants(role_text: str, use_ollama: bool = True) -> list[str]: | |
| """ | |
| Vrátí seznam variant role ve 3 jazycích (CS/SK/EN) + původní text. | |
| Fallback: pouze původní text, pokud Ollama nedostupná. | |
| """ | |
| variants = {role_text.strip()} | |
| if not use_ollama or not check_ollama_available(): | |
| return [v for v in variants if v] | |
| try: | |
| prompt = ROLE_TRANSLATE_PROMPT.format(role=role_text) | |
| raw = ollama_generate(prompt, temperature=0.0, max_tokens=200) | |
| # Zkus najít JSON v odpovědi | |
| match = re.search(r"\{[^}]*\}", raw, re.DOTALL) | |
| if match: | |
| data = json.loads(match.group(0)) | |
| for key in ("cs", "sk", "en"): | |
| val = data.get(key) | |
| if isinstance(val, str) and val.strip(): | |
| variants.add(val.strip()) | |
| except Exception: | |
| pass # při chybě pokračujeme jen s původním textem | |
| return [v for v in variants if v] | |
| # ============================================================================= | |
| # Exclusion filter (multilinguální, embedding-based cluster) | |
| # ============================================================================= | |
| def compute_exclusion_ids( | |
| current_role_text: str, | |
| model, | |
| index, | |
| metadata: list[dict], | |
| role_variants: list[str] | None = None, | |
| similarity_threshold: float = EXCLUSION_SIM_THRESHOLD, | |
| max_exclusions: int = EXCLUSION_MAX_COUNT, | |
| ) -> tuple[set[int], list[dict]]: | |
| """ | |
| Vypočti ID pozic, které budou vyloučeny z doporučení. | |
| Strategie (multilinguální): | |
| 1. Zakóduj current_role (a případné přeložené varianty) fine-tuned modelem. | |
| Díky multilinguálnímu alignmentu to funguje napříč CS/SK/EN. | |
| 2. Najdi nejbližší pozice (≥ similarity_threshold) = "same role cluster". | |
| 3. Doplň substring match na titul (normalizovaně, všechny varianty). | |
| Returns: | |
| (set indexů k vyloučení, list dictů s debug informacemi) | |
| """ | |
| if not current_role_text or not current_role_text.strip(): | |
| return set(), [] | |
| # Texty k zakódování: hlavní + všechny překlady | |
| texts = [current_role_text] | |
| if role_variants: | |
| for v in role_variants: | |
| if v and v.lower() != current_role_text.lower(): | |
| texts.append(v) | |
| # Embed všechny varianty, použij maximum cosine similarity | |
| embeddings = model.encode( | |
| texts, | |
| normalize_embeddings=True, | |
| show_progress_bar=False, | |
| ) | |
| embeddings = np.array(embeddings, dtype=np.float32) | |
| k = min(max_exclusions, index.ntotal) | |
| # Pro každou variantu vytáhni top-k, sjednoť je | |
| exclusion_ids: set[int] = set() | |
| debug_rows: list[dict] = [] | |
| for emb in embeddings: | |
| scores, indices = index.search(emb.reshape(1, -1), k) | |
| for idx, score in zip(indices[0], scores[0]): | |
| if idx < 0: | |
| continue | |
| if score >= similarity_threshold: | |
| idx_int = int(idx) | |
| if idx_int not in exclusion_ids: | |
| exclusion_ids.add(idx_int) | |
| debug_rows.append({ | |
| "index_id": idx_int, | |
| "title": metadata[idx_int].get("title", ""), | |
| "score": float(score), | |
| "reason": "semantic", | |
| }) | |
| # Substring guard: projdi VŠECHNA metadata a zachyť i ta, co embedding | |
| # nechytil (jistota pro přesná shoda titulu) | |
| all_variants = list(set(texts)) | |
| for i, meta in enumerate(metadata): | |
| if i in exclusion_ids: | |
| continue | |
| if _title_contains_any(meta.get("title", ""), all_variants): | |
| exclusion_ids.add(i) | |
| debug_rows.append({ | |
| "index_id": i, | |
| "title": meta.get("title", ""), | |
| "score": None, | |
| "reason": "substring", | |
| }) | |
| return exclusion_ids, debug_rows | |
| # ============================================================================= | |
| # Matching logika | |
| # ============================================================================= | |
| def search_positions( | |
| query_text: str, | |
| model, | |
| index, | |
| metadata, | |
| top_k: int = TOP_K_SEARCH, | |
| exclude_ids: set[int] | None = None, | |
| ): | |
| """Zakóduj dotaz a najdi nejbližší pozice ve FAISS, s filtrací exclude_ids.""" | |
| embedding = model.encode( | |
| [query_text], normalize_embeddings=True, show_progress_bar=False | |
| ) | |
| embedding = np.array(embedding, dtype=np.float32) | |
| # Vytáhni rezervu (2× top_k + velikost exclude setu) | |
| extra = len(exclude_ids) if exclude_ids else 0 | |
| k = min(top_k + extra, index.ntotal) | |
| scores, indices = index.search(embedding, k) | |
| results = [] | |
| rank_counter = 1 | |
| for idx, score in zip(indices[0], scores[0]): | |
| if idx < 0: | |
| continue | |
| idx_int = int(idx) | |
| if exclude_ids and idx_int in exclude_ids: | |
| continue | |
| meta = metadata[idx_int].copy() | |
| meta["rank"] = rank_counter | |
| meta["score"] = float(score) | |
| meta["index_id"] = idx_int | |
| results.append(meta) | |
| rank_counter += 1 | |
| if len(results) >= top_k: | |
| break | |
| return results | |
| # ============================================================================= | |
| # Build vyhledávací text | |
| # ============================================================================= | |
| def build_search_query( | |
| current_role: str, | |
| reason: str, | |
| extras: str, | |
| cv_text: str, | |
| ) -> str: | |
| """ | |
| Sestaví text pro embedding / profile synthesis. | |
| DŮLEŽITÉ: current_role se sem NEZAPOJUJE přímo (jinak by model | |
| vytahoval podobné pozice zpět). Použijeme jen: důvod změny + extras + CV. | |
| """ | |
| parts = [] | |
| if reason and reason.strip(): | |
| parts.append(f"Hledám novou pozici, protože: {reason.strip()}") | |
| if extras and extras.strip(): | |
| parts.append(f"Další informace o mně: {extras.strip()}") | |
| if cv_text and cv_text.strip(): | |
| parts.append(f"Z životopisu: {cv_text.strip()[:1500]}") | |
| # Pokud není nic, aspoň roli jako fallback (aby se něco našlo) | |
| if not parts and current_role: | |
| parts.append(current_role) | |
| return "\n\n".join(parts) | |
| # ============================================================================= | |
| # Streamlit UI | |
| # ============================================================================= | |
| def main(): | |
| # Page config | |
| st.set_page_config( | |
| page_title="ČSOB – AI Matching Assistant", | |
| page_icon="🏦", | |
| layout="wide", | |
| initial_sidebar_state="collapsed", | |
| ) | |
| # Custom CSS | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| background: linear-gradient(135deg, #003366 0%, #0066cc 100%); | |
| padding: 1.5rem 2rem; | |
| border-radius: 12px; | |
| color: white; | |
| margin-bottom: 1.5rem; | |
| } | |
| .main-header h1 { color: white; margin: 0; font-size: 1.8rem; } | |
| .main-header p { color: #cde; margin: 0.3rem 0 0; font-size: 0.95rem; } | |
| .position-card { | |
| border: 1px solid #ddd; | |
| border-radius: 10px; | |
| padding: 1rem 1.2rem; | |
| margin-bottom: 0.8rem; | |
| background: #fafbfc; | |
| } | |
| .position-card:hover { border-color: #0066cc; box-shadow: 0 2px 8px rgba(0,102,204,0.1); } | |
| .score-badge { | |
| display: inline-block; | |
| padding: 2px 10px; | |
| border-radius: 12px; | |
| font-weight: bold; | |
| font-size: 0.85rem; | |
| } | |
| .score-high { background: #d4edda; color: #155724; } | |
| .score-mid { background: #fff3cd; color: #856404; } | |
| .score-low { background: #f8d7da; color: #721c24; } | |
| .info-box { | |
| background: #eef4fb; | |
| border-left: 4px solid #0066cc; | |
| padding: 0.8rem 1rem; | |
| border-radius: 6px; | |
| margin: 0.5rem 0; | |
| font-size: 0.9rem; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Header | |
| st.markdown(""" | |
| <div class="main-header"> | |
| <h1>🏦 AI Matching Assistant</h1> | |
| <p>Najdi svou další kariérní příležitost v ČSOB · Diplomová práce</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # ─── Sidebar: Status ─── | |
| with st.sidebar: | |
| st.header("⚙️ Status systému") | |
| model, model_path = load_model() | |
| model_name = os.path.basename(model_path) if "/" in model_path else model_path | |
| is_finetuned = "final" in model_path or "czsk" in model_path | |
| if is_finetuned: | |
| st.success(f"Model: {model_name}\n(fine-tuned)") | |
| else: | |
| st.info(f"Model: {model_name}\n(base)") | |
| index, metadata = load_index() | |
| st.info(f"Pozice v indexu: {index.ntotal}") | |
| ollama_ok = check_ollama_available() | |
| if ollama_ok: | |
| st.success("LLM: Groq (llama-3.3-70b)") | |
| else: | |
| st.warning("LLM: nedostupný\n(fallback režim)") | |
| st.divider() | |
| st.markdown("**Parametry**") | |
| st.caption(f"Vyhledání: top-{TOP_K_SEARCH}") | |
| st.caption(f"Zobrazení: top-{TOP_K_SHOW}") | |
| st.caption(f"Exclusion práh: {EXCLUSION_SIM_THRESHOLD:.2f}") | |
| st.caption(f"Exclusion cap: max {EXCLUSION_MAX_COUNT}") | |
| st.divider() | |
| show_debug = st.checkbox("🔧 Debug info", value=False, | |
| help="Zobrazí raw cosine similarity a interní metriky (pro autora/vedoucího)") | |
| st.divider() | |
| st.caption("Diplomová práce – Filip Husein") | |
| st.caption("VŠE Praha · 2026") | |
| # ─── Hlavní formulář ─── | |
| st.markdown("### 📝 Vstupní údaje") | |
| st.markdown( | |
| "Vyplň pole níže. Systém pak doporučí top 5 ČSOB pozic šitých na míru – " | |
| "**s vyloučením tvé aktuální pozice** (a podobných)." | |
| ) | |
| with st.form("matching_form", clear_on_submit=False): | |
| # 1. CV upload | |
| uploaded_cv = st.file_uploader( | |
| "**1. Životopis** (volitelné – PDF / DOCX / TXT)", | |
| type=["pdf", "docx", "txt"], | |
| help="CV pomůže lépe porozumět tvým dovednostem. PII (email, telefon) se automaticky odstraní.", | |
| ) | |
| col1, col2 = st.columns([1, 2]) | |
| with col1: | |
| # 2a. Aktuální pozice | |
| current_role = st.text_input( | |
| "**2. Tvoje současná pozice** *", | |
| placeholder="např. Finanční analytik", | |
| help="Pozice, kterou aktuálně zastáváš. Vyloučíme ji (a její varianty CS/SK/EN) z doporučení.", | |
| ) | |
| with col2: | |
| # 2b. Důvod pro změnu | |
| reason = st.text_area( | |
| "**Proč hledáš změnu?** *", | |
| placeholder=( | |
| "např. Chtěl bych se posunout víc k analytice dat, " | |
| "baví mě Python a chci práci s většími datasety…" | |
| ), | |
| height=100, | |
| ) | |
| # 3. Extras | |
| extras = st.text_area( | |
| "**3. Cokoliv dodat?** (volitelné)", | |
| placeholder=( | |
| "Koníčky, silné stránky, preference (remote/hybrid), " | |
| "zájmové oblasti (AI, ESG, fintech…), jazykové schopnosti…" | |
| ), | |
| height=80, | |
| ) | |
| submit = st.form_submit_button("🔍 Najít pozice", type="primary", use_container_width=True) | |
| # ─── Zpracování ─── | |
| if submit: | |
| # Validace povinných polí | |
| if not current_role.strip(): | |
| st.error("Vyplň prosím svoji současnou pozici.") | |
| st.stop() | |
| if not reason.strip() and not uploaded_cv and not extras.strip(): | |
| st.error( | |
| "Vyplň alespoň jedno z: důvod pro změnu, životopis, nebo dodatečné informace." | |
| ) | |
| st.stop() | |
| # Extract CV | |
| cv_text = "" | |
| if uploaded_cv: | |
| with st.spinner("Zpracovávám životopis..."): | |
| raw = extract_cv_text( | |
| uploaded_cv.getvalue(), | |
| uploaded_cv.name, | |
| remove_personal=True, | |
| ) | |
| cv_text = summarize_cv(raw, max_chars=1500) | |
| if cv_text: | |
| st.success(f"✅ CV načteno ({len(cv_text)} znaků)") | |
| else: | |
| st.warning("⚠️ Nepodařilo se extrahovat text z CV.") | |
| start = time.time() | |
| # 1) Získej multilinguální varianty current_role | |
| with st.spinner("Zjišťuji varianty aktuální pozice (CS/SK/EN)..."): | |
| role_variants = get_role_variants(current_role, use_ollama=ollama_ok) | |
| # 2) Vypočti exclusion IDs | |
| with st.spinner("Filtruju aktuální pozici z kandidátů..."): | |
| exclusion_ids, exclusion_debug = compute_exclusion_ids( | |
| current_role_text=current_role, | |
| model=model, | |
| index=index, | |
| metadata=metadata, | |
| role_variants=role_variants, | |
| similarity_threshold=EXCLUSION_SIM_THRESHOLD, | |
| ) | |
| # 3) Sestav dotaz (BEZ current_role) a proveď search | |
| search_query = build_search_query( | |
| current_role=current_role, | |
| reason=reason, | |
| extras=extras, | |
| cv_text=cv_text, | |
| ) | |
| # 4) Syntéza profilu (Ollama) | |
| with st.spinner("Syntetizuji tvůj profil..."): | |
| profile_text = synthesize_profile( | |
| query=search_query, | |
| cv_text=cv_text, | |
| use_ollama=ollama_ok, | |
| ) | |
| # 5) FAISS search s exclusion filter | |
| with st.spinner(f"Hledám pozice (vyloučeno {len(exclusion_ids)})..."): | |
| results = search_positions( | |
| profile_text, | |
| model, index, metadata, | |
| top_k=TOP_K_SEARCH, | |
| exclude_ids=exclusion_ids, | |
| ) | |
| # 5b) Rescale skóre pro zobrazení (raw → user-friendly %) | |
| for r in results: | |
| r["raw_score"] = r["score"] # uchovat pro debug | |
| r["score"] = display_score(r["raw_score"]) / 100.0 # nahradit displayem | |
| # 6) Generace odpovědi | |
| with st.spinner("Generuji doporučení..."): | |
| if ollama_ok: | |
| response = generate_response( | |
| profile=profile_text, | |
| positions=results[:TOP_K_SHOW], | |
| use_ollama=True, | |
| ) | |
| else: | |
| response = _fallback_response(profile_text, results[:TOP_K_SHOW]) | |
| elapsed = time.time() - start | |
| # ─── Zobrazit výsledky ─── | |
| st.divider() | |
| # Info: co bylo vyloučeno | |
| with st.expander(f"🚫 Vyloučeno z doporučení ({len(exclusion_ids)} pozic)", expanded=False): | |
| st.markdown( | |
| f"Aktuální role **„{current_role}“** – detekovány varianty:" | |
| ) | |
| st.code(" · ".join(role_variants), language=None) | |
| st.markdown( | |
| f"Níže je seznam pozic, které byly vyloučeny, " | |
| f"protože sémanticky odpovídají tvé aktuální pozici " | |
| f"(podobnost ≥ {EXCLUSION_SIM_THRESHOLD:.2f}) nebo titul obsahuje variantu role." | |
| ) | |
| if exclusion_debug: | |
| # Seřaď podle score (None na konec) | |
| exclusion_debug.sort( | |
| key=lambda r: (r["score"] is None, -(r["score"] or 0)) | |
| ) | |
| for row in exclusion_debug[:50]: | |
| if row["score"] is not None: | |
| st.text(f" [{row['reason']:9s}] {row['score']:.3f} – {row['title']}") | |
| else: | |
| st.text(f" [{row['reason']:9s}] — – {row['title']}") | |
| if len(exclusion_debug) > 50: | |
| st.caption(f"… a dalších {len(exclusion_debug) - 50}") | |
| else: | |
| st.caption("Nic nebylo vyloučeno (aktuální role se v indexu nevyskytuje).") | |
| # CV preview | |
| if cv_text: | |
| with st.expander("📄 Extrakt z CV"): | |
| st.text(cv_text[:1000] + ("..." if len(cv_text) > 1000 else "")) | |
| # Profil (co bylo zakódováno) | |
| with st.expander("🧠 Syntetizovaný profil (vstup do matching)", expanded=False): | |
| st.info(profile_text) | |
| # Hlavní HR odpověď | |
| st.markdown("### 💬 Doporučení od AI") | |
| st.markdown(response) | |
| # Karty pozic | |
| if results: | |
| st.divider() | |
| st.markdown(f"### 📊 Top {min(TOP_K_SHOW, len(results))} pozic ({elapsed:.1f}s)") | |
| for pos in results[:TOP_K_SHOW]: | |
| score = pos["score"] # už rescaled (0–1) | |
| raw = pos.get("raw_score") # původní cosine similarity | |
| # Badge podle rescaled skóre (≥80 % výborná, 60–80 % dobrá, <60 % inspirace) | |
| if score >= 0.80: | |
| badge_emoji = "🟢" | |
| badge_label = "Výborná shoda" | |
| elif score >= 0.60: | |
| badge_emoji = "🟡" | |
| badge_label = "Dobrá shoda" | |
| else: | |
| badge_emoji = "🔵" | |
| badge_label = "Zajímavá inspirace" | |
| header = f"{badge_emoji} #{pos['rank']} – {pos['title']} ({score:.0%})" | |
| if show_debug and raw is not None: | |
| header += f" [raw={raw:.3f}]" | |
| with st.expander(header, expanded=(pos['rank'] <= 2)): | |
| cols = st.columns([3, 1]) | |
| with cols[0]: | |
| st.markdown(f"**{pos['title']}** · {badge_label}") | |
| desc = pos.get("description", "") | |
| if desc: | |
| st.write(desc[:400] + ("..." if len(desc) > 400 else "")) | |
| with cols[1]: | |
| st.metric("Shoda", f"{score:.0%}") | |
| if show_debug and raw is not None: | |
| st.caption(f"raw cos: {raw:.3f}") | |
| # Metadata | |
| meta_parts = [] | |
| if pos.get("professions"): | |
| meta_parts.append(f"**Profese:** {', '.join(pos['professions'])}") | |
| if pos.get("fields"): | |
| meta_parts.append(f"**Obor:** {', '.join(pos['fields'])}") | |
| if pos.get("location"): | |
| meta_parts.append(f"**Lokace:** {pos['location']}") | |
| if pos.get("employment_types"): | |
| meta_parts.append(f"**Úvazek:** {', '.join(pos['employment_types'])}") | |
| if pos.get("suitable_for_graduate"): | |
| meta_parts.append("✅ Vhodné pro absolventy") | |
| if pos.get("url"): | |
| meta_parts.append(f"[🔗 Odkaz na pozici]({pos['url']})") | |
| if meta_parts: | |
| st.markdown(" · ".join(meta_parts)) | |
| # Debug tabulka s raw skóre | |
| if show_debug: | |
| st.divider() | |
| st.markdown("#### 🔧 Debug – raw cosine similarity") | |
| debug_rows = [ | |
| { | |
| "rank": r["rank"], | |
| "title": r["title"][:60], | |
| "raw_cos": round(r.get("raw_score", 0), 4), | |
| "display_%": f"{r['score']*100:.1f}%", | |
| } | |
| for r in results[:TOP_K_SHOW] | |
| ] | |
| st.table(debug_rows) | |
| st.caption( | |
| f"Mapování: raw {DISPLAY_MIN_RAW:.2f} → {DISPLAY_MIN_PCT:.0f} % | " | |
| f"raw {DISPLAY_MAX_RAW:.2f} → {DISPLAY_MAX_PCT:.0f} % (lineárně, clamp 20–99 %)" | |
| ) | |
| else: | |
| st.warning( | |
| "Po odfiltrování aktuální pozice nezbyly žádné kandidátní pozice. " | |
| "Zkus upřesnit dotaz, rozšířit popis, nebo snížit práh exclusion filtru." | |
| ) | |
| if __name__ == "__main__": | |
| main() | |