Spaces:

Rogersurf
/

hrhub-final

Sleeping

App Files Files Community

Roger Surf commited on Jan 13

Commit

3055aad

1 Parent(s): 9431e4e

Company View refactored

Browse files

Files changed (1) hide show

pages/5_🏢_Company_View.py +139 -18

pages/5_🏢_Company_View.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import streamlit as st
 import numpy as np
 import pandas as pd
 import os
 from sklearn.metrics.pairwise import cosine_similarity
-from dotenv import load_dotenv
-from utils.embeddings import load_production_artifacts
-load_dotenv()
 # =========================================================
 # PAGE CONFIG
@@ -19,16 +18,53 @@ st.set_page_config(
 )
 # =========================================================
-# LOAD DATA (DATASET – PRODUCTION)
 # =========================================================
-@st.cache_resource(show_spinner=False)
 def load_core():
-    return load_production_artifacts()
 candidate_embeddings, company_embeddings, candidates_meta, companies_meta = load_core()
 # =========================================================
-# FAIRNESS
 # =========================================================
 def compute_bilateral_fairness(candidate_embeddings, company_embeddings, top_k=10, sample_size=100):
     n_cand = min(sample_size, len(candidate_embeddings))
@@ -44,17 +80,74 @@ def compute_bilateral_fairness(candidate_embeddings, company_embeddings, top_k=1
         sims = cosine_similarity(company_embeddings[j].reshape(1, -1), candidate_embeddings[:n_cand])[0]
         comp_scores.extend(np.sort(sims)[-top_k:])
-    c_mean = float(np.mean(cand_scores))
-    co_mean = float(np.mean(comp_scores))
-    fairness = min(c_mean, co_mean) / max(c_mean, co_mean)
-    return c_mean, co_mean, fairness
 @st.cache_data(show_spinner=False)
 def cached_fairness(candidate_embeddings, company_embeddings, top_k):
     return compute_bilateral_fairness(candidate_embeddings, company_embeddings, top_k)
 # =========================================================
 # HEADER
@@ -92,10 +185,10 @@ with left:
     st.markdown(f"**Name:** {company.get('name','Unknown')}")
     with st.expander("🏭 Industry", expanded=True):
-        st.write(company.get("industries_list", "N/A"))
     with st.expander("🧠 Required Skills", expanded=True):
-        st.write(company.get("required_skills", "N/A"))
 # =========================================================
 # MATCHING
@@ -111,7 +204,7 @@ for rank, (idx, score) in enumerate(zip(top_idx, top_scores), start=1):
     cand = candidates_meta.iloc[idx]
     rows.append({
         "Rank": rank,
-        "Category": cand.get("Category", "N/A"),
         "Score": score
     })
@@ -128,8 +221,6 @@ with right:
     m2.metric("Average Score", f"{df.Score.mean():.3f}")
     m3.metric("Strong Matches", int((df.Score > threshold).sum()))
-    st.subheader("👤 Top Candidate Matches")
     def style_score(v):
         return "color: green; font-weight: bold;" if v > threshold else ""
@@ -139,7 +230,7 @@ with right:
     )
 # =========================================================
-# FAIRNESS PANEL
 # =========================================================
 st.markdown("---")
 st.subheader("⚖️ Bilateral Fairness (Top-K)")
@@ -155,6 +246,36 @@ c1.metric("Candidate → Company", f"{cand_mean:.3f}")
 c2.metric("Company → Candidate", f"{comp_mean:.3f}")
 c3.metric("Fairness Ratio", f"{fairness:.3f}")
 # =========================================================
 # FOOTER
 # =========================================================

 import streamlit as st
 import numpy as np
 import pandas as pd
+import pickle
 import os
+import json
 from sklearn.metrics.pairwise import cosine_similarity
+from huggingface_hub import hf_hub_download, InferenceClient
 # =========================================================
 # PAGE CONFIG
 )
 # =========================================================
+# HF ARTIFACT CONFIG (SAME AS CANDIDATE VIEW)
 # =========================================================
+DATASET_REPO = "Rogersurf/hrhub-artifacts"
+PROCESSED_DIR = "processed"
+# =========================================================
+# LOAD DATA (HF ARTIFACTS – SAME STANDARD)
+# =========================================================
+@st.cache_resource(show_spinner=True)
 def load_core():
+    cand_emb_path = hf_hub_download(
+        repo_id=DATASET_REPO,
+        filename=f"{PROCESSED_DIR}/candidate_embeddings.npy",
+        repo_type="dataset"
+    )
+    comp_emb_path = hf_hub_download(
+        repo_id=DATASET_REPO,
+        filename=f"{PROCESSED_DIR}/company_embeddings.npy",
+        repo_type="dataset"
+    )
+    cand_meta_path = hf_hub_download(
+        repo_id=DATASET_REPO,
+        filename=f"{PROCESSED_DIR}/candidates_metadata.pkl",
+        repo_type="dataset"
+    )
+    comp_meta_path = hf_hub_download(
+        repo_id=DATASET_REPO,
+        filename=f"{PROCESSED_DIR}/companies_metadata.pkl",
+        repo_type="dataset"
+    )
+    candidate_embeddings = np.load(cand_emb_path)
+    company_embeddings = np.load(comp_emb_path)
+    candidates_meta = pickle.load(open(cand_meta_path, "rb"))
+    companies_meta = pickle.load(open(comp_meta_path, "rb"))
+    return (
+        candidate_embeddings,
+        company_embeddings,
+        candidates_meta,
+        companies_meta
+    )
 candidate_embeddings, company_embeddings, candidates_meta, companies_meta = load_core()
 # =========================================================
+# FAIRNESS (UNCHANGED)
 # =========================================================
 def compute_bilateral_fairness(candidate_embeddings, company_embeddings, top_k=10, sample_size=100):
     n_cand = min(sample_size, len(candidate_embeddings))
         sims = cosine_similarity(company_embeddings[j].reshape(1, -1), candidate_embeddings[:n_cand])[0]
         comp_scores.extend(np.sort(sims)[-top_k:])
+    cand_mean = float(np.mean(cand_scores))
+    comp_mean = float(np.mean(comp_scores))
+    fairness = min(cand_mean, comp_mean) / max(cand_mean, comp_mean)
+    return cand_mean, comp_mean, fairness
 @st.cache_data(show_spinner=False)
 def cached_fairness(candidate_embeddings, company_embeddings, top_k):
     return compute_bilateral_fairness(candidate_embeddings, company_embeddings, top_k)
+# =========================================================
+# LLM CLIENT (SAME AS CANDIDATE VIEW)
+# =========================================================
+@st.cache_resource(show_spinner=False)
+def get_llm_client():
+    token = os.getenv("HF_TOKEN")
+    if not token:
+        return None
+    return InferenceClient(token=token)
+# =========================================================
+# LLM EXPLANATION (COMPANY → CANDIDATE)
+# =========================================================
+def explain_match_llm(company_row, candidate_row, score):
+    client = get_llm_client()
+    if client is None:
+        return {
+            "summary": "LLM not enabled (HF_TOKEN not set).",
+            "strengths": [],
+            "gaps": [],
+            "recommendation": "Add HF_TOKEN to enable AI explanations."
+        }
+    prompt = f"""
+You are an HR analyst.
+Explain why the following candidate is a good match for the company.
+COMPANY:
+Name: {company_row.get('name','')}
+Industry: {company_row.get('industries_list','')}
+Required Skills: {company_row.get('required_skills','')}
+CANDIDATE:
+Category: {candidate_row.get('Category','')}
+Skills: {candidate_row.get('skills','')}
+Career Objective: {candidate_row.get('career_objective','')}
+MATCH SCORE: {score:.3f}
+Return JSON with:
+- summary
+- strengths
+- gaps
+- recommendation
+"""
+    response = client.chat_completion(
+        model="meta-llama/Llama-3.2-3B-Instruct",
+        messages=[{"role": "user", "content": prompt}],
+        max_tokens=400
+    )
+    content = response.choices[0].message.content
+    start, end = content.find("{"), content.rfind("}") + 1
+    return json.loads(content[start:end])
 # =========================================================
 # HEADER
     st.markdown(f"**Name:** {company.get('name','Unknown')}")
     with st.expander("🏭 Industry", expanded=True):
+        st.write(company.get("industries_list","N/A"))
     with st.expander("🧠 Required Skills", expanded=True):
+        st.write(company.get("required_skills","N/A"))
 # =========================================================
 # MATCHING
     cand = candidates_meta.iloc[idx]
     rows.append({
         "Rank": rank,
+        "Category": cand.get("Category","N/A"),
         "Score": score
     })
     m2.metric("Average Score", f"{df.Score.mean():.3f}")
     m3.metric("Strong Matches", int((df.Score > threshold).sum()))
     def style_score(v):
         return "color: green; font-weight: bold;" if v > threshold else ""
     )
 # =========================================================
+# FAIRNESS
 # =========================================================
 st.markdown("---")
 st.subheader("⚖️ Bilateral Fairness (Top-K)")
 c2.metric("Company → Candidate", f"{comp_mean:.3f}")
 c3.metric("Fairness Ratio", f"{fairness:.3f}")
+# =========================================================
+# LLM EXPLANATION
+# =========================================================
+st.markdown("---")
+st.subheader("🤖 Match Explanation (LLM)")
+with st.expander("Why is this candidate a good match?", expanded=True):
+    if st.button("Generate AI Explanation"):
+        explanation = explain_match_llm(
+            company,
+            candidates_meta.iloc[top_idx[0]],
+            top_scores[0]
+        )
+        st.markdown(f"**Summary:** {explanation.get('summary','')}")
+        c1, c2 = st.columns(2)
+        with c1:
+            st.markdown("### ✅ Strengths")
+            for s in explanation.get("strengths", []):
+                st.write(f"- {s}")
+        with c2:
+            st.markdown("### ⚠️ Gaps")
+            for g in explanation.get("gaps", []):
+                st.write(f"- {g}")
+        st.markdown(
+            f"### 🧭 Recommendation\n**{explanation.get('recommendation','')}**"
+        )
 # =========================================================
 # FOOTER
 # =========================================================