Roger Surf commited on
Commit
3055aad
·
1 Parent(s): 9431e4e

Company View refactored

Browse files
Files changed (1) hide show
  1. pages/5_🏢_Company_View.py +139 -18
pages/5_🏢_Company_View.py CHANGED
@@ -1,13 +1,12 @@
1
  import streamlit as st
2
  import numpy as np
3
  import pandas as pd
 
4
  import os
 
5
 
6
  from sklearn.metrics.pairwise import cosine_similarity
7
- from dotenv import load_dotenv
8
- from utils.embeddings import load_production_artifacts
9
-
10
- load_dotenv()
11
 
12
  # =========================================================
13
  # PAGE CONFIG
@@ -19,16 +18,53 @@ st.set_page_config(
19
  )
20
 
21
  # =========================================================
22
- # LOAD DATA (DATASET PRODUCTION)
23
  # =========================================================
24
- @st.cache_resource(show_spinner=False)
 
 
 
 
 
 
25
  def load_core():
26
- return load_production_artifacts()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  candidate_embeddings, company_embeddings, candidates_meta, companies_meta = load_core()
29
 
30
  # =========================================================
31
- # FAIRNESS
32
  # =========================================================
33
  def compute_bilateral_fairness(candidate_embeddings, company_embeddings, top_k=10, sample_size=100):
34
  n_cand = min(sample_size, len(candidate_embeddings))
@@ -44,17 +80,74 @@ def compute_bilateral_fairness(candidate_embeddings, company_embeddings, top_k=1
44
  sims = cosine_similarity(company_embeddings[j].reshape(1, -1), candidate_embeddings[:n_cand])[0]
45
  comp_scores.extend(np.sort(sims)[-top_k:])
46
 
47
- c_mean = float(np.mean(cand_scores))
48
- co_mean = float(np.mean(comp_scores))
49
- fairness = min(c_mean, co_mean) / max(c_mean, co_mean)
50
 
51
- return c_mean, co_mean, fairness
52
 
53
 
54
  @st.cache_data(show_spinner=False)
55
  def cached_fairness(candidate_embeddings, company_embeddings, top_k):
56
  return compute_bilateral_fairness(candidate_embeddings, company_embeddings, top_k)
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  # =========================================================
60
  # HEADER
@@ -92,10 +185,10 @@ with left:
92
  st.markdown(f"**Name:** {company.get('name','Unknown')}")
93
 
94
  with st.expander("🏭 Industry", expanded=True):
95
- st.write(company.get("industries_list", "N/A"))
96
 
97
  with st.expander("🧠 Required Skills", expanded=True):
98
- st.write(company.get("required_skills", "N/A"))
99
 
100
  # =========================================================
101
  # MATCHING
@@ -111,7 +204,7 @@ for rank, (idx, score) in enumerate(zip(top_idx, top_scores), start=1):
111
  cand = candidates_meta.iloc[idx]
112
  rows.append({
113
  "Rank": rank,
114
- "Category": cand.get("Category", "N/A"),
115
  "Score": score
116
  })
117
 
@@ -128,8 +221,6 @@ with right:
128
  m2.metric("Average Score", f"{df.Score.mean():.3f}")
129
  m3.metric("Strong Matches", int((df.Score > threshold).sum()))
130
 
131
- st.subheader("👤 Top Candidate Matches")
132
-
133
  def style_score(v):
134
  return "color: green; font-weight: bold;" if v > threshold else ""
135
 
@@ -139,7 +230,7 @@ with right:
139
  )
140
 
141
  # =========================================================
142
- # FAIRNESS PANEL
143
  # =========================================================
144
  st.markdown("---")
145
  st.subheader("⚖️ Bilateral Fairness (Top-K)")
@@ -155,6 +246,36 @@ c1.metric("Candidate → Company", f"{cand_mean:.3f}")
155
  c2.metric("Company → Candidate", f"{comp_mean:.3f}")
156
  c3.metric("Fairness Ratio", f"{fairness:.3f}")
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  # =========================================================
159
  # FOOTER
160
  # =========================================================
 
1
  import streamlit as st
2
  import numpy as np
3
  import pandas as pd
4
+ import pickle
5
  import os
6
+ import json
7
 
8
  from sklearn.metrics.pairwise import cosine_similarity
9
+ from huggingface_hub import hf_hub_download, InferenceClient
 
 
 
10
 
11
  # =========================================================
12
  # PAGE CONFIG
 
18
  )
19
 
20
  # =========================================================
21
+ # HF ARTIFACT CONFIG (SAME AS CANDIDATE VIEW)
22
  # =========================================================
23
+ DATASET_REPO = "Rogersurf/hrhub-artifacts"
24
+ PROCESSED_DIR = "processed"
25
+
26
+ # =========================================================
27
+ # LOAD DATA (HF ARTIFACTS – SAME STANDARD)
28
+ # =========================================================
29
+ @st.cache_resource(show_spinner=True)
30
  def load_core():
31
+ cand_emb_path = hf_hub_download(
32
+ repo_id=DATASET_REPO,
33
+ filename=f"{PROCESSED_DIR}/candidate_embeddings.npy",
34
+ repo_type="dataset"
35
+ )
36
+ comp_emb_path = hf_hub_download(
37
+ repo_id=DATASET_REPO,
38
+ filename=f"{PROCESSED_DIR}/company_embeddings.npy",
39
+ repo_type="dataset"
40
+ )
41
+ cand_meta_path = hf_hub_download(
42
+ repo_id=DATASET_REPO,
43
+ filename=f"{PROCESSED_DIR}/candidates_metadata.pkl",
44
+ repo_type="dataset"
45
+ )
46
+ comp_meta_path = hf_hub_download(
47
+ repo_id=DATASET_REPO,
48
+ filename=f"{PROCESSED_DIR}/companies_metadata.pkl",
49
+ repo_type="dataset"
50
+ )
51
+
52
+ candidate_embeddings = np.load(cand_emb_path)
53
+ company_embeddings = np.load(comp_emb_path)
54
+ candidates_meta = pickle.load(open(cand_meta_path, "rb"))
55
+ companies_meta = pickle.load(open(comp_meta_path, "rb"))
56
+
57
+ return (
58
+ candidate_embeddings,
59
+ company_embeddings,
60
+ candidates_meta,
61
+ companies_meta
62
+ )
63
 
64
  candidate_embeddings, company_embeddings, candidates_meta, companies_meta = load_core()
65
 
66
  # =========================================================
67
+ # FAIRNESS (UNCHANGED)
68
  # =========================================================
69
  def compute_bilateral_fairness(candidate_embeddings, company_embeddings, top_k=10, sample_size=100):
70
  n_cand = min(sample_size, len(candidate_embeddings))
 
80
  sims = cosine_similarity(company_embeddings[j].reshape(1, -1), candidate_embeddings[:n_cand])[0]
81
  comp_scores.extend(np.sort(sims)[-top_k:])
82
 
83
+ cand_mean = float(np.mean(cand_scores))
84
+ comp_mean = float(np.mean(comp_scores))
85
+ fairness = min(cand_mean, comp_mean) / max(cand_mean, comp_mean)
86
 
87
+ return cand_mean, comp_mean, fairness
88
 
89
 
90
  @st.cache_data(show_spinner=False)
91
  def cached_fairness(candidate_embeddings, company_embeddings, top_k):
92
  return compute_bilateral_fairness(candidate_embeddings, company_embeddings, top_k)
93
 
94
+ # =========================================================
95
+ # LLM CLIENT (SAME AS CANDIDATE VIEW)
96
+ # =========================================================
97
+ @st.cache_resource(show_spinner=False)
98
+ def get_llm_client():
99
+ token = os.getenv("HF_TOKEN")
100
+ if not token:
101
+ return None
102
+ return InferenceClient(token=token)
103
+
104
+ # =========================================================
105
+ # LLM EXPLANATION (COMPANY → CANDIDATE)
106
+ # =========================================================
107
+ def explain_match_llm(company_row, candidate_row, score):
108
+ client = get_llm_client()
109
+
110
+ if client is None:
111
+ return {
112
+ "summary": "LLM not enabled (HF_TOKEN not set).",
113
+ "strengths": [],
114
+ "gaps": [],
115
+ "recommendation": "Add HF_TOKEN to enable AI explanations."
116
+ }
117
+
118
+ prompt = f"""
119
+ You are an HR analyst.
120
+
121
+ Explain why the following candidate is a good match for the company.
122
+
123
+ COMPANY:
124
+ Name: {company_row.get('name','')}
125
+ Industry: {company_row.get('industries_list','')}
126
+ Required Skills: {company_row.get('required_skills','')}
127
+
128
+ CANDIDATE:
129
+ Category: {candidate_row.get('Category','')}
130
+ Skills: {candidate_row.get('skills','')}
131
+ Career Objective: {candidate_row.get('career_objective','')}
132
+
133
+ MATCH SCORE: {score:.3f}
134
+
135
+ Return JSON with:
136
+ - summary
137
+ - strengths
138
+ - gaps
139
+ - recommendation
140
+ """
141
+
142
+ response = client.chat_completion(
143
+ model="meta-llama/Llama-3.2-3B-Instruct",
144
+ messages=[{"role": "user", "content": prompt}],
145
+ max_tokens=400
146
+ )
147
+
148
+ content = response.choices[0].message.content
149
+ start, end = content.find("{"), content.rfind("}") + 1
150
+ return json.loads(content[start:end])
151
 
152
  # =========================================================
153
  # HEADER
 
185
  st.markdown(f"**Name:** {company.get('name','Unknown')}")
186
 
187
  with st.expander("🏭 Industry", expanded=True):
188
+ st.write(company.get("industries_list","N/A"))
189
 
190
  with st.expander("🧠 Required Skills", expanded=True):
191
+ st.write(company.get("required_skills","N/A"))
192
 
193
  # =========================================================
194
  # MATCHING
 
204
  cand = candidates_meta.iloc[idx]
205
  rows.append({
206
  "Rank": rank,
207
+ "Category": cand.get("Category","N/A"),
208
  "Score": score
209
  })
210
 
 
221
  m2.metric("Average Score", f"{df.Score.mean():.3f}")
222
  m3.metric("Strong Matches", int((df.Score > threshold).sum()))
223
 
 
 
224
  def style_score(v):
225
  return "color: green; font-weight: bold;" if v > threshold else ""
226
 
 
230
  )
231
 
232
  # =========================================================
233
+ # FAIRNESS
234
  # =========================================================
235
  st.markdown("---")
236
  st.subheader("⚖️ Bilateral Fairness (Top-K)")
 
246
  c2.metric("Company → Candidate", f"{comp_mean:.3f}")
247
  c3.metric("Fairness Ratio", f"{fairness:.3f}")
248
 
249
+ # =========================================================
250
+ # LLM EXPLANATION
251
+ # =========================================================
252
+ st.markdown("---")
253
+ st.subheader("🤖 Match Explanation (LLM)")
254
+
255
+ with st.expander("Why is this candidate a good match?", expanded=True):
256
+ if st.button("Generate AI Explanation"):
257
+ explanation = explain_match_llm(
258
+ company,
259
+ candidates_meta.iloc[top_idx[0]],
260
+ top_scores[0]
261
+ )
262
+
263
+ st.markdown(f"**Summary:** {explanation.get('summary','')}")
264
+
265
+ c1, c2 = st.columns(2)
266
+ with c1:
267
+ st.markdown("### ✅ Strengths")
268
+ for s in explanation.get("strengths", []):
269
+ st.write(f"- {s}")
270
+ with c2:
271
+ st.markdown("### ⚠️ Gaps")
272
+ for g in explanation.get("gaps", []):
273
+ st.write(f"- {g}")
274
+
275
+ st.markdown(
276
+ f"### 🧭 Recommendation\n**{explanation.get('recommendation','')}**"
277
+ )
278
+
279
  # =========================================================
280
  # FOOTER
281
  # =========================================================