Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,43 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
import json
|
| 4 |
-
import traceback
|
| 5 |
from pathlib import Path
|
| 6 |
-
from typing import Dict,
|
| 7 |
|
| 8 |
import pandas as pd
|
| 9 |
import gradio as gr
|
| 10 |
import plotly.graph_objects as go
|
| 11 |
import plotly.express as px
|
| 12 |
|
| 13 |
-
# Optional LLM (HuggingFace Inference API)
|
| 14 |
-
try:
|
| 15 |
-
from huggingface_hub import InferenceClient
|
| 16 |
-
except Exception:
|
| 17 |
-
InferenceClient = None
|
| 18 |
-
|
| 19 |
# =========================================================
|
| 20 |
# CONFIG
|
| 21 |
# =========================================================
|
| 22 |
|
| 23 |
BASE_DIR = Path(__file__).resolve().parent
|
| 24 |
DATA_FILE = BASE_DIR / "job_description_data.xlsx"
|
| 25 |
-
|
| 26 |
-
HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
|
| 27 |
-
MODEL_NAME = os.environ.get("MODEL_NAME", "deepseek-ai/DeepSeek-R1").strip()
|
| 28 |
-
HF_PROVIDER = os.environ.get("HF_PROVIDER", "novita").strip()
|
| 29 |
N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip()
|
| 30 |
|
| 31 |
-
LLM_ENABLED = bool(HF_API_KEY) and InferenceClient is not None
|
| 32 |
-
llm_client = (
|
| 33 |
-
InferenceClient(provider=HF_PROVIDER, api_key=HF_API_KEY)
|
| 34 |
-
if LLM_ENABLED
|
| 35 |
-
else None
|
| 36 |
-
)
|
| 37 |
-
|
| 38 |
# =========================================================
|
| 39 |
-
# RED FLAG TAXONOMY
|
| 40 |
-
# Positive weights = red flags; negative weights = positive signals
|
| 41 |
# =========================================================
|
| 42 |
|
| 43 |
RED_FLAGS = [
|
|
@@ -53,19 +38,16 @@ RED_FLAGS = [
|
|
| 53 |
("broad / unclear scope", +5, ["other duties", "as needed", "various tasks", "wide range of responsibilities"]),
|
| 54 |
("multitasking / many hats", +5, ["multitask", "juggle", "multiple roles"]),
|
| 55 |
("training / support provided", -8, ["training provided", "mentorship", "onboarding", "support and training", "we will train"]),
|
| 56 |
-
("salary clearly specified", -6, ["salary:", "
|
| 57 |
("clear role structure", -5, ["responsibilities include", "your missions", "main tasks", "key responsibilities"]),
|
| 58 |
("benefits clearly mentioned", -4, ["health insurance", "paid leave", "meal vouchers", "transport", "benefits include", "profit-sharing"]),
|
| 59 |
]
|
| 60 |
|
| 61 |
-
CHART_PALETTE = ["#34d399", "#60a5fa", "#f472b6", "#fbbf24", "#a78bfa",
|
| 62 |
-
"#22d3ee", "#fb7185", "#84cc16", "#f97316", "#e879f9"]
|
| 63 |
-
|
| 64 |
# =========================================================
|
| 65 |
# DATA LOADING
|
| 66 |
# =========================================================
|
| 67 |
|
| 68 |
-
def load_dataset()
|
| 69 |
if not DATA_FILE.exists():
|
| 70 |
return pd.DataFrame()
|
| 71 |
try:
|
|
@@ -73,12 +55,10 @@ def load_dataset() -> pd.DataFrame:
|
|
| 73 |
except Exception:
|
| 74 |
return pd.DataFrame()
|
| 75 |
|
| 76 |
-
|
| 77 |
DF = load_dataset()
|
| 78 |
|
| 79 |
|
| 80 |
-
def extract_flag_labels(red_flags_cell
|
| 81 |
-
"""Parse 'label (+10), label2 (-5)' into [(label, weight)]."""
|
| 82 |
if not isinstance(red_flags_cell, str):
|
| 83 |
return []
|
| 84 |
out = []
|
|
@@ -90,10 +70,10 @@ def extract_flag_labels(red_flags_cell: str) -> List[Tuple[str, int]]:
|
|
| 90 |
|
| 91 |
|
| 92 |
# =========================================================
|
| 93 |
-
# CORE: ANALYZE
|
| 94 |
# =========================================================
|
| 95 |
|
| 96 |
-
def classify_risk(score
|
| 97 |
if score < 12:
|
| 98 |
return "Low", "π’"
|
| 99 |
if score < 25:
|
|
@@ -101,10 +81,9 @@ def classify_risk(score: float) -> Tuple[str, str]:
|
|
| 101 |
return "High", "π΄"
|
| 102 |
|
| 103 |
|
| 104 |
-
def analyze_job(text
|
| 105 |
if not text or len(text.strip()) < 30:
|
| 106 |
-
return
|
| 107 |
-
0, "β", _empty_chart("Paste a job description above"))
|
| 108 |
|
| 109 |
lower = text.lower()
|
| 110 |
detected = []
|
|
@@ -115,21 +94,20 @@ def analyze_job(text: str) -> Tuple[str, int, str, go.Figure]:
|
|
| 115 |
score += weight
|
| 116 |
|
| 117 |
risk, emoji = classify_risk(score)
|
| 118 |
-
|
| 119 |
-
md = f"## {emoji} Risk: **{risk}** | Score: **{score}**\n\n"
|
| 120 |
if not detected:
|
| 121 |
-
md += "_No clear red or positive signals detected.
|
| 122 |
else:
|
| 123 |
bad = [(l, w) for l, w in detected if w > 0]
|
| 124 |
good = [(l, w) for l, w in detected if w < 0]
|
| 125 |
if bad:
|
| 126 |
md += "### π© Red flags detected\n"
|
| 127 |
for l, w in bad:
|
| 128 |
-
md +=
|
| 129 |
if good:
|
| 130 |
md += "\n### β
Positive signals detected\n"
|
| 131 |
for l, w in good:
|
| 132 |
-
md +=
|
| 133 |
|
| 134 |
if detected:
|
| 135 |
cdf = pd.DataFrame(detected, columns=["Signal", "Weight"])
|
|
@@ -137,7 +115,7 @@ def analyze_job(text: str) -> Tuple[str, int, str, go.Figure]:
|
|
| 137 |
fig = px.bar(cdf, x="Weight", y="Signal", color="Type", orientation="h",
|
| 138 |
color_discrete_map={"Red flag": "#c53030", "Positive": "#2f855a"},
|
| 139 |
title="Signal breakdown")
|
| 140 |
-
fig.update_layout(**_styled_layout(height=420
|
| 141 |
else:
|
| 142 |
fig = _empty_chart("No signals to chart")
|
| 143 |
|
|
@@ -145,50 +123,33 @@ def analyze_job(text: str) -> Tuple[str, int, str, go.Figure]:
|
|
| 145 |
|
| 146 |
|
| 147 |
# =========================================================
|
| 148 |
-
#
|
| 149 |
# =========================================================
|
| 150 |
|
| 151 |
-
def _styled_layout(**kwargs)
|
| 152 |
defaults = dict(
|
| 153 |
template="plotly_white",
|
| 154 |
paper_bgcolor="#fdfaf3",
|
| 155 |
plot_bgcolor="#fdfaf3",
|
| 156 |
-
font=dict(family="
|
| 157 |
margin=dict(l=60, r=20, t=70, b=70),
|
| 158 |
-
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1,
|
| 159 |
-
bgcolor="rgba(253,250,243,0.9)",
|
| 160 |
-
bordercolor="#d9cfb9", borderwidth=1,
|
| 161 |
-
font=dict(color="#4a5475", size=11)),
|
| 162 |
-
title=dict(font=dict(size=14, color="#1a2238", family="Geist, system-ui, sans-serif")),
|
| 163 |
-
xaxis=dict(gridcolor="#e6dcc7", zerolinecolor="#d9cfb9",
|
| 164 |
-
tickfont=dict(color="#4a5475", size=11),
|
| 165 |
-
title=dict(font=dict(color="#4a5475", size=12))),
|
| 166 |
-
yaxis=dict(gridcolor="#e6dcc7", zerolinecolor="#d9cfb9",
|
| 167 |
-
tickfont=dict(color="#4a5475", size=11),
|
| 168 |
-
title=dict(font=dict(color="#4a5475", size=12))),
|
| 169 |
)
|
| 170 |
defaults.update(kwargs)
|
| 171 |
return defaults
|
| 172 |
|
| 173 |
|
| 174 |
-
def _empty_chart(title
|
| 175 |
fig = go.Figure()
|
| 176 |
fig.update_layout(
|
| 177 |
title=title, height=420, template="plotly_white",
|
| 178 |
-
paper_bgcolor="#fdfaf3",
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
annotations=[dict(text="(no data available)", x=0.5, y=0.5, xref="paper", yref="paper",
|
| 182 |
-
showarrow=False, font=dict(size=13, color="#8a9099"))],
|
| 183 |
)
|
| 184 |
return fig
|
| 185 |
|
| 186 |
|
| 187 |
-
|
| 188 |
-
# DATASET INSIGHTS (charts from labeled XLSX)
|
| 189 |
-
# =========================================================
|
| 190 |
-
|
| 191 |
-
def build_flag_frequency_chart() -> go.Figure:
|
| 192 |
if DF.empty or "Red Flags" not in DF.columns:
|
| 193 |
return _empty_chart("Dataset not loaded")
|
| 194 |
all_flags = []
|
|
@@ -197,46 +158,32 @@ def build_flag_frequency_chart() -> go.Figure:
|
|
| 197 |
counts = pd.Series(all_flags).value_counts().head(12)
|
| 198 |
fig = go.Figure(go.Bar(
|
| 199 |
y=counts.index[::-1], x=counts.values[::-1], orientation="h",
|
| 200 |
-
marker=dict(color=
|
| 201 |
-
colorscale=[[0, "#f4b8b1"], [1, "#e85a4f"]]),
|
| 202 |
-
hovertemplate="<b>%{y}</b><br>Detected in %{x} jobs<extra></extra>",
|
| 203 |
))
|
| 204 |
-
fig.update_layout(**_styled_layout(
|
| 205 |
-
height=460, title=dict(text="Most Common Signals Across 47 Analyzed Jobs"),
|
| 206 |
-
showlegend=False))
|
| 207 |
-
fig.update_xaxes(title="Number of postings")
|
| 208 |
return fig
|
| 209 |
|
| 210 |
|
| 211 |
-
def build_risk_distribution_chart()
|
| 212 |
if DF.empty or "Risk Level" not in DF.columns:
|
| 213 |
return _empty_chart("Dataset not loaded")
|
| 214 |
counts = DF["Risk Level"].value_counts()
|
| 215 |
-
|
| 216 |
fig = go.Figure(go.Pie(
|
| 217 |
labels=counts.index, values=counts.values,
|
| 218 |
-
marker=dict(colors=[
|
| 219 |
-
hole=0.4,
|
| 220 |
))
|
| 221 |
-
fig.update_layout(**_styled_layout(
|
| 222 |
-
height=400, title=dict(text="Risk Level Distribution in Dataset")))
|
| 223 |
return fig
|
| 224 |
|
| 225 |
|
| 226 |
-
def build_score_distribution_chart()
|
| 227 |
if DF.empty or "Score" not in DF.columns:
|
| 228 |
return _empty_chart("Dataset not loaded")
|
| 229 |
scores = DF["Score"].dropna()
|
| 230 |
-
fig = go.Figure(go.Histogram(
|
| 231 |
-
|
| 232 |
-
marker_line_color="#c53030", marker_line_width=1,
|
| 233 |
-
hovertemplate="Score range: %{x}<br>Jobs: %{y}<extra></extra>",
|
| 234 |
-
))
|
| 235 |
-
fig.update_layout(**_styled_layout(
|
| 236 |
-
height=380, title=dict(text="Risk Score Distribution"),
|
| 237 |
-
bargap=0.05))
|
| 238 |
-
fig.update_xaxes(title="Risk score")
|
| 239 |
-
fig.update_yaxes(title="Number of jobs")
|
| 240 |
return fig
|
| 241 |
|
| 242 |
|
|
@@ -244,17 +191,9 @@ def build_score_distribution_chart() -> go.Figure:
|
|
| 244 |
# KPI CARDS
|
| 245 |
# =========================================================
|
| 246 |
|
| 247 |
-
def render_kpi_cards()
|
| 248 |
if DF.empty:
|
| 249 |
-
return
|
| 250 |
-
'border-radius:12px;border:1px solid #d9cfb9;">'
|
| 251 |
-
'<div style="font-family:\'Geist Mono\',monospace;font-size:11px;'
|
| 252 |
-
'color:#e85a4f;letter-spacing:0.08em;text-transform:uppercase;margin-bottom:12px;font-weight:600;">No Data</div>'
|
| 253 |
-
'<div style="color:#4a5475;font-size:14px;">'
|
| 254 |
-
'Upload <code style="background:#f1ebe0;color:#7d4e8a;padding:2px 6px;border-radius:4px;'
|
| 255 |
-
'font-family:\'Geist Mono\',monospace;font-size:0.85em;border:1px solid #e6dcc7;">'
|
| 256 |
-
'job_description_data.xlsx</code> to populate metrics.'
|
| 257 |
-
'</div></div>')
|
| 258 |
|
| 259 |
total_jobs = len(DF)
|
| 260 |
avg_score = DF["Score"].dropna().mean() if "Score" in DF.columns else 0
|
|
@@ -267,167 +206,99 @@ def render_kpi_cards() -> str:
|
|
| 267 |
all_flags.extend(label for label, _ in extract_flag_labels(str(cell)))
|
| 268 |
top_flag = pd.Series(all_flags).value_counts().index[0] if all_flags else "β"
|
| 269 |
|
| 270 |
-
def card(label, value,
|
| 271 |
-
return
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
<div style="font-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
<div style="color:#1a2238;font-size:34px;font-weight:700;line-height:1;
|
| 282 |
-
letter-spacing:-0.03em;margin-bottom:10px;
|
| 283 |
-
font-family:'Geist',-apple-system,system-ui,sans-serif;">
|
| 284 |
-
{value}
|
| 285 |
-
</div>
|
| 286 |
-
<div style="display:flex;align-items:center;gap:6px;
|
| 287 |
-
font-family:'Geist Mono',monospace;font-size:11px;color:#4a5475;">
|
| 288 |
-
<span style="display:inline-block;width:6px;height:6px;border-radius:50%;
|
| 289 |
-
background:{accent_color};box-shadow:0 0 8px {accent_color}80;"></span>
|
| 290 |
-
<span>{delta_text}</span>
|
| 291 |
-
</div>
|
| 292 |
-
</div>"""
|
| 293 |
|
| 294 |
cards = [
|
| 295 |
-
card("Total.Jobs",
|
| 296 |
-
card("Avg.Score",
|
| 297 |
-
card("High.Risk %",
|
| 298 |
-
card("Top.Signal",
|
| 299 |
top_flag if top_flag != "β" else "no data", "#7d4e8a"),
|
| 300 |
]
|
| 301 |
return ('<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(220px,1fr));'
|
| 302 |
-
'gap:12px;margin-bottom:
|
| 303 |
|
| 304 |
|
| 305 |
# =========================================================
|
| 306 |
-
#
|
| 307 |
# =========================================================
|
| 308 |
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
def _parse_directive(text: str) -> Dict[str, str]:
|
| 330 |
-
m = JSON_BLOCK_RE.search(text)
|
| 331 |
-
if m:
|
| 332 |
-
try:
|
| 333 |
-
return json.loads(m.group(1))
|
| 334 |
-
except json.JSONDecodeError:
|
| 335 |
-
pass
|
| 336 |
-
return {"show": "none"}
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
def _clean_response(text: str) -> str:
|
| 340 |
-
return JSON_BLOCK_RE.sub("", text).strip()
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
def _n8n_call(msg: str) -> Tuple[str, Dict]:
|
| 344 |
-
import requests as req
|
| 345 |
try:
|
| 346 |
-
|
| 347 |
-
data =
|
| 348 |
-
|
| 349 |
-
chart = data.get("chart", "none")
|
| 350 |
-
return answer, {"show": chart}
|
| 351 |
except Exception as e:
|
| 352 |
-
|
|
|
|
| 353 |
|
| 354 |
|
| 355 |
-
def
|
| 356 |
-
m = msg.lower()
|
| 357 |
-
if any(w in m for w in ["common", "frequent", "most", "top flag", "patterns"]):
|
| 358 |
-
return ("The most common signals across our 47 analyzed postings are below. "
|
| 359 |
-
"Notice how 'high responsibility early', 'technical complexity', and "
|
| 360 |
-
"'clear role structure' dominate β they appear in nearly every posting.",
|
| 361 |
-
{"show": "flag_frequency"})
|
| 362 |
-
if any(w in m for w in ["risk", "distribution", "level", "low", "medium", "high"]):
|
| 363 |
-
return ("Here is the risk-level breakdown across our dataset. "
|
| 364 |
-
"Most jobs land in the Medium tier; a smaller share are flagged High.",
|
| 365 |
-
{"show": "risk_distribution"})
|
| 366 |
-
if any(w in m for w in ["score", "histogram", "spread", "average"]):
|
| 367 |
-
return ("Risk scores cluster mostly between 10 and 30. "
|
| 368 |
-
"Anything above 25 is classified as High-risk.",
|
| 369 |
-
{"show": "score_distribution"})
|
| 370 |
-
if any(w in m for w in ["how", "work", "method", "explain"]):
|
| 371 |
-
return ("The app detects 15 weighted signals in any pasted job description. "
|
| 372 |
-
"Red flags add to the score (e.g. +10 for 'high responsibility early'), "
|
| 373 |
-
"positive signals subtract (e.g. -8 for 'training provided'). "
|
| 374 |
-
"The total maps to Low / Medium / High risk.",
|
| 375 |
-
{"show": "none"})
|
| 376 |
-
return ("Try asking about: **most common red flags**, **risk distribution**, "
|
| 377 |
-
"**score spread**, or **how the analyzer works**.",
|
| 378 |
-
{"show": "none"})
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
def ai_chat(user_msg: str, history: list):
|
| 382 |
if not user_msg or not user_msg.strip():
|
| 383 |
-
return history, "", None
|
| 384 |
|
| 385 |
if N8N_WEBHOOK_URL:
|
| 386 |
-
reply,
|
| 387 |
-
if directive is None:
|
| 388 |
-
reply_fb, directive = _keyword_fallback(user_msg)
|
| 389 |
-
reply += "\n\n" + reply_fb
|
| 390 |
-
elif LLM_ENABLED:
|
| 391 |
-
msgs = [{"role": "system", "content": DASHBOARD_SYSTEM}]
|
| 392 |
-
for user_turn, bot_turn in (history or [])[-3:]:
|
| 393 |
-
msgs.append({"role": "user", "content": user_turn})
|
| 394 |
-
msgs.append({"role": "assistant", "content": bot_turn})
|
| 395 |
-
msgs.append({"role": "user", "content": user_msg})
|
| 396 |
-
try:
|
| 397 |
-
r = llm_client.chat_completion(model=MODEL_NAME, messages=msgs,
|
| 398 |
-
temperature=0.3, max_tokens=500, stream=False)
|
| 399 |
-
raw = (r["choices"][0]["message"]["content"]
|
| 400 |
-
if isinstance(r, dict) else r.choices[0].message.content)
|
| 401 |
-
directive = _parse_directive(raw)
|
| 402 |
-
reply = _clean_response(raw)
|
| 403 |
-
except Exception as e:
|
| 404 |
-
reply = f"LLM error: {e}"
|
| 405 |
-
reply_fb, directive = _keyword_fallback(user_msg)
|
| 406 |
-
reply += "\n\n" + reply_fb
|
| 407 |
else:
|
| 408 |
-
reply,
|
| 409 |
|
| 410 |
-
|
| 411 |
"flag_frequency": build_flag_frequency_chart,
|
| 412 |
"risk_distribution": build_risk_distribution_chart,
|
| 413 |
"score_distribution": build_score_distribution_chart,
|
| 414 |
}
|
| 415 |
-
chart_out =
|
| 416 |
|
| 417 |
new_history = (history or []) + [(user_msg, reply)]
|
| 418 |
return new_history, "", chart_out
|
| 419 |
|
| 420 |
|
| 421 |
# =========================================================
|
| 422 |
-
#
|
| 423 |
# =========================================================
|
| 424 |
|
| 425 |
-
def load_css()
|
| 426 |
css_path = BASE_DIR / "style.css"
|
| 427 |
-
|
|
|
|
|
|
|
| 428 |
|
| 429 |
|
| 430 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
|
| 432 |
gr.Markdown(
|
| 433 |
"# Job Risk Analyzer\n"
|
|
@@ -436,17 +307,14 @@ with gr.Blocks(title="Job Risk Analyzer β CS1 Group 14", css=load_css()) as de
|
|
| 436 |
elem_id="escp_title",
|
| 437 |
)
|
| 438 |
|
| 439 |
-
# ===========================================================
|
| 440 |
-
# TAB 1 -- Live analyzer (the main feature)
|
| 441 |
-
# ===========================================================
|
| 442 |
with gr.Tab("π Analyze a Job"):
|
| 443 |
-
gr.Markdown("Paste any job description below to detect red flags and estimate
|
| 444 |
with gr.Row():
|
| 445 |
-
with gr.Column(
|
| 446 |
-
inp = gr.Textbox(label="Job description", lines=
|
| 447 |
placeholder="Paste the full job posting here...")
|
| 448 |
-
btn = gr.Button("Analyze", variant="primary"
|
| 449 |
-
with gr.Column(
|
| 450 |
out_md = gr.Markdown()
|
| 451 |
with gr.Row():
|
| 452 |
out_score = gr.Number(label="Score", precision=0)
|
|
@@ -454,46 +322,29 @@ with gr.Blocks(title="Job Risk Analyzer β CS1 Group 14", css=load_css()) as de
|
|
| 454 |
out_chart = gr.Plot(label="Signal breakdown")
|
| 455 |
btn.click(analyze_job, inputs=[inp], outputs=[out_md, out_score, out_risk, out_chart])
|
| 456 |
|
| 457 |
-
# ===========================================================
|
| 458 |
-
# TAB 2 -- Dataset Dashboard
|
| 459 |
-
# ===========================================================
|
| 460 |
with gr.Tab("π Dataset Dashboard"):
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
gr.Markdown("#### Insights from 47 labeled job postings")
|
| 465 |
-
chart_freq = gr.Plot(label="Most common signals", value=build_flag_frequency_chart)
|
| 466 |
with gr.Row():
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
gr.Markdown("#### Raw labeled dataset")
|
| 471 |
if not DF.empty:
|
| 472 |
display_cols = [c for c in ["Job title", "company", "Score", "Risk Level"] if c in DF.columns]
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
return (render_kpi_cards(), build_flag_frequency_chart(),
|
| 477 |
-
build_risk_distribution_chart(), build_score_distribution_chart())
|
| 478 |
|
| 479 |
-
refresh_btn.click(_on_refresh,
|
| 480 |
-
outputs=[kpi_html, chart_freq, chart_risk, chart_score])
|
| 481 |
-
|
| 482 |
-
# ===========================================================
|
| 483 |
-
# TAB 3 -- AI Dashboard
|
| 484 |
-
# ===========================================================
|
| 485 |
with gr.Tab('"AI" Dashboard'):
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
chatbot = gr.Chatbot(label="Conversation", height=380, type="messages")
|
| 494 |
user_input = gr.Textbox(label="Ask about the dataset",
|
| 495 |
-
placeholder="e.g. What are the most common red flags?"
|
| 496 |
-
lines=1)
|
| 497 |
gr.Examples(
|
| 498 |
examples=[
|
| 499 |
"What are the most common red flags?",
|
|
@@ -503,44 +354,35 @@ with gr.Blocks(title="Job Risk Analyzer β CS1 Group 14", css=load_css()) as de
|
|
| 503 |
],
|
| 504 |
inputs=user_input,
|
| 505 |
)
|
| 506 |
-
with gr.Column(
|
| 507 |
ai_chart = gr.Plot(label="Visualization")
|
| 508 |
|
| 509 |
user_input.submit(ai_chat, inputs=[user_input, chatbot],
|
| 510 |
outputs=[chatbot, user_input, ai_chart])
|
| 511 |
|
| 512 |
-
# ===========================================================
|
| 513 |
-
# TAB 4 -- About / Iterations
|
| 514 |
-
# ===========================================================
|
| 515 |
with gr.Tab("βΉοΈ About"):
|
| 516 |
gr.Markdown("""
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
- π’ **Low** (< 12): Healthy posting with clear structure and benefits
|
| 522 |
-
- π‘ **Medium** (12β24): Some warning signs worth investigating
|
| 523 |
-
- π΄ **High** (β₯ 25): Multiple concerning patterns β proceed with caution
|
| 524 |
-
|
| 525 |
-
### Team β CS1 Group 14
|
| 526 |
-
- **Gaspard** β Technical Lead (Hugging Face Space + Gradio app)
|
| 527 |
-
- **Person 3** β Data Analysis & Insights
|
| 528 |
-
- **Person 4** β Testing & Iterations
|
| 529 |
-
- **Person 5** β Report & Coordination
|
| 530 |
-
|
| 531 |
-
### Iterations
|
| 532 |
-
- **v1** β Keyword matching with hard-coded weights from labeled dataset
|
| 533 |
-
- **v2** β _(to be filled by Person 4 after testing)_
|
| 534 |
-
- **v3** β _(future: integrate LLM for semantic detection beyond keywords)_
|
| 535 |
-
|
| 536 |
-
### Data source
|
| 537 |
-
47 real job postings (mostly French market) manually labeled by the team
|
| 538 |
-
with 15 weighted signal categories.
|
| 539 |
-
""")
|
| 540 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
|
| 542 |
-
|
| 543 |
-
server_name="0.0.0.0",
|
| 544 |
-
server_port=7860,
|
| 545 |
-
allowed_paths=[str(BASE_DIR)]
|
| 546 |
-
)
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CS1 Group 14 β Job Description Risk Analyzer
|
| 3 |
+
Built for Gradio 4.44 / Hugging Face Spaces
|
| 4 |
+
"""
|
| 5 |
import os
|
| 6 |
import re
|
| 7 |
import json
|
|
|
|
| 8 |
from pathlib import Path
|
| 9 |
+
from typing import Dict, List, Tuple
|
| 10 |
|
| 11 |
import pandas as pd
|
| 12 |
import gradio as gr
|
| 13 |
import plotly.graph_objects as go
|
| 14 |
import plotly.express as px
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# =========================================================
|
| 17 |
# CONFIG
|
| 18 |
# =========================================================
|
| 19 |
|
| 20 |
BASE_DIR = Path(__file__).resolve().parent
|
| 21 |
DATA_FILE = BASE_DIR / "job_description_data.xlsx"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip()
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
# =========================================================
|
| 25 |
+
# RED FLAG TAXONOMY
|
|
|
|
| 26 |
# =========================================================
|
| 27 |
|
| 28 |
RED_FLAGS = [
|
|
|
|
| 38 |
("broad / unclear scope", +5, ["other duties", "as needed", "various tasks", "wide range of responsibilities"]),
|
| 39 |
("multitasking / many hats", +5, ["multitask", "juggle", "multiple roles"]),
|
| 40 |
("training / support provided", -8, ["training provided", "mentorship", "onboarding", "support and training", "we will train"]),
|
| 41 |
+
("salary clearly specified", -6, ["salary:", "compensation:", "annual salary", "monthly salary"]),
|
| 42 |
("clear role structure", -5, ["responsibilities include", "your missions", "main tasks", "key responsibilities"]),
|
| 43 |
("benefits clearly mentioned", -4, ["health insurance", "paid leave", "meal vouchers", "transport", "benefits include", "profit-sharing"]),
|
| 44 |
]
|
| 45 |
|
|
|
|
|
|
|
|
|
|
| 46 |
# =========================================================
|
| 47 |
# DATA LOADING
|
| 48 |
# =========================================================
|
| 49 |
|
| 50 |
+
def load_dataset():
|
| 51 |
if not DATA_FILE.exists():
|
| 52 |
return pd.DataFrame()
|
| 53 |
try:
|
|
|
|
| 55 |
except Exception:
|
| 56 |
return pd.DataFrame()
|
| 57 |
|
|
|
|
| 58 |
DF = load_dataset()
|
| 59 |
|
| 60 |
|
| 61 |
+
def extract_flag_labels(red_flags_cell):
|
|
|
|
| 62 |
if not isinstance(red_flags_cell, str):
|
| 63 |
return []
|
| 64 |
out = []
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
# =========================================================
|
| 73 |
+
# CORE: ANALYZE
|
| 74 |
# =========================================================
|
| 75 |
|
| 76 |
+
def classify_risk(score):
|
| 77 |
if score < 12:
|
| 78 |
return "Low", "π’"
|
| 79 |
if score < 25:
|
|
|
|
| 81 |
return "High", "π΄"
|
| 82 |
|
| 83 |
|
| 84 |
+
def analyze_job(text):
|
| 85 |
if not text or len(text.strip()) < 30:
|
| 86 |
+
return "β οΈ Please paste a real job description (at least 30 characters).", 0, "β", _empty_chart("Paste a job description above")
|
|
|
|
| 87 |
|
| 88 |
lower = text.lower()
|
| 89 |
detected = []
|
|
|
|
| 94 |
score += weight
|
| 95 |
|
| 96 |
risk, emoji = classify_risk(score)
|
| 97 |
+
md = "## " + emoji + " Risk: **" + risk + "** | Score: **" + str(score) + "**\n\n"
|
|
|
|
| 98 |
if not detected:
|
| 99 |
+
md += "_No clear red or positive signals detected._"
|
| 100 |
else:
|
| 101 |
bad = [(l, w) for l, w in detected if w > 0]
|
| 102 |
good = [(l, w) for l, w in detected if w < 0]
|
| 103 |
if bad:
|
| 104 |
md += "### π© Red flags detected\n"
|
| 105 |
for l, w in bad:
|
| 106 |
+
md += "- **" + l + "** `(+" + str(w) + ")`\n"
|
| 107 |
if good:
|
| 108 |
md += "\n### β
Positive signals detected\n"
|
| 109 |
for l, w in good:
|
| 110 |
+
md += "- **" + l + "** `(" + str(w) + ")`\n"
|
| 111 |
|
| 112 |
if detected:
|
| 113 |
cdf = pd.DataFrame(detected, columns=["Signal", "Weight"])
|
|
|
|
| 115 |
fig = px.bar(cdf, x="Weight", y="Signal", color="Type", orientation="h",
|
| 116 |
color_discrete_map={"Red flag": "#c53030", "Positive": "#2f855a"},
|
| 117 |
title="Signal breakdown")
|
| 118 |
+
fig.update_layout(**_styled_layout(height=420))
|
| 119 |
else:
|
| 120 |
fig = _empty_chart("No signals to chart")
|
| 121 |
|
|
|
|
| 123 |
|
| 124 |
|
| 125 |
# =========================================================
|
| 126 |
+
# CHARTS
|
| 127 |
# =========================================================
|
| 128 |
|
| 129 |
+
def _styled_layout(**kwargs):
|
| 130 |
defaults = dict(
|
| 131 |
template="plotly_white",
|
| 132 |
paper_bgcolor="#fdfaf3",
|
| 133 |
plot_bgcolor="#fdfaf3",
|
| 134 |
+
font=dict(family="system-ui, sans-serif", color="#1a2238", size=12),
|
| 135 |
margin=dict(l=60, r=20, t=70, b=70),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
)
|
| 137 |
defaults.update(kwargs)
|
| 138 |
return defaults
|
| 139 |
|
| 140 |
|
| 141 |
+
def _empty_chart(title):
|
| 142 |
fig = go.Figure()
|
| 143 |
fig.update_layout(
|
| 144 |
title=title, height=420, template="plotly_white",
|
| 145 |
+
paper_bgcolor="#fdfaf3", plot_bgcolor="#fdfaf3",
|
| 146 |
+
annotations=[dict(text="(no data)", x=0.5, y=0.5, xref="paper", yref="paper",
|
| 147 |
+
showarrow=False, font=dict(size=14, color="#8a9099"))],
|
|
|
|
|
|
|
| 148 |
)
|
| 149 |
return fig
|
| 150 |
|
| 151 |
|
| 152 |
+
def build_flag_frequency_chart():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
if DF.empty or "Red Flags" not in DF.columns:
|
| 154 |
return _empty_chart("Dataset not loaded")
|
| 155 |
all_flags = []
|
|
|
|
| 158 |
counts = pd.Series(all_flags).value_counts().head(12)
|
| 159 |
fig = go.Figure(go.Bar(
|
| 160 |
y=counts.index[::-1], x=counts.values[::-1], orientation="h",
|
| 161 |
+
marker=dict(color="#e85a4f"),
|
|
|
|
|
|
|
| 162 |
))
|
| 163 |
+
fig.update_layout(**_styled_layout(height=460, title="Most Common Signals Across Analyzed Jobs"))
|
|
|
|
|
|
|
|
|
|
| 164 |
return fig
|
| 165 |
|
| 166 |
|
| 167 |
+
def build_risk_distribution_chart():
|
| 168 |
if DF.empty or "Risk Level" not in DF.columns:
|
| 169 |
return _empty_chart("Dataset not loaded")
|
| 170 |
counts = DF["Risk Level"].value_counts()
|
| 171 |
+
colors_map = {"Low": "#2a9d8f", "Medium": "#e9a23b", "High": "#c53030"}
|
| 172 |
fig = go.Figure(go.Pie(
|
| 173 |
labels=counts.index, values=counts.values,
|
| 174 |
+
marker=dict(colors=[colors_map.get(l, "#888") for l in counts.index]),
|
| 175 |
+
hole=0.4,
|
| 176 |
))
|
| 177 |
+
fig.update_layout(**_styled_layout(height=400, title="Risk Level Distribution"))
|
|
|
|
| 178 |
return fig
|
| 179 |
|
| 180 |
|
| 181 |
+
def build_score_distribution_chart():
|
| 182 |
if DF.empty or "Score" not in DF.columns:
|
| 183 |
return _empty_chart("Dataset not loaded")
|
| 184 |
scores = DF["Score"].dropna()
|
| 185 |
+
fig = go.Figure(go.Histogram(x=scores, nbinsx=15, marker_color="#e85a4f"))
|
| 186 |
+
fig.update_layout(**_styled_layout(height=380, title="Risk Score Distribution"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
return fig
|
| 188 |
|
| 189 |
|
|
|
|
| 191 |
# KPI CARDS
|
| 192 |
# =========================================================
|
| 193 |
|
| 194 |
+
def render_kpi_cards():
|
| 195 |
if DF.empty:
|
| 196 |
+
return '<div style="background:#fdfaf3;padding:32px;text-align:center;border-radius:12px;border:1px solid #d9cfb9;color:#4a5475;">No dataset loaded.</div>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
total_jobs = len(DF)
|
| 199 |
avg_score = DF["Score"].dropna().mean() if "Score" in DF.columns else 0
|
|
|
|
| 206 |
all_flags.extend(label for label, _ in extract_flag_labels(str(cell)))
|
| 207 |
top_flag = pd.Series(all_flags).value_counts().index[0] if all_flags else "β"
|
| 208 |
|
| 209 |
+
def card(label, value, sub, color):
|
| 210 |
+
return (
|
| 211 |
+
'<div style="background:#fdfaf3;border:1px solid #d9cfb9;border-radius:12px;'
|
| 212 |
+
'padding:20px 22px;box-shadow:0 2px 8px rgba(26,34,56,0.04);">'
|
| 213 |
+
'<div style="font-family:monospace;color:' + color + ';font-size:11px;font-weight:600;'
|
| 214 |
+
'text-transform:uppercase;letter-spacing:0.08em;margin-bottom:14px;">' + label + '</div>'
|
| 215 |
+
'<div style="color:#1a2238;font-size:34px;font-weight:700;line-height:1;'
|
| 216 |
+
'letter-spacing:-0.03em;margin-bottom:10px;">' + str(value) + '</div>'
|
| 217 |
+
'<div style="font-family:monospace;font-size:11px;color:#4a5475;">' + sub + '</div>'
|
| 218 |
+
'</div>'
|
| 219 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
cards = [
|
| 222 |
+
card("Total.Jobs", total_jobs, "real labeled postings", "#e85a4f"),
|
| 223 |
+
card("Avg.Score", str(round(avg_score, 1)), "weighted across dataset", "#2a9d8f"),
|
| 224 |
+
card("High.Risk %", str(round(high_pct)) + "%", str(risk_counts.get("High", 0)) + " postings flagged", "#c53030"),
|
| 225 |
+
card("Top.Signal", top_flag.split(' ')[0].title() if top_flag != "β" else "β",
|
| 226 |
top_flag if top_flag != "β" else "no data", "#7d4e8a"),
|
| 227 |
]
|
| 228 |
return ('<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(220px,1fr));'
|
| 229 |
+
'gap:12px;margin-bottom:24px;">' + "".join(cards) + '</div>')
|
| 230 |
|
| 231 |
|
| 232 |
# =========================================================
|
| 233 |
+
# CHAT (n8n -> keyword fallback)
|
| 234 |
# =========================================================
|
| 235 |
|
| 236 |
+
def keyword_fallback(msg):
|
| 237 |
+
m = msg.lower()
|
| 238 |
+
if any(w in m for w in ["common", "frequent", "most", "top"]):
|
| 239 |
+
return ("The most common signals in our dataset are 'high responsibility early', "
|
| 240 |
+
"'technical complexity', and 'clear role structure'. These appear in over 60% of postings."), "flag_frequency"
|
| 241 |
+
if any(w in m for w in ["risk", "distribution", "level"]):
|
| 242 |
+
return ("Most jobs land in the Medium risk tier (scores 12-24). High-risk postings combine "
|
| 243 |
+
"multiple red flags like vague scope, on-site-only, and missing salary information."), "risk_distribution"
|
| 244 |
+
if any(w in m for w in ["score", "histogram", "spread"]):
|
| 245 |
+
return ("Risk scores cluster between 10-25 in our dataset. Anything above 25 signals "
|
| 246 |
+
"a problematic posting."), "score_distribution"
|
| 247 |
+
if any(w in m for w in ["how", "work", "explain", "method"]):
|
| 248 |
+
return ("The analyzer scans for 15 weighted signal categories. Red flags add to the score, "
|
| 249 |
+
"positive signals subtract. The total maps to Low/Medium/High risk."), "none"
|
| 250 |
+
return ("Try asking: most common red flags, risk distribution, score spread, or how it works."), "none"
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
def call_n8n(msg):
|
| 254 |
+
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
try:
|
| 256 |
+
r = requests.post(N8N_WEBHOOK_URL, json={"question": msg}, timeout=15)
|
| 257 |
+
data = r.json()
|
| 258 |
+
return data.get("answer", "n8n returned no answer."), data.get("chart", "none")
|
|
|
|
|
|
|
| 259 |
except Exception as e:
|
| 260 |
+
fb_text, fb_chart = keyword_fallback(msg)
|
| 261 |
+
return "(n8n unavailable, using local logic)\n\n" + fb_text, fb_chart
|
| 262 |
|
| 263 |
|
| 264 |
+
def ai_chat(user_msg, history):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
if not user_msg or not user_msg.strip():
|
| 266 |
+
return history or [], "", None
|
| 267 |
|
| 268 |
if N8N_WEBHOOK_URL:
|
| 269 |
+
reply, chart_key = call_n8n(user_msg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
else:
|
| 271 |
+
reply, chart_key = keyword_fallback(user_msg)
|
| 272 |
|
| 273 |
+
builders = {
|
| 274 |
"flag_frequency": build_flag_frequency_chart,
|
| 275 |
"risk_distribution": build_risk_distribution_chart,
|
| 276 |
"score_distribution": build_score_distribution_chart,
|
| 277 |
}
|
| 278 |
+
chart_out = builders[chart_key]() if chart_key in builders else None
|
| 279 |
|
| 280 |
new_history = (history or []) + [(user_msg, reply)]
|
| 281 |
return new_history, "", chart_out
|
| 282 |
|
| 283 |
|
| 284 |
# =========================================================
|
| 285 |
+
# CSS LOADER
|
| 286 |
# =========================================================
|
| 287 |
|
| 288 |
+
def load_css():
|
| 289 |
css_path = BASE_DIR / "style.css"
|
| 290 |
+
if css_path.exists():
|
| 291 |
+
return css_path.read_text(encoding="utf-8")
|
| 292 |
+
return ""
|
| 293 |
|
| 294 |
|
| 295 |
+
# =========================================================
|
| 296 |
+
# UI
|
| 297 |
+
# =========================================================
|
| 298 |
+
|
| 299 |
+
CSS = load_css()
|
| 300 |
+
|
| 301 |
+
with gr.Blocks(title="Job Risk Analyzer", css=CSS) as demo:
|
| 302 |
|
| 303 |
gr.Markdown(
|
| 304 |
"# Job Risk Analyzer\n"
|
|
|
|
| 307 |
elem_id="escp_title",
|
| 308 |
)
|
| 309 |
|
|
|
|
|
|
|
|
|
|
| 310 |
with gr.Tab("π Analyze a Job"):
|
| 311 |
+
gr.Markdown("Paste any job description below to detect red flags and estimate risk.")
|
| 312 |
with gr.Row():
|
| 313 |
+
with gr.Column():
|
| 314 |
+
inp = gr.Textbox(label="Job description", lines=15,
|
| 315 |
placeholder="Paste the full job posting here...")
|
| 316 |
+
btn = gr.Button("Analyze", variant="primary")
|
| 317 |
+
with gr.Column():
|
| 318 |
out_md = gr.Markdown()
|
| 319 |
with gr.Row():
|
| 320 |
out_score = gr.Number(label="Score", precision=0)
|
|
|
|
| 322 |
out_chart = gr.Plot(label="Signal breakdown")
|
| 323 |
btn.click(analyze_job, inputs=[inp], outputs=[out_md, out_score, out_risk, out_chart])
|
| 324 |
|
|
|
|
|
|
|
|
|
|
| 325 |
with gr.Tab("π Dataset Dashboard"):
|
| 326 |
+
gr.HTML(value=render_kpi_cards())
|
| 327 |
+
gr.Markdown("### Insights from labeled job postings")
|
| 328 |
+
gr.Plot(value=build_flag_frequency_chart(), label="Most common signals")
|
|
|
|
|
|
|
| 329 |
with gr.Row():
|
| 330 |
+
gr.Plot(value=build_risk_distribution_chart(), label="Risk distribution")
|
| 331 |
+
gr.Plot(value=build_score_distribution_chart(), label="Score distribution")
|
|
|
|
|
|
|
| 332 |
if not DF.empty:
|
| 333 |
display_cols = [c for c in ["Job title", "company", "Score", "Risk Level"] if c in DF.columns]
|
| 334 |
+
if display_cols:
|
| 335 |
+
gr.Markdown("### Raw labeled dataset")
|
| 336 |
+
gr.Dataframe(DF[display_cols], wrap=True, interactive=False)
|
|
|
|
|
|
|
| 337 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 338 |
with gr.Tab('"AI" Dashboard'):
|
| 339 |
+
status = ("Connected to **n8n workflow**." if N8N_WEBHOOK_URL
|
| 340 |
+
else "Using **keyword matching** (set `N8N_WEBHOOK_URL` to upgrade).")
|
| 341 |
+
gr.Markdown("### Ask questions, get visualizations\n\n" + status)
|
| 342 |
+
|
| 343 |
+
with gr.Row():
|
| 344 |
+
with gr.Column():
|
| 345 |
+
chatbot = gr.Chatbot(label="Conversation", height=380)
|
|
|
|
| 346 |
user_input = gr.Textbox(label="Ask about the dataset",
|
| 347 |
+
placeholder="e.g. What are the most common red flags?")
|
|
|
|
| 348 |
gr.Examples(
|
| 349 |
examples=[
|
| 350 |
"What are the most common red flags?",
|
|
|
|
| 354 |
],
|
| 355 |
inputs=user_input,
|
| 356 |
)
|
| 357 |
+
with gr.Column():
|
| 358 |
ai_chart = gr.Plot(label="Visualization")
|
| 359 |
|
| 360 |
user_input.submit(ai_chat, inputs=[user_input, chatbot],
|
| 361 |
outputs=[chatbot, user_input, ai_chart])
|
| 362 |
|
|
|
|
|
|
|
|
|
|
| 363 |
with gr.Tab("βΉοΈ About"):
|
| 364 |
gr.Markdown("""
|
| 365 |
+
### How it works
|
| 366 |
+
|
| 367 |
+
This app uses a **weighted red-flag taxonomy** built from 47 real labeled job postings.
|
| 368 |
+
Each detected signal contributes to a total score that maps to Low / Medium / High risk.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
|
| 370 |
+
- π’ **Low** (< 12): Healthy posting with clear structure and benefits
|
| 371 |
+
- π‘ **Medium** (12β24): Some warning signs worth investigating
|
| 372 |
+
- π΄ **High** (β₯ 25): Multiple concerning patterns
|
| 373 |
+
|
| 374 |
+
### Team β CS1 Group 14
|
| 375 |
+
|
| 376 |
+
- **Gaspard** β UX Designer + Content Specialist (HF Space, Gradio app, n8n workflow, testing)
|
| 377 |
+
- **Person 3** β Data Analyst (extraction, analysis, charts)
|
| 378 |
+
- **Person 4** β Project Manager (final report, coordination)
|
| 379 |
+
|
| 380 |
+
### Iterations
|
| 381 |
+
|
| 382 |
+
- **v1** β Keyword matching with hard-coded weights from labeled dataset
|
| 383 |
+
- **v2** β Refined keyword patterns after user testing
|
| 384 |
+
- **v3** β Integrated n8n workflow for smarter conversational responses
|
| 385 |
+
""")
|
| 386 |
|
| 387 |
+
if __name__ == "__main__":
|
| 388 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
|
|
|
|
|