Spaces:
Sleeping
Sleeping
| """ | |
| CS1 Group 14 - Job Description Risk Analyzer | |
| Built for Hugging Face Spaces with Gradio SDK | |
| """ | |
| import os | |
| import re | |
| from pathlib import Path | |
| import pandas as pd | |
| import gradio as gr | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| BASE_DIR = Path(__file__).resolve().parent | |
| DATA_FILE = BASE_DIR / "job_description_data.xlsx" | |
| N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip() | |
| # ===== RED FLAG TAXONOMY ===== | |
| RED_FLAGS = [ | |
| ("high responsibility early", 10, ["full ownership", "lead the", "responsible for", "drive the", "own the", "manage the team"]), | |
| ("high autonomy / ownership", 10, ["autonomous", "self-starter", "work independently", "minimal supervision"]), | |
| ("adaptability / flexibility demand", 8, ["flexible", "adaptable", "fast-paced", "changing priorities", "wear many hats"]), | |
| ("cross-functional / many stakeholders", 8, ["cross-functional", "multiple stakeholders", "various teams", "coordinate with"]), | |
| ("customer-facing emotional labor", 6, ["customer-facing", "client-facing", "handle complaints", "difficult customers"]), | |
| ("technical complexity", 6, ["python", "sql", "machine learning", "api", "data pipeline", "advanced"]), | |
| ("on-site only / no remote", 5, ["on-site only", "no remote", "in-office", "fully on-site"]), | |
| ("travel / mobility", 5, ["travel required", "frequent travel", "willing to travel"]), | |
| ("pressure / deadlines", 5, ["tight deadlines", "high pressure", "demanding schedule"]), | |
| ("broad / unclear scope", 5, ["other duties", "as needed", "various tasks"]), | |
| ("multitasking / many hats", 5, ["multitask", "juggle", "multiple roles"]), | |
| ("training / support provided", -8, ["training provided", "mentorship", "onboarding", "we will train"]), | |
| ("salary clearly specified", -6, ["salary:", "compensation:", "annual salary"]), | |
| ("clear role structure", -5, ["responsibilities include", "your missions", "main tasks"]), | |
| ("benefits clearly mentioned", -4, ["health insurance", "paid leave", "meal vouchers", "benefits include"]), | |
| ] | |
| def load_dataset(): | |
| if not DATA_FILE.exists(): | |
| return pd.DataFrame() | |
| try: | |
| return pd.read_excel(DATA_FILE) | |
| except Exception: | |
| return pd.DataFrame() | |
| DF = load_dataset() | |
| def extract_flag_labels(cell): | |
| if not isinstance(cell, str): | |
| return [] | |
| out = [] | |
| for part in re.split(r",\s*(?=[a-zA-Z])", cell): | |
| m = re.match(r"(.+?)\s*\(([+-]\d+)\)", part.strip()) | |
| if m: | |
| out.append((m.group(1).strip(), int(m.group(2)))) | |
| return out | |
| def classify_risk(score): | |
| if score < 12: | |
| return "Low", "π’" | |
| if score < 25: | |
| return "Medium", "π‘" | |
| return "High", "π΄" | |
| def _empty_chart(title): | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title=title, height=420, template="plotly_white", | |
| paper_bgcolor="#fdfaf3", plot_bgcolor="#fdfaf3", | |
| ) | |
| return fig | |
| def _styled(**kwargs): | |
| base = dict( | |
| template="plotly_white", | |
| paper_bgcolor="#fdfaf3", | |
| plot_bgcolor="#fdfaf3", | |
| font=dict(color="#1a2238"), | |
| margin=dict(l=60, r=20, t=70, b=70), | |
| ) | |
| base.update(kwargs) | |
| return base | |
| def analyze_job(text): | |
| if not text or len(text.strip()) < 30: | |
| return "Please paste a real job description (at least 30 characters).", 0, "β", _empty_chart("Awaiting input") | |
| lower = text.lower() | |
| detected = [] | |
| score = 0 | |
| for label, weight, patterns in RED_FLAGS: | |
| if any(p in lower for p in patterns): | |
| detected.append((label, weight)) | |
| score += weight | |
| risk, emoji = classify_risk(score) | |
| md_lines = [f"## {emoji} Risk: **{risk}** | Score: **{score}**", ""] | |
| if not detected: | |
| md_lines.append("_No clear signals detected._") | |
| else: | |
| bad = [(l, w) for l, w in detected if w > 0] | |
| good = [(l, w) for l, w in detected if w < 0] | |
| if bad: | |
| md_lines.append("### π© Red flags") | |
| for l, w in bad: | |
| md_lines.append(f"- **{l}** (+{w})") | |
| if good: | |
| md_lines.append("") | |
| md_lines.append("### β Positive signals") | |
| for l, w in good: | |
| md_lines.append(f"- **{l}** ({w})") | |
| if detected: | |
| cdf = pd.DataFrame(detected, columns=["Signal", "Weight"]) | |
| cdf["Type"] = cdf["Weight"].apply(lambda w: "Red flag" if w > 0 else "Positive") | |
| fig = px.bar(cdf, x="Weight", y="Signal", color="Type", orientation="h", | |
| color_discrete_map={"Red flag": "#c53030", "Positive": "#2f855a"}, | |
| title="Signal breakdown") | |
| fig.update_layout(**_styled(height=420)) | |
| else: | |
| fig = _empty_chart("No signals detected") | |
| return "\n".join(md_lines), score, risk, fig | |
| def chart_flag_frequency(): | |
| if DF.empty or "Red Flags" not in DF.columns: | |
| return _empty_chart("Dataset not loaded") | |
| flags = [] | |
| for cell in DF["Red Flags"].dropna(): | |
| flags.extend(label for label, _ in extract_flag_labels(str(cell))) | |
| counts = pd.Series(flags).value_counts().head(12) | |
| fig = go.Figure(go.Bar(y=counts.index[::-1], x=counts.values[::-1], | |
| orientation="h", marker=dict(color="#e85a4f"))) | |
| fig.update_layout(**_styled(height=460, title="Most Common Signals")) | |
| return fig | |
| def chart_risk_distribution(): | |
| if DF.empty or "Risk Level" not in DF.columns: | |
| return _empty_chart("Dataset not loaded") | |
| counts = DF["Risk Level"].value_counts() | |
| colors = {"Low": "#2a9d8f", "Medium": "#e9a23b", "High": "#c53030"} | |
| fig = go.Figure(go.Pie(labels=counts.index, values=counts.values, | |
| marker=dict(colors=[colors.get(l, "#888") for l in counts.index]), | |
| hole=0.4)) | |
| fig.update_layout(**_styled(height=400, title="Risk Level Distribution")) | |
| return fig | |
| def chart_score_distribution(): | |
| if DF.empty or "Score" not in DF.columns: | |
| return _empty_chart("Dataset not loaded") | |
| fig = go.Figure(go.Histogram(x=DF["Score"].dropna(), nbinsx=15, marker_color="#e85a4f")) | |
| fig.update_layout(**_styled(height=380, title="Risk Score Distribution")) | |
| return fig | |
| def render_kpis(): | |
| if DF.empty: | |
| return '<div style="padding:32px;text-align:center;background:#fdfaf3;border-radius:12px;border:1px solid #d9cfb9;">No dataset loaded.</div>' | |
| total = len(DF) | |
| avg = DF["Score"].dropna().mean() if "Score" in DF.columns else 0 | |
| risk_counts = DF["Risk Level"].value_counts() if "Risk Level" in DF.columns else pd.Series() | |
| high_pct = (risk_counts.get("High", 0) / total * 100) if total else 0 | |
| flags = [] | |
| if "Red Flags" in DF.columns: | |
| for cell in DF["Red Flags"].dropna(): | |
| flags.extend(label for label, _ in extract_flag_labels(str(cell))) | |
| top_flag = pd.Series(flags).value_counts().index[0] if flags else "β" | |
| def card(label, value, sub, color): | |
| return ( | |
| f'<div style="background:#fdfaf3;border:1px solid #d9cfb9;border-radius:12px;' | |
| f'padding:20px 22px;box-shadow:0 2px 8px rgba(26,34,56,0.04);">' | |
| f'<div style="font-family:monospace;color:{color};font-size:11px;font-weight:600;' | |
| f'text-transform:uppercase;letter-spacing:0.08em;margin-bottom:14px;">{label}</div>' | |
| f'<div style="color:#1a2238;font-size:34px;font-weight:700;line-height:1;' | |
| f'letter-spacing:-0.03em;margin-bottom:10px;">{value}</div>' | |
| f'<div style="font-family:monospace;font-size:11px;color:#4a5475;">{sub}</div>' | |
| f'</div>' | |
| ) | |
| cards = [ | |
| card("Total.Jobs", total, "real labeled postings", "#e85a4f"), | |
| card("Avg.Score", f"{avg:.1f}", "across the dataset", "#2a9d8f"), | |
| card("High.Risk %", f"{high_pct:.0f}%", f"{risk_counts.get('High', 0)} flagged", "#c53030"), | |
| card("Top.Signal", top_flag.split(' ')[0].title() if top_flag != "β" else "β", | |
| top_flag if top_flag != "β" else "no data", "#7d4e8a"), | |
| ] | |
| return '<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(220px,1fr));gap:12px;margin-bottom:24px;">' + "".join(cards) + '</div>' | |
| def keyword_answer(msg): | |
| m = msg.lower() | |
| if any(w in m for w in ["common", "frequent", "most", "top"]): | |
| return ("The most common signals in our dataset are 'high responsibility early', " | |
| "'technical complexity', and 'clear role structure'. They appear in over 60% of postings."), "freq" | |
| if any(w in m for w in ["risk", "distribution", "level"]): | |
| return ("Most jobs land in the Medium risk tier (scores 12-24). High-risk postings combine " | |
| "multiple red flags like vague scope and missing salary information."), "risk" | |
| if any(w in m for w in ["score", "histogram", "spread"]): | |
| return ("Risk scores cluster between 10-25 in our dataset. Above 25 strongly indicates " | |
| "a problematic posting."), "score" | |
| if any(w in m for w in ["how", "work", "explain", "method"]): | |
| return ("The analyzer scans for 15 weighted signal categories. Red flags add to the score, " | |
| "positive signals subtract. The total maps to Low/Medium/High risk."), "none" | |
| return ("Try asking about: most common red flags, risk distribution, score spread, or how the analyzer works."), "none" | |
| def call_n8n(msg): | |
| import requests | |
| try: | |
| r = requests.post(N8N_WEBHOOK_URL, json={"question": msg}, timeout=15) | |
| data = r.json() | |
| return data.get("answer", "n8n returned no answer."), data.get("chart", "none") | |
| except Exception: | |
| text, key = keyword_answer(msg) | |
| return "(n8n unavailable, using local logic)\n\n" + text, key | |
| def ask_question(question): | |
| if not question or not question.strip(): | |
| return "_Type a question above and press Submit._", None | |
| if N8N_WEBHOOK_URL: | |
| reply, key = call_n8n(question) | |
| else: | |
| reply, key = keyword_answer(question) | |
| # Accept multiple naming conventions for chart keys | |
| chart_fns = { | |
| "freq": chart_flag_frequency, | |
| "flag_frequency": chart_flag_frequency, | |
| "frequency": chart_flag_frequency, | |
| "risk": chart_risk_distribution, | |
| "risk_distribution": chart_risk_distribution, | |
| "distribution": chart_risk_distribution, | |
| "score": chart_score_distribution, | |
| "score_distribution": chart_score_distribution, | |
| "histogram": chart_score_distribution, | |
| } | |
| chart = chart_fns[key]() if key in chart_fns else None | |
| return f"**Q:** {question}\n\n**A:** {reply}", chart | |
| def load_css(): | |
| p = BASE_DIR / "style.css" | |
| return p.read_text(encoding="utf-8") if p.exists() else "" | |
| # ===== UI ===== | |
| CSS = load_css() | |
| with gr.Blocks(title="Job Risk Analyzer", css=CSS) as demo: | |
| gr.Markdown( | |
| "# Job Risk Analyzer\n" | |
| "Detect hidden risk patterns in job postings using a weighted signal taxonomy.", | |
| elem_id="escp_title", | |
| ) | |
| with gr.Tab("Analyze a Job"): | |
| gr.Markdown("Paste any job description below to detect red flags and estimate risk.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| job_input = gr.Textbox(label="Job description", lines=15, | |
| placeholder="Paste the full job posting here...") | |
| analyze_btn = gr.Button("Analyze", variant="primary") | |
| with gr.Column(): | |
| result_md = gr.Markdown() | |
| with gr.Row(): | |
| score_box = gr.Number(label="Score", precision=0) | |
| risk_box = gr.Textbox(label="Risk Level") | |
| breakdown_chart = gr.Plot(label="Signal breakdown", min_width=400) | |
| analyze_btn.click(analyze_job, inputs=[job_input], | |
| outputs=[result_md, score_box, risk_box, breakdown_chart]) | |
| with gr.Tab("Dataset Dashboard"): | |
| gr.HTML(value=render_kpis()) | |
| gr.Markdown("### Insights from labeled job postings") | |
| gr.Plot(value=chart_flag_frequency(), label="Most common signals") | |
| with gr.Row(): | |
| gr.Plot(value=chart_risk_distribution(), label="Risk distribution") | |
| gr.Plot(value=chart_score_distribution(), label="Score distribution") | |
| if not DF.empty: | |
| cols = [c for c in ["Job title", "company", "Score", "Risk Level"] if c in DF.columns] | |
| if cols: | |
| gr.Markdown("### Raw labeled dataset") | |
| gr.Dataframe(DF[cols], wrap=True, interactive=False) | |
| with gr.Tab("AI Dashboard"): | |
| status = "Connected to n8n workflow." if N8N_WEBHOOK_URL else "Using local logic (set N8N_WEBHOOK_URL to enable n8n)." | |
| gr.Markdown(f"### Ask questions, get visualizations\n\n{status}") | |
| with gr.Row(): | |
| with gr.Column(): | |
| q_input = gr.Textbox(label="Ask about the dataset", | |
| placeholder="e.g. What are the most common red flags?", lines=2) | |
| ask_btn = gr.Button("Ask", variant="primary") | |
| gr.Markdown( | |
| "**Try these examples:**\n\n" | |
| "- What are the most common red flags?\n" | |
| "- Show me the risk level distribution\n" | |
| "- How is the score spread across jobs?\n" | |
| "- How does the analyzer work?" | |
| ) | |
| answer_md = gr.Markdown() | |
| with gr.Column(): | |
| ai_chart = gr.Plot(label="Visualization", min_width=400) | |
| ask_btn.click(ask_question, inputs=[q_input], outputs=[answer_md, ai_chart]) | |
| q_input.submit(ask_question, inputs=[q_input], outputs=[answer_md, ai_chart]) | |
| with gr.Tab("About"): | |
| gr.Markdown(""" | |
| ### How it works | |
| This app uses a weighted red-flag taxonomy built from 47 real labeled job postings. | |
| Each detected signal contributes to a total score that maps to Low / Medium / High risk. | |
| - π’ **Low** (< 12): Healthy posting with clear structure and benefits | |
| - π‘ **Medium** (12-24): Some warning signs worth investigating | |
| - π΄ **High** (>= 25): Multiple concerning patterns | |
| ### Team β CS1 Group 14 | |
| - **Gaspard + Thomas** β UX Designers(HF Space, Gradio app, n8n workflow) | |
| - **Adam** β Data Analyst (extraction, analysis, charts) | |
| - **Sarah** β Project Manager (final report, coordination) | |
| - **Adrien** β Content specialist (testing) | |
| ### Iterations | |
| - **v1** β Keyword matching with hard-coded weights from labeled dataset | |
| - **v2** β Refined keyword patterns after user testing | |
| - **v3** β Integrated n8n workflow for smarter conversational responses | |
| """) | |
| demo.launch() |