""" CS1 Group 14 â Job Description Risk Analyzer Built for Gradio 4.44 / Hugging Face Spaces """ import os import re import json from pathlib import Path from typing import Dict, List, Tuple import pandas as pd import gradio as gr import plotly.graph_objects as go import plotly.express as px # ========================================================= # CONFIG # ========================================================= BASE_DIR = Path(__file__).resolve().parent DATA_FILE = BASE_DIR / "job_description_data.xlsx" N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip() # ========================================================= # RED FLAG TAXONOMY # ========================================================= RED_FLAGS = [ ("high responsibility early", +10, ["full ownership", "lead the", "responsible for", "drive the", "own the", "manage the team", "take charge"]), ("high autonomy / ownership", +10, ["autonomous", "self-starter", "work independently", "minimal supervision", "own initiative"]), ("adaptability / flexibility demand", +8, ["flexible", "adaptable", "fast-paced", "changing priorities", "wear many hats"]), ("cross-functional / many stakeholders",+8, ["cross-functional", "multiple stakeholders", "various teams", "coordinate with", "liaise"]), ("customer-facing emotional labor", +6, ["customer-facing", "client-facing", "handle complaints", "difficult customers"]), ("technical complexity", +6, ["python", "sql", "machine learning", "api", "data pipeline", "advanced", "complex systems"]), ("on-site only / no remote", +5, ["on-site only", "no remote", "in-office", "fully on-site", "presence required"]), ("travel / mobility", +5, ["travel required", "mobility", "frequent travel", "willing to travel"]), ("pressure / deadlines", +5, ["tight deadlines", "high pressure", "fast deadlines", "demanding schedule"]), ("broad / unclear scope", +5, ["other duties", "as needed", "various tasks", "wide range of responsibilities"]), ("multitasking / many hats", +5, ["multitask", "juggle", "multiple roles"]), ("training / support provided", -8, ["training provided", "mentorship", "onboarding", "support and training", "we will train"]), ("salary clearly specified", -6, ["salary:", "compensation:", "annual salary", "monthly salary"]), ("clear role structure", -5, ["responsibilities include", "your missions", "main tasks", "key responsibilities"]), ("benefits clearly mentioned", -4, ["health insurance", "paid leave", "meal vouchers", "transport", "benefits include", "profit-sharing"]), ] # ========================================================= # DATA LOADING # ========================================================= def load_dataset(): if not DATA_FILE.exists(): return pd.DataFrame() try: return pd.read_excel(DATA_FILE) except Exception: return pd.DataFrame() DF = load_dataset() def extract_flag_labels(red_flags_cell): if not isinstance(red_flags_cell, str): return [] out = [] for part in re.split(r",\s*(?=[a-zA-Z])", red_flags_cell): m = re.match(r"(.+?)\s*\(([+-]\d+)\)", part.strip()) if m: out.append((m.group(1).strip(), int(m.group(2)))) return out # ========================================================= # CORE: ANALYZE # ========================================================= def classify_risk(score): if score < 12: return "Low", "đĸ" if score < 25: return "Medium", "đĄ" return "High", "đ´" def analyze_job(text): if not text or len(text.strip()) < 30: return "â ī¸ Please paste a real job description (at least 30 characters).", 0, "â", _empty_chart("Paste a job description above") lower = text.lower() detected = [] score = 0 for label, weight, patterns in RED_FLAGS: if any(p in lower for p in patterns): detected.append((label, weight)) score += weight risk, emoji = classify_risk(score) md = "## " + emoji + " Risk: **" + risk + "** | Score: **" + str(score) + "**\n\n" if not detected: md += "_No clear red or positive signals detected._" else: bad = [(l, w) for l, w in detected if w > 0] good = [(l, w) for l, w in detected if w < 0] if bad: md += "### đŠ Red flags detected\n" for l, w in bad: md += "- **" + l + "** `(+" + str(w) + ")`\n" if good: md += "\n### â Positive signals detected\n" for l, w in good: md += "- **" + l + "** `(" + str(w) + ")`\n" if detected: cdf = pd.DataFrame(detected, columns=["Signal", "Weight"]) cdf["Type"] = cdf["Weight"].apply(lambda w: "Red flag" if w > 0 else "Positive") fig = px.bar(cdf, x="Weight", y="Signal", color="Type", orientation="h", color_discrete_map={"Red flag": "#c53030", "Positive": "#2f855a"}, title="Signal breakdown") fig.update_layout(**_styled_layout(height=420)) else: fig = _empty_chart("No signals to chart") return md, score, risk, fig # ========================================================= # CHARTS # ========================================================= def _styled_layout(**kwargs): defaults = dict( template="plotly_white", paper_bgcolor="#fdfaf3", plot_bgcolor="#fdfaf3", font=dict(family="system-ui, sans-serif", color="#1a2238", size=12), margin=dict(l=60, r=20, t=70, b=70), ) defaults.update(kwargs) return defaults def _empty_chart(title): fig = go.Figure() fig.update_layout( title=title, height=420, template="plotly_white", paper_bgcolor="#fdfaf3", plot_bgcolor="#fdfaf3", annotations=[dict(text="(no data)", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False, font=dict(size=14, color="#8a9099"))], ) return fig def build_flag_frequency_chart(): if DF.empty or "Red Flags" not in DF.columns: return _empty_chart("Dataset not loaded") all_flags = [] for cell in DF["Red Flags"].dropna(): all_flags.extend(label for label, _ in extract_flag_labels(str(cell))) counts = pd.Series(all_flags).value_counts().head(12) fig = go.Figure(go.Bar( y=counts.index[::-1], x=counts.values[::-1], orientation="h", marker=dict(color="#e85a4f"), )) fig.update_layout(**_styled_layout(height=460, title="Most Common Signals Across Analyzed Jobs")) return fig def build_risk_distribution_chart(): if DF.empty or "Risk Level" not in DF.columns: return _empty_chart("Dataset not loaded") counts = DF["Risk Level"].value_counts() colors_map = {"Low": "#2a9d8f", "Medium": "#e9a23b", "High": "#c53030"} fig = go.Figure(go.Pie( labels=counts.index, values=counts.values, marker=dict(colors=[colors_map.get(l, "#888") for l in counts.index]), hole=0.4, )) fig.update_layout(**_styled_layout(height=400, title="Risk Level Distribution")) return fig def build_score_distribution_chart(): if DF.empty or "Score" not in DF.columns: return _empty_chart("Dataset not loaded") scores = DF["Score"].dropna() fig = go.Figure(go.Histogram(x=scores, nbinsx=15, marker_color="#e85a4f")) fig.update_layout(**_styled_layout(height=380, title="Risk Score Distribution")) return fig # ========================================================= # KPI CARDS # ========================================================= def render_kpi_cards(): if DF.empty: return '