Spaces:

ESCP
/

CS1_Group_14

Paused

App Files Files Community

grasepard2 commited on May 1

Commit

30f1124

verified ·

1 Parent(s): 272e88d

Update app.py

Browse files

Files changed (1) hide show

app.py +141 -299

app.py CHANGED Viewed

@@ -1,43 +1,28 @@
 import os
 import re
 import json
-import traceback
 from pathlib import Path
-from typing import Dict, Any, List, Tuple
 import pandas as pd
 import gradio as gr
 import plotly.graph_objects as go
 import plotly.express as px
-# Optional LLM (HuggingFace Inference API)
-try:
-    from huggingface_hub import InferenceClient
-except Exception:
-    InferenceClient = None
 # =========================================================
 # CONFIG
 # =========================================================
 BASE_DIR = Path(__file__).resolve().parent
 DATA_FILE = BASE_DIR / "job_description_data.xlsx"
-HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
-MODEL_NAME = os.environ.get("MODEL_NAME", "deepseek-ai/DeepSeek-R1").strip()
-HF_PROVIDER = os.environ.get("HF_PROVIDER", "novita").strip()
 N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip()
-LLM_ENABLED = bool(HF_API_KEY) and InferenceClient is not None
-llm_client = (
-    InferenceClient(provider=HF_PROVIDER, api_key=HF_API_KEY)
-    if LLM_ENABLED
-    else None
-)
 # =========================================================
-# RED FLAG TAXONOMY (extracted from labeled dataset)
-# Positive weights = red flags; negative weights = positive signals
 # =========================================================
 RED_FLAGS = [
@@ -53,19 +38,16 @@ RED_FLAGS = [
     ("broad / unclear scope",               +5, ["other duties", "as needed", "various tasks", "wide range of responsibilities"]),
     ("multitasking / many hats",            +5, ["multitask", "juggle", "multiple roles"]),
     ("training / support provided",         -8, ["training provided", "mentorship", "onboarding", "support and training", "we will train"]),
-    ("salary clearly specified",            -6, ["salary:", "€", "$", "compensation:", "annual salary", "monthly salary"]),
     ("clear role structure",                -5, ["responsibilities include", "your missions", "main tasks", "key responsibilities"]),
     ("benefits clearly mentioned",          -4, ["health insurance", "paid leave", "meal vouchers", "transport", "benefits include", "profit-sharing"]),
 ]
-CHART_PALETTE = ["#34d399", "#60a5fa", "#f472b6", "#fbbf24", "#a78bfa",
-                 "#22d3ee", "#fb7185", "#84cc16", "#f97316", "#e879f9"]
 # =========================================================
 # DATA LOADING
 # =========================================================
-def load_dataset() -> pd.DataFrame:
     if not DATA_FILE.exists():
         return pd.DataFrame()
     try:
@@ -73,12 +55,10 @@ def load_dataset() -> pd.DataFrame:
     except Exception:
         return pd.DataFrame()
 DF = load_dataset()
-def extract_flag_labels(red_flags_cell: str) -> List[Tuple[str, int]]:
-    """Parse 'label (+10), label2 (-5)' into [(label, weight)]."""
     if not isinstance(red_flags_cell, str):
         return []
     out = []
@@ -90,10 +70,10 @@ def extract_flag_labels(red_flags_cell: str) -> List[Tuple[str, int]]:
 # =========================================================
-# CORE: ANALYZE A JOB DESCRIPTION
 # =========================================================
-def classify_risk(score: float) -> Tuple[str, str]:
     if score < 12:
         return "Low", "🟢"
     if score < 25:
@@ -101,10 +81,9 @@ def classify_risk(score: float) -> Tuple[str, str]:
     return "High", "🔴"
-def analyze_job(text: str) -> Tuple[str, int, str, go.Figure]:
     if not text or len(text.strip()) < 30:
-        return ("⚠️ Please paste a real job description (at least 30 characters).",
-                0, "—", _empty_chart("Paste a job description above"))
     lower = text.lower()
     detected = []
@@ -115,21 +94,20 @@ def analyze_job(text: str) -> Tuple[str, int, str, go.Figure]:
             score += weight
     risk, emoji = classify_risk(score)
-    md = f"## {emoji} Risk: **{risk}** &nbsp;|&nbsp; Score: **{score}**\n\n"
     if not detected:
-        md += "_No clear red or positive signals detected. Description may be too short or vague._"
     else:
         bad = [(l, w) for l, w in detected if w > 0]
         good = [(l, w) for l, w in detected if w < 0]
         if bad:
             md += "### 🚩 Red flags detected\n"
             for l, w in bad:
-                md += f"- **{l}** `(+{w})`\n"
         if good:
             md += "\n### ✅ Positive signals detected\n"
             for l, w in good:
-                md += f"- **{l}** `({w})`\n"
     if detected:
         cdf = pd.DataFrame(detected, columns=["Signal", "Weight"])
@@ -137,7 +115,7 @@ def analyze_job(text: str) -> Tuple[str, int, str, go.Figure]:
         fig = px.bar(cdf, x="Weight", y="Signal", color="Type", orientation="h",
                      color_discrete_map={"Red flag": "#c53030", "Positive": "#2f855a"},
                      title="Signal breakdown")
-        fig.update_layout(**_styled_layout(height=420, showlegend=True))
     else:
         fig = _empty_chart("No signals to chart")
@@ -145,50 +123,33 @@ def analyze_job(text: str) -> Tuple[str, int, str, go.Figure]:
 # =========================================================
-# CHART STYLE HELPERS (preserved from template)
 # =========================================================
-def _styled_layout(**kwargs) -> dict:
     defaults = dict(
         template="plotly_white",
         paper_bgcolor="#fdfaf3",
         plot_bgcolor="#fdfaf3",
-        font=dict(family="Geist, system-ui, -apple-system, sans-serif", color="#1a2238", size=12),
         margin=dict(l=60, r=20, t=70, b=70),
-        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1,
-                    bgcolor="rgba(253,250,243,0.9)",
-                    bordercolor="#d9cfb9", borderwidth=1,
-                    font=dict(color="#4a5475", size=11)),
-        title=dict(font=dict(size=14, color="#1a2238", family="Geist, system-ui, sans-serif")),
-        xaxis=dict(gridcolor="#e6dcc7", zerolinecolor="#d9cfb9",
-                   tickfont=dict(color="#4a5475", size=11),
-                   title=dict(font=dict(color="#4a5475", size=12))),
-        yaxis=dict(gridcolor="#e6dcc7", zerolinecolor="#d9cfb9",
-                   tickfont=dict(color="#4a5475", size=11),
-                   title=dict(font=dict(color="#4a5475", size=12))),
     )
     defaults.update(kwargs)
     return defaults
-def _empty_chart(title: str) -> go.Figure:
     fig = go.Figure()
     fig.update_layout(
         title=title, height=420, template="plotly_white",
-        paper_bgcolor="#fdfaf3",
-        plot_bgcolor="#fdfaf3",
-        font=dict(color="#1a2238", family="Geist, system-ui, sans-serif"),
-        annotations=[dict(text="(no data available)", x=0.5, y=0.5, xref="paper", yref="paper",
-                          showarrow=False, font=dict(size=13, color="#8a9099"))],
     )
     return fig
-# =========================================================
-# DATASET INSIGHTS (charts from labeled XLSX)
-# =========================================================
-def build_flag_frequency_chart() -> go.Figure:
     if DF.empty or "Red Flags" not in DF.columns:
         return _empty_chart("Dataset not loaded")
     all_flags = []
@@ -197,46 +158,32 @@ def build_flag_frequency_chart() -> go.Figure:
     counts = pd.Series(all_flags).value_counts().head(12)
     fig = go.Figure(go.Bar(
         y=counts.index[::-1], x=counts.values[::-1], orientation="h",
-        marker=dict(color=counts.values[::-1],
-                    colorscale=[[0, "#f4b8b1"], [1, "#e85a4f"]]),
-        hovertemplate="<b>%{y}</b><br>Detected in %{x} jobs<extra></extra>",
     ))
-    fig.update_layout(**_styled_layout(
-        height=460, title=dict(text="Most Common Signals Across 47 Analyzed Jobs"),
-        showlegend=False))
-    fig.update_xaxes(title="Number of postings")
     return fig
-def build_risk_distribution_chart() -> go.Figure:
     if DF.empty or "Risk Level" not in DF.columns:
         return _empty_chart("Dataset not loaded")
     counts = DF["Risk Level"].value_counts()
-    colors = {"Low": "#2a9d8f", "Medium": "#e9a23b", "High": "#c53030"}
     fig = go.Figure(go.Pie(
         labels=counts.index, values=counts.values,
-        marker=dict(colors=[colors.get(l, "#888") for l in counts.index]),
-        hole=0.4, textinfo="label+percent",
     ))
-    fig.update_layout(**_styled_layout(
-        height=400, title=dict(text="Risk Level Distribution in Dataset")))
     return fig
-def build_score_distribution_chart() -> go.Figure:
     if DF.empty or "Score" not in DF.columns:
         return _empty_chart("Dataset not loaded")
     scores = DF["Score"].dropna()
-    fig = go.Figure(go.Histogram(
-        x=scores, nbinsx=15, marker_color="#e85a4f",
-        marker_line_color="#c53030", marker_line_width=1,
-        hovertemplate="Score range: %{x}<br>Jobs: %{y}<extra></extra>",
-    ))
-    fig.update_layout(**_styled_layout(
-        height=380, title=dict(text="Risk Score Distribution"),
-        bargap=0.05))
-    fig.update_xaxes(title="Risk score")
-    fig.update_yaxes(title="Number of jobs")
     return fig
@@ -244,17 +191,9 @@ def build_score_distribution_chart() -> go.Figure:
 # KPI CARDS
 # =========================================================
-def render_kpi_cards() -> str:
     if DF.empty:
-        return ('<div style="background:#fdfaf3;padding:32px;text-align:center;'
-                'border-radius:12px;border:1px solid #d9cfb9;">'
-                '<div style="font-family:\'Geist Mono\',monospace;font-size:11px;'
-                'color:#e85a4f;letter-spacing:0.08em;text-transform:uppercase;margin-bottom:12px;font-weight:600;">No Data</div>'
-                '<div style="color:#4a5475;font-size:14px;">'
-                'Upload <code style="background:#f1ebe0;color:#7d4e8a;padding:2px 6px;border-radius:4px;'
-                'font-family:\'Geist Mono\',monospace;font-size:0.85em;border:1px solid #e6dcc7;">'
-                'job_description_data.xlsx</code> to populate metrics.'
-                '</div></div>')
     total_jobs = len(DF)
     avg_score = DF["Score"].dropna().mean() if "Score" in DF.columns else 0
@@ -267,167 +206,99 @@ def render_kpi_cards() -> str:
             all_flags.extend(label for label, _ in extract_flag_labels(str(cell)))
     top_flag = pd.Series(all_flags).value_counts().index[0] if all_flags else "—"
-    def card(label, value, delta_text, accent_color="#e85a4f"):
-        return f"""
-        <div style="background:#fdfaf3;border:1px solid #d9cfb9;border-radius:12px;
-                    padding:20px 22px;position:relative;overflow:hidden;
-                    box-shadow:0 1px 0 rgba(255,255,255,0.7) inset, 0 2px 8px rgba(26, 34, 56, 0.04);
-                    transition:border-color 0.15s, transform 0.15s;">
-            <div style="font-family:'Geist Mono','SF Mono',monospace;
-                        color:{accent_color};font-size:11px;font-weight:600;
-                        text-transform:uppercase;letter-spacing:0.08em;margin-bottom:14px;">
-                {label}
-            </div>
-            <div style="color:#1a2238;font-size:34px;font-weight:700;line-height:1;
-                        letter-spacing:-0.03em;margin-bottom:10px;
-                        font-family:'Geist',-apple-system,system-ui,sans-serif;">
-                {value}
-            </div>
-            <div style="display:flex;align-items:center;gap:6px;
-                        font-family:'Geist Mono',monospace;font-size:11px;color:#4a5475;">
-                <span style="display:inline-block;width:6px;height:6px;border-radius:50%;
-                             background:{accent_color};box-shadow:0 0 8px {accent_color}80;"></span>
-                <span>{delta_text}</span>
-            </div>
-        </div>"""
     cards = [
-        card("Total.Jobs",      f"{total_jobs}",        "real labeled postings",   "#e85a4f"),
-        card("Avg.Score",       f"{avg_score:.1f}",     "weighted across dataset", "#2a9d8f"),
-        card("High.Risk %",     f"{high_pct:.0f}%",     f"{risk_counts.get('High', 0)} postings flagged", "#c53030"),
-        card("Top.Signal",      top_flag.split(' ')[0].title() if top_flag != "—" else "—",
              top_flag if top_flag != "—" else "no data", "#7d4e8a"),
     ]
     return ('<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(220px,1fr));'
-            'gap:12px;margin-bottom:32px;">' + "".join(cards) + "</div>")
 # =========================================================
-# AI CHAT (n8n > LLM > keyword fallback)
 # =========================================================
-DASHBOARD_SYSTEM = """You are an AI assistant for a job description risk analyzer app.
-You help users understand patterns in job postings — red flags, risk levels, common warning signs.
-DATASET CONTEXT:
-- 47 real job postings labeled by humans
-- Each scored on 15 weighted signals (positive = red flag, negative = good signal)
-- Risk levels: Low (<12), Medium (12-24), High (>=25)
-- Top categories: high responsibility early, technical complexity, autonomy demands
-YOUR JOB:
-Answer the user's question conversationally in 2-4 sentences. At the END, output a JSON block:
-```json
-{"show": "flag_frequency"|"risk_distribution"|"score_distribution"|"none"}
-```
-Pick the chart most relevant to their question, or "none" if no chart fits.
-"""
-JSON_BLOCK_RE = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL)
-def _parse_directive(text: str) -> Dict[str, str]:
-    m = JSON_BLOCK_RE.search(text)
-    if m:
-        try:
-            return json.loads(m.group(1))
-        except json.JSONDecodeError:
-            pass
-    return {"show": "none"}
-def _clean_response(text: str) -> str:
-    return JSON_BLOCK_RE.sub("", text).strip()
-def _n8n_call(msg: str) -> Tuple[str, Dict]:
-    import requests as req
     try:
-        resp = req.post(N8N_WEBHOOK_URL, json={"question": msg}, timeout=20)
-        data = resp.json()
-        answer = data.get("answer", "No response from n8n workflow.")
-        chart = data.get("chart", "none")
-        return answer, {"show": chart}
     except Exception as e:
-        return f"n8n error: {e}", None
-def _keyword_fallback(msg: str) -> Tuple[str, Dict]:
-    m = msg.lower()
-    if any(w in m for w in ["common", "frequent", "most", "top flag", "patterns"]):
-        return ("The most common signals across our 47 analyzed postings are below. "
-                "Notice how 'high responsibility early', 'technical complexity', and "
-                "'clear role structure' dominate — they appear in nearly every posting.",
-                {"show": "flag_frequency"})
-    if any(w in m for w in ["risk", "distribution", "level", "low", "medium", "high"]):
-        return ("Here is the risk-level breakdown across our dataset. "
-                "Most jobs land in the Medium tier; a smaller share are flagged High.",
-                {"show": "risk_distribution"})
-    if any(w in m for w in ["score", "histogram", "spread", "average"]):
-        return ("Risk scores cluster mostly between 10 and 30. "
-                "Anything above 25 is classified as High-risk.",
-                {"show": "score_distribution"})
-    if any(w in m for w in ["how", "work", "method", "explain"]):
-        return ("The app detects 15 weighted signals in any pasted job description. "
-                "Red flags add to the score (e.g. +10 for 'high responsibility early'), "
-                "positive signals subtract (e.g. -8 for 'training provided'). "
-                "The total maps to Low / Medium / High risk.",
-                {"show": "none"})
-    return ("Try asking about: **most common red flags**, **risk distribution**, "
-            "**score spread**, or **how the analyzer works**.",
-            {"show": "none"})
-def ai_chat(user_msg: str, history: list):
     if not user_msg or not user_msg.strip():
-        return history, "", None
     if N8N_WEBHOOK_URL:
-        reply, directive = _n8n_call(user_msg)
-        if directive is None:
-            reply_fb, directive = _keyword_fallback(user_msg)
-            reply += "\n\n" + reply_fb
-    elif LLM_ENABLED:
-        msgs = [{"role": "system", "content": DASHBOARD_SYSTEM}]
-        for user_turn, bot_turn in (history or [])[-3:]:
-            msgs.append({"role": "user", "content": user_turn})
-            msgs.append({"role": "assistant", "content": bot_turn})
-        msgs.append({"role": "user", "content": user_msg})
-        try:
-            r = llm_client.chat_completion(model=MODEL_NAME, messages=msgs,
-                                           temperature=0.3, max_tokens=500, stream=False)
-            raw = (r["choices"][0]["message"]["content"]
-                   if isinstance(r, dict) else r.choices[0].message.content)
-            directive = _parse_directive(raw)
-            reply = _clean_response(raw)
-        except Exception as e:
-            reply = f"LLM error: {e}"
-            reply_fb, directive = _keyword_fallback(user_msg)
-            reply += "\n\n" + reply_fb
     else:
-        reply, directive = _keyword_fallback(user_msg)
-    chart_builders = {
         "flag_frequency": build_flag_frequency_chart,
         "risk_distribution": build_risk_distribution_chart,
         "score_distribution": build_score_distribution_chart,
     }
-    chart_out = chart_builders[directive["show"]]() if directive.get("show") in chart_builders else None
     new_history = (history or []) + [(user_msg, reply)]
     return new_history, "", chart_out
 # =========================================================
-# UI
 # =========================================================
-def load_css() -> str:
     css_path = BASE_DIR / "style.css"
-    return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
-with gr.Blocks(title="Job Risk Analyzer — CS1 Group 14", css=load_css()) as demo:
     gr.Markdown(
         "# Job Risk Analyzer\n"
@@ -436,17 +307,14 @@ with gr.Blocks(title="Job Risk Analyzer — CS1 Group 14", css=load_css()) as de
         elem_id="escp_title",
     )
-    # ===========================================================
-    # TAB 1 -- Live analyzer (the main feature)
-    # ===========================================================
     with gr.Tab("🔍 Analyze a Job"):
-        gr.Markdown("Paste any job description below to detect red flags and estimate hidden risk.")
         with gr.Row():
-            with gr.Column(scale=1):
-                inp = gr.Textbox(label="Job description", lines=18,
                                  placeholder="Paste the full job posting here...")
-                btn = gr.Button("Analyze", variant="primary", size="lg")
-            with gr.Column(scale=1):
                 out_md = gr.Markdown()
                 with gr.Row():
                     out_score = gr.Number(label="Score", precision=0)
@@ -454,46 +322,29 @@ with gr.Blocks(title="Job Risk Analyzer — CS1 Group 14", css=load_css()) as de
                 out_chart = gr.Plot(label="Signal breakdown")
         btn.click(analyze_job, inputs=[inp], outputs=[out_md, out_score, out_risk, out_chart])
-    # ===========================================================
-    # TAB 2 -- Dataset Dashboard
-    # ===========================================================
     with gr.Tab("📊 Dataset Dashboard"):
-        kpi_html = gr.HTML(value=render_kpi_cards)
-        refresh_btn = gr.Button("Refresh Dashboard", variant="primary")
-        gr.Markdown("#### Insights from 47 labeled job postings")
-        chart_freq = gr.Plot(label="Most common signals", value=build_flag_frequency_chart)
         with gr.Row():
-            chart_risk = gr.Plot(label="Risk distribution", value=build_risk_distribution_chart)
-            chart_score = gr.Plot(label="Score distribution", value=build_score_distribution_chart)
-        gr.Markdown("#### Raw labeled dataset")
         if not DF.empty:
             display_cols = [c for c in ["Job title", "company", "Score", "Risk Level"] if c in DF.columns]
-            gr.Dataframe(DF[display_cols], wrap=True, interactive=False)
-        def _on_refresh():
-            return (render_kpi_cards(), build_flag_frequency_chart(),
-                    build_risk_distribution_chart(), build_score_distribution_chart())
-        refresh_btn.click(_on_refresh,
-                          outputs=[kpi_html, chart_freq, chart_risk, chart_score])
-    # ===========================================================
-    # TAB 3 -- AI Dashboard
-    # ===========================================================
     with gr.Tab('"AI" Dashboard'):
-        _status = ("Connected to **n8n workflow**." if N8N_WEBHOOK_URL
-                   else "**LLM active.**" if LLM_ENABLED
-                   else "Using **keyword matching**. Set `N8N_WEBHOOK_URL` or `HF_API_KEY` in Space settings to upgrade.")
-        gr.Markdown(f"### Ask questions, get visualizations\n\n{_status}")
-        with gr.Row(equal_height=True):
-            with gr.Column(scale=1):
-                chatbot = gr.Chatbot(label="Conversation", height=380, type="messages")
                 user_input = gr.Textbox(label="Ask about the dataset",
-                                        placeholder="e.g. What are the most common red flags?",
-                                        lines=1)
                 gr.Examples(
                     examples=[
                         "What are the most common red flags?",
@@ -503,44 +354,35 @@ with gr.Blocks(title="Job Risk Analyzer — CS1 Group 14", css=load_css()) as de
                     ],
                     inputs=user_input,
                 )
-            with gr.Column(scale=1):
                 ai_chart = gr.Plot(label="Visualization")
         user_input.submit(ai_chat, inputs=[user_input, chatbot],
                           outputs=[chatbot, user_input, ai_chart])
-    # ===========================================================
-    # TAB 4 -- About / Iterations
-    # ===========================================================
     with gr.Tab("ℹ️ About"):
         gr.Markdown("""
-        ### How it works
-        This app uses a **weighted red-flag taxonomy** built from analyzing 47 real job postings.
-        Each detected signal contributes to a total score; the score determines risk level.
-        - 🟢 **Low** (< 12): Healthy posting with clear structure and benefits
-        - 🟡 **Medium** (12–24): Some warning signs worth investigating
-        - 🔴 **High** (≥ 25): Multiple concerning patterns — proceed with caution
-        ### Team — CS1 Group 14
-        - **Gaspard** — Technical Lead (Hugging Face Space + Gradio app)
-        - **Person 3** — Data Analysis & Insights
-        - **Person 4** — Testing & Iterations
-        - **Person 5** — Report & Coordination
-        ### Iterations
-        - **v1** — Keyword matching with hard-coded weights from labeled dataset
-        - **v2** — _(to be filled by Person 4 after testing)_
-        - **v3** — _(future: integrate LLM for semantic detection beyond keywords)_
-        ### Data source
-        47 real job postings (mostly French market) manually labeled by the team
-        with 15 weighted signal categories.
-        """)
-demo.queue().launch(
-    server_name="0.0.0.0",
-    server_port=7860,
-    allowed_paths=[str(BASE_DIR)]
-)

+"""
+CS1 Group 14 — Job Description Risk Analyzer
+Built for Gradio 4.44 / Hugging Face Spaces
+"""
 import os
 import re
 import json
 from pathlib import Path
+from typing import Dict, List, Tuple
 import pandas as pd
 import gradio as gr
 import plotly.graph_objects as go
 import plotly.express as px
 # =========================================================
 # CONFIG
 # =========================================================
 BASE_DIR = Path(__file__).resolve().parent
 DATA_FILE = BASE_DIR / "job_description_data.xlsx"
 N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip()
 # =========================================================
+# RED FLAG TAXONOMY
 # =========================================================
 RED_FLAGS = [
     ("broad / unclear scope",               +5, ["other duties", "as needed", "various tasks", "wide range of responsibilities"]),
     ("multitasking / many hats",            +5, ["multitask", "juggle", "multiple roles"]),
     ("training / support provided",         -8, ["training provided", "mentorship", "onboarding", "support and training", "we will train"]),
+    ("salary clearly specified",            -6, ["salary:", "compensation:", "annual salary", "monthly salary"]),
     ("clear role structure",                -5, ["responsibilities include", "your missions", "main tasks", "key responsibilities"]),
     ("benefits clearly mentioned",          -4, ["health insurance", "paid leave", "meal vouchers", "transport", "benefits include", "profit-sharing"]),
 ]
 # =========================================================
 # DATA LOADING
 # =========================================================
+def load_dataset():
     if not DATA_FILE.exists():
         return pd.DataFrame()
     try:
     except Exception:
         return pd.DataFrame()
 DF = load_dataset()
+def extract_flag_labels(red_flags_cell):
     if not isinstance(red_flags_cell, str):
         return []
     out = []
 # =========================================================
+# CORE: ANALYZE
 # =========================================================
+def classify_risk(score):
     if score < 12:
         return "Low", "🟢"
     if score < 25:
     return "High", "🔴"
+def analyze_job(text):
     if not text or len(text.strip()) < 30:
+        return "⚠️ Please paste a real job description (at least 30 characters).", 0, "—", _empty_chart("Paste a job description above")
     lower = text.lower()
     detected = []
             score += weight
     risk, emoji = classify_risk(score)
+    md = "## " + emoji + " Risk: **" + risk + "** | Score: **" + str(score) + "**\n\n"
     if not detected:
+        md += "_No clear red or positive signals detected._"
     else:
         bad = [(l, w) for l, w in detected if w > 0]
         good = [(l, w) for l, w in detected if w < 0]
         if bad:
             md += "### 🚩 Red flags detected\n"
             for l, w in bad:
+                md += "- **" + l + "** `(+" + str(w) + ")`\n"
         if good:
             md += "\n### ✅ Positive signals detected\n"
             for l, w in good:
+                md += "- **" + l + "** `(" + str(w) + ")`\n"
     if detected:
         cdf = pd.DataFrame(detected, columns=["Signal", "Weight"])
         fig = px.bar(cdf, x="Weight", y="Signal", color="Type", orientation="h",
                      color_discrete_map={"Red flag": "#c53030", "Positive": "#2f855a"},
                      title="Signal breakdown")
+        fig.update_layout(**_styled_layout(height=420))
     else:
         fig = _empty_chart("No signals to chart")
 # =========================================================
+# CHARTS
 # =========================================================
+def _styled_layout(**kwargs):
     defaults = dict(
         template="plotly_white",
         paper_bgcolor="#fdfaf3",
         plot_bgcolor="#fdfaf3",
+        font=dict(family="system-ui, sans-serif", color="#1a2238", size=12),
         margin=dict(l=60, r=20, t=70, b=70),
     )
     defaults.update(kwargs)
     return defaults
+def _empty_chart(title):
     fig = go.Figure()
     fig.update_layout(
         title=title, height=420, template="plotly_white",
+        paper_bgcolor="#fdfaf3", plot_bgcolor="#fdfaf3",
+        annotations=[dict(text="(no data)", x=0.5, y=0.5, xref="paper", yref="paper",
+                          showarrow=False, font=dict(size=14, color="#8a9099"))],
     )
     return fig
+def build_flag_frequency_chart():
     if DF.empty or "Red Flags" not in DF.columns:
         return _empty_chart("Dataset not loaded")
     all_flags = []
     counts = pd.Series(all_flags).value_counts().head(12)
     fig = go.Figure(go.Bar(
         y=counts.index[::-1], x=counts.values[::-1], orientation="h",
+        marker=dict(color="#e85a4f"),
     ))
+    fig.update_layout(**_styled_layout(height=460, title="Most Common Signals Across Analyzed Jobs"))
     return fig
+def build_risk_distribution_chart():
     if DF.empty or "Risk Level" not in DF.columns:
         return _empty_chart("Dataset not loaded")
     counts = DF["Risk Level"].value_counts()
+    colors_map = {"Low": "#2a9d8f", "Medium": "#e9a23b", "High": "#c53030"}
     fig = go.Figure(go.Pie(
         labels=counts.index, values=counts.values,
+        marker=dict(colors=[colors_map.get(l, "#888") for l in counts.index]),
+        hole=0.4,
     ))
+    fig.update_layout(**_styled_layout(height=400, title="Risk Level Distribution"))
     return fig
+def build_score_distribution_chart():
     if DF.empty or "Score" not in DF.columns:
         return _empty_chart("Dataset not loaded")
     scores = DF["Score"].dropna()
+    fig = go.Figure(go.Histogram(x=scores, nbinsx=15, marker_color="#e85a4f"))
+    fig.update_layout(**_styled_layout(height=380, title="Risk Score Distribution"))
     return fig
 # KPI CARDS
 # =========================================================
+def render_kpi_cards():
     if DF.empty:
+        return '<div style="background:#fdfaf3;padding:32px;text-align:center;border-radius:12px;border:1px solid #d9cfb9;color:#4a5475;">No dataset loaded.</div>'
     total_jobs = len(DF)
     avg_score = DF["Score"].dropna().mean() if "Score" in DF.columns else 0
             all_flags.extend(label for label, _ in extract_flag_labels(str(cell)))
     top_flag = pd.Series(all_flags).value_counts().index[0] if all_flags else "—"
+    def card(label, value, sub, color):
+        return (
+            '<div style="background:#fdfaf3;border:1px solid #d9cfb9;border-radius:12px;'
+            'padding:20px 22px;box-shadow:0 2px 8px rgba(26,34,56,0.04);">'
+            '<div style="font-family:monospace;color:' + color + ';font-size:11px;font-weight:600;'
+            'text-transform:uppercase;letter-spacing:0.08em;margin-bottom:14px;">' + label + '</div>'
+            '<div style="color:#1a2238;font-size:34px;font-weight:700;line-height:1;'
+            'letter-spacing:-0.03em;margin-bottom:10px;">' + str(value) + '</div>'
+            '<div style="font-family:monospace;font-size:11px;color:#4a5475;">' + sub + '</div>'
+            '</div>'
+        )
     cards = [
+        card("Total.Jobs", total_jobs, "real labeled postings", "#e85a4f"),
+        card("Avg.Score", str(round(avg_score, 1)), "weighted across dataset", "#2a9d8f"),
+        card("High.Risk %", str(round(high_pct)) + "%", str(risk_counts.get("High", 0)) + " postings flagged", "#c53030"),
+        card("Top.Signal", top_flag.split(' ')[0].title() if top_flag != "—" else "—",
              top_flag if top_flag != "—" else "no data", "#7d4e8a"),
     ]
     return ('<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(220px,1fr));'
+            'gap:12px;margin-bottom:24px;">' + "".join(cards) + '</div>')
 # =========================================================
+# CHAT (n8n -> keyword fallback)
 # =========================================================
+def keyword_fallback(msg):
+    m = msg.lower()
+    if any(w in m for w in ["common", "frequent", "most", "top"]):
+        return ("The most common signals in our dataset are 'high responsibility early', "
+                "'technical complexity', and 'clear role structure'. These appear in over 60% of postings."), "flag_frequency"
+    if any(w in m for w in ["risk", "distribution", "level"]):
+        return ("Most jobs land in the Medium risk tier (scores 12-24). High-risk postings combine "
+                "multiple red flags like vague scope, on-site-only, and missing salary information."), "risk_distribution"
+    if any(w in m for w in ["score", "histogram", "spread"]):
+        return ("Risk scores cluster between 10-25 in our dataset. Anything above 25 signals "
+                "a problematic posting."), "score_distribution"
+    if any(w in m for w in ["how", "work", "explain", "method"]):
+        return ("The analyzer scans for 15 weighted signal categories. Red flags add to the score, "
+                "positive signals subtract. The total maps to Low/Medium/High risk."), "none"
+    return ("Try asking: most common red flags, risk distribution, score spread, or how it works."), "none"
+def call_n8n(msg):
+    import requests
     try:
+        r = requests.post(N8N_WEBHOOK_URL, json={"question": msg}, timeout=15)
+        data = r.json()
+        return data.get("answer", "n8n returned no answer."), data.get("chart", "none")
     except Exception as e:
+        fb_text, fb_chart = keyword_fallback(msg)
+        return "(n8n unavailable, using local logic)\n\n" + fb_text, fb_chart
+def ai_chat(user_msg, history):
     if not user_msg or not user_msg.strip():
+        return history or [], "", None
     if N8N_WEBHOOK_URL:
+        reply, chart_key = call_n8n(user_msg)
     else:
+        reply, chart_key = keyword_fallback(user_msg)
+    builders = {
         "flag_frequency": build_flag_frequency_chart,
         "risk_distribution": build_risk_distribution_chart,
         "score_distribution": build_score_distribution_chart,
     }
+    chart_out = builders[chart_key]() if chart_key in builders else None
     new_history = (history or []) + [(user_msg, reply)]
     return new_history, "", chart_out
 # =========================================================
+# CSS LOADER
 # =========================================================
+def load_css():
     css_path = BASE_DIR / "style.css"
+    if css_path.exists():
+        return css_path.read_text(encoding="utf-8")
+    return ""
+# =========================================================
+# UI
+# =========================================================
+CSS = load_css()
+with gr.Blocks(title="Job Risk Analyzer", css=CSS) as demo:
     gr.Markdown(
         "# Job Risk Analyzer\n"
         elem_id="escp_title",
     )
     with gr.Tab("🔍 Analyze a Job"):
+        gr.Markdown("Paste any job description below to detect red flags and estimate risk.")
         with gr.Row():
+            with gr.Column():
+                inp = gr.Textbox(label="Job description", lines=15,
                                  placeholder="Paste the full job posting here...")
+                btn = gr.Button("Analyze", variant="primary")
+            with gr.Column():
                 out_md = gr.Markdown()
                 with gr.Row():
                     out_score = gr.Number(label="Score", precision=0)
                 out_chart = gr.Plot(label="Signal breakdown")
         btn.click(analyze_job, inputs=[inp], outputs=[out_md, out_score, out_risk, out_chart])
     with gr.Tab("📊 Dataset Dashboard"):
+        gr.HTML(value=render_kpi_cards())
+        gr.Markdown("### Insights from labeled job postings")
+        gr.Plot(value=build_flag_frequency_chart(), label="Most common signals")
         with gr.Row():
+            gr.Plot(value=build_risk_distribution_chart(), label="Risk distribution")
+            gr.Plot(value=build_score_distribution_chart(), label="Score distribution")
         if not DF.empty:
             display_cols = [c for c in ["Job title", "company", "Score", "Risk Level"] if c in DF.columns]
+            if display_cols:
+                gr.Markdown("### Raw labeled dataset")
+                gr.Dataframe(DF[display_cols], wrap=True, interactive=False)
     with gr.Tab('"AI" Dashboard'):
+        status = ("Connected to **n8n workflow**." if N8N_WEBHOOK_URL
+                  else "Using **keyword matching** (set `N8N_WEBHOOK_URL` to upgrade).")
+        gr.Markdown("### Ask questions, get visualizations\n\n" + status)
+        with gr.Row():
+            with gr.Column():
+                chatbot = gr.Chatbot(label="Conversation", height=380)
                 user_input = gr.Textbox(label="Ask about the dataset",
+                                        placeholder="e.g. What are the most common red flags?")
                 gr.Examples(
                     examples=[
                         "What are the most common red flags?",
                     ],
                     inputs=user_input,
                 )
+            with gr.Column():
                 ai_chart = gr.Plot(label="Visualization")
         user_input.submit(ai_chat, inputs=[user_input, chatbot],
                           outputs=[chatbot, user_input, ai_chart])
     with gr.Tab("ℹ️ About"):
         gr.Markdown("""
+### How it works
+This app uses a **weighted red-flag taxonomy** built from 47 real labeled job postings.
+Each detected signal contributes to a total score that maps to Low / Medium / High risk.
+- 🟢 **Low** (< 12): Healthy posting with clear structure and benefits
+- 🟡 **Medium** (12–24): Some warning signs worth investigating
+- 🔴 **High** (≥ 25): Multiple concerning patterns
+### Team — CS1 Group 14
+- **Gaspard** — UX Designer + Content Specialist (HF Space, Gradio app, n8n workflow, testing)
+- **Person 3** — Data Analyst (extraction, analysis, charts)
+- **Person 4** — Project Manager (final report, coordination)
+### Iterations
+- **v1** — Keyword matching with hard-coded weights from labeled dataset
+- **v2** — Refined keyword patterns after user testing
+- **v3** — Integrated n8n workflow for smarter conversational responses
+        """)
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)