| import os |
| import re |
| import json |
| import time |
| import traceback |
| from pathlib import Path |
| from typing import Dict, Any, List, Tuple |
|
|
| import pandas as pd |
| import gradio as gr |
| import papermill as pm |
| import plotly.graph_objects as go |
|
|
| |
| try: |
| from huggingface_hub import InferenceClient |
| except Exception: |
| InferenceClient = None |
|
|
| |
| |
| |
|
|
| BASE_DIR = Path(__file__).resolve().parent |
|
|
| NB1 = os.environ.get("NB1", "datacreation.ipynb").strip() |
| NB2 = os.environ.get("NB2", "pythonanalysis.ipynb").strip() |
|
|
| RUNS_DIR = BASE_DIR / "runs" |
| ART_DIR = BASE_DIR / "artifacts" |
| PY_FIG_DIR = ART_DIR / "py" / "figures" |
| PY_TAB_DIR = ART_DIR / "py" / "tables" |
|
|
| PAPERMILL_TIMEOUT = int(os.environ.get("PAPERMILL_TIMEOUT", "1800")) |
| MAX_PREVIEW_ROWS = int(os.environ.get("MAX_FILE_PREVIEW_ROWS", "50")) |
| MAX_LOG_CHARS = int(os.environ.get("MAX_LOG_CHARS", "8000")) |
|
|
| HF_API_KEY = os.environ.get("HF_API_KEY", "").strip() |
| MODEL_NAME = os.environ.get("MODEL_NAME", "deepseek-ai/DeepSeek-R1").strip() |
| HF_PROVIDER = os.environ.get("HF_PROVIDER", "novita").strip() |
| N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip() |
|
|
| LLM_ENABLED = bool(HF_API_KEY) and InferenceClient is not None |
| llm_client = ( |
| InferenceClient(provider=HF_PROVIDER, api_key=HF_API_KEY) |
| if LLM_ENABLED |
| else None |
| ) |
|
|
| |
| |
| |
|
|
| def ensure_dirs(): |
| for p in [RUNS_DIR, ART_DIR, PY_FIG_DIR, PY_TAB_DIR]: |
| p.mkdir(parents=True, exist_ok=True) |
|
|
| def stamp(): |
| return time.strftime("%Y%m%d-%H%M%S") |
|
|
| def tail(text: str, n: int = MAX_LOG_CHARS) -> str: |
| return (text or "")[-n:] |
|
|
| def _ls(dir_path: Path, exts: Tuple[str, ...]) -> List[str]: |
| if not dir_path.is_dir(): |
| return [] |
| return sorted(p.name for p in dir_path.iterdir() if p.is_file() and p.suffix.lower() in exts) |
|
|
| def _read_csv(path: Path) -> pd.DataFrame: |
| return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS) |
|
|
| def _read_json(path: Path): |
| with path.open(encoding="utf-8") as f: |
| return json.load(f) |
|
|
| def artifacts_index() -> Dict[str, Any]: |
| return { |
| "python": { |
| "figures": _ls(PY_FIG_DIR, (".png", ".jpg", ".jpeg")), |
| "tables": _ls(PY_TAB_DIR, (".csv", ".json")), |
| }, |
| } |
|
|
| |
| |
| |
|
|
| def run_notebook(nb_name: str) -> str: |
| ensure_dirs() |
| nb_in = BASE_DIR / nb_name |
| if not nb_in.exists(): |
| return f"ERROR: {nb_name} not found." |
| nb_out = RUNS_DIR / f"run_{stamp()}_{nb_name}" |
| pm.execute_notebook( |
| input_path=str(nb_in), |
| output_path=str(nb_out), |
| cwd=str(BASE_DIR), |
| log_output=True, |
| progress_bar=False, |
| request_save_on_cell_execute=True, |
| execution_timeout=PAPERMILL_TIMEOUT, |
| ) |
| return f"Executed {nb_name}" |
|
|
|
|
| def run_datacreation() -> str: |
| try: |
| log = run_notebook(NB1) |
| csvs = [f.name for f in BASE_DIR.glob("*.csv")] |
| return f"OK {log}\n\nCSVs now in /app:\n" + "\n".join(f" - {c}" for c in sorted(csvs)) |
| except Exception as e: |
| return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}" |
|
|
|
|
| def run_pythonanalysis() -> str: |
| try: |
| log = run_notebook(NB2) |
| idx = artifacts_index() |
| figs = idx["python"]["figures"] |
| tabs = idx["python"]["tables"] |
| return ( |
| f"OK {log}\n\n" |
| f"Figures: {', '.join(figs) or '(none)'}\n" |
| f"Tables: {', '.join(tabs) or '(none)'}" |
| ) |
| except Exception as e: |
| return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}" |
|
|
|
|
| def run_full_pipeline() -> str: |
| logs = [] |
| logs.append("=" * 50) |
| logs.append("STEP 1/2: Data Creation (web scraping + synthetic data)") |
| logs.append("=" * 50) |
| logs.append(run_datacreation()) |
| logs.append("") |
| logs.append("=" * 50) |
| logs.append("STEP 2/2: Python Analysis (sentiment, ARIMA, dashboard)") |
| logs.append("=" * 50) |
| logs.append(run_pythonanalysis()) |
| return "\n".join(logs) |
|
|
|
|
| |
| |
| |
|
|
| def _load_all_figures() -> List[Tuple[str, str]]: |
| """Return list of (filepath, caption) for Gallery.""" |
| items = [] |
| for p in sorted(PY_FIG_DIR.glob("*.png")): |
| items.append((str(p), p.stem.replace('_', ' ').title())) |
| return items |
|
|
|
|
| def _load_table_safe(path: Path) -> pd.DataFrame: |
| try: |
| if path.suffix == ".json": |
| obj = _read_json(path) |
| if isinstance(obj, dict): |
| return pd.DataFrame([obj]) |
| return pd.DataFrame(obj) |
| return _read_csv(path) |
| except Exception as e: |
| return pd.DataFrame([{"error": str(e)}]) |
|
|
|
|
| def refresh_gallery(): |
| """Called when user clicks Refresh on Gallery tab.""" |
| figures = _load_all_figures() |
| idx = artifacts_index() |
|
|
| table_choices = list(idx["python"]["tables"]) |
|
|
| default_df = pd.DataFrame() |
| if table_choices: |
| default_df = _load_table_safe(PY_TAB_DIR / table_choices[0]) |
|
|
| return ( |
| figures if figures else [], |
| gr.update(choices=table_choices, value=table_choices[0] if table_choices else None), |
| default_df, |
| ) |
|
|
|
|
| def on_table_select(choice: str): |
| if not choice: |
| return pd.DataFrame([{"hint": "Select a table above."}]) |
| path = PY_TAB_DIR / choice |
| if not path.exists(): |
| return pd.DataFrame([{"error": f"File not found: {choice}"}]) |
| return _load_table_safe(path) |
|
|
|
|
| |
| |
| |
|
|
| def load_kpis() -> Dict[str, Any]: |
| for candidate in [PY_TAB_DIR / "kpis.json", PY_FIG_DIR / "kpis.json"]: |
| if candidate.exists(): |
| try: |
| return _read_json(candidate) |
| except Exception: |
| pass |
| return {} |
|
|
|
|
| |
| |
| |
|
|
| def load_main_dataset() -> pd.DataFrame: |
| for candidate in [ |
| BASE_DIR / "final_dataset.csv", |
| BASE_DIR / "datareal.csv", |
| ]: |
| if candidate.exists(): |
| try: |
| if candidate.name == "datareal.csv": |
| return pd.read_csv(candidate, sep=";") |
| return pd.read_csv(candidate) |
| except Exception: |
| pass |
| return pd.DataFrame() |
|
|
|
|
| def load_kpis() -> Dict[str, Any]: |
| df = load_main_dataset() |
| if df.empty: |
| return {} |
|
|
| kpis = { |
| "n_rows": len(df), |
| "n_countries": df["COUNTRY"].nunique() if "COUNTRY" in df.columns else None, |
| "avg_job_satisfaction": round(df["AVG_JOB_SATISFACTION"].mean(), 2) |
| if "AVG_JOB_SATISFACTION" in df.columns else None, |
| "avg_income": round(df["MEAN_NET_INCOME"].mean(), 2) |
| if "MEAN_NET_INCOME" in df.columns else None, |
| "avg_work_life_balance": round(df["WORK_LIFE_BALANCE"].mean(), 2) |
| if "WORK_LIFE_BALANCE" in df.columns else None, |
| "avg_stress_level": round(df["STRESS_LEVEL"].mean(), 2) |
| if "STRESS_LEVEL" in df.columns else None, |
| "avg_weekly_hours": round(df["AVG_WEEKLY_WORKING_HOURS"].mean(), 2) |
| if "AVG_WEEKLY_WORKING_HOURS" in df.columns else None, |
| } |
|
|
| return {k: v for k, v in kpis.items() if v is not None} |
|
|
|
|
| |
| |
| |
|
|
| DASHBOARD_SYSTEM = """You are an AI dashboard assistant for a country-level job satisfaction analytics app. |
| The dataset contains variables such as COUNTRY, AVG_JOB_SATISFACTION, WORK_LIFE_BALANCE, |
| STRESS_LEVEL, MEAN_NET_INCOME, and AVG_WEEKLY_WORKING_HOURS. |
| |
| Your job: |
| 1. Answer the user's question briefly and clearly. |
| 2. At the end, output a JSON block inside ```json ... ``` with: |
| {"show": "figure"|"table"|"none", "scope": "python", "filename": "..."} |
| |
| Use these filenames: |
| - "job_satisfaction_by_country" for country ranking chart |
| - "correlation_heatmap" for correlation chart |
| - "income_vs_satisfaction" for scatter chart |
| - "top_countries_table" for top countries table |
| - "bottom_countries_table" for bottom countries table |
| - "full_dataset_table" for full dataset preview |
| """ |
|
|
| JSON_BLOCK_RE = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL) |
| FALLBACK_JSON_RE = re.compile(r"\{[^{}]*\"show\"[^{}]*\}", re.DOTALL) |
|
|
|
|
| def _parse_display_directive(text: str) -> Dict[str, str]: |
| m = JSON_BLOCK_RE.search(text) |
| if m: |
| try: |
| return json.loads(m.group(1)) |
| except json.JSONDecodeError: |
| pass |
| m = FALLBACK_JSON_RE.search(text) |
| if m: |
| try: |
| return json.loads(m.group(0)) |
| except json.JSONDecodeError: |
| pass |
| return {"show": "none"} |
|
|
|
|
| def _clean_response(text: str) -> str: |
| return JSON_BLOCK_RE.sub("", text).strip() |
|
|
|
|
| def _keyword_fallback(msg: str, idx: Dict, kpis: Dict) -> Tuple[str, Dict]: |
| msg_lower = msg.lower() |
|
|
| if not kpis: |
| return ( |
| "No dataset found yet. Please run the pipeline first.", |
| {"show": "none"}, |
| ) |
|
|
| summary = ( |
| f"The dataset contains **{kpis.get('n_rows', '?')}** rows and " |
| f"**{kpis.get('n_countries', '?')}** countries. " |
| f"Average job satisfaction is **{kpis.get('avg_job_satisfaction', '?')}**." |
| ) |
|
|
| if any(w in msg_lower for w in ["country", "countries", "ranking", "top countries", "bottom countries"]): |
| return ( |
| f"Here is the country-level job satisfaction ranking. {summary}", |
| {"show": "figure", "filename": "job_satisfaction_by_country"}, |
| ) |
|
|
| if any(w in msg_lower for w in ["correlation", "heatmap", "relationship"]): |
| return ( |
| f"Here is the correlation overview for the numeric variables. {summary}", |
| {"show": "figure", "filename": "correlation_heatmap"}, |
| ) |
|
|
| if any(w in msg_lower for w in ["income", "salary", "net income"]): |
| return ( |
| f"Here is the relationship between income and job satisfaction. {summary}", |
| {"show": "figure", "filename": "income_vs_satisfaction"}, |
| ) |
|
|
| if any(w in msg_lower for w in ["top", "best", "highest"]): |
| return ( |
| f"Here are the top countries by job satisfaction. {summary}", |
| {"show": "table", "scope": "python", "filename": "top_countries_table"}, |
| ) |
|
|
| if any(w in msg_lower for w in ["bottom", "lowest", "worst"]): |
| return ( |
| f"Here are the bottom countries by job satisfaction. {summary}", |
| {"show": "table", "scope": "python", "filename": "bottom_countries_table"}, |
| ) |
|
|
| if any(w in msg_lower for w in ["overview", "summary", "dataset", "data", "kpi"]): |
| return ( |
| f"Here is an overview of the dataset. {summary}", |
| {"show": "table", "scope": "python", "filename": "full_dataset_table"}, |
| ) |
|
|
| return ( |
| f"{summary} Ask about country rankings, correlations, income effects, or top/bottom countries.", |
| {"show": "none"}, |
| ) |
|
|
|
|
| def ai_chat(user_msg: str, history: list): |
| if not user_msg or not user_msg.strip(): |
| return history, "", None, None |
|
|
| idx = artifacts_index() |
| kpis = load_kpis() |
|
|
| if N8N_WEBHOOK_URL: |
| reply, directive = _n8n_call(user_msg) |
| if directive is None: |
| reply_fb, directive = _keyword_fallback(user_msg, idx, kpis) |
| reply += "\n\n" + reply_fb |
| elif not LLM_ENABLED: |
| reply, directive = _keyword_fallback(user_msg, idx, kpis) |
| else: |
| system = DASHBOARD_SYSTEM |
| msgs = [{"role": "system", "content": system}] |
| for entry in (history or [])[-6:]: |
| msgs.append(entry) |
| msgs.append({"role": "user", "content": user_msg}) |
|
|
| try: |
| r = llm_client.chat_completion( |
| model=MODEL_NAME, |
| messages=msgs, |
| temperature=0.3, |
| max_tokens=600, |
| stream=False, |
| ) |
| raw = ( |
| r["choices"][0]["message"]["content"] |
| if isinstance(r, dict) |
| else r.choices[0].message.content |
| ) |
| directive = _parse_display_directive(raw) |
| reply = _clean_response(raw) |
| except Exception as e: |
| reply = f"LLM error: {e}. Falling back to keyword matching." |
| reply_fb, directive = _keyword_fallback(user_msg, idx, kpis) |
| reply += "\n\n" + reply_fb |
|
|
| chart_out = None |
| tab_out = None |
| show = directive.get("show", "none") |
| fname = directive.get("filename", "") |
|
|
| if show == "figure": |
| if fname == "job_satisfaction_by_country": |
| chart_out = build_job_satisfaction_chart() |
| elif fname == "correlation_heatmap": |
| chart_out = build_correlation_chart() |
| elif fname == "income_vs_satisfaction": |
| chart_out = build_income_chart() |
|
|
| if show == "table": |
| if fname == "top_countries_table": |
| tab_out = get_top_countries_table() |
| elif fname == "bottom_countries_table": |
| tab_out = get_bottom_countries_table() |
| elif fname == "full_dataset_table": |
| tab_out = get_dataset_preview() |
|
|
| new_history = (history or []) + [ |
| {"role": "user", "content": user_msg}, |
| {"role": "assistant", "content": reply}, |
| ] |
|
|
| return new_history, "", chart_out, tab_out |
|
|
|
|
| |
| |
| |
|
|
| def render_kpi_cards() -> str: |
| kpis = load_kpis() |
| if not kpis: |
| return ( |
| '<div style="background:rgba(255,255,255,.65);backdrop-filter:blur(16px);' |
| 'border-radius:20px;padding:28px;text-align:center;' |
| 'border:1.5px solid rgba(255,255,255,.7);' |
| 'box-shadow:0 8px 32px rgba(124,92,191,.08);">' |
| '<div style="font-size:36px;margin-bottom:10px;">📊</div>' |
| '<div style="color:#a48de8;font-size:14px;' |
| 'font-weight:800;margin-bottom:6px;">No data yet</div>' |
| '<div style="color:#9d8fc4;font-size:12px;">' |
| 'Run the pipeline to populate these cards.</div>' |
| '</div>' |
| ) |
|
|
| def card(icon, label, value, colour): |
| return f""" |
| <div style="background:rgba(255,255,255,.72);backdrop-filter:blur(16px); |
| border-radius:20px;padding:18px 14px 16px;text-align:center; |
| border:1.5px solid rgba(255,255,255,.8); |
| box-shadow:0 4px 16px rgba(124,92,191,.08); |
| border-top:3px solid {colour};"> |
| <div style="font-size:26px;margin-bottom:7px;line-height:1;">{icon}</div> |
| <div style="color:#9d8fc4;font-size:9.5px;text-transform:uppercase; |
| letter-spacing:1.8px;margin-bottom:7px;font-weight:800;">{label}</div> |
| <div style="color:#2d1f4e;font-size:16px;font-weight:800;">{value}</div> |
| </div>""" |
|
|
| kpi_config = [ |
| ("n_rows", "📄", "Rows", "#a48de8"), |
| ("n_countries", "🌍", "Countries", "#7aa6f8"), |
| ("avg_job_satisfaction", "😊", "Avg Job Satisfaction", "#6ee7c7"), |
| ("avg_income", "💰", "Avg Net Income", "#3dcba8"), |
| ("avg_work_life_balance", "⚖️", "Work-Life Balance", "#f4b942"), |
| ("avg_stress_level", "🔥", "Stress Level", "#ff6b6b"), |
| ("avg_weekly_hours", "⏱️", "Weekly Hours", "#8fa8f8"), |
| ] |
|
|
| html = ( |
| '<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(140px,1fr));' |
| 'gap:12px;margin-bottom:24px;">' |
| ) |
| for key, icon, label, colour in kpi_config: |
| val = kpis.get(key) |
| if val is None: |
| continue |
| if isinstance(val, (int, float)): |
| val = f"{val:,.2f}" if abs(val) < 1000 else f"{val:,.0f}" |
| html += card(icon, label, str(val), colour) |
| html += "</div>" |
| return html |
|
|
|
|
| |
| |
| |
|
|
| CHART_PALETTE = ["#7c5cbf", "#2ec4a0", "#e8537a", "#e8a230", "#5e8fef", |
| "#c45ea8", "#3dbacc", "#a0522d", "#6aaa3a", "#d46060"] |
|
|
|
|
| def _styled_layout(**kwargs) -> dict: |
| defaults = dict( |
| template="plotly_white", |
| paper_bgcolor="rgba(255,255,255,0.95)", |
| plot_bgcolor="rgba(255,255,255,0.98)", |
| font=dict(family="system-ui, sans-serif", color="#2d1f4e", size=12), |
| margin=dict(l=60, r=20, t=70, b=70), |
| title=dict(font=dict(size=15, color="#4b2d8a")), |
| ) |
| defaults.update(kwargs) |
| return defaults |
|
|
|
|
| def _empty_chart(title: str) -> go.Figure: |
| fig = go.Figure() |
| fig.update_layout( |
| title=title, |
| height=420, |
| template="plotly_white", |
| paper_bgcolor="rgba(255,255,255,0.95)", |
| annotations=[dict( |
| text="Run the pipeline to generate data", |
| x=0.5, y=0.5, xref="paper", yref="paper", |
| showarrow=False, |
| font=dict(size=14, color="rgba(124,92,191,0.5)") |
| )], |
| ) |
| return fig |
|
|
|
|
| def build_job_satisfaction_chart() -> go.Figure: |
| df = load_main_dataset() |
| if df.empty or "COUNTRY" not in df.columns or "AVG_JOB_SATISFACTION" not in df.columns: |
| return _empty_chart("Job Satisfaction by Country — run the pipeline first") |
|
|
| plot_df = df[["COUNTRY", "AVG_JOB_SATISFACTION"]].sort_values( |
| "AVG_JOB_SATISFACTION", ascending=False |
| ).head(20) |
|
|
| fig = go.Figure(go.Bar( |
| x=plot_df["AVG_JOB_SATISFACTION"], |
| y=plot_df["COUNTRY"], |
| orientation="h", |
| marker=dict(color=plot_df["AVG_JOB_SATISFACTION"], colorscale="Viridis"), |
| hovertemplate="<b>%{y}</b><br>Job Satisfaction: %{x:.2f}<extra></extra>", |
| )) |
|
|
| fig.update_layout(**_styled_layout( |
| height=max(450, len(plot_df) * 28), |
| title=dict(text="Top Countries by Job Satisfaction"), |
| showlegend=False, |
| )) |
| fig.update_yaxes(autorange="reversed") |
| fig.update_xaxes(title="Average Job Satisfaction") |
| return fig |
|
|
|
|
| def build_income_chart() -> go.Figure: |
| df = load_main_dataset() |
| needed = {"MEAN_NET_INCOME", "AVG_JOB_SATISFACTION", "COUNTRY"} |
| if df.empty or not needed.issubset(df.columns): |
| return _empty_chart("Income vs Job Satisfaction — run the pipeline first") |
|
|
| fig = go.Figure(go.Scatter( |
| x=df["MEAN_NET_INCOME"], |
| y=df["AVG_JOB_SATISFACTION"], |
| mode="markers+text", |
| text=df["COUNTRY"], |
| textposition="top center", |
| marker=dict( |
| size=10, |
| color=df["AVG_JOB_SATISFACTION"], |
| colorscale="Viridis", |
| showscale=True, |
| ), |
| hovertemplate="<b>%{text}</b><br>Income: %{x:,.0f}<br>Job Satisfaction: %{y:.2f}<extra></extra>", |
| )) |
|
|
| fig.update_layout(**_styled_layout( |
| height=500, |
| title=dict(text="Income vs Job Satisfaction"), |
| )) |
| fig.update_xaxes(title="Mean Net Income") |
| fig.update_yaxes(title="Average Job Satisfaction") |
| return fig |
|
|
|
|
| def build_correlation_chart() -> go.Figure: |
| df = load_main_dataset() |
| if df.empty: |
| return _empty_chart("Correlation Heatmap — run the pipeline first") |
|
|
| num_df = df.select_dtypes(include=["number"]) |
| if num_df.empty: |
| return _empty_chart("No numeric columns found") |
|
|
| corr = num_df.corr(numeric_only=True) |
|
|
| fig = go.Figure(data=go.Heatmap( |
| z=corr.values, |
| x=corr.columns, |
| y=corr.columns, |
| colorscale="RdBu", |
| zmin=-1, |
| zmax=1, |
| hovertemplate="X: %{x}<br>Y: %{y}<br>Corr: %{z:.2f}<extra></extra>", |
| )) |
|
|
| fig.update_layout(**_styled_layout( |
| height=600, |
| title=dict(text="Correlation Heatmap"), |
| )) |
| return fig |
|
|
|
|
| def get_top_countries_table() -> pd.DataFrame: |
| df = load_main_dataset() |
| if df.empty or "COUNTRY" not in df.columns or "AVG_JOB_SATISFACTION" not in df.columns: |
| return pd.DataFrame([{"info": "No data available"}]) |
| return df[["COUNTRY", "AVG_JOB_SATISFACTION"]].sort_values( |
| "AVG_JOB_SATISFACTION", ascending=False |
| ).head(10) |
|
|
|
|
| def get_bottom_countries_table() -> pd.DataFrame: |
| df = load_main_dataset() |
| if df.empty or "COUNTRY" not in df.columns or "AVG_JOB_SATISFACTION" not in df.columns: |
| return pd.DataFrame([{"info": "No data available"}]) |
| return df[["COUNTRY", "AVG_JOB_SATISFACTION"]].sort_values( |
| "AVG_JOB_SATISFACTION", ascending=True |
| ).head(10) |
|
|
|
|
| def get_dataset_preview() -> pd.DataFrame: |
| df = load_main_dataset() |
| if df.empty: |
| return pd.DataFrame([{"info": "No data available"}]) |
| return df.head(20) |
|
|
|
|
| def refresh_dashboard(): |
| return ( |
| render_kpi_cards(), |
| build_job_satisfaction_chart(), |
| build_correlation_chart(), |
| build_income_chart(), |
| ) |
|
|
|
|
| |
| |
| |
|
|
| def refresh_gallery(): |
| figures = _load_all_figures() |
|
|
| table_choices = [] |
| if (BASE_DIR / "final_dataset.csv").exists(): |
| table_choices.append("final_dataset.csv") |
| if (BASE_DIR / "datareal.csv").exists(): |
| table_choices.append("datareal.csv") |
|
|
| default_df = pd.DataFrame() |
| if table_choices: |
| first_path = BASE_DIR / table_choices[0] |
| if first_path.name == "datareal.csv": |
| default_df = pd.read_csv(first_path, sep=";", nrows=MAX_PREVIEW_ROWS) |
| else: |
| default_df = pd.read_csv(first_path, nrows=MAX_PREVIEW_ROWS) |
|
|
| return ( |
| figures if figures else [], |
| gr.update(choices=table_choices, value=table_choices[0] if table_choices else None), |
| default_df, |
| ) |
|
|
|
|
| def on_table_select(choice: str): |
| if not choice: |
| return pd.DataFrame([{"hint": "Select a table above."}]) |
|
|
| path = BASE_DIR / choice |
| if not path.exists(): |
| return pd.DataFrame([{"error": f"File not found: {choice}"}]) |
|
|
| try: |
| if path.name == "datareal.csv": |
| return pd.read_csv(path, sep=";", nrows=MAX_PREVIEW_ROWS) |
| return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS) |
| except Exception as e: |
| return pd.DataFrame([{"error": str(e)}]) |
|
|
| |
| |
| |
|
|
| ensure_dirs() |
|
|
| def load_css() -> str: |
| css_path = BASE_DIR / "style.css" |
| return css_path.read_text(encoding="utf-8") if css_path.exists() else "" |
|
|
|
|
| with gr.Blocks(title="AIBDM 2026 Workshop App") as demo: |
|
|
| gr.Markdown( |
| "# Job Satisfaction Dashboard\n" |
| "*Analysis of work-life balance, income, and stress across countries*", |
| elem_id="escp_title", |
| ) |
|
|
| |
| |
| |
| with gr.Tab("Pipeline Runner"): |
| gr.Markdown() |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| btn_nb1 = gr.Button("Step 1: Data Creation", variant="secondary") |
| with gr.Column(scale=1): |
| btn_nb2 = gr.Button("Step 2: Python Analysis", variant="secondary") |
|
|
| with gr.Row(): |
| btn_all = gr.Button("Run Full Pipeline (Both Steps)", variant="primary") |
|
|
| run_log = gr.Textbox( |
| label="Execution Log", |
| lines=18, |
| max_lines=30, |
| interactive=False, |
| ) |
|
|
| btn_nb1.click(run_datacreation, outputs=[run_log]) |
| btn_nb2.click(run_pythonanalysis, outputs=[run_log]) |
| btn_all.click(run_full_pipeline, outputs=[run_log]) |
|
|
| |
| |
| |
| with gr.Tab("Dashboard"): |
| kpi_html = gr.HTML(value=render_kpi_cards) |
|
|
| refresh_btn = gr.Button("Refresh Dashboard", variant="primary") |
|
|
| gr.Markdown("#### Interactive Charts") |
| chart_sales = gr.Plot(label="Job Satisfaction by Country") |
| chart_sentiment = gr.Plot(label="Correlation Heatmap") |
| chart_top = gr.Plot(label="Income vs Job Satisfaction") |
|
|
| |
| gr.Markdown("#### Data Tables") |
| table_dropdown = gr.Dropdown( |
| label="Select a table to view", |
| choices=[], |
| interactive=True, |
| ) |
| table_display = gr.Dataframe( |
| label="Table Preview", |
| interactive=False, |
| ) |
|
|
| def _on_refresh(): |
| kpi, c1, c2, c3 = refresh_dashboard() |
| figs, dd, df = refresh_gallery() |
| return kpi, c1, c2, c3, dd, df |
|
|
| refresh_btn.click( |
| _on_refresh, |
| outputs=[kpi_html, chart_sales, chart_sentiment, chart_top, |
| table_dropdown, table_display], |
| ) |
| table_dropdown.change( |
| on_table_select, |
| inputs=[table_dropdown], |
| outputs=[table_display], |
| ) |
|
|
| |
| |
| |
| with gr.Tab('"AI" Dashboard'): |
| _ai_status = ( |
| "Connected to your **n8n workflow**." if N8N_WEBHOOK_URL |
| else "**LLM active.**" if LLM_ENABLED |
| else "Using **keyword matching**. Upgrade options: " |
| "set `N8N_WEBHOOK_URL` to connect your n8n workflow, " |
| "or set `HF_API_KEY` for direct LLM access." |
| ) |
| gr.Markdown( |
| "### Ask questions, get interactive visualisations\n\n" |
| f"Type a question and the system will pick the right interactive chart or table. {_ai_status}" |
| ) |
|
|
| with gr.Row(equal_height=True): |
| with gr.Column(scale=1): |
| chatbot = gr.Chatbot( |
| label="Conversation", |
| height=380, |
| ) |
| user_input = gr.Textbox( |
| label="Ask about your data", |
| placeholder="e.g. Show me sales trends / What are the top sellers? / Sentiment analysis", |
| lines=1, |
| ) |
| gr.Examples( |
| examples=[ |
| "Show me the top countries by job satisfaction", |
| "Show me the correlation heatmap", |
| "How does income relate to job satisfaction?", |
| "Which countries have the lowest job satisfaction?", |
| "Give me a dataset overview", |
| "Show me the top 10 countries", |
| ], |
| inputs=user_input, |
| ) |
|
|
| with gr.Column(scale=1): |
| ai_figure = gr.Plot( |
| label="Interactive Chart", |
| ) |
| ai_table = gr.Dataframe( |
| label="Data Table", |
| interactive=False, |
| ) |
|
|
| user_input.submit( |
| ai_chat, |
| inputs=[user_input, chatbot], |
| outputs=[chatbot, user_input, ai_figure, ai_table], |
| ) |
|
|
|
|
| demo.launch(css=load_css(), allowed_paths=[str(BASE_DIR)]) |
|
|