XRachel commited on
Commit
9c9cad5
·
verified ·
1 Parent(s): 6f46588

Upload 2 files

Browse files
Files changed (2) hide show
  1. Dockerfile +27 -0
  2. app.py +586 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+ ENV PYTHONDONTWRITEBYTECODE=1
5
+ ENV PYTHONUNBUFFERED=1
6
+
7
+ ENV GRADIO_SERVER_NAME=0.0.0.0
8
+ ENV GRADIO_SERVER_PORT=7860
9
+
10
+ RUN apt-get update && apt-get install -y --no-install-recommends r-base r-base-dev build-essential curl git libcurl4-openssl-dev libssl-dev libxml2-dev && rm -rf /var/lib/apt/lists/*
11
+
12
+ # R packages used in ranalysis.ipynb
13
+ RUN R -e "install.packages(c('forecast','ggplot2','jsonlite','readr','dplyr','tidyr','stringr','lubridate','broom'), repos='https://cloud.r-project.org')"
14
+
15
+ WORKDIR /app
16
+
17
+ COPY requirements.txt /app/requirements.txt
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Optional R kernel for notebooks
21
+ RUN R -e "install.packages('IRkernel', repos='https://cloud.r-project.org/')"
22
+ RUN R -e "IRkernel::installspec(user = FALSE)"
23
+
24
+ COPY . /app
25
+
26
+ EXPOSE 7860
27
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,586 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ import time
5
+ import traceback
6
+ from pathlib import Path
7
+ from typing import Dict, Any, List, Optional, Tuple
8
+
9
+ import pandas as pd
10
+ import gradio as gr
11
+ import papermill as pm
12
+
13
+ # Optional LLM (HuggingFace Inference API)
14
+ try:
15
+ from huggingface_hub import InferenceClient
16
+ except Exception:
17
+ InferenceClient = None
18
+
19
+ # =========================================================
20
+ # CONFIG
21
+ # =========================================================
22
+
23
+ BASE_DIR = Path(__file__).resolve().parent
24
+
25
+ NB1 = os.environ.get("NB1", "datacreation.ipynb").strip()
26
+ NB2 = os.environ.get("NB2", "pythonanalysis.ipynb").strip()
27
+ NB3 = os.environ.get("NB3", "ranalysis.ipynb").strip()
28
+
29
+ RUNS_DIR = BASE_DIR / "runs"
30
+ ART_DIR = BASE_DIR / "artifacts"
31
+ PY_FIG_DIR = ART_DIR / "py" / "figures"
32
+ PY_TAB_DIR = ART_DIR / "py" / "tables"
33
+ R_FIG_DIR = ART_DIR / "r" / "figures"
34
+ R_TAB_DIR = ART_DIR / "r" / "tables"
35
+
36
+ PAPERMILL_TIMEOUT = int(os.environ.get("PAPERMILL_TIMEOUT", "1800"))
37
+ MAX_PREVIEW_ROWS = int(os.environ.get("MAX_FILE_PREVIEW_ROWS", "50"))
38
+ MAX_LOG_CHARS = int(os.environ.get("MAX_LOG_CHARS", "8000"))
39
+
40
+ HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
41
+ MODEL_NAME = os.environ.get("MODEL_NAME", "deepseek-ai/DeepSeek-R1").strip()
42
+ HF_PROVIDER = os.environ.get("HF_PROVIDER", "novita").strip()
43
+
44
+ LLM_ENABLED = bool(HF_API_KEY) and InferenceClient is not None
45
+ llm_client = (
46
+ InferenceClient(provider=HF_PROVIDER, api_key=HF_API_KEY)
47
+ if LLM_ENABLED
48
+ else None
49
+ )
50
+
51
+ # =========================================================
52
+ # HELPERS
53
+ # =========================================================
54
+
55
+ def ensure_dirs():
56
+ for p in [RUNS_DIR, ART_DIR, PY_FIG_DIR, PY_TAB_DIR, R_FIG_DIR, R_TAB_DIR]:
57
+ p.mkdir(parents=True, exist_ok=True)
58
+
59
+ def stamp():
60
+ return time.strftime("%Y%m%d-%H%M%S")
61
+
62
+ def tail(text: str, n: int = MAX_LOG_CHARS) -> str:
63
+ return (text or "")[-n:]
64
+
65
+ def _ls(dir_path: Path, exts: Tuple[str, ...]) -> List[str]:
66
+ if not dir_path.is_dir():
67
+ return []
68
+ return sorted(p.name for p in dir_path.iterdir() if p.is_file() and p.suffix.lower() in exts)
69
+
70
+ def _read_csv(path: Path) -> pd.DataFrame:
71
+ return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS)
72
+
73
+ def _read_json(path: Path):
74
+ with path.open(encoding="utf-8") as f:
75
+ return json.load(f)
76
+
77
+ def artifacts_index() -> Dict[str, Any]:
78
+ return {
79
+ "python": {
80
+ "figures": _ls(PY_FIG_DIR, (".png", ".jpg", ".jpeg")),
81
+ "tables": _ls(PY_TAB_DIR, (".csv", ".json")),
82
+ },
83
+ "r": {
84
+ "figures": _ls(R_FIG_DIR, (".png", ".jpg", ".jpeg")),
85
+ "tables": _ls(R_TAB_DIR, (".csv", ".json")),
86
+ },
87
+ }
88
+
89
+ # =========================================================
90
+ # PIPELINE RUNNERS
91
+ # =========================================================
92
+
93
+ def run_notebook(nb_name: str) -> str:
94
+ ensure_dirs()
95
+ nb_in = BASE_DIR / nb_name
96
+ if not nb_in.exists():
97
+ return f"ERROR: {nb_name} not found."
98
+ nb_out = RUNS_DIR / f"run_{stamp()}_{nb_name}"
99
+ pm.execute_notebook(
100
+ input_path=str(nb_in),
101
+ output_path=str(nb_out),
102
+ cwd=str(BASE_DIR),
103
+ log_output=True,
104
+ progress_bar=False,
105
+ request_save_on_cell_execute=True,
106
+ execution_timeout=PAPERMILL_TIMEOUT,
107
+ )
108
+ return f"Executed {nb_name}"
109
+
110
+ def run_datacreation() -> str:
111
+ try:
112
+ log = run_notebook(NB1)
113
+ csvs = [f.name for f in BASE_DIR.glob("*.csv")]
114
+ return f"OK {log}\n\nCSVs now in /app:\n" + "\n".join(f" - {c}" for c in sorted(csvs))
115
+ except Exception as e:
116
+ return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
117
+
118
+ def run_pythonanalysis() -> str:
119
+ try:
120
+ log = run_notebook(NB2)
121
+ idx = artifacts_index()
122
+ figs = idx["python"]["figures"]
123
+ tabs = idx["python"]["tables"]
124
+ return (
125
+ f"OK {log}\n\n"
126
+ f"Figures: {', '.join(figs) or '(none)'}\n"
127
+ f"Tables: {', '.join(tabs) or '(none)'}"
128
+ )
129
+ except Exception as e:
130
+ return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
131
+
132
+ def run_r() -> str:
133
+ try:
134
+ log = run_notebook(NB3)
135
+ idx = artifacts_index()
136
+ figs = idx["r"]["figures"]
137
+ tabs = idx["r"]["tables"]
138
+ return (
139
+ f"OK {log}\n\n"
140
+ f"Figures: {', '.join(figs) or '(none)'}\n"
141
+ f"Tables: {', '.join(tabs) or '(none)'}"
142
+ )
143
+ except Exception as e:
144
+ return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
145
+
146
+ def run_full_pipeline() -> str:
147
+ logs = []
148
+ logs.append("=" * 50)
149
+ logs.append("STEP 1/3: Data Creation (web scraping + synthetic data)")
150
+ logs.append("=" * 50)
151
+ logs.append(run_datacreation())
152
+ logs.append("")
153
+ logs.append("=" * 50)
154
+ logs.append("STEP 2/3: Python Analysis (sentiment, ARIMA, dashboard)")
155
+ logs.append("=" * 50)
156
+ logs.append(run_pythonanalysis())
157
+ logs.append("")
158
+ logs.append("=" * 50)
159
+ logs.append("STEP 3/3: R Analysis (ETS/ARIMA forecasting)")
160
+ logs.append("=" * 50)
161
+ logs.append(run_r())
162
+ return "\n".join(logs)
163
+
164
+ # =========================================================
165
+ # GALLERY LOADERS
166
+ # =========================================================
167
+
168
+ def _load_all_figures() -> List[Tuple[str, str]]:
169
+ """Return list of (filepath, caption) for Gallery."""
170
+ items = []
171
+ for p in sorted(PY_FIG_DIR.glob("*.png")):
172
+ items.append((str(p), f"Python | {p.stem.replace('_', ' ').title()}"))
173
+ for p in sorted(R_FIG_DIR.glob("*.png")):
174
+ items.append((str(p), f"R | {p.stem.replace('_', ' ').title()}"))
175
+ return items
176
+
177
+ def _load_table_safe(path: Path) -> pd.DataFrame:
178
+ try:
179
+ if path.suffix == ".json":
180
+ obj = _read_json(path)
181
+ if isinstance(obj, dict):
182
+ return pd.DataFrame([obj])
183
+ return pd.DataFrame(obj)
184
+ return _read_csv(path)
185
+ except Exception as e:
186
+ return pd.DataFrame([{"error": str(e)}])
187
+
188
+ def refresh_gallery():
189
+ """Called when user clicks Refresh on Gallery tab."""
190
+ figures = _load_all_figures()
191
+ idx = artifacts_index()
192
+
193
+ table_choices = []
194
+ for scope in ("python", "r"):
195
+ for name in idx[scope]["tables"]:
196
+ table_choices.append(f"{scope}/{name}")
197
+
198
+ default_df = pd.DataFrame()
199
+ if table_choices:
200
+ scope, name = table_choices[0].split("/", 1)
201
+ base = PY_TAB_DIR if scope == "python" else R_TAB_DIR
202
+ default_df = _load_table_safe(base / name)
203
+
204
+ return (
205
+ figures if figures else [],
206
+ gr.update(choices=table_choices, value=table_choices[0] if table_choices else None),
207
+ default_df,
208
+ )
209
+
210
+ def on_table_select(choice: str):
211
+ if not choice or "/" not in choice:
212
+ return pd.DataFrame([{"hint": "Select a table above."}])
213
+ scope, name = choice.split("/", 1)
214
+ base = {"python": PY_TAB_DIR, "r": R_TAB_DIR}.get(scope)
215
+ if not base:
216
+ return pd.DataFrame([{"error": f"Unknown scope: {scope}"}])
217
+ path = base / name
218
+ if not path.exists():
219
+ return pd.DataFrame([{"error": f"File not found: {path}"}])
220
+ return _load_table_safe(path)
221
+
222
+ # =========================================================
223
+ # KPI LOADER
224
+ # =========================================================
225
+
226
+ def load_kpis() -> Dict[str, Any]:
227
+ for candidate in [PY_TAB_DIR / "kpis.json", PY_FIG_DIR / "kpis.json"]:
228
+ if candidate.exists():
229
+ try:
230
+ return _read_json(candidate)
231
+ except Exception:
232
+ pass
233
+ return {}
234
+
235
+ # =========================================================
236
+ # AI DASHBOARD (Tab 3) -- LLM picks what to display
237
+ # =========================================================
238
+
239
+ DASHBOARD_SYSTEM = """You are an AI dashboard assistant for a book-sales analytics app.
240
+ The user asks questions or requests about their data. You have access to pre-computed
241
+ artifacts from Python and R analysis pipelines.
242
+
243
+ AVAILABLE ARTIFACTS (only reference ones that exist):
244
+ {artifacts_json}
245
+
246
+ KPI SUMMARY: {kpis_json}
247
+
248
+ YOUR JOB:
249
+ 1. Answer the user's question conversationally using the KPIs and your knowledge of the artifacts.
250
+ 2. At the END of your response, output a JSON block (fenced with ```json ... ```) that tells
251
+ the dashboard which artifact to display. The JSON must have this shape:
252
+ {{"show": "figure"|"table"|"none", "scope": "python"|"r", "filename": "..."}}
253
+
254
+ - Use "show": "figure" to display a chart image.
255
+ - Use "show": "table" to display a CSV/JSON table.
256
+ - Use "show": "none" if no artifact is relevant.
257
+
258
+ RULES:
259
+ - If the user asks about sales trends or forecasting by title, show sales_trends or arima figures.
260
+ - If the user asks about sentiment, show sentiment figure or sentiment_counts table.
261
+ - If the user asks about R regression, the R notebook focuses on forecasting, show accuracy_table.csv.
262
+ - If the user asks about forecast accuracy or model comparison, show accuracy_table.csv or forecast_compare.png.
263
+ - If the user asks about top sellers, show top_titles_by_units_sold.csv.
264
+ - If the user asks a general data question, pick the most relevant artifact.
265
+ - Keep your answer concise (2-4 sentences), then the JSON block.
266
+ """
267
+
268
+ JSON_BLOCK_RE = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL)
269
+ FALLBACK_JSON_RE = re.compile(r"\{[^{}]*\"show\"[^{}]*\}", re.DOTALL)
270
+
271
+ def _parse_display_directive(text: str) -> Dict[str, str]:
272
+ m = JSON_BLOCK_RE.search(text)
273
+ if m:
274
+ try:
275
+ return json.loads(m.group(1))
276
+ except json.JSONDecodeError:
277
+ pass
278
+ m = FALLBACK_JSON_RE.search(text)
279
+ if m:
280
+ try:
281
+ return json.loads(m.group(0))
282
+ except json.JSONDecodeError:
283
+ pass
284
+ return {"show": "none"}
285
+
286
+ def _clean_response(text: str) -> str:
287
+ """Strip the JSON directive block from the displayed response."""
288
+ return JSON_BLOCK_RE.sub("", text).strip()
289
+
290
+ def ai_chat(user_msg: str, history: list):
291
+ """Chat function for the AI Dashboard tab."""
292
+ if not user_msg or not user_msg.strip():
293
+ return history, "", None, None
294
+
295
+ idx = artifacts_index()
296
+ kpis = load_kpis()
297
+
298
+ if not LLM_ENABLED:
299
+ reply, directive = _keyword_fallback(user_msg, idx, kpis)
300
+ else:
301
+ system = DASHBOARD_SYSTEM.format(
302
+ artifacts_json=json.dumps(idx, indent=2),
303
+ kpis_json=json.dumps(kpis, indent=2) if kpis else "(no KPIs yet, run the pipeline first)",
304
+ )
305
+ msgs = [{"role": "system", "content": system}]
306
+ for entry in (history or [])[-6:]:
307
+ msgs.append(entry)
308
+ msgs.append({"role": "user", "content": user_msg})
309
+
310
+ try:
311
+ r = llm_client.chat_completion(
312
+ model=MODEL_NAME,
313
+ messages=msgs,
314
+ temperature=0.3,
315
+ max_tokens=600,
316
+ stream=False,
317
+ )
318
+ raw = (
319
+ r["choices"][0]["message"]["content"]
320
+ if isinstance(r, dict)
321
+ else r.choices[0].message.content
322
+ )
323
+ directive = _parse_display_directive(raw)
324
+ reply = _clean_response(raw)
325
+ except Exception as e:
326
+ reply = f"LLM error: {e}. Falling back to keyword matching."
327
+ reply_fb, directive = _keyword_fallback(user_msg, idx, kpis)
328
+ reply += "\n\n" + reply_fb
329
+
330
+ fig_out = None
331
+ tab_out = None
332
+ show = directive.get("show", "none")
333
+ scope = directive.get("scope", "")
334
+ fname = directive.get("filename", "")
335
+
336
+ if show == "figure" and scope and fname:
337
+ base = {"python": PY_FIG_DIR, "r": R_FIG_DIR}.get(scope)
338
+ if base and (base / fname).exists():
339
+ fig_out = str(base / fname)
340
+ else:
341
+ reply += f"\n\n*(Could not find figure: {scope}/{fname})*"
342
+
343
+ if show == "table" and scope and fname:
344
+ base = {"python": PY_TAB_DIR, "r": R_TAB_DIR}.get(scope)
345
+ if base and (base / fname).exists():
346
+ tab_out = _load_table_safe(base / fname)
347
+ else:
348
+ reply += f"\n\n*(Could not find table: {scope}/{fname})*"
349
+
350
+ new_history = (history or []) + [
351
+ {"role": "user", "content": user_msg},
352
+ {"role": "assistant", "content": reply},
353
+ ]
354
+
355
+ return new_history, "", fig_out, tab_out
356
+
357
+ def _keyword_fallback(msg: str, idx: Dict, kpis: Dict) -> Tuple[str, Dict]:
358
+ """Simple keyword matcher when LLM is unavailable."""
359
+ msg_lower = msg.lower()
360
+
361
+ if not any(idx[s]["figures"] or idx[s]["tables"] for s in ("python", "r")):
362
+ return (
363
+ "No artifacts found yet. Please run the pipeline first (Tab 1), "
364
+ "then come back here to explore the results.",
365
+ {"show": "none"},
366
+ )
367
+
368
+ kpi_text = ""
369
+ if kpis:
370
+ total = kpis.get("total_units_sold", 0)
371
+ kpi_text = (
372
+ f"Quick summary: **{kpis.get('n_titles', '?')}** book titles across "
373
+ f"**{kpis.get('n_months', '?')}** months, with **{total:,.0f}** total units sold."
374
+ )
375
+
376
+ if any(w in msg_lower for w in ["trend", "sales trend", "monthly sale"]):
377
+ return (
378
+ f"Here are the sales trends for sampled titles. {kpi_text}",
379
+ {"show": "figure", "scope": "python", "filename": "sales_trends_sampled_titles.png"},
380
+ )
381
+
382
+ if any(w in msg_lower for w in ["sentiment", "review", "positive", "negative"]):
383
+ return (
384
+ f"Here is the sentiment distribution across sampled book titles. {kpi_text}",
385
+ {"show": "figure", "scope": "python", "filename": "sentiment_distribution_sampled_titles.png"},
386
+ )
387
+
388
+ if any(w in msg_lower for w in ["arima", "forecast", "predict"]):
389
+ if "compar" in msg_lower or "ets" in msg_lower or "accuracy" in msg_lower:
390
+ if "forecast_compare.png" in idx.get("r", {}).get("figures", []):
391
+ return (
392
+ "Here is the ARIMA+Fourier vs ETS forecast comparison from the R analysis.",
393
+ {"show": "figure", "scope": "r", "filename": "forecast_compare.png"},
394
+ )
395
+ return (
396
+ f"Here are the ARIMA forecasts for sampled titles from the Python analysis. {kpi_text}",
397
+ {"show": "figure", "scope": "python", "filename": "arima_forecasts_sampled_titles.png"},
398
+ )
399
+
400
+ if any(w in msg_lower for w in ["regression", "lm", "coefficient", "price effect", "rating effect"]):
401
+ return (
402
+ "The R notebook focuses on forecasting rather than regression. "
403
+ "Here is the forecast accuracy comparison instead.",
404
+ {"show": "table", "scope": "r", "filename": "accuracy_table.csv"},
405
+ )
406
+
407
+ if any(w in msg_lower for w in ["top", "best sell", "popular", "rank"]):
408
+ return (
409
+ f"Here are the top-selling titles by units sold. {kpi_text}",
410
+ {"show": "table", "scope": "python", "filename": "top_titles_by_units_sold.csv"},
411
+ )
412
+
413
+ if any(w in msg_lower for w in ["accuracy", "benchmark", "rmse", "mape"]):
414
+ return (
415
+ "Here is the forecast accuracy comparison (ARIMA+Fourier vs ETS) from the R analysis.",
416
+ {"show": "table", "scope": "r", "filename": "accuracy_table.csv"},
417
+ )
418
+
419
+ if any(w in msg_lower for w in ["r analysis", "r output", "r result"]):
420
+ if "forecast_compare.png" in idx.get("r", {}).get("figures", []):
421
+ return (
422
+ "Here is the main R output: forecast model comparison plot.",
423
+ {"show": "figure", "scope": "r", "filename": "forecast_compare.png"},
424
+ )
425
+
426
+ if any(w in msg_lower for w in ["dashboard", "overview", "summary", "kpi"]):
427
+ return (
428
+ f"Dashboard overview: {kpi_text}\n\nAsk me about sales trends, sentiment, forecasts, "
429
+ "forecast accuracy, or top sellers to see specific visualizations.",
430
+ {"show": "table", "scope": "python", "filename": "df_dashboard.csv"},
431
+ )
432
+
433
+ return (
434
+ f"I can show you various analyses. {kpi_text}\n\n"
435
+ "Try asking about: **sales trends**, **sentiment**, **ARIMA forecasts**, "
436
+ "**forecast accuracy**, **top sellers**, or **dashboard overview**.",
437
+ {"show": "none"},
438
+ )
439
+
440
+ # =========================================================
441
+ # CSS LOADER (robust injection via <style> tag)
442
+ # =========================================================
443
+
444
+ def load_css() -> str:
445
+ css_path = BASE_DIR / "style.css"
446
+ if css_path.exists():
447
+ return css_path.read_text(encoding="utf-8")
448
+ return ""
449
+
450
+ # =========================================================
451
+ # UI
452
+ # =========================================================
453
+
454
+ ensure_dirs()
455
+
456
+ with gr.Blocks(title="RX12 Workshop App") as demo:
457
+ # ✅ Robust CSS injection
458
+ css_text = load_css()
459
+ if css_text.strip():
460
+ gr.HTML(f"<style>{css_text}</style>")
461
+
462
+ gr.Markdown(
463
+ "# RX12 - Intro to Python and R - Workshop App\n"
464
+ "*The app to integrate the three notebooks in to get a functioning blueprint of the group project's final product*",
465
+ elem_id="escp_title",
466
+ )
467
+
468
+ # ===========================================================
469
+ # TAB 1 -- Pipeline Runner
470
+ # ===========================================================
471
+ with gr.Tab("Pipeline Runner"):
472
+ gr.Markdown("")
473
+
474
+ with gr.Row():
475
+ with gr.Column(scale=1):
476
+ btn_nb1 = gr.Button("Step 1: Data Creation", variant="secondary")
477
+ gr.Markdown("")
478
+ with gr.Column(scale=1):
479
+ btn_nb2 = gr.Button("Step 2a: Python Analysis", variant="secondary")
480
+ gr.Markdown("")
481
+ with gr.Column(scale=1):
482
+ btn_r = gr.Button("Step 2b: R Analysis", variant="secondary")
483
+ gr.Markdown("")
484
+
485
+ with gr.Row():
486
+ btn_all = gr.Button("Run All 3 Steps", variant="primary")
487
+
488
+ run_log = gr.Textbox(
489
+ label="Execution Log",
490
+ lines=18,
491
+ max_lines=30,
492
+ interactive=False,
493
+ )
494
+
495
+ btn_nb1.click(run_datacreation, outputs=[run_log])
496
+ btn_nb2.click(run_pythonanalysis, outputs=[run_log])
497
+ btn_r.click(run_r, outputs=[run_log])
498
+ btn_all.click(run_full_pipeline, outputs=[run_log])
499
+
500
+ # ===========================================================
501
+ # TAB 2 -- Results Gallery
502
+ # ===========================================================
503
+ with gr.Tab("Results Gallery"):
504
+ gr.Markdown(
505
+ "### All generated artifacts\n\n"
506
+ "After running the pipeline, click **Refresh** to load all figures and tables. "
507
+ "Figures are shown in the gallery; select a table from the dropdown to inspect it."
508
+ )
509
+
510
+ refresh_btn = gr.Button("Refresh Gallery", variant="primary")
511
+
512
+ gr.Markdown("#### Figures")
513
+ gallery = gr.Gallery(
514
+ label="All Figures (Python + R)",
515
+ columns=2,
516
+ height=480,
517
+ object_fit="contain",
518
+ )
519
+
520
+ gr.Markdown("#### Tables")
521
+ table_dropdown = gr.Dropdown(
522
+ label="Select a table to view",
523
+ choices=[],
524
+ interactive=True,
525
+ )
526
+ table_display = gr.Dataframe(
527
+ label="Table Preview",
528
+ interactive=False,
529
+ )
530
+
531
+ refresh_btn.click(
532
+ refresh_gallery,
533
+ outputs=[gallery, table_dropdown, table_display],
534
+ )
535
+ table_dropdown.change(
536
+ on_table_select,
537
+ inputs=[table_dropdown],
538
+ outputs=[table_display],
539
+ )
540
+
541
+ # ===========================================================
542
+ # TAB 3 -- AI Dashboard
543
+ # ===========================================================
544
+ with gr.Tab('"AI" Dashboard'):
545
+ gr.Markdown(
546
+ "### Ask questions, get visualisations\n\n"
547
+ "Describe what you want to see and the AI will pick the right chart or table. "
548
+ + (
549
+ "*LLM is active.*"
550
+ if LLM_ENABLED
551
+ else "*No API key detected — using keyword matching. "
552
+ "Set `HF_API_KEY` in Space secrets for full LLM support.*"
553
+ )
554
+ )
555
+
556
+ with gr.Row(equal_height=True):
557
+ with gr.Column(scale=1):
558
+ chatbot = gr.Chatbot(label="Conversation", height=380)
559
+ user_input = gr.Textbox(
560
+ label="Ask about your data",
561
+ placeholder="e.g. Show me sales trends / What drives revenue? / Compare forecast models",
562
+ lines=1,
563
+ )
564
+ gr.Examples(
565
+ examples=[
566
+ "Show me the sales trends",
567
+ "What does the sentiment look like?",
568
+ "Which titles sell the most?",
569
+ "Show the forecast accuracy comparison",
570
+ "Compare the ARIMA and ETS forecasts",
571
+ "Give me a dashboard overview",
572
+ ],
573
+ inputs=user_input,
574
+ )
575
+
576
+ with gr.Column(scale=1):
577
+ ai_figure = gr.Image(label="Visualisation", height=350)
578
+ ai_table = gr.Dataframe(label="Data Table", interactive=False)
579
+
580
+ user_input.submit(
581
+ ai_chat,
582
+ inputs=[user_input, chatbot],
583
+ outputs=[chatbot, user_input, ai_figure, ai_table],
584
+ )
585
+
586
+ demo.launch(allowed_paths=[str(BASE_DIR)])