{ "nbformat": 4, "nbformat_minor": 5, "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": {}, "source": "# **šŸ¤– Data Analysis & Visualization**" }, { "cell_type": "markdown", "metadata": {}, "source": "## **1.** šŸ“¦ Install required packages" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "!pip install pandas matplotlib seaborn numpy scipy" }, { "cell_type": "markdown", "metadata": {}, "source": "## **2.** āœ…ļø Load & inspect the dataset" }, { "cell_type": "markdown", "metadata": {}, "source": "### *a. Initial setup*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import pandas as pd\nimport numpy as np\nimport json, re, warnings\nfrom collections import Counter\nfrom pathlib import Path\nwarnings.filterwarnings(\"ignore\")\n" }, { "cell_type": "markdown", "metadata": {}, "source": "### *b. āœ‹šŸ»šŸ›‘ā›”ļø Create the df dataframe from the ai_bubble_clean.csv file*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "df = pd.read_csv(\"ai_bubble_clean.csv\")\ndf[\"Date\"] = pd.to_datetime(df[\"Date\"])\ndf[\"YearMonth\"] = pd.to_datetime(df[\"YearMonth\"])" }, { "cell_type": "markdown", "metadata": {}, "source": "### *c. āœ‹šŸ»šŸ›‘ā›”ļø Visualize the first few lines of df*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "df.head()" }, { "cell_type": "markdown", "metadata": {}, "source": "### *d. Run a quality check on the dataset*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "def quality_check(df, name=\"DataFrame\"):\n print(f\"\\nšŸ” Quality Check Report for: {name}\")\n print(\"=\" * (25 + len(name)))\n print(f\"\\nšŸ“ Shape: {df.shape}\")\n print(\"\\nšŸ”  Column Types:\")\n print(df.dtypes)\n print(\"\\nā“ Missing Values:\")\n print(df.isnull().sum())\n print(f\"\\nšŸ“‹ Duplicate Rows: {df.duplicated().sum()}\")\n print(\"\\nšŸ“Š Summary Statistics:\")\n display(df.describe(include=\"all\").transpose())\n print(\"\\nšŸ‘€ Sample Rows:\")\n display(df.sample(min(5, len(df))))\n\nquality_check(df, \"AI Bubble Sentiment Dataset\")\n" }, { "cell_type": "markdown", "metadata": {}, "source": "## **3.** šŸ“Š Set up output folders and plot style" }, { "cell_type": "markdown", "metadata": {}, "source": "### *a. Create artifact folders for the Hugging Face dashboard*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import matplotlib\nmatplotlib.use(\"Agg\")\nimport matplotlib.pyplot as plt\nimport matplotlib.dates as mdates\nfrom matplotlib.ticker import MaxNLocator\nimport seaborn as sns\n\nART_DIR = Path(\"artifacts\")\nPY_FIG = ART_DIR / \"py\" / \"figures\"\nPY_TAB = ART_DIR / \"py\" / \"tables\"\n\nfor p in [PY_FIG, PY_TAB]:\n p.mkdir(parents=True, exist_ok=True)\n\nPALETTE = {\"bullish\": \"#2ecc71\", \"neutral\": \"#3498db\", \"bearish\": \"#e74c3c\"}\nESCP_PURPLE = \"#2e0052\"\nsns.set_theme(style=\"whitegrid\", font_scale=1.1)\n\nprint(\"āœ… Output folders:\")\nprint(\" -\", PY_FIG.resolve())\nprint(\" -\", PY_TAB.resolve())\n" }, { "cell_type": "markdown", "metadata": {}, "source": "## **4.** 🧭 Overall sentiment distribution" }, { "cell_type": "markdown", "metadata": {}, "source": "### *a. Compute sentiment, platform and topic counts*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "sent_counts = df[\"Sentiment\"].value_counts().reindex([\"bullish\",\"neutral\",\"bearish\"])\nplat_counts = df[\"Platform\"].value_counts().head(6)\ntopic_counts = df[\"Topic\"].value_counts()\nprint(sent_counts)\n" }, { "cell_type": "markdown", "metadata": {}, "source": "### *b. Plot the 3-panel overview figure*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "fig, axes = plt.subplots(1, 3, figsize=(16, 5))\nfig.suptitle(\"AI Bubble Sentiment — Dataset Overview\", fontsize=15,\n fontweight=\"bold\", color=ESCP_PURPLE)\n\n# Pie — sentiment\naxes[0].pie(sent_counts,\n labels=[f\"{s.title()}\\n{n}\" for s, n in zip(sent_counts.index, sent_counts)],\n colors=[PALETTE[s] for s in sent_counts.index],\n autopct=\"%1.1f%%\", startangle=90,\n wedgeprops={\"edgecolor\":\"white\",\"linewidth\":2})\naxes[0].set_title(\"Sentiment Distribution\", fontweight=\"bold\")\n\n# Bar — platform\naxes[1].barh(plat_counts.index[::-1], plat_counts.values[::-1],\n color=ESCP_PURPLE, alpha=0.8)\naxes[1].set_title(\"Comments by Platform\", fontweight=\"bold\")\naxes[1].set_xlabel(\"Number of Comments\")\nfor i, v in enumerate(plat_counts.values[::-1]):\n axes[1].text(v + 0.3, i, str(v), va=\"center\", fontsize=9)\n\n# Bar — topic\naxes[2].bar(topic_counts.index, topic_counts.values,\n color=[\"#9b59b6\",\"#3498db\",\"#e67e22\",\"#1abc9c\"],\n edgecolor=\"white\", linewidth=1.5)\naxes[2].set_title(\"Comments by Topic\", fontweight=\"bold\")\naxes[2].set_ylabel(\"Count\"); axes[2].set_xlabel(\"Topic\")\nfor i, v in enumerate(topic_counts.values):\n axes[2].text(i, v + 0.3, str(v), ha=\"center\", fontsize=9)\n\nplt.tight_layout()\nplt.savefig(PY_FIG / \"01_overview_distributions.png\", dpi=150)\nplt.show()\n" }, { "cell_type": "markdown", "metadata": {}, "source": "## **5.** šŸ“ˆ Sentiment over time" }, { "cell_type": "markdown", "metadata": {}, "source": "### *a. Aggregate monthly comment counts per sentiment*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "monthly = (df.groupby([\"YearMonth\",\"Sentiment\"]).size()\n .unstack(fill_value=0)\n .reindex(columns=[\"bullish\",\"neutral\",\"bearish\"], fill_value=0))\nmonthly.index = pd.to_datetime(monthly.index)\n\nmonthly_score = df.groupby(\"YearMonth\")[\"SentScore\"].mean()\nmonthly_score.index = pd.to_datetime(monthly_score.index)\nrolling_score = monthly_score.rolling(3, min_periods=1).mean()\n\nmonthly_out = monthly.copy()\nmonthly_out[\"avg_score\"] = monthly_score\nmonthly_out.index = monthly_out.index.strftime(\"%Y-%m\")\nmonthly_out.reset_index(inplace=True)\nmonthly_out.columns = [\"month\",\"bullish\",\"neutral\",\"bearish\",\"avg_score\"]\nmonthly_out.to_csv(PY_TAB / \"monthly_sentiment.csv\", index=False)\nprint(monthly_out.head())\n" }, { "cell_type": "markdown", "metadata": {}, "source": "### *b. Plot stacked area + rolling sentiment score*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 9), sharex=True)\nfig.suptitle(\"AI Bubble Sentiment Over Time\", fontsize=15,\n fontweight=\"bold\", color=ESCP_PURPLE)\n\nax1.stackplot(monthly.index,\n monthly[\"bullish\"], monthly[\"neutral\"], monthly[\"bearish\"],\n labels=[\"Bullish\",\"Neutral\",\"Bearish\"],\n colors=[PALETTE[\"bullish\"],PALETTE[\"neutral\"],PALETTE[\"bearish\"]], alpha=0.75)\nax1.set_ylabel(\"Number of Comments\")\nax1.set_title(\"Monthly Comment Volume by Sentiment\", fontweight=\"bold\")\nax1.legend(loc=\"upper left\", framealpha=0.8)\nax1.yaxis.set_major_locator(MaxNLocator(integer=True))\n\nax2.axhline(0, color=\"black\", lw=0.8, ls=\"--\", alpha=0.6)\nax2.fill_between(rolling_score.index, rolling_score, 0,\n where=(rolling_score >= 0), interpolate=True,\n color=PALETTE[\"bullish\"], alpha=0.4, label=\"Bullish zone\")\nax2.fill_between(rolling_score.index, rolling_score, 0,\n where=(rolling_score < 0), interpolate=True,\n color=PALETTE[\"bearish\"], alpha=0.4, label=\"Bearish zone\")\nax2.plot(rolling_score.index, rolling_score, color=\"black\", lw=1.5, label=\"3-month avg\")\nax2.set_ylabel(\"Avg Sentiment Score\\n(+1=bullish, -1=bearish)\")\nax2.set_xlabel(\"Date\")\nax2.set_title(\"Rolling Average Sentiment Score (3-month window)\", fontweight=\"bold\")\nax2.legend(loc=\"upper left\", framealpha=0.8)\nax2.xaxis.set_major_formatter(mdates.DateFormatter(\"%b '%y\"))\nax2.xaxis.set_major_locator(mdates.MonthLocator(interval=4))\nplt.setp(ax2.xaxis.get_majorticklabels(), rotation=45, ha=\"right\")\n\nplt.tight_layout()\nplt.savefig(PY_FIG / \"02_sentiment_over_time.png\", dpi=150)\nplt.show()\n" }, { "cell_type": "markdown", "metadata": {}, "source": "## **6.** šŸ”„ Sentiment breakdown by topic" }, { "cell_type": "markdown", "metadata": {}, "source": "### *a. āœ‹šŸ»šŸ›‘ā›”ļø Compute cross-tabulation of Topic vs Sentiment*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "cross = pd.crosstab(df[\"Topic\"], df[\"Sentiment\"])[[\"bullish\",\"neutral\",\"bearish\"]]\ncross_pct = cross.div(cross.sum(axis=1), axis=0) * 100\ncross.reset_index().to_csv(PY_TAB / \"sentiment_by_topic.csv\", index=False)\nprint(cross)\n" }, { "cell_type": "markdown", "metadata": {}, "source": "### *b. Plot absolute counts and % heatmaps side by side*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))\nfig.suptitle(\"Sentiment vs. Topic\", fontsize=15, fontweight=\"bold\", color=ESCP_PURPLE)\n\nsns.heatmap(cross, annot=True, fmt=\"d\", cmap=\"YlOrRd\",\n linewidths=0.5, linecolor=\"white\", cbar_kws={\"label\":\"Count\"}, ax=ax1)\nax1.set_title(\"Absolute Comment Counts\", fontweight=\"bold\")\n\nsns.heatmap(cross_pct, annot=True, fmt=\".1f\", cmap=\"RdYlGn\",\n linewidths=0.5, linecolor=\"white\",\n cbar_kws={\"label\":\"% within Topic\"}, ax=ax2, vmin=0, vmax=60)\nax2.set_title(\"% of Comments per Topic\", fontweight=\"bold\")\n\nplt.tight_layout()\nplt.savefig(PY_FIG / \"03_sentiment_by_topic.png\", dpi=150)\nplt.show()\n" }, { "cell_type": "markdown", "metadata": {}, "source": "## **7.** 🌐 Sentiment breakdown by platform" }, { "cell_type": "markdown", "metadata": {}, "source": "### *a. Compute platform cross-tabulation*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "top_platforms = df[\"Platform\"].value_counts().head(6).index\ndf_plat = df[df[\"Platform\"].isin(top_platforms)]\nplat_cross = pd.crosstab(df_plat[\"Platform\"], df_plat[\"Sentiment\"])[[\"bullish\",\"neutral\",\"bearish\"]]\nplat_pct = plat_cross.div(plat_cross.sum(axis=1), axis=0) * 100\nplat_cross.reset_index().to_csv(PY_TAB / \"sentiment_by_platform.csv\", index=False)\nprint(plat_pct)\n" }, { "cell_type": "markdown", "metadata": {}, "source": "### *b. Plot stacked bar chart by platform*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "fig, ax = plt.subplots(figsize=(12, 6))\nplat_pct.plot(kind=\"bar\", stacked=True, ax=ax,\n color=[PALETTE[\"bullish\"],PALETTE[\"neutral\"],PALETTE[\"bearish\"]],\n edgecolor=\"white\", linewidth=0.8)\nax.set_title(\"Sentiment Distribution by Platform (%)\", fontsize=14,\n fontweight=\"bold\", color=ESCP_PURPLE)\nax.set_xlabel(\"Platform\"); ax.set_ylabel(\"% of Comments\")\nax.set_xticklabels(ax.get_xticklabels(), rotation=30, ha=\"right\")\nax.legend(title=\"Sentiment\", bbox_to_anchor=(1.01, 1), loc=\"upper left\")\nax.axhline(50, color=\"black\", ls=\"--\", alpha=0.4, lw=0.9)\nplt.tight_layout()\nplt.savefig(PY_FIG / \"04_sentiment_by_platform.png\", dpi=150)\nplt.show()\n" }, { "cell_type": "markdown", "metadata": {}, "source": "## **8.** šŸ“… Yearly sentiment shift" }, { "cell_type": "markdown", "metadata": {}, "source": "### *a. Aggregate by year*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "yearly = pd.crosstab(df[\"Year\"], df[\"Sentiment\"])[[\"bullish\",\"neutral\",\"bearish\"]]\nyearly_pct = yearly.div(yearly.sum(axis=1), axis=0) * 100\nyearly.reset_index().to_csv(PY_TAB / \"yearly_sentiment.csv\", index=False)\nprint(yearly_pct)\n" }, { "cell_type": "markdown", "metadata": {}, "source": "### *b. āœ‹šŸ»šŸ›‘ā›”ļø Plot volume and share grouped bars side by side*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))\nfig.suptitle(\"How Sentiment Has Shifted Year-over-Year\", fontsize=14,\n fontweight=\"bold\", color=ESCP_PURPLE)\n\nyearly.plot(kind=\"bar\", ax=ax1,\n color=[PALETTE[\"bullish\"],PALETTE[\"neutral\"],PALETTE[\"bearish\"]], edgecolor=\"white\")\nax1.set_title(\"Comment Volume by Year\", fontweight=\"bold\")\nax1.set_xticklabels(yearly.index, rotation=0); ax1.legend(title=\"Sentiment\")\n\nyearly_pct.plot(kind=\"bar\", stacked=True, ax=ax2,\n color=[PALETTE[\"bullish\"],PALETTE[\"neutral\"],PALETTE[\"bearish\"]], edgecolor=\"white\")\nax2.set_title(\"Sentiment Share by Year (%)\", fontweight=\"bold\")\nax2.set_xticklabels(yearly_pct.index, rotation=0)\nax2.legend(title=\"Sentiment\", bbox_to_anchor=(1.01, 1))\nplt.tight_layout()\nplt.savefig(PY_FIG / \"05_yearly_sentiment_shift.png\", dpi=150)\nplt.show()\n" }, { "cell_type": "markdown", "metadata": {}, "source": "## **9.** šŸ”¤ Word frequency by sentiment" }, { "cell_type": "markdown", "metadata": {}, "source": "### *a. Define stopwords and top_words function*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "STOPWORDS = {\n \"the\",\"a\",\"an\",\"is\",\"it\",\"in\",\"of\",\"and\",\"to\",\"for\",\"on\",\"are\",\"that\",\n \"this\",\"with\",\"as\",\"but\",\"not\",\"be\",\"at\",\"by\",\"or\",\"from\",\"have\",\"has\",\n \"will\",\"was\",\"were\",\"been\",\"they\",\"their\",\"we\",\"our\",\"i\",\"you\",\"he\",\"she\",\n \"its\",\"so\",\"if\",\"than\",\"more\",\"just\",\"can\",\"about\",\"what\",\"which\",\"would\",\n \"also\",\"there\",\"these\",\"those\",\"all\",\"some\",\"any\",\"up\",\"how\",\"very\",\"much\",\n \"when\",\"who\",\"one\",\"my\",\"do\",\"had\",\"get\",\"out\",\"even\",\"into\",\"like\",\"no\",\n \"after\",\"them\",\"your\",\"such\",\"because\",\"am\",\"over\",\"does\",\"make\",\"only\",\"really\"\n}\n\ndef top_words(texts, n=20):\n words = []\n for t in texts:\n words.extend(re.findall(r\"\\b[a-z]{3,}\\b\", str(t).lower()))\n return Counter(w for w in words if w not in STOPWORDS).most_common(n)\n" }, { "cell_type": "markdown", "metadata": {}, "source": "### *b. āœ‹šŸ»šŸ›‘ā›”ļø Plot top 20 words for each sentiment label*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "fig, axes = plt.subplots(1, 3, figsize=(18, 7))\nfig.suptitle(\"Top Words by Sentiment\", fontsize=14, fontweight=\"bold\", color=ESCP_PURPLE)\n\nfor ax, sent in zip(axes, [\"bullish\",\"neutral\",\"bearish\"]):\n pairs = top_words(df[df[\"Sentiment\"] == sent][\"Comment\"])\n words, freqs = zip(*pairs)\n ax.barh(list(words)[::-1], list(freqs)[::-1],\n color=PALETTE[sent], alpha=0.85, edgecolor=\"white\")\n ax.set_title(f\"{sent.title()} Comments\", fontweight=\"bold\", color=PALETTE[sent])\n ax.set_xlabel(\"Frequency\")\n\nplt.tight_layout()\nplt.savefig(PY_FIG / \"06_word_frequency_by_sentiment.png\", dpi=150)\nplt.show()\n" }, { "cell_type": "markdown", "metadata": {}, "source": "## **10.** šŸ“ AI Bubble Risk Score" }, { "cell_type": "markdown", "metadata": {}, "source": "### *a. Define bubble_risk function — bearish share of (bullish+bearish) per month*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "def bubble_risk(group):\n b = (group == \"bearish\").sum()\n u = (group == \"bullish\").sum()\n total = b + u\n return b / total if total > 0 else np.nan\n\nmonthly_risk = df.groupby(\"YearMonth\")[\"Sentiment\"].apply(bubble_risk)\nmonthly_risk.index = pd.to_datetime(monthly_risk.index)\nrolling_risk = monthly_risk.rolling(3, min_periods=1).mean()\n\npd.DataFrame({\n \"month\": rolling_risk.index.strftime(\"%Y-%m\"),\n \"bubble_risk_score\": rolling_risk.round(3).values\n}).to_csv(PY_TAB / \"bubble_risk_score.csv\", index=False)\nprint(rolling_risk.tail())\n" }, { "cell_type": "markdown", "metadata": {}, "source": "### *b. Plot bubble risk score over time*" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "fig, ax = plt.subplots(figsize=(13, 5))\nax.fill_between(rolling_risk.index, rolling_risk, 0.5,\n where=(rolling_risk > 0.5), interpolate=True,\n color=\"#e74c3c\", alpha=0.3, label=\"Bearish-dominant\")\nax.fill_between(rolling_risk.index, rolling_risk, 0.5,\n where=(rolling_risk <= 0.5), interpolate=True,\n color=\"#2ecc71\", alpha=0.3, label=\"Bullish-dominant\")\nax.plot(rolling_risk.index, rolling_risk, color=\"#2c3e50\", lw=2,\n label=\"Risk score (3-mo avg)\")\nax.axhline(0.5, color=\"gray\", ls=\"--\", lw=1, alpha=0.7, label=\"Neutral threshold\")\nax.set_ylim(0, 1)\nax.set_ylabel(\"Bubble Risk Score\\n(0 = all bullish, 1 = all bearish)\")\nax.set_title(\"AI Bubble Risk Score Over Time\", fontsize=14,\n fontweight=\"bold\", color=ESCP_PURPLE)\nax.xaxis.set_major_formatter(mdates.DateFormatter(\"%b '%y\"))\nax.xaxis.set_major_locator(mdates.MonthLocator(interval=4))\nplt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha=\"right\")\nax.legend(framealpha=0.8)\nplt.tight_layout()\nplt.savefig(PY_FIG / \"07_bubble_risk_score.png\", dpi=150)\nplt.show()\n" }, { "cell_type": "markdown", "metadata": {}, "source": "## **11.** šŸ’¾ Save Python outputs for the Hugging Face dashboard" }, { "cell_type": "markdown", "metadata": {}, "source": "This section exports **HF-ready artifacts** into the folder structure the app expects:\n- `artifacts/py/figures/` — all chart images\n- `artifacts/py/tables/` — tables and KPI JSON\n" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from scipy.stats import chi2_contingency\n\n# Chi-square test\nchi2_stat, p, dof, _ = chi2_contingency(cross.values)\npd.DataFrame({\n \"Test\": [\"Chi-Square (Sentiment vs Topic)\"],\n \"Chi2_Statistic\": [round(chi2_stat, 3)],\n \"p_value\": [round(p, 4)],\n \"Degrees_of_Freedom\": [dof],\n \"Significant_alpha_05\": [\"Yes\" if p < 0.05 else \"No\"],\n}).to_csv(PY_TAB / \"chi_square_result.csv\", index=False)\n\n# KPIs JSON\nlatest_3mo = df[df[\"Date\"] >= df[\"Date\"].max() - pd.DateOffset(months=3)]\nlatest_risk = (latest_3mo[\"Sentiment\"]==\"bearish\").sum() / max(\n (latest_3mo[\"Sentiment\"]==\"bearish\").sum() +\n (latest_3mo[\"Sentiment\"]==\"bullish\").sum(), 1)\n\nkpis = {\n \"total_comments\": int(len(df)),\n \"date_range\": f\"{df['Date'].min().strftime('%b %Y')} – {df['Date'].max().strftime('%b %Y')}\",\n \"n_platforms\": int(df[\"Platform\"].nunique()),\n \"n_topics\": int(df[\"Topic\"].nunique()),\n \"pct_bearish\": round((df[\"Sentiment\"]==\"bearish\").mean()*100, 1),\n \"pct_bullish\": round((df[\"Sentiment\"]==\"bullish\").mean()*100, 1),\n \"pct_neutral\": round((df[\"Sentiment\"]==\"neutral\").mean()*100, 1),\n \"avg_sentiment_score\": round(df[\"SentScore\"].mean(), 3),\n \"latest_bubble_risk\": round(float(latest_risk), 3),\n \"chi2_p_value\": round(p, 4),\n \"most_bearish_topic\": str(cross_pct[\"bearish\"].idxmax()),\n \"most_bullish_topic\": str(cross_pct[\"bullish\"].idxmax()),\n \"dominant_platform\": str(df[\"Platform\"].value_counts().index[0]),\n}\nwith open(PY_TAB / \"kpis.json\", \"w\") as f:\n json.dump(kpis, f, indent=2)\n\nprint(\"āœ… All Python artifacts saved\")\nprint(f\" Figures : {len(list(PY_FIG.glob('*.png')))}\")\nprint(f\" Tables : {len(list(PY_TAB.glob('*.csv')) + list(PY_TAB.glob('*.json')))}\")\nfor k, v in kpis.items():\n print(f\" {k}: {v}\")\n" }, { "cell_type": "markdown", "metadata": {}, "source": "āœ… **Output for R notebook**: `ai_bubble_clean.csv` in the working directory (produced by `datacreation.ipynb`)." }, { "cell_type": "markdown", "metadata": {}, "source": "## **12.** ā¬‡ļø Download all Python artifacts" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# ── Download all Python artifacts as a ZIP ────────────────────────────────────\nimport shutil, zipfile, os\nfrom pathlib import Path\n\nzip_path = \"python_analysis_artifacts.zip\"\n\nwith zipfile.ZipFile(zip_path, \"w\", zipfile.ZIP_DEFLATED) as zf:\n for p in Path(\"artifacts/py\").rglob(\"*\"):\n if p.is_file():\n zf.write(p, p.relative_to(\"artifacts/py\"))\n\nprint(\"šŸ“¦ ZIP contents:\")\nwith zipfile.ZipFile(zip_path, \"r\") as zf:\n for name in sorted(zf.namelist()):\n print(f\" {name}\")\n\n# Colab: triggers a browser download\n# HuggingFace / local: the ZIP is saved next to the notebook\ntry:\n from google.colab import files\n files.download(zip_path)\n print(\"\\nāœ… Download started!\")\nexcept ImportError:\n print(f\"\\nāœ… Saved as: {Path(zip_path).resolve()}\")\n" } ] }