{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "import pandas as pd\n", "import numpy as np\n", "import os\n", "import json\n", "from pathlib import Path\n", "import matplotlib.pyplot as plt\n" ] }, { "cell_type": "code", "metadata": {}, "source": [ "BASE_DIR = Path('.')\n", "ART_DIR = BASE_DIR / 'artifacts' / 'py'\n", "FIG_DIR = ART_DIR / 'figures'\n", "TAB_DIR = ART_DIR / 'tables'\n", "\n", "FIG_DIR.mkdir(parents=True, exist_ok=True)\n", "TAB_DIR.mkdir(parents=True, exist_ok=True)\n" ] }, { "cell_type": "code", "metadata": {}, "source": [ "df_food = pd.read_csv('clean_food_products.csv')\n", "df_reviews = pd.read_csv('synthetic_food_reviews.csv')\n", "\n", "print(df_food.shape)\n", "print(df_reviews.shape)\n" ] }, { "cell_type": "code", "metadata": {}, "source": [ "# KPI calculation\n", "kpis = {\n", " 'n_products': int(len(df_food)),\n", " 'avg_calories_per_100g': round(float(df_food['energy-kcal_100g'].mean()), 2),\n", " 'healthy_count': int((df_food['health_label'] == 'healthy').sum()),\n", " 'unhealthy_count': int((df_food['health_label'] == 'unhealthy').sum())\n", "}\n", "\n", "with open(TAB_DIR / 'kpis.json', 'w') as f:\n", " json.dump(kpis, f, indent=2)\n", "\n", "kpis\n" ] }, { "cell_type": "code", "metadata": {}, "source": [ "# Save main dashboard table\n", "df_food.to_csv(TAB_DIR / 'food_dashboard.csv', index=False)\n" ] }, { "cell_type": "code", "metadata": {}, "source": [ "# Health label counts\n", "health_counts = df_food['health_label'].value_counts().reset_index()\n", "health_counts.columns = ['health_label', 'count']\n", "health_counts.to_csv(TAB_DIR / 'health_label_counts.csv', index=False)\n", "health_counts\n" ] }, { "cell_type": "code", "metadata": {}, "source": [ "# Nutrition by health label\n", "nutrition = df_food.groupby('health_label')[[\n", " 'energy-kcal_100g',\n", " 'sugars_100g',\n", " 'fat_100g',\n", " 'salt_100g',\n", " 'proteins_100g',\n", " 'fiber_100g'\n", "]].mean().reset_index()\n", "\n", "nutrition.to_csv(TAB_DIR / 'nutrition_by_health_label.csv', index=False)\n", "nutrition\n" ] }, { "cell_type": "code", "metadata": {}, "source": [ "# Nutri-score vs health label\n", "df_compare = df_food[~df_food['nutriscore_grade'].isin(['unknown','not-applicable'])]\n", "\n", "nutri = pd.crosstab(df_compare['nutriscore_grade'], df_compare['health_label']).reset_index()\n", "nutri_long = nutri.melt(id_vars='nutriscore_grade', var_name='health_label', value_name='count')\n", "\n", "nutri_long.to_csv(TAB_DIR / 'nutriscore_vs_health.csv', index=False)\n", "nutri_long\n" ] }, { "cell_type": "code", "metadata": {}, "source": [ "# Recommendations\n", "df_food[['product_name','health_label','nutriscore_grade']].to_csv(\n", " TAB_DIR / 'recommendations.csv', index=False\n", ")\n" ] }, { "cell_type": "code", "metadata": {}, "source": [ "# Charts\n", "plt.figure(figsize=(6,4))\n", "df_food['health_label'].value_counts().plot(kind='bar')\n", "plt.title('Health Label Distribution')\n", "plt.tight_layout()\n", "plt.savefig(FIG_DIR / 'health_label_distribution.png')\n", "plt.close()\n", "\n", "plt.figure(figsize=(8,5))\n", "pd.crosstab(df_compare['nutriscore_grade'], df_compare['health_label']).plot(kind='bar', stacked=True)\n", "plt.title('Nutri-Score vs Health Label')\n", "plt.tight_layout()\n", "plt.savefig(FIG_DIR / 'nutriscore_vs_health.png')\n", "plt.close()\n" ] } ] }