{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "gJkQRrz-DLPm" }, "source": [ "# **Introduction & Team Introduction**\n", "\n", "The overall goal of this project is to analyze and uncover hidden patterns in the Dataset Name using unsupervised learning techniques. By applying data preparation, dimensionality reduction, and clustering analysis, we aim to provide data-driven insights and recommendations that can help stakeholders make better strategic decisions and grow their business.\n", "\n", "This project has been carried out by the following participants:\n", "\n", "Amisha Magar (amagar25@student.aau.dk)\n", "Riya Pokharel (rpokha25@student.aau.dk)\n", "Sristee Rai (skulun25@student.aau.dk)" ] }, { "cell_type": "markdown", "metadata": { "id": "Gbk9rUc5LXD2" }, "source": [ "#Group A" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 474 }, "id": "HSAttP0t9xgr", "outputId": "3a29ed34-ccaf-4fc2-ac5e-66dc99e51af6" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1599, 12)\n", "fixed acidity 0\n", "volatile acidity 0\n", "citric acid 0\n", "residual sugar 0\n", "chlorides 0\n", "free sulfur dioxide 0\n", "total sulfur dioxide 0\n", "density 0\n", "pH 0\n", "sulphates 0\n", "alcohol 0\n", "quality 0\n", "dtype: int64\n" ] }, { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "summary": "{\n \"name\": \"wine_data\",\n \"rows\": 1599,\n \"fields\": [\n {\n \"column\": \"fixed acidity\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.7410963181277006,\n \"min\": 4.6,\n \"max\": 15.9,\n \"num_unique_values\": 96,\n \"samples\": [\n 5.3,\n 12.7,\n 12.6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"volatile acidity\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.17905970415353498,\n \"min\": 0.12,\n \"max\": 1.58,\n \"num_unique_values\": 143,\n \"samples\": [\n 1.025,\n 0.4,\n 0.87\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"citric acid\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.19480113740531785,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 80,\n \"samples\": [\n 0.37,\n 0.0,\n 0.09\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"residual sugar\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.4099280595072805,\n \"min\": 0.9,\n \"max\": 15.5,\n \"num_unique_values\": 91,\n \"samples\": [\n 11.0,\n 3.0,\n 15.5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"chlorides\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.047065302010090154,\n \"min\": 0.012,\n \"max\": 0.611,\n \"num_unique_values\": 153,\n \"samples\": [\n 0.096,\n 0.343,\n 0.159\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"free sulfur dioxide\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 10.46015696980973,\n \"min\": 1.0,\n \"max\": 72.0,\n \"num_unique_values\": 60,\n \"samples\": [\n 11.0,\n 9.0,\n 32.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total sulfur dioxide\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 32.89532447829901,\n \"min\": 6.0,\n \"max\": 289.0,\n \"num_unique_values\": 144,\n \"samples\": [\n 68.0,\n 35.0,\n 101.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"density\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.0018873339538425559,\n \"min\": 0.99007,\n \"max\": 1.00369,\n \"num_unique_values\": 436,\n \"samples\": [\n 0.99974,\n 1.0001,\n 0.99471\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pH\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.15438646490354266,\n \"min\": 2.74,\n \"max\": 4.01,\n \"num_unique_values\": 89,\n \"samples\": [\n 3.07,\n 3.0,\n 3.15\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sulphates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.16950697959010977,\n \"min\": 0.33,\n \"max\": 2.0,\n \"num_unique_values\": 96,\n \"samples\": [\n 1.07,\n 1.04,\n 1.18\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"alcohol\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.0656675818473926,\n \"min\": 8.4,\n \"max\": 14.9,\n \"num_unique_values\": 65,\n \"samples\": [\n 8.5,\n 9.95,\n 9.4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"quality\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 3,\n \"max\": 8,\n \"num_unique_values\": 6,\n \"samples\": [\n 5,\n 6,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", "type": "dataframe", "variable_name": "wine_data" }, "text/html": [ "\n", "
| \n", " | fixed acidity | \n", "volatile acidity | \n", "citric acid | \n", "residual sugar | \n", "chlorides | \n", "free sulfur dioxide | \n", "total sulfur dioxide | \n", "density | \n", "pH | \n", "sulphates | \n", "alcohol | \n", "quality | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "7.4 | \n", "0.70 | \n", "0.00 | \n", "1.9 | \n", "0.076 | \n", "11.0 | \n", "34.0 | \n", "0.9978 | \n", "3.51 | \n", "0.56 | \n", "9.4 | \n", "5 | \n", "
| 1 | \n", "7.8 | \n", "0.88 | \n", "0.00 | \n", "2.6 | \n", "0.098 | \n", "25.0 | \n", "67.0 | \n", "0.9968 | \n", "3.20 | \n", "0.68 | \n", "9.8 | \n", "5 | \n", "
| 2 | \n", "7.8 | \n", "0.76 | \n", "0.04 | \n", "2.3 | \n", "0.092 | \n", "15.0 | \n", "54.0 | \n", "0.9970 | \n", "3.26 | \n", "0.65 | \n", "9.8 | \n", "5 | \n", "
| 3 | \n", "11.2 | \n", "0.28 | \n", "0.56 | \n", "1.9 | \n", "0.075 | \n", "17.0 | \n", "60.0 | \n", "0.9980 | \n", "3.16 | \n", "0.58 | \n", "9.8 | \n", "6 | \n", "
| 4 | \n", "7.4 | \n", "0.70 | \n", "0.00 | \n", "1.9 | \n", "0.076 | \n", "11.0 | \n", "34.0 | \n", "0.9978 | \n", "3.51 | \n", "0.56 | \n", "9.4 | \n", "5 | \n", "