{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "ec455bd3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Active code page: 1252\n", "Requirement already satisfied: requests in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (2.32.5)\n", "Requirement already satisfied: pandas in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (2.3.2)\n", "Requirement already satisfied: numpy in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (2.3.2)\n", "Collecting matplotlib\n", " Using cached matplotlib-3.10.7-cp313-cp313-win_amd64.whl.metadata (11 kB)\n", "Collecting plotly\n", " Downloading plotly-6.3.1-py3-none-any.whl.metadata (8.5 kB)\n", "Requirement already satisfied: streamlit in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (1.49.1)\n", "Requirement already satisfied: langchain in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (0.3.27)\n", "Requirement already satisfied: transformers in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (4.56.1)\n", "Requirement already satisfied: charset_normalizer<4,>=2 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from requests) (3.4.3)\n", "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from requests) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from requests) (2.5.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from requests) (2025.8.3)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from pandas) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from pandas) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from pandas) (2025.2)\n", "Collecting contourpy>=1.0.1 (from matplotlib)\n", " Using cached contourpy-1.3.3-cp313-cp313-win_amd64.whl.metadata (5.5 kB)\n", "Collecting cycler>=0.10 (from matplotlib)\n", " Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\n", "Collecting fonttools>=4.22.0 (from matplotlib)\n", " Using cached fonttools-4.60.1-cp313-cp313-win_amd64.whl.metadata (114 kB)\n", "Collecting kiwisolver>=1.3.1 (from matplotlib)\n", " Using cached kiwisolver-1.4.9-cp313-cp313-win_amd64.whl.metadata (6.4 kB)\n", "Requirement already satisfied: packaging>=20.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from matplotlib) (23.2)\n", "Requirement already satisfied: pillow>=8 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from matplotlib) (11.3.0)\n", "Collecting pyparsing>=3 (from matplotlib)\n", " Using cached pyparsing-3.2.5-py3-none-any.whl.metadata (5.0 kB)\n", "Requirement already satisfied: narwhals>=1.15.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from plotly) (2.4.0)\n", "Requirement already satisfied: altair!=5.4.0,!=5.4.1,<6,>=4.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (5.5.0)\n", "Requirement already satisfied: blinker<2,>=1.5.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (1.9.0)\n", "Requirement already satisfied: cachetools<7,>=4.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (6.2.0)\n", "Requirement already satisfied: click<9,>=7.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (8.2.1)\n", "Requirement already satisfied: protobuf<7,>=3.20 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (6.32.0)\n", "Requirement already satisfied: pyarrow>=7.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (21.0.0)\n", "Requirement already satisfied: tenacity<10,>=8.1.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (8.5.0)\n", "Requirement already satisfied: toml<2,>=0.10.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (0.10.2)\n", "Requirement already satisfied: typing-extensions<5,>=4.4.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (4.15.0)\n", "Requirement already satisfied: watchdog<7,>=2.1.5 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (6.0.0)\n", "Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (3.1.45)\n", "Requirement already satisfied: pydeck<1,>=0.8.0b4 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (0.9.1)\n", "Requirement already satisfied: tornado!=6.5.0,<7,>=6.0.3 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (6.5.2)\n", "Requirement already satisfied: langchain-core<1.0.0,>=0.3.72 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langchain) (0.3.76)\n", "Requirement already satisfied: langchain-text-splitters<1.0.0,>=0.3.9 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langchain) (0.3.11)\n", "Requirement already satisfied: langsmith>=0.1.17 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langchain) (0.4.27)\n", "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langchain) (2.11.7)\n", "Requirement already satisfied: SQLAlchemy<3,>=1.4 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langchain) (2.0.43)\n", "Requirement already satisfied: PyYAML>=5.3 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langchain) (6.0.2)\n", "Requirement already satisfied: filelock in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from transformers) (3.19.1)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from transformers) (0.34.4)\n", "Requirement already satisfied: regex!=2019.12.17 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from transformers) (2025.9.1)\n", "Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from transformers) (0.22.0)\n", "Requirement already satisfied: safetensors>=0.4.3 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from transformers) (0.6.2)\n", "Requirement already satisfied: tqdm>=4.27 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from transformers) (4.67.1)\n", "Requirement already satisfied: jinja2 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from altair!=5.4.0,!=5.4.1,<6,>=4.0->streamlit) (3.1.6)\n", "Requirement already satisfied: jsonschema>=3.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from altair!=5.4.0,!=5.4.1,<6,>=4.0->streamlit) (4.25.1)\n", "Requirement already satisfied: colorama in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from click<9,>=7.0->streamlit) (0.4.6)\n", "Requirement already satisfied: gitdb<5,>=4.0.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit) (4.0.12)\n", "Requirement already satisfied: fsspec>=2023.5.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (2024.2.0)\n", "Requirement already satisfied: jsonpatch<2.0,>=1.33 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langchain-core<1.0.0,>=0.3.72->langchain) (1.33)\n", "Requirement already satisfied: httpx<1,>=0.23.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langsmith>=0.1.17->langchain) (0.28.1)\n", "Requirement already satisfied: orjson>=3.9.14 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langsmith>=0.1.17->langchain) (3.11.3)\n", "Requirement already satisfied: requests-toolbelt>=1.0.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langsmith>=0.1.17->langchain) (1.0.0)\n", "Requirement already satisfied: zstandard>=0.23.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langsmith>=0.1.17->langchain) (0.24.0)\n", "Requirement already satisfied: annotated-types>=0.6.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.33.2 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.33.2)\n", "Requirement already satisfied: typing-inspection>=0.4.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.4.1)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", "Requirement already satisfied: greenlet>=1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from SQLAlchemy<3,>=1.4->langchain) (3.2.4)\n", "Requirement already satisfied: smmap<6,>=3.0.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit) (5.0.2)\n", "Requirement already satisfied: anyio in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from httpx<1,>=0.23.0->langsmith>=0.1.17->langchain) (4.10.0)\n", "Requirement already satisfied: httpcore==1.* in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from httpx<1,>=0.23.0->langsmith>=0.1.17->langchain) (1.0.9)\n", "Requirement already satisfied: h11>=0.16 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith>=0.1.17->langchain) (0.16.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from jinja2->altair!=5.4.0,!=5.4.1,<6,>=4.0->streamlit) (3.0.2)\n", "Requirement already satisfied: jsonpointer>=1.9 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from jsonpatch<2.0,>=1.33->langchain-core<1.0.0,>=0.3.72->langchain) (3.0.0)\n", "Requirement already satisfied: attrs>=22.2.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from jsonschema>=3.0->altair!=5.4.0,!=5.4.1,<6,>=4.0->streamlit) (25.3.0)\n", "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from jsonschema>=3.0->altair!=5.4.0,!=5.4.1,<6,>=4.0->streamlit) (2025.9.1)\n", "Requirement already satisfied: referencing>=0.28.4 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from jsonschema>=3.0->altair!=5.4.0,!=5.4.1,<6,>=4.0->streamlit) (0.36.2)\n", "Requirement already satisfied: rpds-py>=0.7.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from jsonschema>=3.0->altair!=5.4.0,!=5.4.1,<6,>=4.0->streamlit) (0.27.1)\n", "Requirement already satisfied: sniffio>=1.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from anyio->httpx<1,>=0.23.0->langsmith>=0.1.17->langchain) (1.3.1)\n", "Using cached matplotlib-3.10.7-cp313-cp313-win_amd64.whl (8.1 MB)\n", "Downloading plotly-6.3.1-py3-none-any.whl (9.8 MB)\n", " ---------------------------------------- 0.0/9.8 MB ? eta -:--:--\n", " ---- ----------------------------------- 1.0/9.8 MB 7.0 MB/s eta 0:00:02\n", " ---- ----------------------------------- 1.0/9.8 MB 7.0 MB/s eta 0:00:02\n", " ---------- ----------------------------- 2.6/9.8 MB 4.5 MB/s eta 0:00:02\n", " ------------- -------------------------- 3.4/9.8 MB 4.4 MB/s eta 0:00:02\n", " ----------------- ---------------------- 4.2/9.8 MB 4.3 MB/s eta 0:00:02\n", " -------------------- ------------------- 5.0/9.8 MB 4.2 MB/s eta 0:00:02\n", " ----------------------- ---------------- 5.8/9.8 MB 4.1 MB/s eta 0:00:01\n", " --------------------------- ------------ 6.8/9.8 MB 4.1 MB/s eta 0:00:01\n", " ------------------------------ --------- 7.6/9.8 MB 4.1 MB/s eta 0:00:01\n", " ---------------------------------- ----- 8.4/9.8 MB 4.1 MB/s eta 0:00:01\n", " ------------------------------------- -- 9.2/9.8 MB 4.0 MB/s eta 0:00:01\n", " --------------------------------------- 9.7/9.8 MB 4.0 MB/s eta 0:00:01\n", " ---------------------------------------- 9.8/9.8 MB 3.9 MB/s eta 0:00:00\n", "Using cached contourpy-1.3.3-cp313-cp313-win_amd64.whl (226 kB)\n", "Using cached cycler-0.12.1-py3-none-any.whl (8.3 kB)\n", "Using cached fonttools-4.60.1-cp313-cp313-win_amd64.whl (2.3 MB)\n", "Using cached kiwisolver-1.4.9-cp313-cp313-win_amd64.whl (73 kB)\n", "Using cached pyparsing-3.2.5-py3-none-any.whl (113 kB)\n", "Installing collected packages: pyparsing, plotly, kiwisolver, fonttools, cycler, contourpy, matplotlib\n", "Successfully installed contourpy-1.3.3 cycler-0.12.1 fonttools-4.60.1 kiwisolver-1.4.9 matplotlib-3.10.7 plotly-6.3.1 pyparsing-3.2.5\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install requests pandas numpy matplotlib plotly streamlit langchain transformers\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "42263596", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "šŸ” Dataset Loaded Successfully!\n", "Total Rows: 203\n", "Total Columns: 25\n", "\n" ] }, { "data": { "application/vnd.microsoft.datawrangler.viewer.v0+json": { "columns": [ { "name": "index", "rawType": "int64", "type": "integer" }, { "name": "state_name", "rawType": "object", "type": "string" }, { "name": "district_name", "rawType": "object", "type": "string" }, { "name": "crop_year", "rawType": "int64", "type": "integer" }, { "name": "season", "rawType": "object", "type": "string" }, { "name": "crop", "rawType": "object", "type": "string" }, { "name": "area_", "rawType": "float64", "type": "float" }, { "name": "production_", "rawType": "float64", "type": "float" }, { "name": "subdivision", "rawType": "object", "type": "string" }, { "name": "jan", "rawType": "float64", "type": "float" }, { "name": "feb", "rawType": "float64", "type": "float" }, { "name": "mar", "rawType": "float64", "type": "float" }, { "name": "apr", "rawType": "float64", "type": "float" }, { "name": "may", "rawType": "float64", "type": "float" }, { "name": "jun", "rawType": "float64", "type": "float" }, { "name": "jul", "rawType": "float64", "type": "float" }, { "name": "aug", "rawType": "float64", "type": "float" }, { "name": "sep", "rawType": "float64", "type": "float" }, { "name": "oct", "rawType": "float64", "type": "float" }, { "name": "nov", "rawType": "float64", "type": "float" }, { "name": "dec", "rawType": "float64", "type": "float" }, { "name": "annual", "rawType": "float64", "type": "float" }, { "name": "jf", "rawType": "float64", "type": "float" }, { "name": "mam", "rawType": "float64", "type": "float" }, { "name": "jjas", "rawType": "float64", "type": "float" }, { "name": "ond", "rawType": "float64", "type": "float" } ], "ref": "633f3f12-0965-479f-8a47-3a1c5a2b8105", "rows": [ [ "0", "andaman and nicobar islands", "NICOBARS", "2000", "Kharif", "Arecanut", "1254.0", "2000.0", "andaman & nicobar islands", "53.0", "59.0", "171.3", "218.1", "422.8", "357.0", "176.3", "460.8", "250.1", "321.2", "158.3", "115.2", "2763.2", "112.0", "812.2", "1244.2", "594.7" ], [ "1", "andaman and nicobar islands", "NICOBARS", "2000", "Kharif", "Other Kharif pulses", "2.0", "1.0", "andaman & nicobar islands", "53.0", "59.0", "171.3", "218.1", "422.8", "357.0", "176.3", "460.8", "250.1", "321.2", "158.3", "115.2", "2763.2", "112.0", "812.2", "1244.2", "594.7" ], [ "2", "andaman and nicobar islands", "NICOBARS", "2000", "Kharif", "Rice", "102.0", "321.0", "andaman & nicobar islands", "53.0", "59.0", "171.3", "218.1", "422.8", "357.0", "176.3", "460.8", "250.1", "321.2", "158.3", "115.2", "2763.2", "112.0", "812.2", "1244.2", "594.7" ], [ "3", "andaman and nicobar islands", "NICOBARS", "2000", "Whole Year", "Banana", "176.0", "641.0", "andaman & nicobar islands", "53.0", "59.0", "171.3", "218.1", "422.8", "357.0", "176.3", "460.8", "250.1", "321.2", "158.3", "115.2", "2763.2", "112.0", "812.2", "1244.2", "594.7" ], [ "4", "andaman and nicobar islands", "NICOBARS", "2000", "Whole Year", "Cashewnut", "720.0", "165.0", "andaman & nicobar islands", "53.0", "59.0", "171.3", "218.1", "422.8", "357.0", "176.3", "460.8", "250.1", "321.2", "158.3", "115.2", "2763.2", "112.0", "812.2", "1244.2", "594.7" ] ], "shape": { "columns": 25, "rows": 5 } }, "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
state_namedistrict_namecrop_yearseasoncroparea_production_subdivisionjanfeb...augsepoctnovdecannualjfmamjjasond
0andaman and nicobar islandsNICOBARS2000KharifArecanut1254.02000.0andaman & nicobar islands53.059.0...460.8250.1321.2158.3115.22763.2112.0812.21244.2594.7
1andaman and nicobar islandsNICOBARS2000KharifOther Kharif pulses2.01.0andaman & nicobar islands53.059.0...460.8250.1321.2158.3115.22763.2112.0812.21244.2594.7
2andaman and nicobar islandsNICOBARS2000KharifRice102.0321.0andaman & nicobar islands53.059.0...460.8250.1321.2158.3115.22763.2112.0812.21244.2594.7
3andaman and nicobar islandsNICOBARS2000Whole YearBanana176.0641.0andaman & nicobar islands53.059.0...460.8250.1321.2158.3115.22763.2112.0812.21244.2594.7
4andaman and nicobar islandsNICOBARS2000Whole YearCashewnut720.0165.0andaman & nicobar islands53.059.0...460.8250.1321.2158.3115.22763.2112.0812.21244.2594.7
\n", "

5 rows Ɨ 25 columns

\n", "
" ], "text/plain": [ " state_name district_name crop_year season \\\n", "0 andaman and nicobar islands NICOBARS 2000 Kharif \n", "1 andaman and nicobar islands NICOBARS 2000 Kharif \n", "2 andaman and nicobar islands NICOBARS 2000 Kharif \n", "3 andaman and nicobar islands NICOBARS 2000 Whole Year \n", "4 andaman and nicobar islands NICOBARS 2000 Whole Year \n", "\n", " crop area_ production_ subdivision jan \\\n", "0 Arecanut 1254.0 2000.0 andaman & nicobar islands 53.0 \n", "1 Other Kharif pulses 2.0 1.0 andaman & nicobar islands 53.0 \n", "2 Rice 102.0 321.0 andaman & nicobar islands 53.0 \n", "3 Banana 176.0 641.0 andaman & nicobar islands 53.0 \n", "4 Cashewnut 720.0 165.0 andaman & nicobar islands 53.0 \n", "\n", " feb ... aug sep oct nov dec annual jf mam jjas \\\n", "0 59.0 ... 460.8 250.1 321.2 158.3 115.2 2763.2 112.0 812.2 1244.2 \n", "1 59.0 ... 460.8 250.1 321.2 158.3 115.2 2763.2 112.0 812.2 1244.2 \n", "2 59.0 ... 460.8 250.1 321.2 158.3 115.2 2763.2 112.0 812.2 1244.2 \n", "3 59.0 ... 460.8 250.1 321.2 158.3 115.2 2763.2 112.0 812.2 1244.2 \n", "4 59.0 ... 460.8 250.1 321.2 158.3 115.2 2763.2 112.0 812.2 1244.2 \n", "\n", " ond \n", "0 594.7 \n", "1 594.7 \n", "2 594.7 \n", "3 594.7 \n", "4 594.7 \n", "\n", "[5 rows x 25 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "šŸ“Š Columns in dataset:\n", "['state_name', 'district_name', 'crop_year', 'season', 'crop', 'area_', 'production_', 'subdivision', 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec', 'annual', 'jf', 'mam', 'jjas', 'ond']\n", "\n", "šŸ›ļø Unique States in Dataset:\n", "- andaman and nicobar islands\n", "\n", "🌾 Unique Crops in Dataset:\n", "- Arecanut\n", "- Arhar/Tur\n", "- Banana\n", "- Black pepper\n", "- Cashewnut\n", "- Coconut\n", "- Dry chillies\n", "- Dry ginger\n", "- Groundnut\n", "- Maize\n", "- Moong(Green Gram)\n", "- Other Kharif pulses\n", "- Rice\n", "- Sugarcane\n", "- Sunflower\n", "- Sweet potato\n", "- Tapioca\n", "- Turmeric\n", "- Urad\n", "- other oilseeds\n", "... (Total 20 unique crops)\n", "\n", "šŸ“… Crop Year Range: 2000 - 2010\n", "\n", "šŸ“ˆ Number of unique crops per state:\n" ] }, { "data": { "application/vnd.microsoft.datawrangler.viewer.v0+json": { "columns": [ { "name": "index", "rawType": "int64", "type": "integer" }, { "name": "state_name", "rawType": "object", "type": "string" }, { "name": "unique_crops", "rawType": "int64", "type": "integer" } ], "ref": "16808a21-275e-4e3e-a212-22ed467e3c22", "rows": [ [ "0", "andaman and nicobar islands", "20" ] ], "shape": { "columns": 2, "rows": 1 } }, "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
state_nameunique_crops
0andaman and nicobar islands20
\n", "
" ], "text/plain": [ " state_name unique_crops\n", "0 andaman and nicobar islands 20" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# -----------------------------------------------\n", "# šŸ“˜ Project Samarth - Phase 1: Data Discovery\n", "# -----------------------------------------------\n", "\n", "import pandas as pd\n", "\n", "# āœ… Load merged dataset\n", "file_path = \"../hybrid_dataset/merged_agri_rainfall.csv\" # adjust if needed\n", "df = pd.read_csv(file_path)\n", "\n", "# āœ… Basic info\n", "print(\"šŸ” Dataset Loaded Successfully!\")\n", "print(f\"Total Rows: {len(df)}\")\n", "print(f\"Total Columns: {len(df.columns)}\\n\")\n", "\n", "# āœ… Display first few rows\n", "display(df.head())\n", "\n", "# āœ… Show all available columns\n", "print(\"šŸ“Š Columns in dataset:\")\n", "print(df.columns.tolist())\n", "\n", "# āœ… Check unique states\n", "if \"state_name\" in df.columns:\n", " states = sorted(df[\"state_name\"].dropna().unique().tolist())\n", " print(\"\\nšŸ›ļø Unique States in Dataset:\")\n", " for s in states:\n", " print(\"-\", s)\n", "\n", "# āœ… Check unique crops\n", "if \"crop\" in df.columns:\n", " crops = sorted(df[\"crop\"].dropna().unique().tolist())\n", " print(\"\\n🌾 Unique Crops in Dataset:\")\n", " for c in crops[:20]: # limit to first 20\n", " print(\"-\", c)\n", " print(f\"... (Total {len(crops)} unique crops)\")\n", "\n", "# āœ… Check year range\n", "if \"crop_year\" in df.columns:\n", " min_year, max_year = int(df[\"crop_year\"].min()), int(df[\"crop_year\"].max())\n", " print(f\"\\nšŸ“… Crop Year Range: {min_year} - {max_year}\")\n", "\n", "# āœ… Quick count by state and crop\n", "if {\"state_name\", \"crop\"} <= set(df.columns):\n", " summary = (\n", " df.groupby(\"state_name\")[\"crop\"]\n", " .nunique()\n", " .sort_values(ascending=False)\n", " .reset_index()\n", " .rename(columns={\"crop\": \"unique_crops\"})\n", " )\n", " print(\"\\nšŸ“ˆ Number of unique crops per state:\")\n", " display(summary)\n" ] }, { "cell_type": "code", "execution_count": 5, "id": "49e9d168", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Columns available in this file:\n", "['subdivision', 'year', 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec', 'annual', 'jf', 'mam', 'jjas', 'ond']\n" ] } ], "source": [ "import pandas as pd\n", "\n", "df = pd.read_csv(r\"C:\\Users\\satya\\Downloads\\Project_Samarth\\task\\hybrid_dataset\\imd_rainfall_data.csv\")\n", "\n", "print(\"Columns available in this file:\")\n", "print(df.columns.tolist())\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "1f616bd6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "āœ… Datasets Loaded Successfully!\n", "\n", "Agriculture Data Shape: (5000, 7)\n", "IMD Rainfall Data Shape: (2000, 19)\n" ] } ], "source": [ "import pandas as pd\n", "\n", "# Paths to your saved files (use raw string format to avoid escape issues)\n", "agri_path = r\"C:\\Users\\satya\\Downloads\\Project_Samarth\\task\\hybrid_dataset\\agriculture_data.csv\"\n", "imd_path = r\"C:\\Users\\satya\\Downloads\\Project_Samarth\\task\\hybrid_dataset\\imd_rainfall_data.csv\"\n", "\n", "# Load both datasets\n", "agri_df = pd.read_csv(agri_path)\n", "imd_df = pd.read_csv(imd_path)\n", "\n", "print(\"āœ… Datasets Loaded Successfully!\\n\")\n", "print(f\"Agriculture Data Shape: {agri_df.shape}\")\n", "print(f\"IMD Rainfall Data Shape: {imd_df.shape}\")\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "485e1cd8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "🌾 Agriculture Data Columns:\n", "['state_name', 'district_name', 'crop_year', 'season', 'crop', 'area_', 'production_']\n", "\n", "ā˜ļø IMD Rainfall Data Columns:\n", "['subdivision', 'year', 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec', 'annual', 'jf', 'mam', 'jjas', 'ond']\n", "\n", "šŸ“Š Agriculture Data Sample:\n" ] }, { "data": { "application/vnd.microsoft.datawrangler.viewer.v0+json": { "columns": [ { "name": "index", "rawType": "int64", "type": "integer" }, { "name": "state_name", "rawType": "object", "type": "string" }, { "name": "district_name", "rawType": "object", "type": "string" }, { "name": "crop_year", "rawType": "int64", "type": "integer" }, { "name": "season", "rawType": "object", "type": "string" }, { "name": "crop", "rawType": "object", "type": "string" }, { "name": "area_", "rawType": "float64", "type": "float" }, { "name": "production_", "rawType": "float64", "type": "float" } ], "ref": "0d1e680d-db1a-4e1b-a014-078e92fcf760", "rows": [ [ "0", "Andaman and Nicobar Islands", "NICOBARS", "2000", "Kharif", "Arecanut", "1254.0", "2000.0" ], [ "1", "Andaman and Nicobar Islands", "NICOBARS", "2000", "Kharif", "Other Kharif pulses", "2.0", "1.0" ], [ "2", "Andaman and Nicobar Islands", "NICOBARS", "2000", "Kharif", "Rice", "102.0", "321.0" ], [ "3", "Andaman and Nicobar Islands", "NICOBARS", "2000", "Whole Year", "Banana", "176.0", "641.0" ], [ "4", "Andaman and Nicobar Islands", "NICOBARS", "2000", "Whole Year", "Cashewnut", "720.0", "165.0" ] ], "shape": { "columns": 7, "rows": 5 } }, "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
state_namedistrict_namecrop_yearseasoncroparea_production_
0Andaman and Nicobar IslandsNICOBARS2000KharifArecanut1254.02000.0
1Andaman and Nicobar IslandsNICOBARS2000KharifOther Kharif pulses2.01.0
2Andaman and Nicobar IslandsNICOBARS2000KharifRice102.0321.0
3Andaman and Nicobar IslandsNICOBARS2000Whole YearBanana176.0641.0
4Andaman and Nicobar IslandsNICOBARS2000Whole YearCashewnut720.0165.0
\n", "
" ], "text/plain": [ " state_name district_name crop_year season \\\n", "0 Andaman and Nicobar Islands NICOBARS 2000 Kharif \n", "1 Andaman and Nicobar Islands NICOBARS 2000 Kharif \n", "2 Andaman and Nicobar Islands NICOBARS 2000 Kharif \n", "3 Andaman and Nicobar Islands NICOBARS 2000 Whole Year \n", "4 Andaman and Nicobar Islands NICOBARS 2000 Whole Year \n", "\n", " crop area_ production_ \n", "0 Arecanut 1254.0 2000.0 \n", "1 Other Kharif pulses 2.0 1.0 \n", "2 Rice 102.0 321.0 \n", "3 Banana 176.0 641.0 \n", "4 Cashewnut 720.0 165.0 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "šŸŒ¦ļø IMD Rainfall Data Sample:\n" ] }, { "data": { "application/vnd.microsoft.datawrangler.viewer.v0+json": { "columns": [ { "name": "index", "rawType": "int64", "type": "integer" }, { "name": "subdivision", "rawType": "object", "type": "string" }, { "name": "year", "rawType": "int64", "type": "integer" }, { "name": "jan", "rawType": "float64", "type": "float" }, { "name": "feb", "rawType": "float64", "type": "float" }, { "name": "mar", "rawType": "float64", "type": "float" }, { "name": "apr", "rawType": "float64", "type": "float" }, { "name": "may", "rawType": "float64", "type": "float" }, { "name": "jun", "rawType": "float64", "type": "float" }, { "name": "jul", "rawType": "float64", "type": "float" }, { "name": "aug", "rawType": "float64", "type": "float" }, { "name": "sep", "rawType": "float64", "type": "float" }, { "name": "oct", "rawType": "float64", "type": "float" }, { "name": "nov", "rawType": "float64", "type": "float" }, { "name": "dec", "rawType": "float64", "type": "float" }, { "name": "annual", "rawType": "float64", "type": "float" }, { "name": "jf", "rawType": "float64", "type": "float" }, { "name": "mam", "rawType": "float64", "type": "float" }, { "name": "jjas", "rawType": "float64", "type": "float" }, { "name": "ond", "rawType": "float64", "type": "float" } ], "ref": "032f16ab-7b47-4a43-bcbc-1c17f86ef3bb", "rows": [ [ "0", "Andaman & Nicobar Islands", "1901", "49.2", "87.1", "29.2", "2.3", "528.8", "517.5", "365.1", "481.1", "332.6", "388.5", "558.2", "33.6", "3373.2", "136.3", "560.3", "1696.3", "980.3" ], [ "1", "Andaman & Nicobar Islands", "1902", "0.0", "159.8", "12.2", "0.0", "446.1", "537.1", "228.9", "753.7", "666.2", "197.2", "359.0", "160.5", "3520.7", "159.8", "458.3", "2185.9", "716.7" ], [ "2", "Andaman & Nicobar Islands", "1903", "12.7", "144.0", "0.0", "1.0", "235.1", "479.9", "728.4", "326.7", "339.0", "181.2", "284.4", "225.0", "2957.4", "156.7", "236.1", "1874.0", "690.6" ], [ "3", "Andaman & Nicobar Islands", "1904", "9.4", "14.7", "0.0", "202.4", "304.5", "495.1", "502.0", "160.1", "820.4", "222.2", "308.7", "40.1", "3079.6", "24.1", "506.9", "1977.6", "571.0" ], [ "4", "Andaman & Nicobar Islands", "1905", "1.3", "0.0", "3.3", "26.9", "279.5", "628.7", "368.7", "330.5", "297.0", "260.7", "25.4", "344.7", "2566.7", "1.3", "309.7", "1624.9", "630.8" ] ], "shape": { "columns": 19, "rows": 5 } }, "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
subdivisionyearjanfebmaraprmayjunjulaugsepoctnovdecannualjfmamjjasond
0Andaman & Nicobar Islands190149.287.129.22.3528.8517.5365.1481.1332.6388.5558.233.63373.2136.3560.31696.3980.3
1Andaman & Nicobar Islands19020.0159.812.20.0446.1537.1228.9753.7666.2197.2359.0160.53520.7159.8458.32185.9716.7
2Andaman & Nicobar Islands190312.7144.00.01.0235.1479.9728.4326.7339.0181.2284.4225.02957.4156.7236.11874.0690.6
3Andaman & Nicobar Islands19049.414.70.0202.4304.5495.1502.0160.1820.4222.2308.740.13079.624.1506.91977.6571.0
4Andaman & Nicobar Islands19051.30.03.326.9279.5628.7368.7330.5297.0260.725.4344.72566.71.3309.71624.9630.8
\n", "
" ], "text/plain": [ " subdivision year jan feb mar apr may jun \\\n", "0 Andaman & Nicobar Islands 1901 49.2 87.1 29.2 2.3 528.8 517.5 \n", "1 Andaman & Nicobar Islands 1902 0.0 159.8 12.2 0.0 446.1 537.1 \n", "2 Andaman & Nicobar Islands 1903 12.7 144.0 0.0 1.0 235.1 479.9 \n", "3 Andaman & Nicobar Islands 1904 9.4 14.7 0.0 202.4 304.5 495.1 \n", "4 Andaman & Nicobar Islands 1905 1.3 0.0 3.3 26.9 279.5 628.7 \n", "\n", " jul aug sep oct nov dec annual jf mam jjas \\\n", "0 365.1 481.1 332.6 388.5 558.2 33.6 3373.2 136.3 560.3 1696.3 \n", "1 228.9 753.7 666.2 197.2 359.0 160.5 3520.7 159.8 458.3 2185.9 \n", "2 728.4 326.7 339.0 181.2 284.4 225.0 2957.4 156.7 236.1 1874.0 \n", "3 502.0 160.1 820.4 222.2 308.7 40.1 3079.6 24.1 506.9 1977.6 \n", "4 368.7 330.5 297.0 260.7 25.4 344.7 2566.7 1.3 309.7 1624.9 \n", "\n", " ond \n", "0 980.3 \n", "1 716.7 \n", "2 690.6 \n", "3 571.0 \n", "4 630.8 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "print(\"\\n🌾 Agriculture Data Columns:\")\n", "print(agri_df.columns.tolist())\n", "\n", "print(\"\\nā˜ļø IMD Rainfall Data Columns:\")\n", "print(imd_df.columns.tolist())\n", "\n", "print(\"\\nšŸ“Š Agriculture Data Sample:\")\n", "display(agri_df.head(5))\n", "\n", "print(\"\\nšŸŒ¦ļø IMD Rainfall Data Sample:\")\n", "display(imd_df.head(5))\n" ] }, { "cell_type": "code", "execution_count": 9, "id": "4d48c1f1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "šŸ›ļø Unique States in Agriculture Data:\n", "['Andaman and Nicobar Islands', 'Andhra Pradesh']\n", "\n", "šŸ“… Year Range in Agriculture Data:\n", "1997 → 2014\n", "\n", "🌾 Top 10 Crops:\n", "['Arecanut', 'Other Kharif pulses', 'Rice', 'Banana', 'Cashewnut', 'Coconut', 'Dry ginger', 'Sugarcane', 'Sweet potato', 'Tapioca']\n", "\n", "šŸ›ļø Unique Subdivisions in IMD Rainfall Data:\n", "['Andaman & Nicobar Islands', 'Arunachal Pradesh', 'Assam & Meghalaya', 'Naga Mani Mizo Tripura', 'Sub Himalayan West Bengal & Sikkim', 'Gangetic West Bengal', 'Orissa', 'Jharkhand', 'Bihar', 'East Uttar Pradesh', 'West Uttar Pradesh', 'Uttarakhand', 'Haryana Delhi & Chandigarh', 'Punjab', 'Himachal Pradesh', 'Jammu & Kashmir', 'West Rajasthan', 'East Rajasthan']\n", "\n", "šŸ“… Year Range in IMD Rainfall Data:\n", "1901 → 2017\n" ] } ], "source": [ "# ---- AGRICULTURE ----\n", "print(\"\\nšŸ›ļø Unique States in Agriculture Data:\")\n", "print(agri_df['state_name'].unique().tolist())\n", "\n", "print(\"\\nšŸ“… Year Range in Agriculture Data:\")\n", "if 'crop_year' in agri_df.columns:\n", " print(int(agri_df['crop_year'].min()), \"→\", int(agri_df['crop_year'].max()))\n", "\n", "print(\"\\n🌾 Top 10 Crops:\")\n", "print(agri_df['crop'].unique().tolist()[:10])\n", "\n", "# ---- IMD RAINFALL ----\n", "print(\"\\nšŸ›ļø Unique Subdivisions in IMD Rainfall Data:\")\n", "print(imd_df['subdivision'].unique().tolist())\n", "\n", "print(\"\\nšŸ“… Year Range in IMD Rainfall Data:\")\n", "if 'year' in imd_df.columns:\n", " print(int(imd_df['year'].min()), \"→\", int(imd_df['year'].max()))\n" ] }, { "cell_type": "code", "execution_count": 10, "id": "8cfe6ee8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "āœ… Common Names Found Between Agriculture & IMD Data (0):\n", "[]\n", "\n", "āš ļø States in Agriculture but not in IMD (2):\n", "['andaman and nicobar islands', 'andhra pradesh']\n" ] } ], "source": [ "# Lowercase and trim for consistency\n", "agri_states = set(agri_df['state_name'].str.lower().str.strip().unique())\n", "imd_subdiv = set(imd_df['subdivision'].str.lower().str.strip().unique())\n", "\n", "common = sorted(agri_states.intersection(imd_subdiv))\n", "\n", "print(f\"\\nāœ… Common Names Found Between Agriculture & IMD Data ({len(common)}):\")\n", "print(common[:10])\n", "\n", "missing_from_imd = sorted(list(agri_states - imd_subdiv))\n", "print(f\"\\nāš ļø States in Agriculture but not in IMD ({len(missing_from_imd)}):\")\n", "print(missing_from_imd[:10])\n" ] }, { "cell_type": "code", "execution_count": null, "id": "21653121", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "myenv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.0" } }, "nbformat": 4, "nbformat_minor": 5 }