{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "ec455bd3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Active code page: 1252\n",
"Requirement already satisfied: requests in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (2.32.5)\n",
"Requirement already satisfied: pandas in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (2.3.2)\n",
"Requirement already satisfied: numpy in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (2.3.2)\n",
"Collecting matplotlib\n",
" Using cached matplotlib-3.10.7-cp313-cp313-win_amd64.whl.metadata (11 kB)\n",
"Collecting plotly\n",
" Downloading plotly-6.3.1-py3-none-any.whl.metadata (8.5 kB)\n",
"Requirement already satisfied: streamlit in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (1.49.1)\n",
"Requirement already satisfied: langchain in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (0.3.27)\n",
"Requirement already satisfied: transformers in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (4.56.1)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from requests) (3.4.3)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from requests) (3.10)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from requests) (2.5.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from requests) (2025.8.3)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from pandas) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from pandas) (2025.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from pandas) (2025.2)\n",
"Collecting contourpy>=1.0.1 (from matplotlib)\n",
" Using cached contourpy-1.3.3-cp313-cp313-win_amd64.whl.metadata (5.5 kB)\n",
"Collecting cycler>=0.10 (from matplotlib)\n",
" Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\n",
"Collecting fonttools>=4.22.0 (from matplotlib)\n",
" Using cached fonttools-4.60.1-cp313-cp313-win_amd64.whl.metadata (114 kB)\n",
"Collecting kiwisolver>=1.3.1 (from matplotlib)\n",
" Using cached kiwisolver-1.4.9-cp313-cp313-win_amd64.whl.metadata (6.4 kB)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from matplotlib) (23.2)\n",
"Requirement already satisfied: pillow>=8 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from matplotlib) (11.3.0)\n",
"Collecting pyparsing>=3 (from matplotlib)\n",
" Using cached pyparsing-3.2.5-py3-none-any.whl.metadata (5.0 kB)\n",
"Requirement already satisfied: narwhals>=1.15.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from plotly) (2.4.0)\n",
"Requirement already satisfied: altair!=5.4.0,!=5.4.1,<6,>=4.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (5.5.0)\n",
"Requirement already satisfied: blinker<2,>=1.5.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (1.9.0)\n",
"Requirement already satisfied: cachetools<7,>=4.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (6.2.0)\n",
"Requirement already satisfied: click<9,>=7.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (8.2.1)\n",
"Requirement already satisfied: protobuf<7,>=3.20 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (6.32.0)\n",
"Requirement already satisfied: pyarrow>=7.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (21.0.0)\n",
"Requirement already satisfied: tenacity<10,>=8.1.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (8.5.0)\n",
"Requirement already satisfied: toml<2,>=0.10.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (0.10.2)\n",
"Requirement already satisfied: typing-extensions<5,>=4.4.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (4.15.0)\n",
"Requirement already satisfied: watchdog<7,>=2.1.5 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (6.0.0)\n",
"Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (3.1.45)\n",
"Requirement already satisfied: pydeck<1,>=0.8.0b4 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (0.9.1)\n",
"Requirement already satisfied: tornado!=6.5.0,<7,>=6.0.3 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from streamlit) (6.5.2)\n",
"Requirement already satisfied: langchain-core<1.0.0,>=0.3.72 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langchain) (0.3.76)\n",
"Requirement already satisfied: langchain-text-splitters<1.0.0,>=0.3.9 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langchain) (0.3.11)\n",
"Requirement already satisfied: langsmith>=0.1.17 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langchain) (0.4.27)\n",
"Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langchain) (2.11.7)\n",
"Requirement already satisfied: SQLAlchemy<3,>=1.4 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langchain) (2.0.43)\n",
"Requirement already satisfied: PyYAML>=5.3 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langchain) (6.0.2)\n",
"Requirement already satisfied: filelock in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from transformers) (3.19.1)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from transformers) (0.34.4)\n",
"Requirement already satisfied: regex!=2019.12.17 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from transformers) (2025.9.1)\n",
"Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from transformers) (0.22.0)\n",
"Requirement already satisfied: safetensors>=0.4.3 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from transformers) (0.6.2)\n",
"Requirement already satisfied: tqdm>=4.27 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from transformers) (4.67.1)\n",
"Requirement already satisfied: jinja2 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from altair!=5.4.0,!=5.4.1,<6,>=4.0->streamlit) (3.1.6)\n",
"Requirement already satisfied: jsonschema>=3.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from altair!=5.4.0,!=5.4.1,<6,>=4.0->streamlit) (4.25.1)\n",
"Requirement already satisfied: colorama in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from click<9,>=7.0->streamlit) (0.4.6)\n",
"Requirement already satisfied: gitdb<5,>=4.0.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit) (4.0.12)\n",
"Requirement already satisfied: fsspec>=2023.5.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (2024.2.0)\n",
"Requirement already satisfied: jsonpatch<2.0,>=1.33 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langchain-core<1.0.0,>=0.3.72->langchain) (1.33)\n",
"Requirement already satisfied: httpx<1,>=0.23.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langsmith>=0.1.17->langchain) (0.28.1)\n",
"Requirement already satisfied: orjson>=3.9.14 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langsmith>=0.1.17->langchain) (3.11.3)\n",
"Requirement already satisfied: requests-toolbelt>=1.0.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langsmith>=0.1.17->langchain) (1.0.0)\n",
"Requirement already satisfied: zstandard>=0.23.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from langsmith>=0.1.17->langchain) (0.24.0)\n",
"Requirement already satisfied: annotated-types>=0.6.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.33.2 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.33.2)\n",
"Requirement already satisfied: typing-inspection>=0.4.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.4.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
"Requirement already satisfied: greenlet>=1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from SQLAlchemy<3,>=1.4->langchain) (3.2.4)\n",
"Requirement already satisfied: smmap<6,>=3.0.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit) (5.0.2)\n",
"Requirement already satisfied: anyio in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from httpx<1,>=0.23.0->langsmith>=0.1.17->langchain) (4.10.0)\n",
"Requirement already satisfied: httpcore==1.* in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from httpx<1,>=0.23.0->langsmith>=0.1.17->langchain) (1.0.9)\n",
"Requirement already satisfied: h11>=0.16 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith>=0.1.17->langchain) (0.16.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from jinja2->altair!=5.4.0,!=5.4.1,<6,>=4.0->streamlit) (3.0.2)\n",
"Requirement already satisfied: jsonpointer>=1.9 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from jsonpatch<2.0,>=1.33->langchain-core<1.0.0,>=0.3.72->langchain) (3.0.0)\n",
"Requirement already satisfied: attrs>=22.2.0 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from jsonschema>=3.0->altair!=5.4.0,!=5.4.1,<6,>=4.0->streamlit) (25.3.0)\n",
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from jsonschema>=3.0->altair!=5.4.0,!=5.4.1,<6,>=4.0->streamlit) (2025.9.1)\n",
"Requirement already satisfied: referencing>=0.28.4 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from jsonschema>=3.0->altair!=5.4.0,!=5.4.1,<6,>=4.0->streamlit) (0.36.2)\n",
"Requirement already satisfied: rpds-py>=0.7.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from jsonschema>=3.0->altair!=5.4.0,!=5.4.1,<6,>=4.0->streamlit) (0.27.1)\n",
"Requirement already satisfied: sniffio>=1.1 in c:\\users\\satya\\anaconda3\\envs\\myenv\\lib\\site-packages (from anyio->httpx<1,>=0.23.0->langsmith>=0.1.17->langchain) (1.3.1)\n",
"Using cached matplotlib-3.10.7-cp313-cp313-win_amd64.whl (8.1 MB)\n",
"Downloading plotly-6.3.1-py3-none-any.whl (9.8 MB)\n",
" ---------------------------------------- 0.0/9.8 MB ? eta -:--:--\n",
" ---- ----------------------------------- 1.0/9.8 MB 7.0 MB/s eta 0:00:02\n",
" ---- ----------------------------------- 1.0/9.8 MB 7.0 MB/s eta 0:00:02\n",
" ---------- ----------------------------- 2.6/9.8 MB 4.5 MB/s eta 0:00:02\n",
" ------------- -------------------------- 3.4/9.8 MB 4.4 MB/s eta 0:00:02\n",
" ----------------- ---------------------- 4.2/9.8 MB 4.3 MB/s eta 0:00:02\n",
" -------------------- ------------------- 5.0/9.8 MB 4.2 MB/s eta 0:00:02\n",
" ----------------------- ---------------- 5.8/9.8 MB 4.1 MB/s eta 0:00:01\n",
" --------------------------- ------------ 6.8/9.8 MB 4.1 MB/s eta 0:00:01\n",
" ------------------------------ --------- 7.6/9.8 MB 4.1 MB/s eta 0:00:01\n",
" ---------------------------------- ----- 8.4/9.8 MB 4.1 MB/s eta 0:00:01\n",
" ------------------------------------- -- 9.2/9.8 MB 4.0 MB/s eta 0:00:01\n",
" --------------------------------------- 9.7/9.8 MB 4.0 MB/s eta 0:00:01\n",
" ---------------------------------------- 9.8/9.8 MB 3.9 MB/s eta 0:00:00\n",
"Using cached contourpy-1.3.3-cp313-cp313-win_amd64.whl (226 kB)\n",
"Using cached cycler-0.12.1-py3-none-any.whl (8.3 kB)\n",
"Using cached fonttools-4.60.1-cp313-cp313-win_amd64.whl (2.3 MB)\n",
"Using cached kiwisolver-1.4.9-cp313-cp313-win_amd64.whl (73 kB)\n",
"Using cached pyparsing-3.2.5-py3-none-any.whl (113 kB)\n",
"Installing collected packages: pyparsing, plotly, kiwisolver, fonttools, cycler, contourpy, matplotlib\n",
"Successfully installed contourpy-1.3.3 cycler-0.12.1 fonttools-4.60.1 kiwisolver-1.4.9 matplotlib-3.10.7 plotly-6.3.1 pyparsing-3.2.5\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install requests pandas numpy matplotlib plotly streamlit langchain transformers\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "42263596",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"š Dataset Loaded Successfully!\n",
"Total Rows: 203\n",
"Total Columns: 25\n",
"\n"
]
},
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "state_name",
"rawType": "object",
"type": "string"
},
{
"name": "district_name",
"rawType": "object",
"type": "string"
},
{
"name": "crop_year",
"rawType": "int64",
"type": "integer"
},
{
"name": "season",
"rawType": "object",
"type": "string"
},
{
"name": "crop",
"rawType": "object",
"type": "string"
},
{
"name": "area_",
"rawType": "float64",
"type": "float"
},
{
"name": "production_",
"rawType": "float64",
"type": "float"
},
{
"name": "subdivision",
"rawType": "object",
"type": "string"
},
{
"name": "jan",
"rawType": "float64",
"type": "float"
},
{
"name": "feb",
"rawType": "float64",
"type": "float"
},
{
"name": "mar",
"rawType": "float64",
"type": "float"
},
{
"name": "apr",
"rawType": "float64",
"type": "float"
},
{
"name": "may",
"rawType": "float64",
"type": "float"
},
{
"name": "jun",
"rawType": "float64",
"type": "float"
},
{
"name": "jul",
"rawType": "float64",
"type": "float"
},
{
"name": "aug",
"rawType": "float64",
"type": "float"
},
{
"name": "sep",
"rawType": "float64",
"type": "float"
},
{
"name": "oct",
"rawType": "float64",
"type": "float"
},
{
"name": "nov",
"rawType": "float64",
"type": "float"
},
{
"name": "dec",
"rawType": "float64",
"type": "float"
},
{
"name": "annual",
"rawType": "float64",
"type": "float"
},
{
"name": "jf",
"rawType": "float64",
"type": "float"
},
{
"name": "mam",
"rawType": "float64",
"type": "float"
},
{
"name": "jjas",
"rawType": "float64",
"type": "float"
},
{
"name": "ond",
"rawType": "float64",
"type": "float"
}
],
"ref": "633f3f12-0965-479f-8a47-3a1c5a2b8105",
"rows": [
[
"0",
"andaman and nicobar islands",
"NICOBARS",
"2000",
"Kharif",
"Arecanut",
"1254.0",
"2000.0",
"andaman & nicobar islands",
"53.0",
"59.0",
"171.3",
"218.1",
"422.8",
"357.0",
"176.3",
"460.8",
"250.1",
"321.2",
"158.3",
"115.2",
"2763.2",
"112.0",
"812.2",
"1244.2",
"594.7"
],
[
"1",
"andaman and nicobar islands",
"NICOBARS",
"2000",
"Kharif",
"Other Kharif pulses",
"2.0",
"1.0",
"andaman & nicobar islands",
"53.0",
"59.0",
"171.3",
"218.1",
"422.8",
"357.0",
"176.3",
"460.8",
"250.1",
"321.2",
"158.3",
"115.2",
"2763.2",
"112.0",
"812.2",
"1244.2",
"594.7"
],
[
"2",
"andaman and nicobar islands",
"NICOBARS",
"2000",
"Kharif",
"Rice",
"102.0",
"321.0",
"andaman & nicobar islands",
"53.0",
"59.0",
"171.3",
"218.1",
"422.8",
"357.0",
"176.3",
"460.8",
"250.1",
"321.2",
"158.3",
"115.2",
"2763.2",
"112.0",
"812.2",
"1244.2",
"594.7"
],
[
"3",
"andaman and nicobar islands",
"NICOBARS",
"2000",
"Whole Year",
"Banana",
"176.0",
"641.0",
"andaman & nicobar islands",
"53.0",
"59.0",
"171.3",
"218.1",
"422.8",
"357.0",
"176.3",
"460.8",
"250.1",
"321.2",
"158.3",
"115.2",
"2763.2",
"112.0",
"812.2",
"1244.2",
"594.7"
],
[
"4",
"andaman and nicobar islands",
"NICOBARS",
"2000",
"Whole Year",
"Cashewnut",
"720.0",
"165.0",
"andaman & nicobar islands",
"53.0",
"59.0",
"171.3",
"218.1",
"422.8",
"357.0",
"176.3",
"460.8",
"250.1",
"321.2",
"158.3",
"115.2",
"2763.2",
"112.0",
"812.2",
"1244.2",
"594.7"
]
],
"shape": {
"columns": 25,
"rows": 5
}
},
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state_name | \n",
" district_name | \n",
" crop_year | \n",
" season | \n",
" crop | \n",
" area_ | \n",
" production_ | \n",
" subdivision | \n",
" jan | \n",
" feb | \n",
" ... | \n",
" aug | \n",
" sep | \n",
" oct | \n",
" nov | \n",
" dec | \n",
" annual | \n",
" jf | \n",
" mam | \n",
" jjas | \n",
" ond | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" andaman and nicobar islands | \n",
" NICOBARS | \n",
" 2000 | \n",
" Kharif | \n",
" Arecanut | \n",
" 1254.0 | \n",
" 2000.0 | \n",
" andaman & nicobar islands | \n",
" 53.0 | \n",
" 59.0 | \n",
" ... | \n",
" 460.8 | \n",
" 250.1 | \n",
" 321.2 | \n",
" 158.3 | \n",
" 115.2 | \n",
" 2763.2 | \n",
" 112.0 | \n",
" 812.2 | \n",
" 1244.2 | \n",
" 594.7 | \n",
"
\n",
" \n",
" | 1 | \n",
" andaman and nicobar islands | \n",
" NICOBARS | \n",
" 2000 | \n",
" Kharif | \n",
" Other Kharif pulses | \n",
" 2.0 | \n",
" 1.0 | \n",
" andaman & nicobar islands | \n",
" 53.0 | \n",
" 59.0 | \n",
" ... | \n",
" 460.8 | \n",
" 250.1 | \n",
" 321.2 | \n",
" 158.3 | \n",
" 115.2 | \n",
" 2763.2 | \n",
" 112.0 | \n",
" 812.2 | \n",
" 1244.2 | \n",
" 594.7 | \n",
"
\n",
" \n",
" | 2 | \n",
" andaman and nicobar islands | \n",
" NICOBARS | \n",
" 2000 | \n",
" Kharif | \n",
" Rice | \n",
" 102.0 | \n",
" 321.0 | \n",
" andaman & nicobar islands | \n",
" 53.0 | \n",
" 59.0 | \n",
" ... | \n",
" 460.8 | \n",
" 250.1 | \n",
" 321.2 | \n",
" 158.3 | \n",
" 115.2 | \n",
" 2763.2 | \n",
" 112.0 | \n",
" 812.2 | \n",
" 1244.2 | \n",
" 594.7 | \n",
"
\n",
" \n",
" | 3 | \n",
" andaman and nicobar islands | \n",
" NICOBARS | \n",
" 2000 | \n",
" Whole Year | \n",
" Banana | \n",
" 176.0 | \n",
" 641.0 | \n",
" andaman & nicobar islands | \n",
" 53.0 | \n",
" 59.0 | \n",
" ... | \n",
" 460.8 | \n",
" 250.1 | \n",
" 321.2 | \n",
" 158.3 | \n",
" 115.2 | \n",
" 2763.2 | \n",
" 112.0 | \n",
" 812.2 | \n",
" 1244.2 | \n",
" 594.7 | \n",
"
\n",
" \n",
" | 4 | \n",
" andaman and nicobar islands | \n",
" NICOBARS | \n",
" 2000 | \n",
" Whole Year | \n",
" Cashewnut | \n",
" 720.0 | \n",
" 165.0 | \n",
" andaman & nicobar islands | \n",
" 53.0 | \n",
" 59.0 | \n",
" ... | \n",
" 460.8 | \n",
" 250.1 | \n",
" 321.2 | \n",
" 158.3 | \n",
" 115.2 | \n",
" 2763.2 | \n",
" 112.0 | \n",
" 812.2 | \n",
" 1244.2 | \n",
" 594.7 | \n",
"
\n",
" \n",
"
\n",
"
5 rows Ć 25 columns
\n",
"
"
],
"text/plain": [
" state_name district_name crop_year season \\\n",
"0 andaman and nicobar islands NICOBARS 2000 Kharif \n",
"1 andaman and nicobar islands NICOBARS 2000 Kharif \n",
"2 andaman and nicobar islands NICOBARS 2000 Kharif \n",
"3 andaman and nicobar islands NICOBARS 2000 Whole Year \n",
"4 andaman and nicobar islands NICOBARS 2000 Whole Year \n",
"\n",
" crop area_ production_ subdivision jan \\\n",
"0 Arecanut 1254.0 2000.0 andaman & nicobar islands 53.0 \n",
"1 Other Kharif pulses 2.0 1.0 andaman & nicobar islands 53.0 \n",
"2 Rice 102.0 321.0 andaman & nicobar islands 53.0 \n",
"3 Banana 176.0 641.0 andaman & nicobar islands 53.0 \n",
"4 Cashewnut 720.0 165.0 andaman & nicobar islands 53.0 \n",
"\n",
" feb ... aug sep oct nov dec annual jf mam jjas \\\n",
"0 59.0 ... 460.8 250.1 321.2 158.3 115.2 2763.2 112.0 812.2 1244.2 \n",
"1 59.0 ... 460.8 250.1 321.2 158.3 115.2 2763.2 112.0 812.2 1244.2 \n",
"2 59.0 ... 460.8 250.1 321.2 158.3 115.2 2763.2 112.0 812.2 1244.2 \n",
"3 59.0 ... 460.8 250.1 321.2 158.3 115.2 2763.2 112.0 812.2 1244.2 \n",
"4 59.0 ... 460.8 250.1 321.2 158.3 115.2 2763.2 112.0 812.2 1244.2 \n",
"\n",
" ond \n",
"0 594.7 \n",
"1 594.7 \n",
"2 594.7 \n",
"3 594.7 \n",
"4 594.7 \n",
"\n",
"[5 rows x 25 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"š Columns in dataset:\n",
"['state_name', 'district_name', 'crop_year', 'season', 'crop', 'area_', 'production_', 'subdivision', 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec', 'annual', 'jf', 'mam', 'jjas', 'ond']\n",
"\n",
"šļø Unique States in Dataset:\n",
"- andaman and nicobar islands\n",
"\n",
"š¾ Unique Crops in Dataset:\n",
"- Arecanut\n",
"- Arhar/Tur\n",
"- Banana\n",
"- Black pepper\n",
"- Cashewnut\n",
"- Coconut\n",
"- Dry chillies\n",
"- Dry ginger\n",
"- Groundnut\n",
"- Maize\n",
"- Moong(Green Gram)\n",
"- Other Kharif pulses\n",
"- Rice\n",
"- Sugarcane\n",
"- Sunflower\n",
"- Sweet potato\n",
"- Tapioca\n",
"- Turmeric\n",
"- Urad\n",
"- other oilseeds\n",
"... (Total 20 unique crops)\n",
"\n",
"š
Crop Year Range: 2000 - 2010\n",
"\n",
"š Number of unique crops per state:\n"
]
},
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "state_name",
"rawType": "object",
"type": "string"
},
{
"name": "unique_crops",
"rawType": "int64",
"type": "integer"
}
],
"ref": "16808a21-275e-4e3e-a212-22ed467e3c22",
"rows": [
[
"0",
"andaman and nicobar islands",
"20"
]
],
"shape": {
"columns": 2,
"rows": 1
}
},
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state_name | \n",
" unique_crops | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" andaman and nicobar islands | \n",
" 20 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state_name unique_crops\n",
"0 andaman and nicobar islands 20"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# -----------------------------------------------\n",
"# š Project Samarth - Phase 1: Data Discovery\n",
"# -----------------------------------------------\n",
"\n",
"import pandas as pd\n",
"\n",
"# ā
Load merged dataset\n",
"file_path = \"../hybrid_dataset/merged_agri_rainfall.csv\" # adjust if needed\n",
"df = pd.read_csv(file_path)\n",
"\n",
"# ā
Basic info\n",
"print(\"š Dataset Loaded Successfully!\")\n",
"print(f\"Total Rows: {len(df)}\")\n",
"print(f\"Total Columns: {len(df.columns)}\\n\")\n",
"\n",
"# ā
Display first few rows\n",
"display(df.head())\n",
"\n",
"# ā
Show all available columns\n",
"print(\"š Columns in dataset:\")\n",
"print(df.columns.tolist())\n",
"\n",
"# ā
Check unique states\n",
"if \"state_name\" in df.columns:\n",
" states = sorted(df[\"state_name\"].dropna().unique().tolist())\n",
" print(\"\\nšļø Unique States in Dataset:\")\n",
" for s in states:\n",
" print(\"-\", s)\n",
"\n",
"# ā
Check unique crops\n",
"if \"crop\" in df.columns:\n",
" crops = sorted(df[\"crop\"].dropna().unique().tolist())\n",
" print(\"\\nš¾ Unique Crops in Dataset:\")\n",
" for c in crops[:20]: # limit to first 20\n",
" print(\"-\", c)\n",
" print(f\"... (Total {len(crops)} unique crops)\")\n",
"\n",
"# ā
Check year range\n",
"if \"crop_year\" in df.columns:\n",
" min_year, max_year = int(df[\"crop_year\"].min()), int(df[\"crop_year\"].max())\n",
" print(f\"\\nš
Crop Year Range: {min_year} - {max_year}\")\n",
"\n",
"# ā
Quick count by state and crop\n",
"if {\"state_name\", \"crop\"} <= set(df.columns):\n",
" summary = (\n",
" df.groupby(\"state_name\")[\"crop\"]\n",
" .nunique()\n",
" .sort_values(ascending=False)\n",
" .reset_index()\n",
" .rename(columns={\"crop\": \"unique_crops\"})\n",
" )\n",
" print(\"\\nš Number of unique crops per state:\")\n",
" display(summary)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "49e9d168",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Columns available in this file:\n",
"['subdivision', 'year', 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec', 'annual', 'jf', 'mam', 'jjas', 'ond']\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_csv(r\"C:\\Users\\satya\\Downloads\\Project_Samarth\\task\\hybrid_dataset\\imd_rainfall_data.csv\")\n",
"\n",
"print(\"Columns available in this file:\")\n",
"print(df.columns.tolist())\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "1f616bd6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ā
Datasets Loaded Successfully!\n",
"\n",
"Agriculture Data Shape: (5000, 7)\n",
"IMD Rainfall Data Shape: (2000, 19)\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"# Paths to your saved files (use raw string format to avoid escape issues)\n",
"agri_path = r\"C:\\Users\\satya\\Downloads\\Project_Samarth\\task\\hybrid_dataset\\agriculture_data.csv\"\n",
"imd_path = r\"C:\\Users\\satya\\Downloads\\Project_Samarth\\task\\hybrid_dataset\\imd_rainfall_data.csv\"\n",
"\n",
"# Load both datasets\n",
"agri_df = pd.read_csv(agri_path)\n",
"imd_df = pd.read_csv(imd_path)\n",
"\n",
"print(\"ā
Datasets Loaded Successfully!\\n\")\n",
"print(f\"Agriculture Data Shape: {agri_df.shape}\")\n",
"print(f\"IMD Rainfall Data Shape: {imd_df.shape}\")\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "485e1cd8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"š¾ Agriculture Data Columns:\n",
"['state_name', 'district_name', 'crop_year', 'season', 'crop', 'area_', 'production_']\n",
"\n",
"āļø IMD Rainfall Data Columns:\n",
"['subdivision', 'year', 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec', 'annual', 'jf', 'mam', 'jjas', 'ond']\n",
"\n",
"š Agriculture Data Sample:\n"
]
},
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "state_name",
"rawType": "object",
"type": "string"
},
{
"name": "district_name",
"rawType": "object",
"type": "string"
},
{
"name": "crop_year",
"rawType": "int64",
"type": "integer"
},
{
"name": "season",
"rawType": "object",
"type": "string"
},
{
"name": "crop",
"rawType": "object",
"type": "string"
},
{
"name": "area_",
"rawType": "float64",
"type": "float"
},
{
"name": "production_",
"rawType": "float64",
"type": "float"
}
],
"ref": "0d1e680d-db1a-4e1b-a014-078e92fcf760",
"rows": [
[
"0",
"Andaman and Nicobar Islands",
"NICOBARS",
"2000",
"Kharif",
"Arecanut",
"1254.0",
"2000.0"
],
[
"1",
"Andaman and Nicobar Islands",
"NICOBARS",
"2000",
"Kharif",
"Other Kharif pulses",
"2.0",
"1.0"
],
[
"2",
"Andaman and Nicobar Islands",
"NICOBARS",
"2000",
"Kharif",
"Rice",
"102.0",
"321.0"
],
[
"3",
"Andaman and Nicobar Islands",
"NICOBARS",
"2000",
"Whole Year",
"Banana",
"176.0",
"641.0"
],
[
"4",
"Andaman and Nicobar Islands",
"NICOBARS",
"2000",
"Whole Year",
"Cashewnut",
"720.0",
"165.0"
]
],
"shape": {
"columns": 7,
"rows": 5
}
},
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state_name | \n",
" district_name | \n",
" crop_year | \n",
" season | \n",
" crop | \n",
" area_ | \n",
" production_ | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Andaman and Nicobar Islands | \n",
" NICOBARS | \n",
" 2000 | \n",
" Kharif | \n",
" Arecanut | \n",
" 1254.0 | \n",
" 2000.0 | \n",
"
\n",
" \n",
" | 1 | \n",
" Andaman and Nicobar Islands | \n",
" NICOBARS | \n",
" 2000 | \n",
" Kharif | \n",
" Other Kharif pulses | \n",
" 2.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" | 2 | \n",
" Andaman and Nicobar Islands | \n",
" NICOBARS | \n",
" 2000 | \n",
" Kharif | \n",
" Rice | \n",
" 102.0 | \n",
" 321.0 | \n",
"
\n",
" \n",
" | 3 | \n",
" Andaman and Nicobar Islands | \n",
" NICOBARS | \n",
" 2000 | \n",
" Whole Year | \n",
" Banana | \n",
" 176.0 | \n",
" 641.0 | \n",
"
\n",
" \n",
" | 4 | \n",
" Andaman and Nicobar Islands | \n",
" NICOBARS | \n",
" 2000 | \n",
" Whole Year | \n",
" Cashewnut | \n",
" 720.0 | \n",
" 165.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state_name district_name crop_year season \\\n",
"0 Andaman and Nicobar Islands NICOBARS 2000 Kharif \n",
"1 Andaman and Nicobar Islands NICOBARS 2000 Kharif \n",
"2 Andaman and Nicobar Islands NICOBARS 2000 Kharif \n",
"3 Andaman and Nicobar Islands NICOBARS 2000 Whole Year \n",
"4 Andaman and Nicobar Islands NICOBARS 2000 Whole Year \n",
"\n",
" crop area_ production_ \n",
"0 Arecanut 1254.0 2000.0 \n",
"1 Other Kharif pulses 2.0 1.0 \n",
"2 Rice 102.0 321.0 \n",
"3 Banana 176.0 641.0 \n",
"4 Cashewnut 720.0 165.0 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"š¦ļø IMD Rainfall Data Sample:\n"
]
},
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "subdivision",
"rawType": "object",
"type": "string"
},
{
"name": "year",
"rawType": "int64",
"type": "integer"
},
{
"name": "jan",
"rawType": "float64",
"type": "float"
},
{
"name": "feb",
"rawType": "float64",
"type": "float"
},
{
"name": "mar",
"rawType": "float64",
"type": "float"
},
{
"name": "apr",
"rawType": "float64",
"type": "float"
},
{
"name": "may",
"rawType": "float64",
"type": "float"
},
{
"name": "jun",
"rawType": "float64",
"type": "float"
},
{
"name": "jul",
"rawType": "float64",
"type": "float"
},
{
"name": "aug",
"rawType": "float64",
"type": "float"
},
{
"name": "sep",
"rawType": "float64",
"type": "float"
},
{
"name": "oct",
"rawType": "float64",
"type": "float"
},
{
"name": "nov",
"rawType": "float64",
"type": "float"
},
{
"name": "dec",
"rawType": "float64",
"type": "float"
},
{
"name": "annual",
"rawType": "float64",
"type": "float"
},
{
"name": "jf",
"rawType": "float64",
"type": "float"
},
{
"name": "mam",
"rawType": "float64",
"type": "float"
},
{
"name": "jjas",
"rawType": "float64",
"type": "float"
},
{
"name": "ond",
"rawType": "float64",
"type": "float"
}
],
"ref": "032f16ab-7b47-4a43-bcbc-1c17f86ef3bb",
"rows": [
[
"0",
"Andaman & Nicobar Islands",
"1901",
"49.2",
"87.1",
"29.2",
"2.3",
"528.8",
"517.5",
"365.1",
"481.1",
"332.6",
"388.5",
"558.2",
"33.6",
"3373.2",
"136.3",
"560.3",
"1696.3",
"980.3"
],
[
"1",
"Andaman & Nicobar Islands",
"1902",
"0.0",
"159.8",
"12.2",
"0.0",
"446.1",
"537.1",
"228.9",
"753.7",
"666.2",
"197.2",
"359.0",
"160.5",
"3520.7",
"159.8",
"458.3",
"2185.9",
"716.7"
],
[
"2",
"Andaman & Nicobar Islands",
"1903",
"12.7",
"144.0",
"0.0",
"1.0",
"235.1",
"479.9",
"728.4",
"326.7",
"339.0",
"181.2",
"284.4",
"225.0",
"2957.4",
"156.7",
"236.1",
"1874.0",
"690.6"
],
[
"3",
"Andaman & Nicobar Islands",
"1904",
"9.4",
"14.7",
"0.0",
"202.4",
"304.5",
"495.1",
"502.0",
"160.1",
"820.4",
"222.2",
"308.7",
"40.1",
"3079.6",
"24.1",
"506.9",
"1977.6",
"571.0"
],
[
"4",
"Andaman & Nicobar Islands",
"1905",
"1.3",
"0.0",
"3.3",
"26.9",
"279.5",
"628.7",
"368.7",
"330.5",
"297.0",
"260.7",
"25.4",
"344.7",
"2566.7",
"1.3",
"309.7",
"1624.9",
"630.8"
]
],
"shape": {
"columns": 19,
"rows": 5
}
},
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" subdivision | \n",
" year | \n",
" jan | \n",
" feb | \n",
" mar | \n",
" apr | \n",
" may | \n",
" jun | \n",
" jul | \n",
" aug | \n",
" sep | \n",
" oct | \n",
" nov | \n",
" dec | \n",
" annual | \n",
" jf | \n",
" mam | \n",
" jjas | \n",
" ond | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Andaman & Nicobar Islands | \n",
" 1901 | \n",
" 49.2 | \n",
" 87.1 | \n",
" 29.2 | \n",
" 2.3 | \n",
" 528.8 | \n",
" 517.5 | \n",
" 365.1 | \n",
" 481.1 | \n",
" 332.6 | \n",
" 388.5 | \n",
" 558.2 | \n",
" 33.6 | \n",
" 3373.2 | \n",
" 136.3 | \n",
" 560.3 | \n",
" 1696.3 | \n",
" 980.3 | \n",
"
\n",
" \n",
" | 1 | \n",
" Andaman & Nicobar Islands | \n",
" 1902 | \n",
" 0.0 | \n",
" 159.8 | \n",
" 12.2 | \n",
" 0.0 | \n",
" 446.1 | \n",
" 537.1 | \n",
" 228.9 | \n",
" 753.7 | \n",
" 666.2 | \n",
" 197.2 | \n",
" 359.0 | \n",
" 160.5 | \n",
" 3520.7 | \n",
" 159.8 | \n",
" 458.3 | \n",
" 2185.9 | \n",
" 716.7 | \n",
"
\n",
" \n",
" | 2 | \n",
" Andaman & Nicobar Islands | \n",
" 1903 | \n",
" 12.7 | \n",
" 144.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 235.1 | \n",
" 479.9 | \n",
" 728.4 | \n",
" 326.7 | \n",
" 339.0 | \n",
" 181.2 | \n",
" 284.4 | \n",
" 225.0 | \n",
" 2957.4 | \n",
" 156.7 | \n",
" 236.1 | \n",
" 1874.0 | \n",
" 690.6 | \n",
"
\n",
" \n",
" | 3 | \n",
" Andaman & Nicobar Islands | \n",
" 1904 | \n",
" 9.4 | \n",
" 14.7 | \n",
" 0.0 | \n",
" 202.4 | \n",
" 304.5 | \n",
" 495.1 | \n",
" 502.0 | \n",
" 160.1 | \n",
" 820.4 | \n",
" 222.2 | \n",
" 308.7 | \n",
" 40.1 | \n",
" 3079.6 | \n",
" 24.1 | \n",
" 506.9 | \n",
" 1977.6 | \n",
" 571.0 | \n",
"
\n",
" \n",
" | 4 | \n",
" Andaman & Nicobar Islands | \n",
" 1905 | \n",
" 1.3 | \n",
" 0.0 | \n",
" 3.3 | \n",
" 26.9 | \n",
" 279.5 | \n",
" 628.7 | \n",
" 368.7 | \n",
" 330.5 | \n",
" 297.0 | \n",
" 260.7 | \n",
" 25.4 | \n",
" 344.7 | \n",
" 2566.7 | \n",
" 1.3 | \n",
" 309.7 | \n",
" 1624.9 | \n",
" 630.8 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subdivision year jan feb mar apr may jun \\\n",
"0 Andaman & Nicobar Islands 1901 49.2 87.1 29.2 2.3 528.8 517.5 \n",
"1 Andaman & Nicobar Islands 1902 0.0 159.8 12.2 0.0 446.1 537.1 \n",
"2 Andaman & Nicobar Islands 1903 12.7 144.0 0.0 1.0 235.1 479.9 \n",
"3 Andaman & Nicobar Islands 1904 9.4 14.7 0.0 202.4 304.5 495.1 \n",
"4 Andaman & Nicobar Islands 1905 1.3 0.0 3.3 26.9 279.5 628.7 \n",
"\n",
" jul aug sep oct nov dec annual jf mam jjas \\\n",
"0 365.1 481.1 332.6 388.5 558.2 33.6 3373.2 136.3 560.3 1696.3 \n",
"1 228.9 753.7 666.2 197.2 359.0 160.5 3520.7 159.8 458.3 2185.9 \n",
"2 728.4 326.7 339.0 181.2 284.4 225.0 2957.4 156.7 236.1 1874.0 \n",
"3 502.0 160.1 820.4 222.2 308.7 40.1 3079.6 24.1 506.9 1977.6 \n",
"4 368.7 330.5 297.0 260.7 25.4 344.7 2566.7 1.3 309.7 1624.9 \n",
"\n",
" ond \n",
"0 980.3 \n",
"1 716.7 \n",
"2 690.6 \n",
"3 571.0 \n",
"4 630.8 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"print(\"\\nš¾ Agriculture Data Columns:\")\n",
"print(agri_df.columns.tolist())\n",
"\n",
"print(\"\\nāļø IMD Rainfall Data Columns:\")\n",
"print(imd_df.columns.tolist())\n",
"\n",
"print(\"\\nš Agriculture Data Sample:\")\n",
"display(agri_df.head(5))\n",
"\n",
"print(\"\\nš¦ļø IMD Rainfall Data Sample:\")\n",
"display(imd_df.head(5))\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "4d48c1f1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"šļø Unique States in Agriculture Data:\n",
"['Andaman and Nicobar Islands', 'Andhra Pradesh']\n",
"\n",
"š
Year Range in Agriculture Data:\n",
"1997 ā 2014\n",
"\n",
"š¾ Top 10 Crops:\n",
"['Arecanut', 'Other Kharif pulses', 'Rice', 'Banana', 'Cashewnut', 'Coconut', 'Dry ginger', 'Sugarcane', 'Sweet potato', 'Tapioca']\n",
"\n",
"šļø Unique Subdivisions in IMD Rainfall Data:\n",
"['Andaman & Nicobar Islands', 'Arunachal Pradesh', 'Assam & Meghalaya', 'Naga Mani Mizo Tripura', 'Sub Himalayan West Bengal & Sikkim', 'Gangetic West Bengal', 'Orissa', 'Jharkhand', 'Bihar', 'East Uttar Pradesh', 'West Uttar Pradesh', 'Uttarakhand', 'Haryana Delhi & Chandigarh', 'Punjab', 'Himachal Pradesh', 'Jammu & Kashmir', 'West Rajasthan', 'East Rajasthan']\n",
"\n",
"š
Year Range in IMD Rainfall Data:\n",
"1901 ā 2017\n"
]
}
],
"source": [
"# ---- AGRICULTURE ----\n",
"print(\"\\nšļø Unique States in Agriculture Data:\")\n",
"print(agri_df['state_name'].unique().tolist())\n",
"\n",
"print(\"\\nš
Year Range in Agriculture Data:\")\n",
"if 'crop_year' in agri_df.columns:\n",
" print(int(agri_df['crop_year'].min()), \"ā\", int(agri_df['crop_year'].max()))\n",
"\n",
"print(\"\\nš¾ Top 10 Crops:\")\n",
"print(agri_df['crop'].unique().tolist()[:10])\n",
"\n",
"# ---- IMD RAINFALL ----\n",
"print(\"\\nšļø Unique Subdivisions in IMD Rainfall Data:\")\n",
"print(imd_df['subdivision'].unique().tolist())\n",
"\n",
"print(\"\\nš
Year Range in IMD Rainfall Data:\")\n",
"if 'year' in imd_df.columns:\n",
" print(int(imd_df['year'].min()), \"ā\", int(imd_df['year'].max()))\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "8cfe6ee8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"ā
Common Names Found Between Agriculture & IMD Data (0):\n",
"[]\n",
"\n",
"ā ļø States in Agriculture but not in IMD (2):\n",
"['andaman and nicobar islands', 'andhra pradesh']\n"
]
}
],
"source": [
"# Lowercase and trim for consistency\n",
"agri_states = set(agri_df['state_name'].str.lower().str.strip().unique())\n",
"imd_subdiv = set(imd_df['subdivision'].str.lower().str.strip().unique())\n",
"\n",
"common = sorted(agri_states.intersection(imd_subdiv))\n",
"\n",
"print(f\"\\nā
Common Names Found Between Agriculture & IMD Data ({len(common)}):\")\n",
"print(common[:10])\n",
"\n",
"missing_from_imd = sorted(list(agri_states - imd_subdiv))\n",
"print(f\"\\nā ļø States in Agriculture but not in IMD ({len(missing_from_imd)}):\")\n",
"print(missing_from_imd[:10])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21653121",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "myenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}