Copy a token from your Hugging Face\ntokens page and paste it below. Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
"
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjYAAAIjCAYAAAAQrVEdAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAANRVJREFUeJzt3X98znX////7sbEffhybn5ud5kcoRJxG2yoiy6HkTJbo50hU5xArpDT0S3FWEvGuLmdL5f2WisLZWPPrU5Y0yY+apIlODubHdjBssx3fP3z3ujjasIOtQ0+36+VyXE7H6/Xc63gcx3mR2+U4XsdrNrfb7RYAAIAB/Hw9AAAAQGUhbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAXJIGDx6sZs2a+XoMAH8xhA0Ar9hstgrdVq9e7etRPaxevdpjvsDAQIWFhal79+568cUXlZOTc8HH/vHHHzV58mTt2rWr8ga+CPPnz9eMGTN8PQbgEzZ+VxQAb3zwwQce9+fNm6e0tDS9//77HttvvvlmhYWFXfDjFBUVqaSkRIGBgRd8jDOtXr1aPXr00KhRo9SlSxcVFxcrJydH69at05IlSxQSEqKPPvpIN910k9fH/vjjjzVgwACtWrVK3bt3r5R5L8Ztt92mrVu3XjKhBfyZqvl6AAB/Lffdd5/H/W+++UZpaWlltv/R8ePHVaNGjQo/TvXq1S9ovvPp2rWr7rzzTo9tP/zwg3r16qX4+Hj9+OOPatSoUZU8NoCqx0dRACpd9+7d1a5dO2VmZqpbt26qUaOGnnrqKUnSZ599pj59+igiIkKBgYFq0aKFnnvuORUXF3sc44/n2OzatUs2m03/+te/9NZbb6lFixYKDAxUly5dtGHDhouat0OHDpoxY4Zyc3M1a9Ysa/tvv/2mf/7zn7rqqqsUHBysevXqacCAAR7vhKSkpGjAgAGSpB49epT5KK6iz3fHjh2Kj49XeHi4goKC1LhxYw0aNEh5eXke6z744ANFRUUpODhYdevW1aBBg7Rnzx5rf/fu3bVs2TL99ttv1iycq4TLCe/YAKgShw4d0i233KJBgwbpvvvusz6WSklJUa1atZSUlKRatWpp5cqVSk5Olsvl0vTp08973Pnz5+vo0aN6+OGHZbPZNG3aNPXv31+//vrrRb3Lc+edd2ro0KFasWKFXnjhBUnShg0btG7dOg0aNEiNGzfWrl27NGfOHHXv3l0//vijatSooW7dumnUqFGaOXOmnnrqKbVp00aSrP+tyPMtLCyUw+FQQUGBRo4cqfDwcP33v//V0qVLlZubq5CQEEnSCy+8oGeeeUZ33XWXHnroIeXk5OiNN95Qt27d9P333ys0NFRPP/208vLy9Pvvv+u1116TJNWqVeuCXxfgL8cNABchMTHR/cf/lNx4441uSe65c+eWWX/8+PEy2x5++GF3jRo13CdPnrS2JSQkuJs2bWrdz87Odkty16tXz3348GFr+2effeaW5F6yZMk551y1apVbknvhwoVnXdOhQwd3nTp1zjlrRkaGW5J73rx51raFCxe6JblXrVpVZn1Fnu/3339/3tl27drl9vf3d7/wwgse27ds2eKuVq2ax/Y+ffp4vHbA5YSPogBUicDAQA0ZMqTM9uDgYOvPR48e1cGDB9W1a1cdP35cWVlZ5z3uwIEDVadOHet+165dJUm//vrrRc9cq1YtHT16tNxZi4qKdOjQIbVs2VKhoaHauHFjhY5Zkedb+o7M8uXLdfz48XKP8+mnn6qkpER33XWXDh48aN3Cw8PVqlUrrVq1yuvnC5iIj6IAVIm//e1vCggIKLN927ZtmjhxolauXCmXy+Wx74/nk5SnSZMmHvdLI+fIkSMXMe1px44dU+3ata37J06c0NSpU/Xuu+/qv//9r9xnfIm0IrNKFXu+zZs3V1JSkl599VV9+OGH6tq1q/7xj3/ovvvus6Jnx44dcrvdatWqVbmPU1UnWwN/NYQNgCpx5jsVpXJzc3XjjTfKbrfr2WefVYsWLRQUFKSNGzdq/PjxKikpOe9x/f39y93uvsgrVxQVFennn39Wu3btrG0jR47Uu+++q9GjRys2NlYhISGy2WwaNGhQhWb15vm+8sorGjx4sD777DOtWLFCo0aN0tSpU/XNN9+ocePGKikpkc1m0xdffFHua8B5NMBphA2AP83q1at16NAhffrpp+rWrZu1PTs724dTnfbxxx/rxIkTcjgcHtsSEhL0yiuvWNtOnjyp3Nxcj5+12WzlHtPb59u+fXu1b99eEydO1Lp163T99ddr7ty5ev7559WiRQu53W41b95cV1555Tmfy9nmAS4HnGMD4E9T+k7Dme+uFBYW6s033/TVSJJOX8dm9OjRqlOnjhITE63t/v7+Zd4JeuONN8p8VbtmzZqSVCZ4Kvp8XS6XTp065bGtffv28vPzU0FBgSSpf//+8vf315QpU8rM5Ha7dejQIY95KvpRGWAa3rEB8Ke57rrrVKdOHSUkJGjUqFGy2Wx6//33L/pjJG/8v//3/3Ty5EkVFxfr0KFD+vrrr/X5558rJCREixYtUnh4uLX2tttu0/vvv6+QkBC1bdtWGRkZ+vLLL1WvXj2PY3bs2FH+/v56+eWXlZeXp8DAQN10000Vfr4rV67UiBEjNGDAAF155ZU6deqU3n//ffn7+ys+Pl6S1KJFCz3//POaMGGCdu3apX79+ql27drKzs7WokWLNHz4cD3xxBOSpKioKC1YsEBJSUnq0qWLatWqpb59+1bxKwtcInz0bSwAhjjb172vvvrqctd//fXX7piYGHdwcLA7IiLCPW7cOPfy5cvLfF36bF/3nj59epljSnJPmjTpnHOWft279Fa9enV3gwYN3N26dXO/8MIL7gMHDpT5mSNHjriHDBnirl+/vrtWrVpuh8PhzsrKcjdt2tSdkJDgsfbtt992X3HFFW5/f3+P51KR5/vrr7+6H3zwQXeLFi3cQUFB7rp167p79Ojh/vLLL8vM9Mknn7hvuOEGd82aNd01a9Z0t27d2p2YmOjevn27tebYsWPue+65xx0aGuqWxFe/cVnhd0UBAABjcI4NAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIzBBfr+RCUlJdq7d69q167NJc8BAPCC2+3W0aNHFRERIT+/s78vQ9j8ifbu3avIyEhfjwEAwF/Wnj171Lhx47PuJ2z+RLVr15Z0+v8Uu93u42kAAPjrcLlcioyMtP4tPRvC5k9U+vGT3W4nbAAAuADnO5WDk4cBAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGKOarwdA5YkaO8/XIwBVLnP6A74eAcAljHdsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAyfhs3UqVPVpUsX1a5dWw0bNlS/fv20fft2jzXdu3eXzWbzuD3yyCMea3bv3q0+ffqoRo0aatiwocaOHatTp055rFm9erU6deqkwMBAtWzZUikpKWXmmT17tpo1a6agoCBFR0fr22+/9dh/8uRJJSYmql69eqpVq5bi4+O1f//+ynkxAADARfNp2KxZs0aJiYn65ptvlJaWpqKiIvXq1Uv5+fke64YNG6Z9+/ZZt2nTpln7iouL1adPHxUWFmrdunV67733lJKSouTkZGtNdna2+vTpox49emjTpk0aPXq0HnroIS1fvtxas2DBAiUlJWnSpEnauHGjOnToIIfDoQMHDlhrxowZoyVLlmjhwoVas2aN9u7dq/79+1fhKwQAALxhc7vdbl8PUSonJ0cNGzbUmjVr1K1bN0mn37Hp2LGjZsyYUe7PfPHFF7rtttu0d+9ehYWFSZLmzp2r8ePHKycnRwEBARo/fryWLVumrVu3Wj83aNAg5ebmKjU1VZIUHR2tLl26aNasWZKkkpISRUZGauTIkXryySeVl5enBg0aaP78+brzzjslSVlZWWrTpo0yMjIUExNz3ufncrkUEhKivLw82e32C36dziZq7LxKPyZwqcmc/oCvRwDgAxX9N/SSOscmLy9PklS3bl2P7R9++KHq16+vdu3aacKECTp+/Li1LyMjQ+3bt7eiRpIcDodcLpe2bdtmrYmLi/M4psPhUEZGhiSpsLBQmZmZHmv8/PwUFxdnrcnMzFRRUZHHmtatW6tJkybWmj8qKCiQy+XyuAEAgKpTzdcDlCopKdHo0aN1/fXXq127dtb2e+65R02bNlVERIQ2b96s8ePHa/v27fr0008lSU6n0yNqJFn3nU7nOde4XC6dOHFCR44cUXFxcblrsrKyrGMEBAQoNDS0zJrSx/mjqVOnasqUKV6+EgAA4EJdMmGTmJiorVu36quvvvLYPnz4cOvP7du3V6NGjdSzZ0/t3LlTLVq0+LPH9MqECROUlJRk3Xe5XIqMjPThRAAAmO2S+ChqxIgRWrp0qVatWqXGjRufc210dLQk6ZdffpEkhYeHl/lmUun98PDwc66x2+0KDg5W/fr15e/vX+6aM49RWFio3Nzcs675o8DAQNntdo8bAACoOj4NG7fbrREjRmjRokVauXKlmjdvft6f2bRpkySpUaNGkqTY2Fht2bLF49tLaWlpstvtatu2rbUmPT3d4zhpaWmKjY2VJAUEBCgqKspjTUlJidLT0601UVFRql69usea7du3a/fu3dYaAADgWz79KCoxMVHz58/XZ599ptq1a1vnqoSEhCg4OFg7d+7U/Pnzdeutt6pevXravHmzxowZo27duumaa66RJPXq1Utt27bV/fffr2nTpsnpdGrixIlKTExUYGCgJOmRRx7RrFmzNG7cOD344INauXKlPvroIy1btsyaJSkpSQkJCercubOuvfZazZgxQ/n5+RoyZIg109ChQ5WUlKS6devKbrdr5MiRio2NrdA3ogAAQNXzadjMmTNH0umvdJ/p3Xff1eDBgxUQEKAvv/zSiozIyEjFx8dr4sSJ1lp/f38tXbpUjz76qGJjY1WzZk0lJCTo2WeftdY0b95cy5Yt05gxY/T666+rcePGeuedd+RwOKw1AwcOVE5OjpKTk+V0OtWxY0elpqZ6nFD82muvyc/PT/Hx8SooKJDD4dCbb75ZRa8OAADw1iV1HRvTcR0b4OJxHRvg8vSXvI4NAADAxSBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMn4bN1KlT1aVLF9WuXVsNGzZUv379tH37do81J0+eVGJiourVq6datWopPj5e+/fv91ize/du9enTRzVq1FDDhg01duxYnTp1ymPN6tWr1alTJwUGBqply5ZKSUkpM8/s2bPVrFkzBQUFKTo6Wt9++63XswAAAN/xadisWbNGiYmJ+uabb5SWlqaioiL16tVL+fn51poxY8ZoyZIlWrhwodasWaO9e/eqf//+1v7i4mL16dNHhYWFWrdund577z2lpKQoOTnZWpOdna0+ffqoR48e2rRpk0aPHq2HHnpIy5cvt9YsWLBASUlJmjRpkjZu3KgOHTrI4XDowIEDFZ4FAAD4ls3tdrt9PUSpnJwcNWzYUGvWrFG3bt2Ul5enBg0aaP78+brzzjslSVlZWWrTpo0yMjIUExOjL774Qrfddpv27t2rsLAwSdLcuXM1fvx45eTkKCAgQOPHj9eyZcu0detW67EGDRqk3NxcpaamSpKio6PVpUsXzZo1S5JUUlKiyMhIjRw5Uk8++WSFZvmjgoICFRQUWPddLpciIyOVl5cnu91e6a9f1Nh5lX5M4FKTOf0BX48AwAdcLpdCQkLO+2/oJXWOTV5eniSpbt26kqTMzEwVFRUpLi7OWtO6dWs1adJEGRkZkqSMjAy1b9/eihpJcjgccrlc2rZtm7XmzGOUrik9RmFhoTIzMz3W+Pn5KS4uzlpTkVn+aOrUqQoJCbFukZGRF/bCAACACrlkwqakpESjR4/W9ddfr3bt2kmSnE6nAgICFBoa6rE2LCxMTqfTWnNm1JTuL913rjUul0snTpzQwYMHVVxcXO6aM49xvln+aMKECcrLy7Nue/bsqeCrAQAALkQ1Xw9QKjExUVu3btVXX33l61EqTWBgoAIDA309BgAAl41L4h2bESNGaOnSpVq1apUaN25sbQ8PD1dhYaFyc3M91u/fv1/h4eHWmj9+M6n0/vnW2O12BQcHq379+vL39y93zZnHON8sAADAt3waNm63WyNGjNCiRYu0cuVKNW/e3GN/VFSUqlevrvT0dGvb9u3btXv3bsXGxkqSYmNjtWXLFo9vL6Wlpclut6tt27bWmjOPUbqm9BgBAQGKioryWFNSUqL09HRrTUVmAQAAvuXTj6ISExM1f/58ffbZZ6pdu7Z1rkpISIiCg4MVEhKioUOHKikpSXXr1pXdbtfIkSMVGxtrfQupV69eatu2re6//35NmzZNTqdTEydOVGJiovUx0COPPKJZs2Zp3LhxevDBB7Vy5Up99NFHWrZsmTVLUlKSEhIS1LlzZ1177bWaMWOG8vPzNWTIEGum880CAAB8y6dhM2fOHElS9+7dPba/++67Gjx4sCTptddek5+fn+Lj41VQUCCHw6E333zTWuvv76+lS5fq0UcfVWxsrGrWrKmEhAQ9++yz1prmzZtr2bJlGjNmjF5//XU1btxY77zzjhwOh7Vm4MCBysnJUXJyspxOpzp27KjU1FSPE4rPNwsAAPCtS+o6Nqar6HfwLxTXscHlgOvYAJenv+R1bAAAAC4GYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMIbXYXPixAkdP37cuv/bb79pxowZWrFiRaUOBgAA4C2vw+b222/XvHnzJEm5ubmKjo7WK6+8ottvv11z5syp9AEBAAAqyuuw2bhxo7p27SpJ+vjjjxUWFqbffvtN8+bN08yZMyt9QAAAgIryOmyOHz+u2rVrS5JWrFih/v37y8/PTzExMfrtt98qfUAAAICK8jpsWrZsqcWLF2vPnj1avny5evXqJUk6cOCA7HZ7pQ8IAABQUV6HTXJysp544gk1a9ZM1157rWJjYyWdfvfm73//e6UPCAAAUFHVvP2BO++8UzfccIP27dunDh06WNt79uypO+64o1KHAwAA8MYFXccmPDxctWvXVlpamk6cOCFJ6tKli1q3bl2pwwEAAHjD67A5dOiQevbsqSuvvFK33nqr9u3bJ0kaOnSoHn/88UofEAAAoKK8DpsxY8aoevXq2r17t2rUqGFtHzhwoFJTUyt1OAAAAG94fY7NihUrtHz5cjVu3Nhje6tWrfi6NwAA8Cmv37HJz8/3eKem1OHDhxUYGFgpQwEAAFwIr8Oma9eu1q9UkCSbzaaSkhJNmzZNPXr0qNThAAAAvOF12EybNk1vvfWWbrnlFhUWFmrcuHFq166d1q5dq5dfftmrY61du1Z9+/ZVRESEbDabFi9e7LF/8ODBstlsHrfevXt7rDl8+LDuvfde2e12hYaGaujQoTp27JjHms2bN6tr164KCgpSZGSkpk2bVmaWhQsXqnXr1goKClL79u31n//8x2O/2+1WcnKyGjVqpODgYMXFxWnHjh1ePV8AAFC1vA6bdu3a6eeff9YNN9yg22+/Xfn5+erfv7++//57tWjRwqtj5efnq0OHDpo9e/ZZ1/Tu3Vv79u2zbv/7v//rsf/ee+/Vtm3blJaWpqVLl2rt2rUaPny4td/lcqlXr15q2rSpMjMzNX36dE2ePFlvvfWWtWbdunW6++67NXToUH3//ffq16+f+vXrp61bt1prpk2bppkzZ2ru3Llav369atasKYfDoZMnT3r1nAEAQNWxud1ut6+HkE5/pLVo0SL169fP2jZ48GDl5uaWeSen1E8//aS2bdtqw4YN6ty5syQpNTVVt956q37//XdFRERozpw5evrpp+V0OhUQECBJevLJJ7V48WJlZWVJOv2Nrvz8fC1dutQ6dkxMjDp27Ki5c+fK7XYrIiJCjz/+uJ544glJUl5ensLCwpSSkqJBgwZV6Dm6XC6FhIQoLy+vSn79RNTYeedfBPzFZU5/wNcjAPCBiv4b6vW3otauXXvO/d26dfP2kOe0evVqNWzYUHXq1NFNN92k559/XvXq1ZMkZWRkKDQ01IoaSYqLi5Ofn5/Wr1+vO+64QxkZGerWrZsVNZLkcDj08ssv68iRI6pTp44yMjKUlJTk8bgOh8MKquzsbDmdTsXFxVn7Q0JCFB0drYyMjLOGTUFBgQoKCqz7Lpfrol8PAABwdl6HTffu3ctss9ls1p+Li4svaqAz9e7dW/3791fz5s21c+dOPfXUU7rllluUkZEhf39/OZ1ONWzY0ONnqlWrprp168rpdEqSnE6nmjdv7rEmLCzM2lenTh05nU5r25lrzjzGmT9X3pryTJ06VVOmTLmAZw4AAC6E12Fz5MgRj/tFRUX6/vvv9cwzz+iFF16otMEkebwT0r59e11zzTVq0aKFVq9erZ49e1bqY1WFCRMmeLwT5HK5FBkZ6cOJAAAwm9dhExISUmbbzTffrICAACUlJSkzM7NSBivPFVdcofr16+uXX35Rz549FR4ergMHDnisOXXqlA4fPqzw8HBJp3+v1f79+z3WlN4/35oz95dua9Sokceajh07nnXewMBAru0DAMCf6IJ+CWZ5wsLCtH379so6XLl+//13HTp0yIqL2NhY5ebmesTUypUrVVJSoujoaGvN2rVrVVRUZK1JS0vTVVddpTp16lhr0tPTPR4rLS1NsbGxkqTmzZsrPDzcY43L5dL69eutNQAAwPe8fsdm8+bNHvfdbrf27dunl1566ZzvXpTn2LFj+uWXX6z72dnZ2rRpk+rWrau6detqypQpio+PV3h4uHbu3Klx48apZcuWcjgckqQ2bdqod+/eGjZsmObOnauioiKNGDFCgwYNUkREhCTpnnvu0ZQpUzR06FCNHz9eW7du1euvv67XXnvNetzHHntMN954o1555RX16dNH//d//6fvvvvO+kq4zWbT6NGj9fzzz6tVq1Zq3ry5nnnmGUVERHh8iwsAAPiW12HTsWNH2Ww2/fFb4jExMfr3v//t1bG+++47j6sVl56PkpCQoDlz5mjz5s167733lJubq4iICPXq1UvPPfecx8c7H374oUaMGKGePXvKz89P8fHxmjlzprU/JCREK1asUGJioqKiolS/fn0lJyd7XOvmuuuu0/z58zVx4kQ99dRTatWqlRYvXqx27dpZa8aNG6f8/HwNHz5cubm5uuGGG5SamqqgoCCvnjMAAKg6Xl/H5o+/6NLPz08NGjTgH/gK4Do2wMXjOjbA5anKrmPTtGnTixoMAACgqlQobM78aOd8Ro0adcHDAAAAXIwKhc2ZJ9qei81mI2wAAIDPVChssrOzq3oOAACAi1Zp17EBAADwNa9PHpZOXyjv888/1+7du1VYWOix79VXX62UwQAAALzlddikp6frH//4h6644gplZWWpXbt22rVrl9xutzp16lQVMwIAAFSI1x9FTZgwQU888YS2bNmioKAgffLJJ9qzZ49uvPFGDRgwoCpmBAAAqBCvw+ann37SAw+cvkBWtWrVdOLECdWqVUvPPvusXn755UofEAAAoKK8DpuaNWta59U0atRIO3futPYdPHiw8iYDAADwktfn2MTExOirr75SmzZtdOutt+rxxx/Xli1b9OmnnyomJqYqZgQAAKgQr8Pm1Vdf1bFjxyRJU6ZM0bFjx7RgwQK1atWKb0QBAACf8jpsXnzxRd13332STn8sNXfu3EofCgAA4EJ4fY5NTk6OevfurcjISI0dO1Y//PBDVcwFAADgNa/D5rPPPtO+ffv0zDPPaMOGDerUqZOuvvpqvfjii9q1a1cVjAgAAFAxF/QrFerUqaPhw4dr9erV+u233zR48GC9//77atmyZWXPBwAAUGEX9buiioqK9N1332n9+vXatWuXwsLCKmsuAAAAr11Q2KxatUrDhg1TWFiYBg8eLLvdrqVLl+r333+v7PkAAAAqzOtvRf3tb3/T4cOH1bt3b7311lvq27evAgMDq2I2AAAAr3gdNpMnT9aAAQMUGhpaBeMAAABcOK/DZtiwYVUxBwAAwEW7qJOHAQAALiWEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADCGT8Nm7dq16tu3ryIiImSz2bR48WKP/W63W8nJyWrUqJGCg4MVFxenHTt2eKw5fPiw7r33XtntdoWGhmro0KE6duyYx5rNmzera9euCgoKUmRkpKZNm1ZmloULF6p169YKCgpS+/bt9Z///MfrWQAAgG/5NGzy8/PVoUMHzZ49u9z906ZN08yZMzV37lytX79eNWvWlMPh0MmTJ6019957r7Zt26a0tDQtXbpUa9eu1fDhw639LpdLvXr1UtOmTZWZmanp06dr8uTJeuutt6w169at0913362hQ4fq+++/V79+/dSvXz9t3brVq1kAAIBv2dxut9vXQ0iSzWbTokWL1K9fP0mn3yGJiIjQ448/rieeeEKSlJeXp7CwMKWkpGjQoEH66aef1LZtW23YsEGdO3eWJKWmpurWW2/V77//roiICM2ZM0dPP/20nE6nAgICJElPPvmkFi9erKysLEnSwIEDlZ+fr6VLl1rzxMTEqGPHjpo7d26FZqkIl8ulkJAQ5eXlyW63V8rrdqaosfMq/ZjApSZz+gO+HgGAD1T039BL9hyb7OxsOZ1OxcXFWdtCQkIUHR2tjIwMSVJGRoZCQ0OtqJGkuLg4+fn5af369daabt26WVEjSQ6HQ9u3b9eRI0esNWc+Tuma0sepyCzlKSgokMvl8rgBAICqc8mGjdPplCSFhYV5bA8LC7P2OZ1ONWzY0GN/tWrVVLduXY815R3jzMc425oz959vlvJMnTpVISEh1i0yMvI8zxoAAFyMSzZsTDBhwgTl5eVZtz179vh6JAAAjHbJhk14eLgkaf/+/R7b9+/fb+0LDw/XgQMHPPafOnVKhw8f9lhT3jHOfIyzrTlz//lmKU9gYKDsdrvHDQAAVJ1LNmyaN2+u8PBwpaenW9tcLpfWr1+v2NhYSVJsbKxyc3OVmZlprVm5cqVKSkoUHR1trVm7dq2KioqsNWlpabrqqqtUp04da82Zj1O6pvRxKjILAADwPZ+GzbFjx7Rp0yZt2rRJ0umTdDdt2qTdu3fLZrNp9OjRev755/X5559ry5YteuCBBxQREWF9c6pNmzbq3bu3hg0bpm+//VZff/21RowYoUGDBikiIkKSdM899yggIEBDhw7Vtm3btGDBAr3++utKSkqy5njssceUmpqqV155RVlZWZo8ebK+++47jRgxQpIqNAsAAPC9ar588O+++049evSw7pfGRkJCglJSUjRu3Djl5+dr+PDhys3N1Q033KDU1FQFBQVZP/Phhx9qxIgR6tmzp/z8/BQfH6+ZM2da+0NCQrRixQolJiYqKipK9evXV3Jysse1bq677jrNnz9fEydO1FNPPaVWrVpp8eLFateunbWmIrMAAADfumSuY3M54Do2wMXjOjbA5ekvfx0bAAAAbxE2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADDGJR02kydPls1m87i1bt3a2n/y5EklJiaqXr16qlWrluLj47V//36PY+zevVt9+vRRjRo11LBhQ40dO1anTp3yWLN69Wp16tRJgYGBatmypVJSUsrMMnv2bDVr1kxBQUGKjo7Wt99+WyXPGQAAXLhLOmwk6eqrr9a+ffus21dffWXtGzNmjJYsWaKFCxdqzZo12rt3r/r372/tLy4uVp8+fVRYWKh169bpvffeU0pKipKTk6012dnZ6tOnj3r06KFNmzZp9OjReuihh7R8+XJrzYIFC5SUlKRJkyZp48aN6tChgxwOhw4cOPDnvAgAAKBCbG632+3rIc5m8uTJWrx4sTZt2lRmX15enho0aKD58+frzjvvlCRlZWWpTZs2ysjIUExMjL744gvddttt2rt3r8LCwiRJc+fO1fjx45WTk6OAgACNHz9ey5Yt09atW61jDxo0SLm5uUpNTZUkRUdHq0uXLpo1a5YkqaSkRJGRkRo5cqSefPLJCj8fl8ulkJAQ5eXlyW63X+jLclZRY+dV+jGBS03m9Ad8PQIAH6jov6GX/Ds2O3bsUEREhK644grde++92r17tyQpMzNTRUVFiouLs9a2bt1aTZo0UUZGhiQpIyND7du3t6JGkhwOh1wul7Zt22atOfMYpWtKj1FYWKjMzEyPNX5+foqLi7PWnE1BQYFcLpfHDQAAVJ1LOmyio6OVkpKi1NRUzZkzR9nZ2eratauOHj0qp9OpgIAAhYaGevxMWFiYnE6nJMnpdHpETen+0n3nWuNyuXTixAkdPHhQxcXF5a4pPcbZTJ06VSEhIdYtMjLS69cAAABUXDVfD3Aut9xyi/Xna665RtHR0WratKk++ugjBQcH+3CyipkwYYKSkpKs+y6Xi7gBAKAKXdLv2PxRaGiorrzySv3yyy8KDw9XYWGhcnNzPdbs379f4eHhkqTw8PAy35IqvX++NXa7XcHBwapfv778/f3LXVN6jLMJDAyU3W73uAEAgKrzlwqbY8eOaefOnWrUqJGioqJUvXp1paenW/u3b9+u3bt3KzY2VpIUGxurLVu2eHx7KS0tTXa7XW3btrXWnHmM0jWlxwgICFBUVJTHmpKSEqWnp1trAADApeGSDpsnnnhCa9as0a5du7Ru3Trdcccd8vf31913362QkBANHTpUSUlJWrVqlTIzMzVkyBDFxsYqJiZGktSrVy+1bdtW999/v3744QctX75cEydOVGJiogIDAyVJjzzyiH799VeNGzdOWVlZevPNN/XRRx9pzJgx1hxJSUl6++239d577+mnn37So48+qvz8fA0ZMsQnrwsAACjfJX2Oze+//667775bhw4dUoMGDXTDDTfom2++UYMGDSRJr732mvz8/BQfH6+CggI5HA69+eab1s/7+/tr6dKlevTRRxUbG6uaNWsqISFBzz77rLWmefPmWrZsmcaMGaPXX39djRs31jvvvCOHw2GtGThwoHJycpScnCyn06mOHTsqNTW1zAnFAADAty7p69iYhuvYABeP69gAlydjrmMDAABQUYQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGNU8/UAAHC5iBo7z9cjAFUuc/oDPn183rEBAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGAAAYg7ABAADGIGwAAIAxCBsAAGAMwgYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbL82ePVvNmjVTUFCQoqOj9e233/p6JAAA8P8jbLywYMECJSUladKkSdq4caM6dOggh8OhAwcO+Ho0AAAgwsYrr776qoYNG6YhQ4aobdu2mjt3rmrUqKF///vfvh4NAABIqubrAf4qCgsLlZmZqQkTJljb/Pz8FBcXp4yMjHJ/pqCgQAUFBdb9vLw8SZLL5aqSGYsLTlTJcYFLSVX9/fkz8HcUl4Oq+jtaely3233OdYRNBR08eFDFxcUKCwvz2B4WFqasrKxyf2bq1KmaMmVKme2RkZFVMiNwOQh54xFfjwDgHKr67+jRo0cVEhJy1v2ETRWaMGGCkpKSrPslJSU6fPiw6tWrJ5vN5sPJUBlcLpciIyO1Z88e2e12X48D4A/4O2oWt9uto0ePKiIi4pzrCJsKql+/vvz9/bV//36P7fv371d4eHi5PxMYGKjAwECPbaGhoVU1InzEbrfzH03gEsbfUXOc652aUpw8XEEBAQGKiopSenq6ta2kpETp6emKjY314WQAAKAU79h4ISkpSQkJCercubOuvfZazZgxQ/n5+RoyZIivRwMAACJsvDJw4EDl5OQoOTlZTqdTHTt2VGpqapkTinF5CAwM1KRJk8p83Ajg0sDf0cuTzX2+700BAAD8RXCODQAAMAZhAwAAjEHYAAAAYxA2AADAGIQNcIFmz56tZs2aKSgoSNHR0fr22299PRIASWvXrlXfvn0VEREhm82mxYsX+3ok/IkIG+ACLFiwQElJSZo0aZI2btyoDh06yOFw6MCBA74eDbjs5efnq0OHDpo9e7avR4EP8HVv4AJER0erS5cumjVrlqTTV6GOjIzUyJEj9eSTT/p4OgClbDabFi1apH79+vl6FPxJeMcG8FJhYaEyMzMVFxdnbfPz81NcXJwyMjJ8OBkAgLABvHTw4EEVFxeXueJ0WFiYnE6nj6YCAEiEDQAAMAhhA3ipfv368vf31/79+z2279+/X+Hh4T6aCgAgETaA1wICAhQVFaX09HRrW0lJidLT0xUbG+vDyQAA/HZv4AIkJSUpISFBnTt31rXXXqsZM2YoPz9fQ4YM8fVowGXv2LFj+uWXX6z72dnZ2rRpk+rWrasmTZr4cDL8Gfi6N3CBZs2apenTp8vpdKpjx46aOXOmoqOjfT0WcNlbvXq1evToUWZ7QkKCUlJS/vyB8KcibAAAgDE4xwYAABiDsAEAAMYgbAAAgDEIGwAAYAzCBgAAGIOwAQAAxiBsAACAMQgbAABgDMIGwF9a9+7dNXr06AqtXb16tWw2m3Jzcy/qMZs1a6YZM2Zc1DEAVA3CBgAAGIOwAQAAxiBsABjj/fffV+fOnVW7dm2Fh4frnnvu0YEDB8qs+/rrr3XNNdcoKChIMTEx2rp1q8f+r776Sl27dlVwcLAiIyM1atQo5efnl/uYbrdbkydPVpMmTRQYGKiIiAiNGjWqSp4fgPMjbAAYo6ioSM8995x++OEHLV68WLt27dLgwYPLrBs7dqxeeeUVbdiwQQ0aNFDfvn1VVFQkSdq5c6d69+6t+Ph4bd68WQsWLNBXX32lESNGlPuYn3zyiV577TX9z//8j3bs2KHFixerffv2Vfk0AZxDNV8PAACV5cEHH7T+fMUVV2jmzJnq0qWLjh07plq1aln7Jk2apJtvvlmS9N5776lx48ZatGiR7rrrLk2dOlX33nuvdUJyq1atNHPmTN14442aM2eOgoKCPB5z9+7dCg8PV1xcnKpXr64mTZro2muvrfonC6BcvGMDwBiZmZnq27evmjRpotq1a+vGG2+UdDo+zhQbG2v9uW7durrqqqv0008/SZJ++OEHpaSkqFatWtbN4XCopKRE2dnZZR5zwIABOnHihK644goNGzZMixYt0qlTp6rwWQI4F8IGgBHy8/PlcDhkt9v14YcfasOGDVq0aJEkqbCwsMLHOXbsmB5++GFt2rTJuv3www/asWOHWrRoUWZ9ZGSktm/frjfffFPBwcH65z//qW7dulkfbQH4c/FRFAAjZGVl6dChQ3rppZcUGRkpSfruu+/KXfvNN9+oSZMmkqQjR47o559/Vps2bSRJnTp10o8//qiWLVtW+LGDg4PVt29f9e3bV4mJiWrdurW2bNmiTp06XeSzAuAtwgaAEZo0aaKAgAC98cYbeuSRR7R161Y999xz5a599tlnVa9ePYWFhenpp59W/fr11a9fP0nS+PHjFRMToxEjRuihhx5SzZo19eOPPyotLU2zZs0qc6yUlBQVFxcrOjpaNWrU0AcffKDg4GA1bdq0Kp8ugLPgoygARmjQoIFSUlK0cOFCtW3bVi+99JL+9a9/lbv2pZde0mOPPaaoqCg5nU4tWbJEAQEBkqRrrrlGa9as0c8//6yuXbvq73//u5KTkxUREVHusUJDQ/X222/r+uuv1zXXXKMvv/xSS5YsUb169arsuQI4O5vb7Xb7eggAAIDKwDs2AADAGIQNAAAwBmEDAACMQdgAAABjEDYAAMAYhA0AADAGYQMAAIxB2AAAAGMQNgAAwBiEDQAAMAZhAwAAjPH/AVSrgG7nWmoXAAAAAElFTkSuQmCC\n"
},
"metadata": {}
}
]
},
{
"cell_type": "markdown",
"source": [
"### 1. Gestion de la Vivienda\n",
"\n",
"Voy a eliminar las columnas de _Mode y _Medi para quedarme solo con _AVG, ya que dan el mismo tipo de informacion"
],
"metadata": {
"id": "lG4hjq_SUFsn"
}
},
{
"cell_type": "code",
"source": [
"columnas_eliminar = [c for c in df_train.columns if c.endswith('_MODE') or c.endswith('_MEDI')]\n",
"df_train.drop(columns=columnas_eliminar, inplace=True)\n",
"print(f\"Columnas eliminadas por redundancia: {len(columnas_eliminar)}\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pZ8BGVerUXV8",
"outputId": "34ec58e2-afd4-4d80-f7e2-39ebb7686965"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Columnas eliminadas por redundancia: 33\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"### 2. Gestion de OWN_CAR_AGE\n",
"\n",
"Si en esta es un nulo, voy a asumir que es un 0, ya que esto es consistente con no tener coche o un coche nuevo sin dato"
],
"metadata": {
"id": "AGyHbwrtU3lm"
}
},
{
"cell_type": "code",
"source": [
"df_train['OWN_CAR_AGE'].fillna(0, inplace=True)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "OV7FihYbVE9O",
"outputId": "5479a7a6-259a-43ec-9b77-17da246d09f2"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/tmp/ipython-input-786619477.py:1: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train['OWN_CAR_AGE'].fillna(0, inplace=True)\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"### 3. Gestion de OCCUPATION_TYPE\n",
"\n",
"En esta voy a rellenar con una nueva categoria que sea 'Unknown'"
],
"metadata": {
"id": "2bg9oJWQVbGm"
}
},
{
"cell_type": "code",
"source": [
"df_train['OCCUPATION_TYPE'].fillna('Unknown', inplace=True)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5tERMTqyVlkC",
"outputId": "61ddec11-6b6d-4696-d53c-fe87e3492c00"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/tmp/ipython-input-552318821.py:1: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train['OCCUPATION_TYPE'].fillna('Unknown', inplace=True)\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"### 4. Imputacion Inteligente (Voy a crear una columna nueva llamada FLAG para indicar el riesgo + aplicacion de al MEDIANA)"
],
"metadata": {
"id": "O28gQBQ5brts"
}
},
{
"cell_type": "code",
"source": [
"# 1. Voy a identificar las columnas numericas que TODAVIA tienen nulos\n",
"cols_numericas_con_nulos = df_train.select_dtypes(include=['float64', 'int64']).columns[df_train.select_dtypes(include=['float64', 'int64']).isnull().any()]\n",
"\n",
"print(f\"Imputamos {len(cols_numericas_con_nulos)} columnas numericas...\")\n",
"\n",
"for col in cols_numericas_con_nulos:\n",
" # A. Voy a crear la tabla 'Bandera' (Flag)\n",
" df_train[f'{col}_ISNAN'] = df_train[col].isna().astype(int)\n",
"\n",
" # B. Imputar el valor con la mediana\n",
" # Voy a usar la mediana ya que es robusta a valores extremos (Outliers)\n",
" mediana = df_train[col].median()\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"\n",
"print(\"Imputacion completa\")\n",
"\n",
"# Verificacion final de la Limpieza de Datos\n",
"print(f\"Total de nulos restantes en el dataset: {df_train.isnull().sum().sum()}\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pg1VFSVBcDWx",
"outputId": "ba5623be-5a82-4661-c237-324d3b7a5e9f"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Imputamos 31 columnas numericas...\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n",
"/tmp/ipython-input-2544851836.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" df_train[col].fillna(mediana, inplace=True)\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Imputacion completa\n",
"Total de nulos restantes en el dataset: 1292\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"### 5. Gestion NAME_TYPE_SUITE (Categorica)\n",
"\n",
"En esta voy a rellenarla con la moda (Esta es el valor que mas se repite)"
],
"metadata": {
"id": "fbcZvJZneLJU"
}
},
{
"cell_type": "code",
"source": [
"moda_suite = df_train['NAME_TYPE_SUITE'].mode()[0]\n",
"\n",
"df_train['NAME_TYPE_SUITE'] = df_train['NAME_TYPE_SUITE'].fillna(moda_suite)\n",
"\n",
"print(f\"Valor usado para rellenar: {moda_suite}\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ZaT2N8coeWPJ",
"outputId": "2bb4bd52-a00f-4ca3-ef89-042080e0abe5"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Valor usado para rellenar: Unaccompanied\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"### 6. Verificacion final de nulos"
],
"metadata": {
"id": "O-yXhgCPepj2"
}
},
{
"cell_type": "code",
"source": [
"total_nulos = df_train.isna().sum().sum()\n",
"\n",
"print(f\"Total de nulos en el dataset: {total_nulos}\")\n",
"\n",
"if total_nulos == 0:\n",
" print(\"Dataset totalmente limpio\")\n",
"else:\n",
" print(\"Aun quedan nulos\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "UlX4snXNetZU",
"outputId": "a396d283-8f7d-4ad4-bb87-ffacfa6b001a"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Total de nulos en el dataset: 0\n",
"Dataset totalmente limpio\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"df_column_description.head(10)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 363
},
"id": "HnytRL-ms37a",
"outputId": "1e9b0bd6-ce54-44c5-9309-ea34bb72c043"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Unnamed: 0 Table Row \\\n",
"0 1 application_{train|test}.csv SK_ID_CURR \n",
"1 2 application_{train|test}.csv TARGET \n",
"2 5 application_{train|test}.csv NAME_CONTRACT_TYPE \n",
"3 6 application_{train|test}.csv CODE_GENDER \n",
"4 7 application_{train|test}.csv FLAG_OWN_CAR \n",
"5 8 application_{train|test}.csv FLAG_OWN_REALTY \n",
"6 9 application_{train|test}.csv CNT_CHILDREN \n",
"7 10 application_{train|test}.csv AMT_INCOME_TOTAL \n",
"8 11 application_{train|test}.csv AMT_CREDIT \n",
"9 12 application_{train|test}.csv AMT_ANNUITY \n",
"\n",
" Description Special \n",
"0 ID of loan in our sample NaN \n",
"1 Target variable (1 - client with payment diffi... NaN \n",
"2 Identification if loan is cash or revolving NaN \n",
"3 Gender of the client NaN \n",
"4 Flag if the client owns a car NaN \n",
"5 Flag if client owns a house or flat NaN \n",
"6 Number of children the client has NaN \n",
"7 Income of the client NaN \n",
"8 Credit amount of the loan NaN \n",
"9 Loan annuity NaN "
],
"text/html": [
"\n",
"