diff --git "a/processed_data/left.ipynb" "b/processed_data/left.ipynb"
new file mode 100644--- /dev/null
+++ "b/processed_data/left.ipynb"
@@ -0,0 +1,2659 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "retRenUTV_zY"
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "66a5bb7c",
+ "outputId": "43d093be-b202-481e-f42e-4e75f331c4d4"
+ },
+ "source": [
+ "import pandas as pd\n",
+ "project_df_config = pd.read_csv('/content/ProjectConfiguration.csv', sep=',', on_bad_lines='skip')\n",
+ "display(project_df_config.head())"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ " id projectId propertyCategory \\\n",
+ "0 cmf53kkzz000ivcu89r5399s4 cmf53kkzy000fvcu8tx8jwjmr RESIDENTIAL \n",
+ "1 cmf53kl00000kvcu86ivy65di cmf53kkzy000fvcu8tx8jwjmr RESIDENTIAL \n",
+ "2 cmf5r6hv20004vxpt0l657blu cmf5r6hv00001vxptnfichhfl RESIDENTIAL \n",
+ "3 cmfawdrnq000avc18qcvaxzi9 cmfawdrno0007vc18l0fm0z2j RESIDENTIAL \n",
+ "4 cmfawdrnr000dvc188hupv4yy cmfawdrno0007vc18l0fm0z2j RESIDENTIAL \n",
+ "\n",
+ " type customBHK \n",
+ "0 1BHK \n",
+ "1 2BHK \n",
+ "2 2BHK 2BHK \n",
+ "3 1BHK 1BHK \n",
+ "4 3BHK 3BHK "
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " projectId | \n",
+ " propertyCategory | \n",
+ " type | \n",
+ " customBHK | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " cmf53kkzz000ivcu89r5399s4 | \n",
+ " cmf53kkzy000fvcu8tx8jwjmr | \n",
+ " RESIDENTIAL | \n",
+ " 1BHK | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " cmf53kl00000kvcu86ivy65di | \n",
+ " cmf53kkzy000fvcu8tx8jwjmr | \n",
+ " RESIDENTIAL | \n",
+ " 2BHK | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " cmf5r6hv20004vxpt0l657blu | \n",
+ " cmf5r6hv00001vxptnfichhfl | \n",
+ " RESIDENTIAL | \n",
+ " 2BHK | \n",
+ " 2BHK | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " cmfawdrnq000avc18qcvaxzi9 | \n",
+ " cmfawdrno0007vc18l0fm0z2j | \n",
+ " RESIDENTIAL | \n",
+ " 1BHK | \n",
+ " 1BHK | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " cmfawdrnr000dvc188hupv4yy | \n",
+ " cmfawdrno0007vc18l0fm0z2j | \n",
+ " RESIDENTIAL | \n",
+ " 3BHK | \n",
+ " 3BHK | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"display(project_df_config\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"id \",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"cmf53kl00000kvcu86ivy65di\",\n \"cmfawdrnr000dvc188hupv4yy\",\n \"cmf5r6hv20004vxpt0l657blu\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" projectId \",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \" cmf53kkzy000fvcu8tx8jwjmr\",\n \" cmf5r6hv00001vxptnfichhfl\",\n \" cmfawdrno0007vc18l0fm0z2j\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" propertyCategory\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" RESIDENTIAL \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" type \",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \" 1BHK \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" customBHK\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \" 2BHK\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "project_df_config_var = pd.read_csv('/content/ProjectConfigurationVariant.csv', sep=',', on_bad_lines='skip')\n",
+ "display(project_df_config_var.head())"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 313
+ },
+ "id": "t4eD2J_oWxQA",
+ "outputId": "adbbc6e4-4464-4733-e227-ca88b4604888"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ " id \"configurationId\" \"bathrooms\" \\\n",
+ "0 cmf5r6hv20005vxpt3yfnl2qp \"cmf5r6hv20004vxpt0l657blu\" \"12\" \n",
+ "1 cmf5r6hv20006vxptcx3lmm05 \"cmf5r6hv20004vxpt0l657blu\" \"3\" \n",
+ "2 cmfawdrnq000bvc188680qjyx \"cmfawdrnq000avc18qcvaxzi9\" \"1\" \n",
+ "3 cmfawdrnr000cvc1897rpsu1b \"cmfawdrnq000avc18qcvaxzi9\" \"1\" \n",
+ "4 cmfawdrnr000evc18jwvlery4 \"cmfawdrnr000dvc188hupv4yy\" \"3\" \n",
+ "\n",
+ " \"privateBathrooms\" \"publicBathrooms\" \"balcony\" \"furnishedType\" \\\n",
+ "0 \"3\" \"UNFURNISHED\" \n",
+ "1 \"2\" \"UNFURNISHED\" \n",
+ "2 \"1\" \"UNFURNISHED\" \n",
+ "3 \"1\" \"UNFURNISHED\" \n",
+ "4 \"2\" \"UNFURNISHED\" \n",
+ "\n",
+ " \"furnishingType\" \\\n",
+ "0 \"[]\" ... \n",
+ "1 \"[]\" ... \n",
+ "2 \"[]\" ... \n",
+ "3 \"[]\" ... \n",
+ "4 \"[]\" ... \n",
+ "\n",
+ " \"lift\" \"ageOfProperty\" \"parkingType\" \"listingType\" \\\n",
+ "0 \"false\" \"Sell\" \n",
+ "1 \"false\" \"Sell\" \n",
+ "2 \"false\" \"Sell\" \n",
+ "3 \"false\" \"Sell\" \n",
+ "4 \"false\" \"Sell\" \n",
+ "\n",
+ " \"floorPlanImage\" \\\n",
+ "0 \"https://pub-d28896f69c604ec5aa743cb0397740d9... \n",
+ "1 \"https://pub-d28896f69c604ec5aa743cb0397740d9... \n",
+ "2 \"https://pub-d28896f69c604ec5aa743cb0397740d9... \n",
+ "3 \"https://pub-d28896f69c604ec5aa743cb0397740d9... \n",
+ "4 \"https://pub-d28896f69c604ec5aa743cb0397740d9... \n",
+ "\n",
+ " \"carpetArea\" \"price\" \\\n",
+ "0 \"972\" \"120000000\" \n",
+ "1 \"188.73\" \"210000000\" \n",
+ "2 \"426.57\" \"13000000\" \n",
+ "3 \"460.8\" \"15000000\" \n",
+ "4 \"893.08\" \"29000000\" \n",
+ "\n",
+ " \"propertyImages\" \\\n",
+ "0 \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... \n",
+ "1 \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... \n",
+ "2 \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... \n",
+ "3 \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... \n",
+ "4 \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... \n",
+ "\n",
+ " \"maintenanceCharges\" \"aboutProperty\" \\\n",
+ "0 \"faded \" \"about property \" \n",
+ "1 \"fsdaffdsafsfdddsa\" \n",
+ "2 \"na\" \n",
+ "3 \"na\" \n",
+ "4 \"na\" \n",
+ "\n",
+ " \"createdAt\" \"updatedAt\" \n",
+ "0 \"2025-09-04 18:42:08.748\" \"2025-09-04 18:42:08.748\" \n",
+ "1 \"2025-09-04 18:42:08.748\" \"2025-09-04 18:42:08.748\" \n",
+ "2 \"2025-09-08 09:06:36.995\" \"2025-09-08 09:06:36.995\" \n",
+ "3 \"2025-09-08 09:06:36.995\" \"2025-09-08 09:06:36.995\" \n",
+ "4 \"2025-09-08 09:06:36.995\" \"2025-09-08 09:06:36.995\" "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " \"configurationId\" | \n",
+ " \"bathrooms\" | \n",
+ " \"privateBathrooms\" | \n",
+ " \"publicBathrooms\" | \n",
+ " \"balcony\" | \n",
+ " \"furnishedType\" | \n",
+ " \"furnishingType\" | \n",
+ " \"lift\" | \n",
+ " \"ageOfProperty\" | \n",
+ " \"parkingType\" | \n",
+ " \"listingType\" | \n",
+ " \"floorPlanImage\" | \n",
+ " \"carpetArea\" | \n",
+ " \"price\" | \n",
+ " \"propertyImages\" | \n",
+ " \"maintenanceCharges\" | \n",
+ " \"aboutProperty\" | \n",
+ " \"createdAt\" | \n",
+ " \"updatedAt\" | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " cmf5r6hv20005vxpt3yfnl2qp | \n",
+ " \"cmf5r6hv20004vxpt0l657blu\" | \n",
+ " \"12\" | \n",
+ " | \n",
+ " | \n",
+ " \"3\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " | \n",
+ " \"Sell\" | \n",
+ " \"https://pub-d28896f69c604ec5aa743cb0397740d9... | \n",
+ " \"972\" | \n",
+ " \"120000000\" | \n",
+ " \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... | \n",
+ " \"faded \" | \n",
+ " \"about property \" | \n",
+ " \"2025-09-04 18:42:08.748\" | \n",
+ " \"2025-09-04 18:42:08.748\" | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " cmf5r6hv20006vxptcx3lmm05 | \n",
+ " \"cmf5r6hv20004vxpt0l657blu\" | \n",
+ " \"3\" | \n",
+ " | \n",
+ " | \n",
+ " \"2\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " | \n",
+ " \"Sell\" | \n",
+ " \"https://pub-d28896f69c604ec5aa743cb0397740d9... | \n",
+ " \"188.73\" | \n",
+ " \"210000000\" | \n",
+ " \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... | \n",
+ " | \n",
+ " \"fsdaffdsafsfdddsa\" | \n",
+ " \"2025-09-04 18:42:08.748\" | \n",
+ " \"2025-09-04 18:42:08.748\" | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " cmfawdrnq000bvc188680qjyx | \n",
+ " \"cmfawdrnq000avc18qcvaxzi9\" | \n",
+ " \"1\" | \n",
+ " | \n",
+ " | \n",
+ " \"1\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " | \n",
+ " \"Sell\" | \n",
+ " \"https://pub-d28896f69c604ec5aa743cb0397740d9... | \n",
+ " \"426.57\" | \n",
+ " \"13000000\" | \n",
+ " \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... | \n",
+ " | \n",
+ " \"na\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " cmfawdrnr000cvc1897rpsu1b | \n",
+ " \"cmfawdrnq000avc18qcvaxzi9\" | \n",
+ " \"1\" | \n",
+ " | \n",
+ " | \n",
+ " \"1\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " | \n",
+ " \"Sell\" | \n",
+ " \"https://pub-d28896f69c604ec5aa743cb0397740d9... | \n",
+ " \"460.8\" | \n",
+ " \"15000000\" | \n",
+ " \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... | \n",
+ " | \n",
+ " \"na\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " cmfawdrnr000evc18jwvlery4 | \n",
+ " \"cmfawdrnr000dvc188hupv4yy\" | \n",
+ " \"3\" | \n",
+ " | \n",
+ " | \n",
+ " \"2\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " | \n",
+ " \"Sell\" | \n",
+ " \"https://pub-d28896f69c604ec5aa743cb0397740d9... | \n",
+ " \"893.08\" | \n",
+ " \"29000000\" | \n",
+ " \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... | \n",
+ " | \n",
+ " \"na\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"display(project_df_config_var\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"id \",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"cmf5r6hv20006vxptcx3lmm05\",\n \"cmfawdrnr000evc18jwvlery4\",\n \"cmfawdrnq000bvc188680qjyx\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"configurationId\\\" \",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \" \\\"cmf5r6hv20004vxpt0l657blu\\\"\",\n \" \\\"cmfawdrnq000avc18qcvaxzi9\\\"\",\n \" \\\"cmfawdrnr000dvc188hupv4yy\\\"\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"bathrooms\\\"\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \" \\\"12\\\" \",\n \" \\\"3\\\" \",\n \" \\\"1\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"privateBathrooms\\\"\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"publicBathrooms\\\"\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"balcony\\\"\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \" \\\"3\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"furnishedType\\\" \",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \\\"UNFURNISHED\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"furnishingType\\\" \",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \\\"[]\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"lift\\\" \",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \\\"false\\\"\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"ageOfProperty\\\"\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"parkingType\\\"\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"listingType\\\"\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \\\"Sell\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"floorPlanImage\\\" \",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \" \\\"https://pub-d28896f69c604ec5aa743cb0397740d9.r2.dev/1757011238542-2d39a8b06669406b.jpg\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"carpetArea\\\"\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \" \\\"188.73\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"price\\\" \",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \" \\\"210000000\\\"\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"propertyImages\\\" \",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \" \\\"[\\\"\\\"https://pub-d28896f69c604ec5aa743cb0397740d9.r2.dev/1757011238542-bd3d1a40c2d7fadb.jpg\\\"\\\"]\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"maintenanceCharges\\\"\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"aboutProperty\\\" \",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \" \\\"about property \\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"createdAt\\\" \",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \" \\\"2025-09-08 09:06:36.995\\\"\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" \\\"updatedAt\\\"\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \" \\\"2025-09-08 09:06:36.995\\\"\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Clean up column names by removing quotes and spaces\n",
+ "project_df_config_var.columns = project_df_config_var.columns.str.replace('\"', '').str.strip()\n",
+ "project_df_config.columns = project_df_config.columns.str.strip()\n",
+ "\n",
+ "# Perform the left merge\n",
+ "merged_config_df = pd.merge(project_df_config_var, project_df_config, left_on='configurationId', right_on='id', how='left')\n",
+ "\n",
+ "# Display the first few rows of the merged dataframe\n",
+ "display(merged_config_df.head())"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 342
+ },
+ "id": "pcPvJV8gZCjt",
+ "outputId": "c8f97191-b8e3-4c7a-b47f-ac0f5c049706"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ " id_x configurationId bathrooms \\\n",
+ "0 cmf5r6hv20005vxpt3yfnl2qp \"cmf5r6hv20004vxpt0l657blu\" \"12\" \n",
+ "1 cmf5r6hv20006vxptcx3lmm05 \"cmf5r6hv20004vxpt0l657blu\" \"3\" \n",
+ "2 cmfawdrnq000bvc188680qjyx \"cmfawdrnq000avc18qcvaxzi9\" \"1\" \n",
+ "3 cmfawdrnr000cvc1897rpsu1b \"cmfawdrnq000avc18qcvaxzi9\" \"1\" \n",
+ "4 cmfawdrnr000evc18jwvlery4 \"cmfawdrnr000dvc188hupv4yy\" \"3\" \n",
+ "\n",
+ " privateBathrooms publicBathrooms balcony furnishedType \\\n",
+ "0 \"3\" \"UNFURNISHED\" \n",
+ "1 \"2\" \"UNFURNISHED\" \n",
+ "2 \"1\" \"UNFURNISHED\" \n",
+ "3 \"1\" \"UNFURNISHED\" \n",
+ "4 \"2\" \"UNFURNISHED\" \n",
+ "\n",
+ " furnishingType lift \\\n",
+ "0 \"[]\" ... \"false\" \n",
+ "1 \"[]\" ... \"false\" \n",
+ "2 \"[]\" ... \"false\" \n",
+ "3 \"[]\" ... \"false\" \n",
+ "4 \"[]\" ... \"false\" \n",
+ "\n",
+ " ageOfProperty ... propertyImages \\\n",
+ "0 ... \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... \n",
+ "1 ... \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... \n",
+ "2 ... \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... \n",
+ "3 ... \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... \n",
+ "4 ... \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... \n",
+ "\n",
+ " maintenanceCharges aboutProperty \\\n",
+ "0 \"faded \" \"about property \" \n",
+ "1 \"fsdaffdsafsfdddsa\" \n",
+ "2 \"na\" \n",
+ "3 \"na\" \n",
+ "4 \"na\" \n",
+ "\n",
+ " createdAt updatedAt id_y projectId \\\n",
+ "0 \"2025-09-04 18:42:08.748\" \"2025-09-04 18:42:08.748\" NaN NaN \n",
+ "1 \"2025-09-04 18:42:08.748\" \"2025-09-04 18:42:08.748\" NaN NaN \n",
+ "2 \"2025-09-08 09:06:36.995\" \"2025-09-08 09:06:36.995\" NaN NaN \n",
+ "3 \"2025-09-08 09:06:36.995\" \"2025-09-08 09:06:36.995\" NaN NaN \n",
+ "4 \"2025-09-08 09:06:36.995\" \"2025-09-08 09:06:36.995\" NaN NaN \n",
+ "\n",
+ " propertyCategory type customBHK \n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "\n",
+ "[5 rows x 25 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id_x | \n",
+ " configurationId | \n",
+ " bathrooms | \n",
+ " privateBathrooms | \n",
+ " publicBathrooms | \n",
+ " balcony | \n",
+ " furnishedType | \n",
+ " furnishingType | \n",
+ " lift | \n",
+ " ageOfProperty | \n",
+ " ... | \n",
+ " propertyImages | \n",
+ " maintenanceCharges | \n",
+ " aboutProperty | \n",
+ " createdAt | \n",
+ " updatedAt | \n",
+ " id_y | \n",
+ " projectId | \n",
+ " propertyCategory | \n",
+ " type | \n",
+ " customBHK | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " cmf5r6hv20005vxpt3yfnl2qp | \n",
+ " \"cmf5r6hv20004vxpt0l657blu\" | \n",
+ " \"12\" | \n",
+ " | \n",
+ " | \n",
+ " \"3\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " ... | \n",
+ " \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... | \n",
+ " \"faded \" | \n",
+ " \"about property \" | \n",
+ " \"2025-09-04 18:42:08.748\" | \n",
+ " \"2025-09-04 18:42:08.748\" | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " cmf5r6hv20006vxptcx3lmm05 | \n",
+ " \"cmf5r6hv20004vxpt0l657blu\" | \n",
+ " \"3\" | \n",
+ " | \n",
+ " | \n",
+ " \"2\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " ... | \n",
+ " \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... | \n",
+ " | \n",
+ " \"fsdaffdsafsfdddsa\" | \n",
+ " \"2025-09-04 18:42:08.748\" | \n",
+ " \"2025-09-04 18:42:08.748\" | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " cmfawdrnq000bvc188680qjyx | \n",
+ " \"cmfawdrnq000avc18qcvaxzi9\" | \n",
+ " \"1\" | \n",
+ " | \n",
+ " | \n",
+ " \"1\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " ... | \n",
+ " \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... | \n",
+ " | \n",
+ " \"na\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " cmfawdrnr000cvc1897rpsu1b | \n",
+ " \"cmfawdrnq000avc18qcvaxzi9\" | \n",
+ " \"1\" | \n",
+ " | \n",
+ " | \n",
+ " \"1\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " ... | \n",
+ " \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... | \n",
+ " | \n",
+ " \"na\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " cmfawdrnr000evc18jwvlery4 | \n",
+ " \"cmfawdrnr000dvc188hupv4yy\" | \n",
+ " \"3\" | \n",
+ " | \n",
+ " | \n",
+ " \"2\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " ... | \n",
+ " \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... | \n",
+ " | \n",
+ " \"na\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 25 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe"
+ }
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "f230f8e3",
+ "outputId": "1299b902-56f1-4c77-d96a-8a5a4f36cd72"
+ },
+ "source": [
+ "print(f\"The merged_config_df has {merged_config_df.shape[0]} rows and {merged_config_df.shape[1]} columns.\")"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "The merged_config_df has 78 rows and 25 columns.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "merged_config_df.to_csv('final_merged_data100.csv', index=False)"
+ ],
+ "metadata": {
+ "id": "hDr5uJqvbg2Y"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "project_adr_df = pd.read_csv('/content/ProjectAddress.csv', sep=',', on_bad_lines='skip')\n",
+ "display(project_adr_df.head())"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 310
+ },
+ "id": "kkdwonEdcbgf",
+ "outputId": "b3cd5648-6818-4ac8-8b16-c1e383cf98af"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ " id projectId \\\n",
+ "0 cmf53kl01000nvcu8ibut7fka cmf53kkzy000fvcu8tx8jwjmr \n",
+ "1 cmfc6pq1n000cvca0gpdjstzf cmfc6pq1k0001vca0ikzb258m \n",
+ "2 cmfc79ip8001lvca0o1ls6fuz cmfc79ip5001cvca0qht6o44p \n",
+ "3 cmfcccifw004vvca0mvtcgxos cmfcccifs004nvca0b7im3r5a \n",
+ "4 cmff8swbn0007vxp7bhj7cmqv cmff8swbm0001vxp7pe3neibi \n",
+ "\n",
+ " landmark \\\n",
+ "0 Babys school \n",
+ "1 JBCN International School Mulund \n",
+ "2 JBCN International School Parel \n",
+ "3 Lodha Xperia Mall \n",
+ "4 sdfgb \n",
+ "\n",
+ " fullAddress \\\n",
+ "0 Mumbai chembur ... \n",
+ "1 Prataprao Gujar Rd Neelam Nagar Mulund East M... \n",
+ "2 AVENUE 15 Ramesh Barrel Supplying Company K.T... \n",
+ "3 64C5+C63 Dombivli East Dombivli Maharashtra 4... \n",
+ "4 asdfgh ... \n",
+ "\n",
+ " pincode \n",
+ "0 411017 \n",
+ "1 400081 \n",
+ "2 400015 \n",
+ "3 421201 \n",
+ "4 123456 "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " projectId | \n",
+ " landmark | \n",
+ " fullAddress | \n",
+ " pincode | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " cmf53kl01000nvcu8ibut7fka | \n",
+ " cmf53kkzy000fvcu8tx8jwjmr | \n",
+ " Babys school | \n",
+ " Mumbai chembur ... | \n",
+ " 411017 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " cmfc6pq1n000cvca0gpdjstzf | \n",
+ " cmfc6pq1k0001vca0ikzb258m | \n",
+ " JBCN International School Mulund | \n",
+ " Prataprao Gujar Rd Neelam Nagar Mulund East M... | \n",
+ " 400081 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " cmfc79ip8001lvca0o1ls6fuz | \n",
+ " cmfc79ip5001cvca0qht6o44p | \n",
+ " JBCN International School Parel | \n",
+ " AVENUE 15 Ramesh Barrel Supplying Company K.T... | \n",
+ " 400015 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " cmfcccifw004vvca0mvtcgxos | \n",
+ " cmfcccifs004nvca0b7im3r5a | \n",
+ " Lodha Xperia Mall | \n",
+ " 64C5+C63 Dombivli East Dombivli Maharashtra 4... | \n",
+ " 421201 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " cmff8swbn0007vxp7bhj7cmqv | \n",
+ " cmff8swbm0001vxp7pe3neibi | \n",
+ " sdfgb | \n",
+ " asdfgh ... | \n",
+ " 123456 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"display(project_adr_df\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"id \",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"cmfc6pq1n000cvca0gpdjstzf\",\n \"cmff8swbn0007vxp7bhj7cmqv\",\n \"cmfc79ip8001lvca0o1ls6fuz\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" projectId \",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \" cmfc6pq1k0001vca0ikzb258m\",\n \" cmff8swbm0001vxp7pe3neibi\",\n \" cmfc79ip5001cvca0qht6o44p\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" landmark \",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \" JBCN International School Mulund \",\n \" sdfgb \",\n \" JBCN International School Parel \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" fullAddress \",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \" Prataprao Gujar Rd Neelam Nagar Mulund East Mumbai Maharashtra 400081 \",\n \" asdfgh \",\n \" AVENUE 15 Ramesh Barrel Supplying Company K.T.Gupta Wadi S.P.Murai Rd behind Sewri Road Sewri W Maharashtra 400015 \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \" pincode\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 127590,\n \"min\": 123456,\n \"max\": 421201,\n \"num_unique_values\": 5,\n \"samples\": [\n 400081,\n 123456,\n 400015\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "d3c5cc71",
+ "outputId": "bf2b1a0b-b8b0-4744-85a0-48cd3c29bfa2"
+ },
+ "source": [
+ "print(\"Columns in merged_config_df:\", merged_config_df.columns)\n",
+ "print(\"Columns in project_adr_df:\", project_adr_df.columns)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Columns in merged_config_df: Index(['id_x', 'configurationId', 'bathrooms', 'privateBathrooms',\n",
+ " 'publicBathrooms', 'balcony', 'furnishedType', 'furnishingType', 'lift',\n",
+ " 'ageOfProperty', 'parkingType', 'listingType', 'floorPlanImage',\n",
+ " 'carpetArea', 'price', 'propertyImages', 'maintenanceCharges',\n",
+ " 'aboutProperty', 'createdAt', 'updatedAt', 'id_y', 'projectId',\n",
+ " 'propertyCategory', 'type', 'customBHK'],\n",
+ " dtype='object')\n",
+ "Columns in project_adr_df: Index(['id ', ' projectId ',\n",
+ " ' landmark ',\n",
+ " ' fullAddress ',\n",
+ " ' pincode'],\n",
+ " dtype='object')\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 342
+ },
+ "id": "91c5028d",
+ "outputId": "5c9b438d-1134-4f93-c67c-3d9113abf76e"
+ },
+ "source": [
+ "# Clean up column names by removing quotes and spaces\n",
+ "merged_config_df.columns = merged_config_df.columns.str.strip()\n",
+ "project_adr_df.columns = project_adr_df.columns.str.strip()\n",
+ "\n",
+ "# Perform the left merge\n",
+ "final_merged_df = pd.merge(merged_config_df, project_adr_df, on='projectId', how='left')\n",
+ "\n",
+ "# Display the first few rows of the merged dataframe\n",
+ "display(final_merged_df.head())"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ " id_x configurationId bathrooms \\\n",
+ "0 cmf5r6hv20005vxpt3yfnl2qp \"cmf5r6hv20004vxpt0l657blu\" \"12\" \n",
+ "1 cmf5r6hv20006vxptcx3lmm05 \"cmf5r6hv20004vxpt0l657blu\" \"3\" \n",
+ "2 cmfawdrnq000bvc188680qjyx \"cmfawdrnq000avc18qcvaxzi9\" \"1\" \n",
+ "3 cmfawdrnr000cvc1897rpsu1b \"cmfawdrnq000avc18qcvaxzi9\" \"1\" \n",
+ "4 cmfawdrnr000evc18jwvlery4 \"cmfawdrnr000dvc188hupv4yy\" \"3\" \n",
+ "\n",
+ " privateBathrooms publicBathrooms balcony furnishedType \\\n",
+ "0 \"3\" \"UNFURNISHED\" \n",
+ "1 \"2\" \"UNFURNISHED\" \n",
+ "2 \"1\" \"UNFURNISHED\" \n",
+ "3 \"1\" \"UNFURNISHED\" \n",
+ "4 \"2\" \"UNFURNISHED\" \n",
+ "\n",
+ " furnishingType lift \\\n",
+ "0 \"[]\" ... \"false\" \n",
+ "1 \"[]\" ... \"false\" \n",
+ "2 \"[]\" ... \"false\" \n",
+ "3 \"[]\" ... \"false\" \n",
+ "4 \"[]\" ... \"false\" \n",
+ "\n",
+ " ageOfProperty ... updatedAt id_y projectId \\\n",
+ "0 ... \"2025-09-04 18:42:08.748\" NaN NaN \n",
+ "1 ... \"2025-09-04 18:42:08.748\" NaN NaN \n",
+ "2 ... \"2025-09-08 09:06:36.995\" NaN NaN \n",
+ "3 ... \"2025-09-08 09:06:36.995\" NaN NaN \n",
+ "4 ... \"2025-09-08 09:06:36.995\" NaN NaN \n",
+ "\n",
+ " propertyCategory type customBHK id landmark fullAddress pincode \n",
+ "0 NaN NaN NaN NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN NaN NaN NaN \n",
+ "3 NaN NaN NaN NaN NaN NaN NaN \n",
+ "4 NaN NaN NaN NaN NaN NaN NaN \n",
+ "\n",
+ "[5 rows x 29 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id_x | \n",
+ " configurationId | \n",
+ " bathrooms | \n",
+ " privateBathrooms | \n",
+ " publicBathrooms | \n",
+ " balcony | \n",
+ " furnishedType | \n",
+ " furnishingType | \n",
+ " lift | \n",
+ " ageOfProperty | \n",
+ " ... | \n",
+ " updatedAt | \n",
+ " id_y | \n",
+ " projectId | \n",
+ " propertyCategory | \n",
+ " type | \n",
+ " customBHK | \n",
+ " id | \n",
+ " landmark | \n",
+ " fullAddress | \n",
+ " pincode | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " cmf5r6hv20005vxpt3yfnl2qp | \n",
+ " \"cmf5r6hv20004vxpt0l657blu\" | \n",
+ " \"12\" | \n",
+ " | \n",
+ " | \n",
+ " \"3\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " ... | \n",
+ " \"2025-09-04 18:42:08.748\" | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " cmf5r6hv20006vxptcx3lmm05 | \n",
+ " \"cmf5r6hv20004vxpt0l657blu\" | \n",
+ " \"3\" | \n",
+ " | \n",
+ " | \n",
+ " \"2\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " ... | \n",
+ " \"2025-09-04 18:42:08.748\" | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " cmfawdrnq000bvc188680qjyx | \n",
+ " \"cmfawdrnq000avc18qcvaxzi9\" | \n",
+ " \"1\" | \n",
+ " | \n",
+ " | \n",
+ " \"1\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " ... | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " cmfawdrnr000cvc1897rpsu1b | \n",
+ " \"cmfawdrnq000avc18qcvaxzi9\" | \n",
+ " \"1\" | \n",
+ " | \n",
+ " | \n",
+ " \"1\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " ... | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " cmfawdrnr000evc18jwvlery4 | \n",
+ " \"cmfawdrnr000dvc188hupv4yy\" | \n",
+ " \"3\" | \n",
+ " | \n",
+ " | \n",
+ " \"2\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " ... | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 29 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe"
+ }
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print(f\"The final_merged_df has {final_merged_df.shape[0]} rows and {final_merged_df.shape[1]} columns.\")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "r9-uyxyMfqoz",
+ "outputId": "67e7116b-02b4-43ad-9149-fbae9ce23efd"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "The final_merged_df has 78 rows and 29 columns.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "final_merged_df.to_csv('final_merged_data200.csv', index=False)"
+ ],
+ "metadata": {
+ "id": "l4SW0LqJgX9C"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "project_df = pd.read_csv('/content/project.csv', sep=',', on_bad_lines='skip')\n",
+ "display(project_df_config_var.head())"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 313
+ },
+ "id": "o_GW3ua-cqC-",
+ "outputId": "5f6e7769-8734-46e1-c758-2a58ff937310"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ " id configurationId bathrooms \\\n",
+ "0 cmf5r6hv20005vxpt3yfnl2qp \"cmf5r6hv20004vxpt0l657blu\" \"12\" \n",
+ "1 cmf5r6hv20006vxptcx3lmm05 \"cmf5r6hv20004vxpt0l657blu\" \"3\" \n",
+ "2 cmfawdrnq000bvc188680qjyx \"cmfawdrnq000avc18qcvaxzi9\" \"1\" \n",
+ "3 cmfawdrnr000cvc1897rpsu1b \"cmfawdrnq000avc18qcvaxzi9\" \"1\" \n",
+ "4 cmfawdrnr000evc18jwvlery4 \"cmfawdrnr000dvc188hupv4yy\" \"3\" \n",
+ "\n",
+ " privateBathrooms publicBathrooms balcony furnishedType \\\n",
+ "0 \"3\" \"UNFURNISHED\" \n",
+ "1 \"2\" \"UNFURNISHED\" \n",
+ "2 \"1\" \"UNFURNISHED\" \n",
+ "3 \"1\" \"UNFURNISHED\" \n",
+ "4 \"2\" \"UNFURNISHED\" \n",
+ "\n",
+ " furnishingType lift \\\n",
+ "0 \"[]\" ... \"false\" \n",
+ "1 \"[]\" ... \"false\" \n",
+ "2 \"[]\" ... \"false\" \n",
+ "3 \"[]\" ... \"false\" \n",
+ "4 \"[]\" ... \"false\" \n",
+ "\n",
+ " ageOfProperty parkingType listingType \\\n",
+ "0 \"Sell\" \n",
+ "1 \"Sell\" \n",
+ "2 \"Sell\" \n",
+ "3 \"Sell\" \n",
+ "4 \"Sell\" \n",
+ "\n",
+ " floorPlanImage carpetArea \\\n",
+ "0 \"https://pub-d28896f69c604ec5aa743cb0397740d9... \"972\" \n",
+ "1 \"https://pub-d28896f69c604ec5aa743cb0397740d9... \"188.73\" \n",
+ "2 \"https://pub-d28896f69c604ec5aa743cb0397740d9... \"426.57\" \n",
+ "3 \"https://pub-d28896f69c604ec5aa743cb0397740d9... \"460.8\" \n",
+ "4 \"https://pub-d28896f69c604ec5aa743cb0397740d9... \"893.08\" \n",
+ "\n",
+ " price propertyImages \\\n",
+ "0 \"120000000\" \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... \n",
+ "1 \"210000000\" \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... \n",
+ "2 \"13000000\" \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... \n",
+ "3 \"15000000\" \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... \n",
+ "4 \"29000000\" \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... \n",
+ "\n",
+ " maintenanceCharges aboutProperty \\\n",
+ "0 \"faded \" \"about property \" \n",
+ "1 \"fsdaffdsafsfdddsa\" \n",
+ "2 \"na\" \n",
+ "3 \"na\" \n",
+ "4 \"na\" \n",
+ "\n",
+ " createdAt updatedAt \n",
+ "0 \"2025-09-04 18:42:08.748\" \"2025-09-04 18:42:08.748\" \n",
+ "1 \"2025-09-04 18:42:08.748\" \"2025-09-04 18:42:08.748\" \n",
+ "2 \"2025-09-08 09:06:36.995\" \"2025-09-08 09:06:36.995\" \n",
+ "3 \"2025-09-08 09:06:36.995\" \"2025-09-08 09:06:36.995\" \n",
+ "4 \"2025-09-08 09:06:36.995\" \"2025-09-08 09:06:36.995\" "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " configurationId | \n",
+ " bathrooms | \n",
+ " privateBathrooms | \n",
+ " publicBathrooms | \n",
+ " balcony | \n",
+ " furnishedType | \n",
+ " furnishingType | \n",
+ " lift | \n",
+ " ageOfProperty | \n",
+ " parkingType | \n",
+ " listingType | \n",
+ " floorPlanImage | \n",
+ " carpetArea | \n",
+ " price | \n",
+ " propertyImages | \n",
+ " maintenanceCharges | \n",
+ " aboutProperty | \n",
+ " createdAt | \n",
+ " updatedAt | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " cmf5r6hv20005vxpt3yfnl2qp | \n",
+ " \"cmf5r6hv20004vxpt0l657blu\" | \n",
+ " \"12\" | \n",
+ " | \n",
+ " | \n",
+ " \"3\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " | \n",
+ " \"Sell\" | \n",
+ " \"https://pub-d28896f69c604ec5aa743cb0397740d9... | \n",
+ " \"972\" | \n",
+ " \"120000000\" | \n",
+ " \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... | \n",
+ " \"faded \" | \n",
+ " \"about property \" | \n",
+ " \"2025-09-04 18:42:08.748\" | \n",
+ " \"2025-09-04 18:42:08.748\" | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " cmf5r6hv20006vxptcx3lmm05 | \n",
+ " \"cmf5r6hv20004vxpt0l657blu\" | \n",
+ " \"3\" | \n",
+ " | \n",
+ " | \n",
+ " \"2\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " | \n",
+ " \"Sell\" | \n",
+ " \"https://pub-d28896f69c604ec5aa743cb0397740d9... | \n",
+ " \"188.73\" | \n",
+ " \"210000000\" | \n",
+ " \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... | \n",
+ " | \n",
+ " \"fsdaffdsafsfdddsa\" | \n",
+ " \"2025-09-04 18:42:08.748\" | \n",
+ " \"2025-09-04 18:42:08.748\" | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " cmfawdrnq000bvc188680qjyx | \n",
+ " \"cmfawdrnq000avc18qcvaxzi9\" | \n",
+ " \"1\" | \n",
+ " | \n",
+ " | \n",
+ " \"1\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " | \n",
+ " \"Sell\" | \n",
+ " \"https://pub-d28896f69c604ec5aa743cb0397740d9... | \n",
+ " \"426.57\" | \n",
+ " \"13000000\" | \n",
+ " \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... | \n",
+ " | \n",
+ " \"na\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " cmfawdrnr000cvc1897rpsu1b | \n",
+ " \"cmfawdrnq000avc18qcvaxzi9\" | \n",
+ " \"1\" | \n",
+ " | \n",
+ " | \n",
+ " \"1\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " | \n",
+ " \"Sell\" | \n",
+ " \"https://pub-d28896f69c604ec5aa743cb0397740d9... | \n",
+ " \"460.8\" | \n",
+ " \"15000000\" | \n",
+ " \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... | \n",
+ " | \n",
+ " \"na\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " cmfawdrnr000evc18jwvlery4 | \n",
+ " \"cmfawdrnr000dvc188hupv4yy\" | \n",
+ " \"3\" | \n",
+ " | \n",
+ " | \n",
+ " \"2\" | \n",
+ " \"UNFURNISHED\" | \n",
+ " \"[]\" ... | \n",
+ " \"false\" | \n",
+ " | \n",
+ " | \n",
+ " \"Sell\" | \n",
+ " \"https://pub-d28896f69c604ec5aa743cb0397740d9... | \n",
+ " \"893.08\" | \n",
+ " \"29000000\" | \n",
+ " \"[\"\"https://pub-d28896f69c604ec5aa743cb039774... | \n",
+ " | \n",
+ " \"na\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ " \"2025-09-08 09:06:36.995\" | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"display(project_df_config_var\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"cmf5r6hv20006vxptcx3lmm05\",\n \"cmfawdrnr000evc18jwvlery4\",\n \"cmfawdrnq000bvc188680qjyx\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"configurationId\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \" \\\"cmf5r6hv20004vxpt0l657blu\\\"\",\n \" \\\"cmfawdrnq000avc18qcvaxzi9\\\"\",\n \" \\\"cmfawdrnr000dvc188hupv4yy\\\"\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bathrooms\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \" \\\"12\\\" \",\n \" \\\"3\\\" \",\n \" \\\"1\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"privateBathrooms\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"publicBathrooms\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"balcony\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \" \\\"3\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"furnishedType\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \\\"UNFURNISHED\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"furnishingType\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \\\"[]\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"lift\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \\\"false\\\"\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ageOfProperty\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"parkingType\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"listingType\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \" \\\"Sell\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"floorPlanImage\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \" \\\"https://pub-d28896f69c604ec5aa743cb0397740d9.r2.dev/1757011238542-2d39a8b06669406b.jpg\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"carpetArea\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \" \\\"188.73\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \" \\\"210000000\\\"\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"propertyImages\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \" \\\"[\\\"\\\"https://pub-d28896f69c604ec5aa743cb0397740d9.r2.dev/1757011238542-bd3d1a40c2d7fadb.jpg\\\"\\\"]\\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"maintenanceCharges\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"aboutProperty\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \" \\\"about property \\\" \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"createdAt\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \" \\\"2025-09-08 09:06:36.995\\\"\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"updatedAt\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \" \\\"2025-09-08 09:06:36.995\\\"\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [],
+ "metadata": {
+ "id": "giZvUiPMdIQB"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [],
+ "metadata": {
+ "id": "9U3IJrNeijE9"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [],
+ "metadata": {
+ "id": "WjLvmDQ8ijBd"
+ },
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file