search-engine

Runtime error

App Files Files Community

IAMTFRMZA

Michelangiolo commited on Jun 29, 2023

Commit

3e23545

0 Parent(s):

Duplicate from goliathaiconsulting/airbnb-search-engine

Browse files

Co-authored-by: Mazzeschi <Michelangiolo@users.noreply.huggingface.co>

Files changed (7) hide show

.gitattributes +34 -0
Airbnb_Open_Data.csv +0 -0
README.md +13 -0
airbnb.ipynb +604 -0
app.py +91 -0
df_encoded.parquet +3 -0
history.ipynb +107 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

Airbnb_Open_Data.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Airbnb Search Engine
+emoji: 🐢
+colorFrom: green
+colorTo: green
+sdk: gradio
+sdk_version: 3.23.0
+app_file: app.py
+pinned: false
+duplicated_from: goliathaiconsulting/airbnb-search-engine
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

airbnb.ipynb ADDED Viewed

	@@ -0,0 +1,604 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['id', 'NAME', 'host id', 'host name', 'neighbourhood group',\n",
+       "       'neighbourhood', 'lat', 'long', 'country', 'country code',\n",
+       "       'instant_bookable', 'cancellation_policy', 'room type',\n",
+       "       'Construction year', 'price', 'service fee', 'minimum nights',\n",
+       "       'number of reviews', 'last review', 'reviews per month',\n",
+       "       'review rate number', 'calculated host listings count',\n",
+       "       'availability 365', 'house_rules', 'license'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\ardit\\AppData\\Local\\Temp\\ipykernel_25752\\2207992772.py:4: DtypeWarning: Columns (25) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+      "  df = pd.read_csv('Airbnb_Open_Data.csv')\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import random\n",
+    "\n",
+    "df = pd.read_csv('Airbnb_Open_Data.csv')\n",
+    "df = df.drop('host_identity_verified', axis=1)\n",
+    "df['description'] = df['NAME']\n",
+    "df['price'] = df['price'].dropna().apply(lambda x : int(x[1:].strip().replace(',', '')))\n",
+    "df['sq. meters'] = df['price'].apply(lambda x : random.choices([25, 40, 45, 55, 60, 70], weights=[5, 5, 4, 3, 2, 1])[0])\n",
+    "df = df[['price', 'sq. meters', 'description', 'neighbourhood group', 'host name', 'cancellation_policy', 'house_rules']]\n",
+    "df = df[df['house_rules']!='#NAME?'].dropna().reset_index(drop=True)\n",
+    "df = df[0:10000]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 10000/10000 [17:37<00:00,  9.45it/s]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>price</th>\n",
+       "      <th>sq. meters</th>\n",
+       "      <th>description</th>\n",
+       "      <th>neighbourhood group</th>\n",
+       "      <th>host name</th>\n",
+       "      <th>cancellation_policy</th>\n",
+       "      <th>house_rules</th>\n",
+       "      <th>text_vector_</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>966.0</td>\n",
+       "      <td>25</td>\n",
+       "      <td>Clean &amp; quiet apt home by the park</td>\n",
+       "      <td>Brooklyn</td>\n",
+       "      <td>Madaline</td>\n",
+       "      <td>strict</td>\n",
+       "      <td>Clean up and treat the home the way you'd like...</td>\n",
+       "      <td>[-0.047521110624074936, 0.03044620156288147, 0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>142.0</td>\n",
+       "      <td>25</td>\n",
+       "      <td>Skylit Midtown Castle</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>Jenna</td>\n",
+       "      <td>moderate</td>\n",
+       "      <td>Pet friendly but please confirm with me if the...</td>\n",
+       "      <td>[-0.04690079391002655, 0.061329323798418045, 0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>620.0</td>\n",
+       "      <td>45</td>\n",
+       "      <td>THE VILLAGE OF HARLEM....NEW YORK !</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>Elise</td>\n",
+       "      <td>flexible</td>\n",
+       "      <td>I encourage you to use my kitchen, cooking and...</td>\n",
+       "      <td>[0.00039011164335533977, 0.018310122191905975,...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>204.0</td>\n",
+       "      <td>55</td>\n",
+       "      <td>Entire Apt: Spacious Studio/Loft by central park</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>Lyndon</td>\n",
+       "      <td>moderate</td>\n",
+       "      <td>Please no smoking in the house, porch or on th...</td>\n",
+       "      <td>[-0.04602213576436043, 0.015605293214321136, 0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>577.0</td>\n",
+       "      <td>25</td>\n",
+       "      <td>Large Cozy 1 BR Apartment In Midtown East</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>Michelle</td>\n",
+       "      <td>flexible</td>\n",
+       "      <td>No smoking, please, and no drugs.</td>\n",
+       "      <td>[-0.04859349876642227, -0.01263828668743372, 0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9995</th>\n",
+       "      <td>745.0</td>\n",
+       "      <td>60</td>\n",
+       "      <td>Upper West Side 1BR next to subway/Central Park</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>Doreen</td>\n",
+       "      <td>strict</td>\n",
+       "      <td>Our Herbivorian House manual with detailed rul...</td>\n",
+       "      <td>[-0.0346745029091835, -0.005859952419996262, 0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9996</th>\n",
+       "      <td>1135.0</td>\n",
+       "      <td>45</td>\n",
+       "      <td>Modern and Bright Studio Apt in Williamsburg</td>\n",
+       "      <td>Brooklyn</td>\n",
+       "      <td>Shannon</td>\n",
+       "      <td>strict</td>\n",
+       "      <td>No smoking please!</td>\n",
+       "      <td>[-0.016586357727646828, 0.020517650991678238, ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9997</th>\n",
+       "      <td>59.0</td>\n",
+       "      <td>45</td>\n",
+       "      <td>Holiday in Trendy Williamsburg Apt!</td>\n",
+       "      <td>Brooklyn</td>\n",
+       "      <td>Peter</td>\n",
+       "      <td>strict</td>\n",
+       "      <td>We suggest you use email or texting contact us...</td>\n",
+       "      <td>[-0.05095353722572327, 0.08510775864124298, -0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9998</th>\n",
+       "      <td>1055.0</td>\n",
+       "      <td>25</td>\n",
+       "      <td>Greenwich Village| Private Queen room</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>Kelly</td>\n",
+       "      <td>flexible</td>\n",
+       "      <td>Please treat this house as if it is your own. ...</td>\n",
+       "      <td>[0.00017118529649451375, 0.010939894244074821,...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9999</th>\n",
+       "      <td>285.0</td>\n",
+       "      <td>25</td>\n",
+       "      <td>Comfortable bedroom in spacious apt</td>\n",
+       "      <td>Brooklyn</td>\n",
+       "      <td>Arthur</td>\n",
+       "      <td>strict</td>\n",
+       "      <td>Please, No smoking and no pets. We do require ...</td>\n",
+       "      <td>[-0.01795135624706745, -0.029596544802188873, ...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>10000 rows × 8 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       price  sq. meters                                       description  \\\n",
+       "0      966.0          25                Clean & quiet apt home by the park   \n",
+       "1      142.0          25                             Skylit Midtown Castle   \n",
+       "2      620.0          45               THE VILLAGE OF HARLEM....NEW YORK !   \n",
+       "3      204.0          55  Entire Apt: Spacious Studio/Loft by central park   \n",
+       "4      577.0          25         Large Cozy 1 BR Apartment In Midtown East   \n",
+       "...      ...         ...                                               ...   \n",
+       "9995   745.0          60   Upper West Side 1BR next to subway/Central Park   \n",
+       "9996  1135.0          45      Modern and Bright Studio Apt in Williamsburg   \n",
+       "9997    59.0          45               Holiday in Trendy Williamsburg Apt!   \n",
+       "9998  1055.0          25             Greenwich Village| Private Queen room   \n",
+       "9999   285.0          25               Comfortable bedroom in spacious apt   \n",
+       "\n",
+       "     neighbourhood group host name cancellation_policy  \\\n",
+       "0               Brooklyn  Madaline              strict   \n",
+       "1              Manhattan     Jenna            moderate   \n",
+       "2              Manhattan     Elise            flexible   \n",
+       "3              Manhattan    Lyndon            moderate   \n",
+       "4              Manhattan  Michelle            flexible   \n",
+       "...                  ...       ...                 ...   \n",
+       "9995           Manhattan    Doreen              strict   \n",
+       "9996            Brooklyn   Shannon              strict   \n",
+       "9997            Brooklyn     Peter              strict   \n",
+       "9998           Manhattan     Kelly            flexible   \n",
+       "9999            Brooklyn    Arthur              strict   \n",
+       "\n",
+       "                                            house_rules  \\\n",
+       "0     Clean up and treat the home the way you'd like...   \n",
+       "1     Pet friendly but please confirm with me if the...   \n",
+       "2     I encourage you to use my kitchen, cooking and...   \n",
+       "3     Please no smoking in the house, porch or on th...   \n",
+       "4                     No smoking, please, and no drugs.   \n",
+       "...                                                 ...   \n",
+       "9995  Our Herbivorian House manual with detailed rul...   \n",
+       "9996                                 No smoking please!   \n",
+       "9997  We suggest you use email or texting contact us...   \n",
+       "9998  Please treat this house as if it is your own. ...   \n",
+       "9999  Please, No smoking and no pets. We do require ...   \n",
+       "\n",
+       "                                           text_vector_  \n",
+       "0     [-0.047521110624074936, 0.03044620156288147, 0...  \n",
+       "1     [-0.04690079391002655, 0.061329323798418045, 0...  \n",
+       "2     [0.00039011164335533977, 0.018310122191905975,...  \n",
+       "3     [-0.04602213576436043, 0.015605293214321136, 0...  \n",
+       "4     [-0.04859349876642227, -0.01263828668743372, 0...  \n",
+       "...                                                 ...  \n",
+       "9995  [-0.0346745029091835, -0.005859952419996262, 0...  \n",
+       "9996  [-0.016586357727646828, 0.020517650991678238, ...  \n",
+       "9997  [-0.05095353722572327, 0.08510775864124298, -0...  \n",
+       "9998  [0.00017118529649451375, 0.010939894244074821,...  \n",
+       "9999  [-0.01795135624706745, -0.029596544802188873, ...  \n",
+       "\n",
+       "[10000 rows x 8 columns]"
+      ]
+     },
+     "execution_count": 72,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "from tqdm import tqdm\n",
+    "from sentence_transformers import SentenceTransformer\n",
+    "tqdm.pandas()\n",
+    "\n",
+    "model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2\n",
+    "\n",
+    "#encode df version: for small dataset only\n",
+    "df['text_vector_'] = df['description'].progress_apply(lambda x : model.encode(x).tolist())\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_parquet('df_encoded.parquet')\n",
+    "df['neighbourhood group'][0:2500] = df['neighbourhood group'][0:2500].apply(lambda x : 'Manhattan')\n",
+    "df['neighbourhood group'][2500:5000] = df['neighbourhood group'][0:2500].apply(lambda x : 'Brooklyn')\n",
+    "df['neighbourhood group'][5000:7500] = df['neighbourhood group'][0:2500].apply(lambda x : 'Queens')\n",
+    "df['neighbourhood group'][7500:] = df['neighbourhood group'][0:2500].apply(lambda x : 'Bronx')\n",
+    "df['location'] = df['neighbourhood group']\n",
+    "df = df[['price', 'sq. meters', 'description', 'location', 'host name', 'cancellation_policy', 'house_rules', 'text_vector_']]\n",
+    "df = df.reset_index(drop=True)\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 145,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.neighbors import NearestNeighbors\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "from sentence_transformers import SentenceTransformer\n",
+    "\n",
+    "# df = df.read_parquet('df_encoded.parquet')\n",
+    "model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2\n",
+    "\n",
+    "#prepare model\n",
+    "# nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import gradio as gr\n",
+    "import statistics\n",
+    "\n",
+    "def closest_number(x):\n",
+    "    closest_numbers = [10, 20, 30, 40]\n",
+    "    closest_number = closest_numbers[0]\n",
+    "    min_distance = abs(x - closest_number)\n",
+    "    for number in closest_numbers[1:]:\n",
+    "        distance = abs(x - number)\n",
+    "        if distance < min_distance:\n",
+    "            closest_number = number\n",
+    "            min_distance = distance\n",
+    "    return closest_number\n",
+    "\n",
+    "def search(df, query):\n",
+    "    product = model.encode(query).tolist()\n",
+    "    # product = df.iloc[0]['text_vector_'] #use one of the products as sample\n",
+    "\n",
+    "    nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())\n",
+    "    distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object\n",
+    "\n",
+    "    #print out the description of every recommended product\n",
+    "    df_search = df.iloc[list(indices)[0]].drop(['text_vector_'], axis=1) #.sort_values('avgFeedbackScore', ascending=False)\n",
+    "\n",
+    "    return df_search.sort_values('price', ascending=False)\n",
+    "\n",
+    "def filter_df(df, column_name, filter_type, filter_value):\n",
+    "    if filter_type == '==':\n",
+    "        df_filtered = df[df[column_name]==filter_value]\n",
+    "    elif filter_type == '>=':\n",
+    "        df_filtered = df[df[column_name]>=filter_value]\n",
+    "    elif filter_type == '<=':\n",
+    "        df_filtered = df[df[column_name]<=filter_value]\n",
+    "    return df_filtered\n",
+    "\n",
+    "history = list()\n",
+    "def predict(input1, input2, input3, input4):\n",
+    "    history.append([input1, input2, input3, input4])\n",
+    "\n",
+    "    print(history)\n",
+    "    df_location = filter_df(df, 'location', '==', input3)\n",
+    "    df_size = filter_df(df_location, 'sq. meters', '==', input2)\n",
+    "    df_price = filter_df(df_size, 'price', '<=', input1)\n",
+    "    df_result = search(df_price, input4)\n",
+    "\n",
+    "    prediction = [\n",
+    "        round(statistics.mean([x[0] for x in history])), #price\n",
+    "        closest_number(statistics.mean([x[1] for x in history])), #square room\n",
+    "        statistics.mode([x[2] for x in history]) #state\n",
+    "    ]\n",
+    "\n",
+    "    return df_result, prediction\n",
+    "\n",
+    "with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:\n",
+    "    gr.Markdown(\n",
+    "    \"\"\"\n",
+    "    # Airbnb Search Engine\n",
+    "    \"\"\"\n",
+    "    )\n",
+    "    input1 = gr.Slider(100, 1200, value=700, step_size=100, label=\"Max Price\")\n",
+    "    input2 = gr.Radio([25, 40, 45, 55, 60, 70], multiselect=False, label='square meters', value=45)\n",
+    "    input3 = gr.Radio(['Manhattan', 'Brooklyn', 'Queens', 'Bronx'], multiselect=False, label='State', value='Queens')\n",
+    "    input4 = gr.Textbox(label='Query', value='I want to take a break from work 😴!!!')\n",
+    "\n",
+    "    btn = gr.Button(value=\"Search for a Room\")\n",
+    "    output1 = gr.Dataframe()\n",
+    "    output2 = gr.Textbox(label='prediction for the next search')\n",
+    "    # btn.click(greet, inputs='text', outputs=['dataframe'])\n",
+    "    btn.click(predict, [input1, input2, input3, input4], [output1, output2])\n",
+    "demo.launch(share=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.system('pip install openpyxl')\n",
+    "os.system('pip install sentence-transformers')\n",
+    "import pandas as pd\n",
+    "import gradio as gr\n",
+    "import statistics\n",
+    "from sklearn.neighbors import NearestNeighbors\n",
+    "from sentence_transformers import SentenceTransformer\n",
+    "\n",
+    "df = pd.read_parquet('df_encoded.parquet')\n",
+    "df['neighbourhood group'][0:2500] = df['neighbourhood group'][0:2500].apply(lambda x : 'Manhattan')\n",
+    "df['neighbourhood group'][2500:5000] = df['neighbourhood group'][0:2500].apply(lambda x : 'Brooklyn')\n",
+    "df['neighbourhood group'][5000:7500] = df['neighbourhood group'][0:2500].apply(lambda x : 'Queens')\n",
+    "df['neighbourhood group'][7500:] = df['neighbourhood group'][0:2500].apply(lambda x : 'Bronx')\n",
+    "df['location'] = df['neighbourhood group']\n",
+    "df = df[['price', 'sq. meters', 'description', 'location', 'host name', 'cancellation_policy', 'house_rules', 'text_vector_']]\n",
+    "df = df.reset_index(drop=True)\n",
+    "df\n",
+    "\n",
+    "model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2\n",
+    "\n",
+    "#prepare model #we run it anew in the search function every time, after the initial filtering\n",
+    "# nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())\n",
+    "\n",
+    "def closest_number(x):\n",
+    "    closest_numbers = [25, 40, 45, 55, 60, 70]\n",
+    "    closest_number = closest_numbers[0]\n",
+    "    min_distance = abs(x - closest_number)\n",
+    "    for number in closest_numbers[1:]:\n",
+    "        distance = abs(x - number)\n",
+    "        if distance < min_distance:\n",
+    "            closest_number = number\n",
+    "            min_distance = distance\n",
+    "    return closest_number\n",
+    "\n",
+    "def search(df, query):\n",
+    "    product = model.encode(query).tolist()\n",
+    "    # product = df.iloc[0]['text_vector_'] #use one of the products as sample\n",
+    "\n",
+    "    nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())\n",
+    "    distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object\n",
+    "\n",
+    "    #print out the description of every recommended product\n",
+    "    df_search = df.iloc[list(indices)[0]].drop(['text_vector_'], axis=1) #.sort_values('avgFeedbackScore', ascending=False)\n",
+    "\n",
+    "    return df_search.sort_values('price', ascending=False)\n",
+    "\n",
+    "def filter_df(df, column_name, filter_type, filter_value):\n",
+    "    if filter_type == '==':\n",
+    "        df_filtered = df[df[column_name]==filter_value]\n",
+    "    elif filter_type == '>=':\n",
+    "        df_filtered = df[df[column_name]>=filter_value]\n",
+    "    elif filter_type == '<=':\n",
+    "        df_filtered = df[df[column_name]<=filter_value]\n",
+    "    return df_filtered"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict(history, input1, input2, input3, input4):\n",
+    "    history.append([input1, input2, input3, input4])\n",
+    "\n",
+    "    print(history)\n",
+    "    df_location = filter_df(df, 'location', '==', input3)\n",
+    "    df_size = filter_df(df_location, 'sq. meters', '==', input2)\n",
+    "    df_price = filter_df(df_size, 'price', '<=', input1)\n",
+    "    df_result = search(df_price, input4)\n",
+    "\n",
+    "    prediction = [\n",
+    "        round(statistics.mean([x[0] for x in history])), #price\n",
+    "        closest_number(statistics.mean([x[1] for x in history])), #square meters\n",
+    "        statistics.mode([x[2] for x in history]) #state\n",
+    "    ]\n",
+    "\n",
+    "    print(history)\n",
+    "\n",
+    "    return df_result, prediction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Slider, please remove them: {'step_size': 100}\n",
+      "  warnings.warn(\n",
+      "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Radio, please remove them: {'multiselect': False}\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running on local URL:  http://127.0.0.1:7863\n",
+      "\n",
+      "To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7863/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[700, 45, 'Brooklyn', 'I want to take a break from work 😴!!!']]\n",
+      "[[700, 45, 'Brooklyn', 'I want to take a break from work 😴!!!']]\n",
+      "[[700, 45, 'Brooklyn', 'I want to take a break from work 😴!!!'], [700, 45, 'Brooklyn', 'I want to take a break from work 😴!!!']]\n",
+      "[[700, 45, 'Brooklyn', 'I want to take a break from work 😴!!!'], [700, 45, 'Brooklyn', 'I want to take a break from work 😴!!!']]\n"
+     ]
+    }
+   ],
+   "source": [
+    "with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:\n",
+    "    history = gr.Variable(value=[]) #beginning\n",
+    "    gr.Markdown(\n",
+    "    \"\"\"\n",
+    "    # Airbnb Search Engine\n",
+    "    \"\"\"\n",
+    "    )\n",
+    "    input1 = gr.Slider(100, 1200, value=700, step_size=100, label=\"Max Price\")\n",
+    "    input2 = gr.Radio([25, 40, 45, 55, 60, 70], multiselect=False, label='square meters', value=45)\n",
+    "    input3 = gr.Radio(['Manhattan', 'Brooklyn', 'Queens', 'Bronx'], multiselect=False, label='State', value='Brooklyn')\n",
+    "    input4 = gr.Textbox(label='Query', value='I want to take a break from work 😴!!!')\n",
+    "\n",
+    "    btn = gr.Button(value=\"Search for a Room\")\n",
+    "    output1 = gr.Dataframe()\n",
+    "    output2 = gr.Textbox(label='prediction for the next search')\n",
+    "    # btn.click(greet, inputs='text', outputs=['dataframe'])\n",
+    "    btn.click(predict, [history, input1, input2, input3, input4], [output1, output2])\n",
+    "demo.launch(share=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+os.system('pip install openpyxl')
+os.system('pip install sentence-transformers')
+import pandas as pd
+import gradio as gr
+import statistics
+from sklearn.neighbors import NearestNeighbors
+from sentence_transformers import SentenceTransformer
+df = pd.read_parquet('df_encoded.parquet')
+df['neighbourhood group'][0:2500] = df['neighbourhood group'][0:2500].apply(lambda x : 'Manhattan')
+df['neighbourhood group'][2500:5000] = df['neighbourhood group'][0:2500].apply(lambda x : 'Brooklyn')
+df['neighbourhood group'][5000:7500] = df['neighbourhood group'][0:2500].apply(lambda x : 'Queens')
+df['neighbourhood group'][7500:] = df['neighbourhood group'][0:2500].apply(lambda x : 'Bronx')
+df['location'] = df['neighbourhood group']
+df = df[['price', 'sq. meters', 'description', 'location', 'host name', 'cancellation_policy', 'house_rules', 'text_vector_']]
+df = df.reset_index(drop=True)
+df
+model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2
+#prepare model #we run it anew in the search function every time, after the initial filtering
+# nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())
+def closest_number(x):
+    closest_numbers = [25, 40, 45, 55, 60, 70]
+    closest_number = closest_numbers[0]
+    min_distance = abs(x - closest_number)
+    for number in closest_numbers[1:]:
+        distance = abs(x - number)
+        if distance < min_distance:
+            closest_number = number
+            min_distance = distance
+    return closest_number
+def search(df, query):
+    product = model.encode(query).tolist()
+    # product = df.iloc[0]['text_vector_'] #use one of the products as sample
+    nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())
+    distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object
+    #print out the description of every recommended product
+    df_search = df.iloc[list(indices)[0]].drop(['text_vector_'], axis=1) #.sort_values('avgFeedbackScore', ascending=False)
+    return df_search.sort_values('price', ascending=False)
+def filter_df(df, column_name, filter_type, filter_value):
+    if filter_type == '==':
+        df_filtered = df[df[column_name]==filter_value]
+    elif filter_type == '>=':
+        df_filtered = df[df[column_name]>=filter_value]
+    elif filter_type == '<=':
+        df_filtered = df[df[column_name]<=filter_value]
+    return df_filtered
+def predict(history, input1, input2, input3, input4):
+    history.append([input1, input2, input3, input4])
+    print(history)
+    df_location = filter_df(df, 'location', '==', input3)
+    df_size = filter_df(df_location, 'sq. meters', '==', input2)
+    df_price = filter_df(df_size, 'price', '<=', input1)
+    df_result = search(df_price, input4)
+    prediction = [
+        round(statistics.mean([x[0] for x in history])), #price
+        closest_number(statistics.mean([x[1] for x in history])), #square meters
+        statistics.mode([x[2] for x in history]) #state
+    ]
+    return df_result, prediction
+with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:
+    history = gr.Variable(value=[]) #beginning
+    gr.Markdown(
+    """
+    # Airbnb Search Engine
+    """
+    )
+    input1 = gr.Slider(100, 1200, value=700, step_size=100, label="Max Price")
+    input2 = gr.Radio([25, 40, 45, 55, 60, 70], multiselect=False, label='square meters', value=45)
+    input3 = gr.Radio(['Manhattan', 'Brooklyn', 'Queens', 'Bronx'], multiselect=False, label='State', value='Brooklyn')
+    input4 = gr.Textbox(label='Query', value='I want to take a break from work 😴!!!')
+    btn = gr.Button(value="Search for a Room")
+    output1 = gr.Dataframe()
+    output2 = gr.Textbox(label='prediction for the next search')
+    # btn.click(greet, inputs='text', outputs=['dataframe'])
+    btn.click(predict, [history, input1, input2, input3, input4], [output1, output2])
+demo.launch(share=False)

df_encoded.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:efe09f27cabb790b1de79ba1483bceded0499ef48627bde47756b1905dd72a91
+size 48169491

history.ipynb ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Radio, please remove them: {'multiselect': False}\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running on local URL:  http://127.0.0.1:7861\n",
+      "\n",
+      "To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[40]\n",
+      "[40, 30]\n",
+      "[40, 30, 10]\n",
+      "[40, 30, 10, 10]\n",
+      "[40, 30, 10, 10, 10]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import gradio as gr\n",
+    "import statistics\n",
+    "\n",
+    "def predict(history, input1):\n",
+    "    history.append(input1)\n",
+    "\n",
+    "    print(history)\n",
+    "    total = statistics.mean(history)\n",
+    "\n",
+    "    return total\n",
+    "\n",
+    "with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:\n",
+    "    gr.Markdown(\n",
+    "    \"\"\"\n",
+    "    # Gradio with History\n",
+    "    \"\"\"\n",
+    "    )\n",
+    "    history = gr.Variable(value=[]) #beginning\n",
+    "    input1 = gr.Radio([10, 20, 30, 40, 50], multiselect=False, label='value')\n",
+    "    btn = gr.Button(value=\"Search for a Room\")\n",
+    "    output1 = gr.Textbox(label='value')\n",
+    "    # btn.click(greet, inputs='text', outputs=['dataframe'])\n",
+    "    btn.click(predict, [history, input1], [output1])\n",
+    "demo.launch(share=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}