Spaces:
Runtime error
Runtime error
| import os | |
| os.system('pip install openpyxl') | |
| os.system('pip install sentence-transformers') | |
| import pandas as pd | |
| import gradio as gr | |
| import statistics | |
| from sklearn.neighbors import NearestNeighbors | |
| from sentence_transformers import SentenceTransformer | |
| df = pd.read_parquet('df_encoded.parquet') | |
| df['neighbourhood group'][0:2500] = df['neighbourhood group'][0:2500].apply(lambda x : 'Manhattan') | |
| df['neighbourhood group'][2500:5000] = df['neighbourhood group'][0:2500].apply(lambda x : 'Brooklyn') | |
| df['neighbourhood group'][5000:7500] = df['neighbourhood group'][0:2500].apply(lambda x : 'Queens') | |
| df['neighbourhood group'][7500:] = df['neighbourhood group'][0:2500].apply(lambda x : 'Bronx') | |
| df['location'] = df['neighbourhood group'] | |
| df = df[['price', 'sq. meters', 'description', 'location', 'host name', 'cancellation_policy', 'house_rules', 'text_vector_']] | |
| df = df.reset_index(drop=True) | |
| df | |
| model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2 | |
| #prepare model #we run it anew in the search function every time, after the initial filtering | |
| # nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df['text_vector_'].values.tolist()) | |
| def closest_number(x): | |
| closest_numbers = [25, 40, 45, 55, 60, 70] | |
| closest_number = closest_numbers[0] | |
| min_distance = abs(x - closest_number) | |
| for number in closest_numbers[1:]: | |
| distance = abs(x - number) | |
| if distance < min_distance: | |
| closest_number = number | |
| min_distance = distance | |
| return closest_number | |
| def search(df, query): | |
| product = model.encode(query).tolist() | |
| # product = df.iloc[0]['text_vector_'] #use one of the products as sample | |
| nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df['text_vector_'].values.tolist()) | |
| distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object | |
| #print out the description of every recommended product | |
| df_search = df.iloc[list(indices)[0]].drop(['text_vector_'], axis=1) #.sort_values('avgFeedbackScore', ascending=False) | |
| return df_search.sort_values('price', ascending=False) | |
| def filter_df(df, column_name, filter_type, filter_value): | |
| if filter_type == '==': | |
| df_filtered = df[df[column_name]==filter_value] | |
| elif filter_type == '>=': | |
| df_filtered = df[df[column_name]>=filter_value] | |
| elif filter_type == '<=': | |
| df_filtered = df[df[column_name]<=filter_value] | |
| return df_filtered | |
| def predict(history, input1, input2, input3, input4): | |
| history.append([input1, input2, input3, input4]) | |
| print(history) | |
| df_location = filter_df(df, 'location', '==', input3) | |
| df_size = filter_df(df_location, 'sq. meters', '==', input2) | |
| df_price = filter_df(df_size, 'price', '<=', input1) | |
| df_result = search(df_price, input4) | |
| prediction = [ | |
| round(statistics.mean([x[0] for x in history])), #price | |
| closest_number(statistics.mean([x[1] for x in history])), #square meters | |
| statistics.mode([x[2] for x in history]) #state | |
| ] | |
| return df_result, prediction | |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo: | |
| history = gr.Variable(value=[]) #beginning | |
| gr.Markdown( | |
| """ | |
| # Airbnb Search Engine | |
| """ | |
| ) | |
| input1 = gr.Slider(100, 1200, value=700, step_size=100, label="Max Price") | |
| input2 = gr.Radio([25, 40, 45, 55, 60, 70], multiselect=False, label='square meters', value=45) | |
| input3 = gr.Radio(['Manhattan', 'Brooklyn', 'Queens', 'Bronx'], multiselect=False, label='State', value='Brooklyn') | |
| input4 = gr.Textbox(label='Query', value='I want to take a break from work 😴!!!') | |
| btn = gr.Button(value="Search for a Room") | |
| output1 = gr.Dataframe() | |
| output2 = gr.Textbox(label='prediction for the next search') | |
| # btn.click(greet, inputs='text', outputs=['dataframe']) | |
| btn.click(predict, [history, input1, input2, input3, input4], [output1, output2]) | |
| demo.launch(share=False) |