Spaces:
Sleeping
Sleeping
| import pickle | |
| import pandas as pd | |
| import shap | |
| from shap.plots._force_matplotlib import draw_additive_plot | |
| import gradio as gr | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import xgboost as xgb | |
| from scipy import stats | |
| from gradio import Interface | |
| from gradio.components import Markdown, Row, Column, Slider, Dropdown, CheckboxGroup, Button, Textbox, Dataframe | |
| from category_encoders import TargetEncoder | |
| # Load DataFrames | |
| bb_df = pd.read_csv('beer_brewery_imputed.csv') | |
| bb_df_percentile = pd.read_csv('bb_df_testing.csv') | |
| aslin_example_df_full = pd.read_csv('App_Example_Aslin_Update.csv') | |
| aslin_example_df = aslin_example_df_full.drop(['Number of Ratings Beer'], axis=1) | |
| aslin_example_df['ABV'] = aslin_example_df['ABV']*100 | |
| # Load pickle files | |
| with open('unique_brewery_file.pickle', 'rb') as file: | |
| unique_breweries_list = pickle.load(file) | |
| loaded_model_regressor = pickle.load(open("XGB_Untappd_regressor_FlavorBreakout.pkl", 'rb')) | |
| loaded_model = pickle.load(open("XGB_Untappd_4_classifier_FlavorBreakout.pkl", 'rb')) | |
| loaded_enc_regressor = pickle.load(open("target_encoder_regressor_flavorbreakout.pkl", 'rb')) | |
| loaded_enc_classification = pickle.load(open("target_encoder_classification_flavorbreakout.pkl", 'rb')) | |
| # Define choices | |
| region_choices = ['Far West','Great Lakes','Mideast','Non-Con','Northeast','OTHER','Plains','Rocky Mountain','Southeast','Southwest'] | |
| style_choices = ['Altbier', 'Barleywine - American', 'Belgian Blonde', 'Blonde Ale', 'Bock - Doppelbock', 'Brown Ale - American', 'Cream Ale', 'Dark Ale', 'IPA - American', 'Lager - American', 'Pilsner - German', 'Stout - American', 'Wheat Beer - American Pale Wheat'] | |
| brewery_style_choices = ['Brew Pub', 'Cidery', 'Contract Brewery','Macro Brewery','Micro Brewery', 'Nano Brewery', 'OTHER', 'Regional Brewery'] | |
| state_choices = ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MISSING', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY'] | |
| # Define flavors and hops | |
| flavors_list = ['Apple', 'Apricot', 'Berry', 'Bitter', 'Caramel', 'Chocolate', 'Citrus', 'Clove', 'Coffee', 'Dry', 'Earthy', 'Fig', 'Floral', 'Fruity', 'Funky', 'Grapefruit', 'Hazelnut', 'Herbal', 'Malt', 'Nutmeg', 'Nutty', 'Peach', 'Pear', 'Peat', 'Pepper', 'Pine', 'Plum', 'Resin', 'Salty', 'Smoky', 'Sour', 'Spicy', 'Strawberry', 'Sweet', 'Tart', 'Toast', 'Toffee', 'Tropical', 'Vanilla'] | |
| hops_list = ['Amarillo', 'Cascade', 'Centennial', 'Chinook', 'Citra', 'Columbus', 'Crystal', 'Fuggle', 'Galaxy', 'Golding', 'Hallertau', 'Magnum', 'Mosaic', 'Noble', 'Nugget', 'Saaz', 'Simcoe', 'Tettnang', 'Warrior', 'Willamette'] | |
| # Setup SHAP | |
| explainer = shap.Explainer(loaded_model_regressor) | |
| # Function to extract selected items | |
| def extract_selected_items(row, items_list, prefix): | |
| return [item for item in items_list if row[prefix + item] == 1] | |
| # Function to filter beers | |
| def filter_beers(style, state): | |
| target_abv = 0.03 | |
| filtered_df = bb_df[(bb_df['Style'] == style) & (bb_df['State'] == state)].copy() | |
| filtered_df['ABV_diff'] = abs(filtered_df['ABV'] - target_abv) | |
| filtered_df.loc[filtered_df['ABV_diff'] > target_abv, 'ABV_diff'] = target_abv | |
| filtered_df = filtered_df[filtered_df['ABV_diff'] <= target_abv] | |
| sorted_df = filtered_df.sort_values(by='Number of Ratings Beer', ascending=False) | |
| limited_df = sorted_df.head(5)[['Brewery', 'Beer Name', 'Average Rating Beer', 'Number of Ratings Beer', 'Style', 'ABV', 'IBU', 'State']] | |
| limited_df = limited_df.rename(columns={'Average Rating Beer': 'Avg Rating', 'Number of Ratings Beer': '# Ratings'}) | |
| limited_df['ABV'] = (limited_df['ABV'] * 100).round(2).astype(str) + '%' | |
| limited_df['Avg Rating'] = limited_df['Avg Rating'].round(2) | |
| limited_df['IBU'] = limited_df['IBU'].astype(int) | |
| limited_df['# Ratings'] = limited_df['# Ratings'].apply(lambda x: '{:,}'.format(x)) | |
| return limited_df | |
| # Main function | |
| def main_func(BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group): | |
| flavors_selected = [flavor for flavor in flavors_list if flavor in Flavor_Group] | |
| hops_selected = [hop for hop in hops_list if hop in Hop_Group] | |
| new_row = pd.DataFrame(columns=['ABV', 'IBU', 'Style', 'Brewery Style', 'Region', 'State'] + flavors_list + hops_list) | |
| new_row.loc[0] = [float(ABV), float(IBU), Style, BreweryStyle, Region, State] + [1 if flavor in Flavor_Group else 0 for flavor in flavors_list] + [1 if hop in Hop_Group else 0 for hop in hops_list] | |
| new_row[['ABV', 'IBU']] = new_row[['ABV', 'IBU']].astype(float) | |
| new_row['ABV'] = new_row['ABV']/100 | |
| new_row_class = new_row.copy() | |
| new_row_regress = new_row.copy() | |
| new_row_encoded_class = loaded_enc_classification.transform(new_row_class) | |
| new_row_encoded_regressor = loaded_enc_regressor.transform(new_row_regress) | |
| prob = loaded_model.predict_proba(new_row_encoded_class) | |
| score_predict = loaded_model_regressor.predict(new_row_encoded_regressor)[0] | |
| score_predict = round(score_predict, 2) | |
| score_predict_str = str(score_predict) | |
| shap_values = explainer(new_row_encoded_regressor) | |
| plot = shap.plots.bar(shap_values[0], max_display=7, order=shap.Explanation.abs, show_data='auto', show=False) | |
| plt.tight_layout() | |
| local_plot = plt.gcf() | |
| plt.close() | |
| similar_beers = filter_beers(Style, State) | |
| nr_state_p = new_row['State'][0] | |
| nr_style_p = new_row['Style'][0] | |
| overall_df = bb_df_percentile | |
| state_df = bb_df_percentile[bb_df_percentile['State'] == nr_state_p] | |
| style_overall_df = bb_df_percentile[bb_df_percentile['Style'] == nr_style_p] | |
| style_state_df = bb_df_percentile[(bb_df_percentile['Style'] == nr_style_p) & (bb_df_percentile['State'] == nr_state_p)] | |
| percent_100 = 1/1 | |
| percentile_overall = stats.percentileofscore(overall_df['Average Rating Beer'], score_predict).round(1)/100 | |
| percentile_state = stats.percentileofscore(state_df['Average Rating Beer'], score_predict).round(1)/100 | |
| percentile_style_overall = stats.percentileofscore(style_overall_df['Average Rating Beer'], score_predict).round(1)/100 | |
| percentile_style_state = stats.percentileofscore(style_state_df['Average Rating Beer'], score_predict).round(1)/100 | |
| percentile_dict0 = { | |
| #"Percentile Scale": [percent_100], | |
| "Overall in USA": [percentile_overall], | |
| f" Overall in {nr_state_p}": [percentile_state], | |
| f"{nr_style_p}s in USA": [percentile_style_overall], | |
| f"{nr_style_p}s in {nr_state_p}": [percentile_style_state] | |
| } | |
| title_text = f"<h2><center><b>{BeerName} Rating Prediction Percentiles</b></center></h2>" | |
| #return local_plot, similar_beers, score_predict_str, percentile_dict0 | |
| return local_plot, similar_beers, score_predict_str, percentile_dict0, title_text | |
| # Create the UI | |
| title = "<center><b>🍻 **Untappd Beer Rating Predictor**🍻</b></center>" | |
| description1 = """ | |
| This app takes user-inputted beer characteristics to predict the most likely Untappd rating the beer would receive if brewed. The ratings are calculated on a 5-point scale using a model based on data pulled in June 2023. | |
| - The model has a <b>Mean Average Error (MAE) of .12</b> and <b> Root Mean Squared Error (RMSE) of .16</b> | |
| - The input variables in this model <b> explain 65% of the variation </b>in the Untappd beer score | |
| """ | |
| theme = gr.themes.Default() | |
| with gr.Blocks(title=title, theme=theme) as demo: | |
| Markdown(f"# {title}") | |
| Markdown(description1) | |
| submit_btn1 = gr.Button("Predict") | |
| with Row(): | |
| with Column(): | |
| Markdown("<h2><center><b>New Beer Characteristics</b></center></h2>") | |
| BeerName = Textbox(label='Beer Name', value='New Beer 1') | |
| ABV = Slider(label="ABV %", minimum=0, maximum=20, value=4.5, step=.1) | |
| IBU = Slider(label="IBU", minimum=0.0, maximum=200, value=41, step=1) | |
| Style = Dropdown(choices=style_choices, label="Beer Style", value='IPA - Session') | |
| BreweryStyle = Dropdown(choices=brewery_style_choices, label="Brewery Style", value='Micro Brewery') | |
| Region = Dropdown(choices=region_choices, label="USA Region", value='Mideast') | |
| State = Dropdown(choices=state_choices, label="State", value='VA') | |
| Flavor_Group = CheckboxGroup(choices=flavors_list, label="Flavors") | |
| Hop_Group = CheckboxGroup(choices=hops_list, label="Hops") | |
| with gr.Column(visible=True) as output_col: | |
| Markdown("<h2><center><b>Untappd Rating Prediction</b></center></h2>") | |
| score_predict_str = gr.Label(label="XGBoost Regressor") | |
| Markdown("<h2><center><b>Characteristics Affecting Rating Prediction</b></center></h2>") | |
| local_plot = gr.Plot(label='Shap:') | |
| percentile_title = Markdown(f"<h2><center><b>Untappd Rating Percentiles</b></center></h2>") | |
| percentile_dict0 = gr.Label( label='Percentile', show_label=True, value = float ) | |
| submit_btn2 = gr.Button("Predict") | |
| Markdown("""---""") | |
| with gr.Row(): | |
| with gr.Column(): | |
| Markdown("<h1><center><b>Similar Beers</b></center></h1>") | |
| similar_beers = Dataframe(label="", type="pandas") | |
| submit_btn1.click( | |
| main_func, | |
| [BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group], | |
| #[local_plot, similar_beers, score_predict_str, percentile_dict0], | |
| [local_plot, similar_beers, score_predict_str, percentile_dict0, percentile_title], | |
| api_name="Untappd_Rating_Model") | |
| submit_btn2.click( | |
| main_func, | |
| [BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group], | |
| #[local_plot, similar_beers, score_predict_str, percentile_dict0], | |
| [local_plot, similar_beers, score_predict_str, percentile_dict0, percentile_title], | |
| api_name="Untappd_Rating_Model1") | |
| examples = [] | |
| for _, row in aslin_example_df.iterrows(): | |
| example = [ | |
| row['Beer Name'], row['ABV'], row['IBU'], row['Style'], | |
| row['Brewery Style'], row['Region'], row['State'], | |
| extract_selected_items(row, flavors_list, ''), | |
| extract_selected_items(row, hops_list, '') | |
| ] | |
| examples.append(example) | |
| Markdown("<h1><center><b>Example Inputs from Aslin Beer Menu</b></center></h1>") | |
| gr.Examples(examples[:10], | |
| [BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group], | |
| #[local_plot, similar_beers, score_predict_str, percentile_dict0], | |
| [local_plot, similar_beers, score_predict_str, percentile_dict0, percentile_title], | |
| main_func, | |
| cache_examples=True, label="Aslin Beer List") | |
| demo.launch() | |