Spaces:
Sleeping
Sleeping
File size: 11,000 Bytes
afeba3f dedfcd6 526349e 565f529 575c5fe 526349e afeba3f dedfcd6 afeba3f dedfcd6 46a5516 afeba3f dedfcd6 afeba3f dedfcd6 afeba3f dedfcd6 afeba3f dedfcd6 d217d7f dedfcd6 d217d7f dedfcd6 d217d7f dedfcd6 afeba3f dedfcd6 afeba3f dedfcd6 afeba3f dedfcd6 46a5516 afeba3f 46a5516 afeba3f dedfcd6 afeba3f dedfcd6 afeba3f dedfcd6 afeba3f dedfcd6 afeba3f dedfcd6 d217d7f 17ebb50 afeba3f dedfcd6 afeba3f dedfcd6 afeba3f dedfcd6 46a5516 dc18fd6 dedfcd6 afeba3f dedfcd6 afeba3f dedfcd6 afeba3f dedfcd6 57842a6 dedfcd6 afeba3f dedfcd6 afeba3f dedfcd6 dc18fd6 afeba3f dedfcd6 afeba3f 46a5516 dedfcd6 afeba3f 46a5516 dedfcd6 afeba3f d217d7f dedfcd6 d217d7f 46a5516 dedfcd6 afeba3f dedfcd6 afeba3f dedfcd6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
import pickle
import pandas as pd
import shap
from shap.plots._force_matplotlib import draw_additive_plot
import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
from scipy import stats
from gradio import Interface
from gradio.components import Markdown, Row, Column, Slider, Dropdown, CheckboxGroup, Button, Textbox, Dataframe
from category_encoders import TargetEncoder
# Load DataFrames
bb_df = pd.read_csv('beer_brewery_imputed.csv')
bb_df_percentile = pd.read_csv('bb_df_testing.csv')
aslin_example_df_full = pd.read_csv('App_Example_Aslin_Update.csv')
aslin_example_df = aslin_example_df_full.drop(['Number of Ratings Beer'], axis=1)
aslin_example_df['ABV'] = aslin_example_df['ABV']*100
# Load pickle files
with open('unique_brewery_file.pickle', 'rb') as file:
unique_breweries_list = pickle.load(file)
loaded_model_regressor = pickle.load(open("XGB_Untappd_regressor_FlavorBreakout.pkl", 'rb'))
loaded_model = pickle.load(open("XGB_Untappd_4_classifier_FlavorBreakout.pkl", 'rb'))
loaded_enc_regressor = pickle.load(open("target_encoder_regressor_flavorbreakout.pkl", 'rb'))
loaded_enc_classification = pickle.load(open("target_encoder_classification_flavorbreakout.pkl", 'rb'))
# Define choices
region_choices = ['Far West','Great Lakes','Mideast','Non-Con','Northeast','OTHER','Plains','Rocky Mountain','Southeast','Southwest']
style_choices = ['Altbier', 'Barleywine - American', 'Belgian Blonde', 'Blonde Ale', 'Bock - Doppelbock', 'Brown Ale - American', 'Cream Ale', 'Dark Ale', 'IPA - American', 'Lager - American', 'Pilsner - German', 'Stout - American', 'Wheat Beer - American Pale Wheat']
brewery_style_choices = ['Brew Pub', 'Cidery', 'Contract Brewery','Macro Brewery','Micro Brewery', 'Nano Brewery', 'OTHER', 'Regional Brewery']
state_choices = ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MISSING', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']
# Define flavors and hops
flavors_list = ['Apple', 'Apricot', 'Berry', 'Bitter', 'Caramel', 'Chocolate', 'Citrus', 'Clove', 'Coffee', 'Dry', 'Earthy', 'Fig', 'Floral', 'Fruity', 'Funky', 'Grapefruit', 'Hazelnut', 'Herbal', 'Malt', 'Nutmeg', 'Nutty', 'Peach', 'Pear', 'Peat', 'Pepper', 'Pine', 'Plum', 'Resin', 'Salty', 'Smoky', 'Sour', 'Spicy', 'Strawberry', 'Sweet', 'Tart', 'Toast', 'Toffee', 'Tropical', 'Vanilla']
hops_list = ['Amarillo', 'Cascade', 'Centennial', 'Chinook', 'Citra', 'Columbus', 'Crystal', 'Fuggle', 'Galaxy', 'Golding', 'Hallertau', 'Magnum', 'Mosaic', 'Noble', 'Nugget', 'Saaz', 'Simcoe', 'Tettnang', 'Warrior', 'Willamette']
# Setup SHAP
explainer = shap.Explainer(loaded_model_regressor)
# Function to extract selected items
def extract_selected_items(row, items_list, prefix):
return [item for item in items_list if row[prefix + item] == 1]
# Function to filter beers
def filter_beers(style, state):
target_abv = 0.03
filtered_df = bb_df[(bb_df['Style'] == style) & (bb_df['State'] == state)].copy()
filtered_df['ABV_diff'] = abs(filtered_df['ABV'] - target_abv)
filtered_df.loc[filtered_df['ABV_diff'] > target_abv, 'ABV_diff'] = target_abv
filtered_df = filtered_df[filtered_df['ABV_diff'] <= target_abv]
sorted_df = filtered_df.sort_values(by='Number of Ratings Beer', ascending=False)
limited_df = sorted_df.head(5)[['Brewery', 'Beer Name', 'Average Rating Beer', 'Number of Ratings Beer', 'Style', 'ABV', 'IBU', 'State']]
limited_df = limited_df.rename(columns={'Average Rating Beer': 'Avg Rating', 'Number of Ratings Beer': '# Ratings'})
limited_df['ABV'] = (limited_df['ABV'] * 100).round(2).astype(str) + '%'
limited_df['Avg Rating'] = limited_df['Avg Rating'].round(2)
limited_df['IBU'] = limited_df['IBU'].astype(int)
limited_df['# Ratings'] = limited_df['# Ratings'].apply(lambda x: '{:,}'.format(x))
return limited_df
# Main function
def main_func(BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group):
flavors_selected = [flavor for flavor in flavors_list if flavor in Flavor_Group]
hops_selected = [hop for hop in hops_list if hop in Hop_Group]
new_row = pd.DataFrame(columns=['ABV', 'IBU', 'Style', 'Brewery Style', 'Region', 'State'] + flavors_list + hops_list)
new_row.loc[0] = [float(ABV), float(IBU), Style, BreweryStyle, Region, State] + [1 if flavor in Flavor_Group else 0 for flavor in flavors_list] + [1 if hop in Hop_Group else 0 for hop in hops_list]
new_row[['ABV', 'IBU']] = new_row[['ABV', 'IBU']].astype(float)
new_row['ABV'] = new_row['ABV']/100
new_row_class = new_row.copy()
new_row_regress = new_row.copy()
new_row_encoded_class = loaded_enc_classification.transform(new_row_class)
new_row_encoded_regressor = loaded_enc_regressor.transform(new_row_regress)
prob = loaded_model.predict_proba(new_row_encoded_class)
score_predict = loaded_model_regressor.predict(new_row_encoded_regressor)[0]
score_predict = round(score_predict, 2)
score_predict_str = str(score_predict)
shap_values = explainer(new_row_encoded_regressor)
plot = shap.plots.bar(shap_values[0], max_display=7, order=shap.Explanation.abs, show_data='auto', show=False)
plt.tight_layout()
local_plot = plt.gcf()
plt.close()
similar_beers = filter_beers(Style, State)
nr_state_p = new_row['State'][0]
nr_style_p = new_row['Style'][0]
overall_df = bb_df_percentile
state_df = bb_df_percentile[bb_df_percentile['State'] == nr_state_p]
style_overall_df = bb_df_percentile[bb_df_percentile['Style'] == nr_style_p]
style_state_df = bb_df_percentile[(bb_df_percentile['Style'] == nr_style_p) & (bb_df_percentile['State'] == nr_state_p)]
percent_100 = 1/1
percentile_overall = stats.percentileofscore(overall_df['Average Rating Beer'], score_predict).round(1)/100
percentile_state = stats.percentileofscore(state_df['Average Rating Beer'], score_predict).round(1)/100
percentile_style_overall = stats.percentileofscore(style_overall_df['Average Rating Beer'], score_predict).round(1)/100
percentile_style_state = stats.percentileofscore(style_state_df['Average Rating Beer'], score_predict).round(1)/100
percentile_dict0 = {
#"Percentile Scale": [percent_100],
"Overall in USA": [percentile_overall],
f" Overall in {nr_state_p}": [percentile_state],
f"{nr_style_p}s in USA": [percentile_style_overall],
f"{nr_style_p}s in {nr_state_p}": [percentile_style_state]
}
title_text = f"<h2><center><b>{BeerName} Rating Prediction Percentiles</b></center></h2>"
#return local_plot, similar_beers, score_predict_str, percentile_dict0
return local_plot, similar_beers, score_predict_str, percentile_dict0, title_text
# Create the UI
title = "<center><b>🍻 **Untappd Beer Rating Predictor**🍻</b></center>"
description1 = """
This app takes user-inputted beer characteristics to predict the most likely Untappd rating the beer would receive if brewed. The ratings are calculated on a 5-point scale using a model based on data pulled in June 2023.
- The model has a <b>Mean Average Error (MAE) of .12</b> and <b> Root Mean Squared Error (RMSE) of .16</b>
- The input variables in this model <b> explain 65% of the variation </b>in the Untappd beer score
"""
theme = gr.themes.Default()
with gr.Blocks(title=title, theme=theme) as demo:
Markdown(f"# {title}")
Markdown(description1)
submit_btn1 = gr.Button("Predict")
with Row():
with Column():
Markdown("<h2><center><b>New Beer Characteristics</b></center></h2>")
BeerName = Textbox(label='Beer Name', value='New Beer 1')
ABV = Slider(label="ABV %", minimum=0, maximum=20, value=4.5, step=.1)
IBU = Slider(label="IBU", minimum=0.0, maximum=200, value=41, step=1)
Style = Dropdown(choices=style_choices, label="Beer Style", value='IPA - Session')
BreweryStyle = Dropdown(choices=brewery_style_choices, label="Brewery Style", value='Micro Brewery')
Region = Dropdown(choices=region_choices, label="USA Region", value='Mideast')
State = Dropdown(choices=state_choices, label="State", value='VA')
Flavor_Group = CheckboxGroup(choices=flavors_list, label="Flavors")
Hop_Group = CheckboxGroup(choices=hops_list, label="Hops")
with gr.Column(visible=True) as output_col:
Markdown("<h2><center><b>Untappd Rating Prediction</b></center></h2>")
score_predict_str = gr.Label(label="XGBoost Regressor")
Markdown("<h2><center><b>Characteristics Affecting Rating Prediction</b></center></h2>")
local_plot = gr.Plot(label='Shap:')
percentile_title = Markdown(f"<h2><center><b>Untappd Rating Percentiles</b></center></h2>")
percentile_dict0 = gr.Label( label='Percentile', show_label=True, value = float )
submit_btn2 = gr.Button("Predict")
Markdown("""---""")
with gr.Row():
with gr.Column():
Markdown("<h1><center><b>Similar Beers</b></center></h1>")
similar_beers = Dataframe(label="", type="pandas")
submit_btn1.click(
main_func,
[BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group],
#[local_plot, similar_beers, score_predict_str, percentile_dict0],
[local_plot, similar_beers, score_predict_str, percentile_dict0, percentile_title],
api_name="Untappd_Rating_Model")
submit_btn2.click(
main_func,
[BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group],
#[local_plot, similar_beers, score_predict_str, percentile_dict0],
[local_plot, similar_beers, score_predict_str, percentile_dict0, percentile_title],
api_name="Untappd_Rating_Model1")
examples = []
for _, row in aslin_example_df.iterrows():
example = [
row['Beer Name'], row['ABV'], row['IBU'], row['Style'],
row['Brewery Style'], row['Region'], row['State'],
extract_selected_items(row, flavors_list, ''),
extract_selected_items(row, hops_list, '')
]
examples.append(example)
Markdown("<h1><center><b>Example Inputs from Aslin Beer Menu</b></center></h1>")
gr.Examples(examples[:10],
[BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group],
#[local_plot, similar_beers, score_predict_str, percentile_dict0],
[local_plot, similar_beers, score_predict_str, percentile_dict0, percentile_title],
main_func,
cache_examples=True, label="Aslin Beer List")
demo.launch()
|