chrisclark1016 commited on
Commit
dedfcd6
·
verified ·
1 Parent(s): effd463

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -226
app.py CHANGED
@@ -4,345 +4,195 @@ import shap
4
  from shap.plots._force_matplotlib import draw_additive_plot
5
  import gradio as gr
6
  import numpy as np
7
- import matplotlib
8
  import matplotlib.pyplot as plt
9
  import xgboost as xgb
10
- #import gradio.outputs as gro
11
  from scipy import stats
12
  from gradio import Interface
13
  from gradio.components import Markdown, Row, Column, Slider, Dropdown, CheckboxGroup, Button, Textbox, Dataframe
14
- import category_encoders
15
  from category_encoders import TargetEncoder
16
- from datasets import Dataset, load_dataset
17
 
18
-
19
- #Load DFs
20
-
21
- #Set up DF for filtered beers
22
  bb_df = pd.read_csv('beer_brewery_imputed.csv')
23
-
24
- #Set up DF for percentiles (currently only includes beers with 500 or more reviews)
25
  bb_df_percentile = pd.read_csv('bb_df_testing.csv')
26
-
27
- #Set up DF for Aslin Examples
28
  aslin_example_df_full = pd.read_csv('App_Example_Aslin_Update.csv')
29
  aslin_example_df = aslin_example_df_full.drop(['Number of Ratings Beer'], axis=1)
30
  aslin_example_df['ABV'] = aslin_example_df['ABV']*100
31
 
32
-
33
- #Open individual Brewery list
34
  with open('unique_brewery_file.pickle', 'rb') as file:
35
  unique_breweries_list = pickle.load(file)
36
-
37
-
38
- # load the regressor model from disk
39
  loaded_model_regressor = pickle.load(open("XGB_Untappd_regressor_FlavorBreakout.pkl", 'rb'))
40
- # load the classification model from disk
41
  loaded_model = pickle.load(open("XGB_Untappd_4_classifier_FlavorBreakout.pkl", 'rb'))
42
-
43
-
44
- #Categorical Variable Encoder
45
  loaded_enc_regressor = pickle.load(open("target_encoder_regressor_flavorbreakout.pkl", 'rb'))
46
  loaded_enc_classification = pickle.load(open("target_encoder_classification_flavorbreakout.pkl", 'rb'))
47
 
48
- #Define Choices
49
  region_choices = ['Far West','Great Lakes','Mideast','Non-Con','Northeast','OTHER','Plains','Rocky Mountain','Southeast','Southwest']
50
-
51
- style_choices = ['Altbier', 'Australian Sparkling Ale', 'Barleywine - American', 'Barleywine - English', 'Barleywine - Other', 'Belgian Blonde', 'Belgian Dubbel', 'Belgian Enkel / Patersbier', 'Belgian Quadrupel',
52
- 'Belgian Strong Dark Ale', 'Belgian Strong Golden Ale', 'Belgian Tripel', 'Bitter - Best', 'Bitter - Extra Special / Strong (ESB)', 'Bitter - Session / Ordinary', 'Bière de Champagne / Bière Brut',
53
- 'Black & Tan', 'Blonde Ale', 'Bock - Doppelbock', 'Bock - Eisbock', 'Bock - Hell / Maibock / Lentebock', 'Bock - Single / Traditional', 'Bock - Weizenbock', 'Bock - Weizendoppelbock', 'Brett Beer',
54
- 'Brown Ale - American', 'Brown Ale - Belgian', 'Brown Ale - English', 'Brown Ale - Imperial / Double', 'Brown Ale - Other', 'California Common', 'Chilli / Chile Beer', 'Cider - Dry', 'Cider - Graff',
55
- 'Cider - Herbed / Spiced / Hopped', 'Cider - Ice / Applewine', 'Cider - Other Fruit', 'Cider - Perry / Poiré', 'Cider - Rosé', 'Cider - Sweet', 'Cider - Traditional / Apfelwein', 'Corn Beer / Chicha de Jora',
56
- 'Cream Ale', 'Dark Ale', 'Farmhouse Ale - Bière de Coupage', 'Farmhouse Ale - Bière de Garde', 'Farmhouse Ale - Bière de Mars', 'Farmhouse Ale - Grisette', 'Farmhouse Ale - Other', 'Farmhouse Ale - Sahti',
57
- 'Farmhouse Ale - Saison', 'Festbier', 'Flavored Malt Beverage', 'Freeze-Distilled Beer', 'Fruit Beer', 'Gluten-Free', 'Golden Ale - American', 'Golden Ale - English', 'Golden Ale - Other', 'Grape Ale - Italian',
58
- 'Grape Ale - Other', 'Grodziskie / Grätzer', 'Hard Ginger Beer', 'Hard Kombucha / Jun', 'Hard Seltzer', 'Historical Beer - Adambier', 'Historical Beer - Broyhan', 'Historical Beer - Burton Ale', 'Historical Beer - Dampfbier',
59
- 'Historical Beer - Gruit / Ancient Herbed Ale', 'Historical Beer - Kentucky Common', 'Historical Beer - Kottbusser', 'Historical Beer - Kuit / Kuyt / Koyt', 'Historical Beer - Lichtenhainer',
60
- 'Historical Beer - Mumme', 'Historical Beer - Other', 'Historical Beer - Steinbier', 'Honey Beer', 'IPA - American', 'IPA - Belgian', 'IPA - Black / Cascadian Dark Ale', 'IPA - Brett',
61
- 'IPA - Brown', 'IPA - Brut', 'IPA - Cold', 'IPA - English', 'IPA - Farmhouse', 'IPA - Fruited', 'IPA - Imperial / Double', 'IPA - Imperial / Double Black', 'IPA - Imperial / Double Milkshake',
62
- 'IPA - Imperial / Double New England / Hazy', 'IPA - Milkshake', 'IPA - New England / Hazy', 'IPA - New Zealand', 'IPA - Other', 'IPA - Quadruple', 'IPA - Red', 'IPA - Rye', 'IPA - Session',
63
- 'IPA - Sour', 'IPA - Triple', 'IPA - Triple New England / Hazy', 'IPA - White / Wheat', 'Kellerbier / Zwickelbier', 'Koji / Ginjo Beer', 'Kvass', 'Kölsch', 'Lager - Amber / Red', 'Lager - American',
64
- 'Lager - American Amber / Red', 'Lager - American Light', 'Lager - Dark', 'Lager - Dortmunder / Export', 'Lager - Helles', 'Lager - IPL (India Pale Lager)', 'Lager - Japanese Rice', 'Lager - Leichtbier',
65
- 'Lager - Mexican', 'Lager - Munich Dunkel', 'Lager - Other', 'Lager - Pale', 'Lager - Strong', 'Lager - Vienna', 'Lager - Winter', 'Lambic - Framboise', 'Lambic - Fruit', 'Lambic - Gueuze', 'Lambic - Kriek',
66
- 'Lambic - Other', 'Lambic - Traditional', 'Malt Beer', 'Malt Liquor', 'Mead - Acerglyn / Maple Wine', 'Mead - Bochet', 'Mead - Braggot', 'Mead - Cyser', 'Mead - Melomel', 'Mead - Metheglin', 'Mead - Other',
67
- 'Mead - Pyment', 'Mead - Session / Short', 'Mead - Traditional', 'Mild - Dark', 'Mild - Light', 'Mild - Other', 'Märzen', 'Non-Alcoholic Beer - IPA', 'Non-Alcoholic Beer - Lager', 'Non-Alcoholic Beer - Other',
68
- 'Non-Alcoholic Beer - Porter / Stout', 'Non-Alcoholic Beer - Sour', 'Non-Alcoholic Beer - Wheat Beer', 'Old Ale', 'Other', 'Pale Ale - American', 'Pale Ale - Australian', 'Pale Ale - Belgian', 'Pale Ale - English',
69
- 'Pale Ale - Milkshake', 'Pale Ale - New England / Hazy', 'Pale Ale - New Zealand', 'Pale Ale - Other', 'Pale Ale - XPA (Extra Pale)', 'Pilsner - Czech / Bohemian', 'Pilsner - German', 'Pilsner - Imperial / Double',
70
- 'Pilsner - Italian', 'Pilsner - New Zealand', 'Pilsner - Other', 'Porter - American', 'Porter - Baltic', 'Porter - Coffee', 'Porter - English', 'Porter - Imperial / Double', 'Porter - Imperial / Double Baltic',
71
- 'Porter - Imperial / Double Coffee', 'Porter - Other', 'Pumpkin / Yam Beer', 'Rauchbier', 'Red Ale - American Amber / Red', 'Red Ale - Imperial / Double', 'Red Ale - Irish', 'Red Ale - Other', 'Roggenbier', 'Root Beer',
72
- 'Rye Beer', 'Rye Wine', 'Schwarzbier', 'Scotch Ale / Wee Heavy', 'Scottish Ale', 'Scottish Export Ale', 'Shandy / Radler', 'Smoked Beer', 'Sorghum / Millet Beer', 'Sour - Berliner Weisse', 'Sour - Flanders Oud Bruin',
73
- 'Sour - Flanders Red Ale', 'Sour - Fruited', 'Sour - Fruited Berliner Weisse', 'Sour - Fruited Gose', 'Sour - Other', 'Sour - Other Gose', 'Sour - Smoothie / Pastry', 'Sour - Traditional Gose', 'Specialty Grain',
74
- 'Spiced / Herbed Beer', 'Stout - American', 'Stout - Belgian', 'Stout - Coffee', 'Stout - English', 'Stout - Foreign / Export', 'Stout - Imperial / Double', 'Stout - Imperial / Double Coffee', 'Stout - Imperial / Double Milk',
75
- 'Stout - Imperial / Double Oatmeal', 'Stout - Imperial / Double Pastry', 'Stout - Imperial / Double White / Golden', 'Stout - Irish Dry', 'Stout - Milk / Sweet', 'Stout - Oatmeal', 'Stout - Other', 'Stout - Oyster',
76
- 'Stout - Pastry', 'Stout - Russian Imperial', 'Stout - White / Golden', 'Strong Ale - American', 'Strong Ale - English', 'Strong Ale - Other', 'Table Beer', 'Traditional Ale', 'Wheat Beer - American Pale Wheat',
77
- 'Wheat Beer - Dunkelweizen', 'Wheat Beer - Hefeweizen', 'Wheat Beer - Hefeweizen Light / Leicht', 'Wheat Beer - Kristallweizen', 'Wheat Beer - Other', 'Wheat Beer - Wheat Wine', 'Wheat Beer - Witbier / Blanche',
78
- 'Wild Ale - American', 'Wild Ale - Other', 'Winter Ale', 'Winter Warmer']
79
-
80
- brewery_style_choices = ['Brew Pub', 'Cidery', 'Contract Brewery','Macro Brewery','Micro Brewery',
81
- 'Nano Brewery', 'OTHER', 'Regional Brewery']
82
-
83
- # brewery_style_choices = ['Bar / Restaurant / Store', 'Brew Pub', 'Cidery', 'Contract Brewery', 'Home / Non-Commercial Brewery', 'Macro Brewery', 'Meadery',
84
- # 'Micro Brewery', 'Nano Brewery', 'OTHER', 'Regional Brewery']
85
-
86
- state_choices = ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MISSING', 'MN', 'MO', 'MS', 'MT', 'NC',
87
- 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SAINT CROIX ISLAND VIRGIN ISLANDS', 'SAINT JOHN ISLAND VIRGIN ISLANDS', 'SAINT THOMAS ISLAND VIRGIN ISLANDS',
88
- 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']
89
 
90
  # Define flavors and hops
91
- flavors_list = ['Apple', 'Apricot', 'Berry', 'Bitter', 'Caramel', 'Chocolate', 'Citrus', 'Clove', 'Coffee',
92
- 'Dry', 'Earthy', 'Fig', 'Floral', 'Fruity', 'Funky', 'Grapefruit', 'Hazelnut', 'Herbal',
93
- 'Malt', 'Nutmeg', 'Nutty', 'Peach', 'Pear', 'Peat', 'Pepper', 'Pine', 'Plum', 'Resin',
94
- 'Salty', 'Smoky', 'Sour', 'Spicy', 'Strawberry', 'Sweet', 'Tart', 'Toast', 'Toffee',
95
- 'Tropical', 'Vanilla']
96
-
97
- hops_list = ['Amarillo', 'Cascade', 'Centennial', 'Chinook', 'Citra', 'Columbus', 'Crystal', 'Fuggle',
98
- 'Galaxy', 'Golding', 'Hallertau', 'Magnum', 'Mosaic', 'Noble', 'Nugget', 'Saaz', 'Simcoe', 'Tettnang',
99
- 'Warrior', 'Willamette']
100
-
101
-
102
 
103
  # Setup SHAP
104
- explainer = shap.Explainer(loaded_model_regressor) # PLEASE DO NOT CHANGE THIS.
105
-
106
 
107
- #Extract Lists for Examples Function for checkboxes in examples
108
  def extract_selected_items(row, items_list, prefix):
109
- selected_items = []
110
- for item in items_list:
111
- if row[prefix + item] == 1: # access the DataFrame column directly with the item name
112
- selected_items.append(item)
113
- return selected_items
114
 
115
- #Set up similar beers function
116
  def filter_beers(style, state):
117
- target_abv = 0.03 # Set the target ABV value within the function
118
-
119
  filtered_df = bb_df[(bb_df['Style'] == style) & (bb_df['State'] == state)].copy()
120
  filtered_df['ABV_diff'] = abs(filtered_df['ABV'] - target_abv)
121
  filtered_df.loc[filtered_df['ABV_diff'] > target_abv, 'ABV_diff'] = target_abv
122
  filtered_df = filtered_df[filtered_df['ABV_diff'] <= target_abv]
123
  sorted_df = filtered_df.sort_values(by='Number of Ratings Beer', ascending=False)
124
  limited_df = sorted_df.head(5)[['Brewery', 'Beer Name', 'Average Rating Beer', 'Number of Ratings Beer', 'Style', 'ABV', 'IBU', 'State']]
125
-
126
  limited_df = limited_df.rename(columns={'Average Rating Beer': 'Avg Rating', 'Number of Ratings Beer': '# Ratings'})
127
-
128
  limited_df['ABV'] = (limited_df['ABV'] * 100).round(2).astype(str) + '%'
129
  limited_df['Avg Rating'] = limited_df['Avg Rating'].round(2)
130
  limited_df['IBU'] = limited_df['IBU'].astype(int)
131
- limited_df['# Ratings'] = limited_df['# Ratings'].apply(lambda x: '{:,}'.format(x)) # Add commas to # Ratings
132
-
133
  return limited_df
134
 
135
- # #Define percentiles before main function runs
136
- # percentile_overall = 0
137
- # percentile_state = 0
138
- # percentile_style_overall = 0
139
- # percentile_style_state = 0
140
-
141
  def main_func(BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group):
142
-
143
  flavors_selected = [flavor for flavor in flavors_list if flavor in Flavor_Group]
144
  hops_selected = [hop for hop in hops_list if hop in Hop_Group]
145
-
146
  new_row = pd.DataFrame(columns=['ABV', 'IBU', 'Style', 'Brewery Style', 'Region', 'State'] + flavors_list + hops_list)
147
  new_row.loc[0] = [float(ABV), float(IBU), Style, BreweryStyle, Region, State] + [1 if flavor in Flavor_Group else 0 for flavor in flavors_list] + [1 if hop in Hop_Group else 0 for hop in hops_list]
148
  new_row[['ABV', 'IBU']] = new_row[['ABV', 'IBU']].astype(float)
149
  new_row['ABV'] = new_row['ABV']/100
150
 
151
- # Transform the new_row using the loaded encoder
152
- new_row_class = new_row
153
- new_row_regress = new_row
154
  new_row_encoded_class = loaded_enc_classification.transform(new_row_class)
155
  new_row_encoded_regressor = loaded_enc_regressor.transform(new_row_regress)
156
 
157
  prob = loaded_model.predict_proba(new_row_encoded_class)
158
-
159
- score_predict = loaded_model_regressor.predict(new_row_encoded_regressor)
160
- score_predict = score_predict[0]
161
  score_predict = round(score_predict, 2)
162
  score_predict_str = str(score_predict)
163
- score_predict_float = float(score_predict)
164
-
165
 
166
- #Build SHAP
167
  shap_values = explainer(new_row_encoded_regressor)
168
-
169
  plot = shap.plots.bar(shap_values[0], max_display=7, order=shap.Explanation.abs, show_data='auto', show=False)
170
-
171
  plt.tight_layout()
172
  local_plot = plt.gcf()
173
  plt.close()
174
 
175
- #Build Similar Beers
176
- similar_beers = filter_beers(Style, State) # fetch beers matching the style and state
177
 
178
- #Read in variables for percentiles
179
  nr_state_p = new_row['State'][0]
180
  nr_style_p = new_row['Style'][0]
181
 
182
- #Show Percentiles for prediction
183
  overall_df = bb_df_percentile
184
  state_df = bb_df_percentile[bb_df_percentile['State'] == nr_state_p]
185
  style_overall_df = bb_df_percentile[bb_df_percentile['Style'] == nr_style_p]
186
  style_state_df = bb_df_percentile[(bb_df_percentile['Style'] == nr_style_p) & (bb_df_percentile['State'] == nr_state_p)]
187
 
188
- # Calculate the percentile of a beer
189
-
190
- percentile_overall = stats.percentileofscore(overall_df['Average Rating Beer'], score_predict_float).round(1)/100
191
- percentile_state = stats.percentileofscore(state_df['Average Rating Beer'], score_predict_float).round(1)/100
192
- percentile_style_overall = stats.percentileofscore(style_overall_df['Average Rating Beer'], score_predict_float).round(1)/100
193
- percentile_style_state = stats.percentileofscore(style_state_df['Average Rating Beer'], score_predict_float).round(1)/100
194
 
195
  percentile_dict0 = {
196
- "USA Overall": [percentile_overall],
197
- f"{nr_state_p} Overall": [percentile_state],
198
- f"{nr_style_p} in USA": [percentile_style_overall],
199
- f"{nr_style_p} in {nr_state_p}": [percentile_style_state]
 
 
200
 
201
- }
202
 
203
- # percentile_dict1 = {"Percentile Overall": percentile_overall}
204
- # percentile_dict2 = {f"Percentile {nr_state_p}": percentile_state}
205
- # percentile_dict3 = {f"Percentile {nr_style_p} Overall": percentile_style_overall}
206
- # percentile_dict4 = {f"Percentile {nr_style_p} {nr_state_p}": percentile_style_state}
207
-
208
- # #Convert to dataframe
209
- # percentile_df = pd.DataFrame(
210
- # { "type": ["USA Overall",f"{nr_state_p} Overall",f"USA by Style {nr_style_p}",f"{nr_state_p} by Style {nr_style_p}"],
211
- # "value": [percentile_overall,percentile_state,percentile_style_overall,percentile_style_state],})
212
-
213
-
214
-
215
- return local_plot, similar_beers,score_predict_str,percentile_dict0
216
 
217
 
218
-
219
- #"Below 4.0": float(prob[0][0]), "Above 4.0": 1-float(prob[0][0])},
220
-
221
- #,percentile_overall, percentile_state, percentile_style_overall, percentile_style_state
222
-
223
- # main_func('',.045, 41, 'IPA - Session', 'Micro Brewery', 'Far West', 'CA', [], [])
224
-
225
- ## Create the UI
226
  title = "<center><b>🍻 **Untappd Beer Rating Predictor**🍻</b></center>"
227
  description1 = """
228
- This app predicts beers scores based on Untappd data pulled in June 2023. <b> Mean Average Error (MAE) of .12</b> and <b> Root Mean Squared Error (RMSE) of .16</b>.
229
- The input variables in this model <b> explain 65% of the variation </b>in the Untappd beer score """
230
 
231
- # description2 = """
232
- # To use the app, click on one of the examples, or adjust the values of the seven beer score predictors, and click on Analyze. ✨
233
- # """
234
 
235
- theme = gr.themes.Default()#primary_hue="amber"
236
 
237
- with gr.Blocks(title=title, theme = theme) as demo:
 
 
238
  Markdown(f"# {title}")
239
  Markdown(description1)
240
- # Markdown("""---""")
241
- # Markdown(description2)
242
- # Markdown("""---""")
243
 
244
  submit_btn1 = gr.Button("Predict")
245
  with Row():
246
  with Column():
247
- # BeerName = gr.components.Textbox(label='Beer Name (not required)', value = 'New Beer 1')
248
- # ABV = gr.components.Slider(label="ABV %", minimum=0, maximum=20, value=4.5, step=.1)
249
- # IBU = gr.components.Slider(label="IBU", minimum=0.0, maximum=200, value=41, step=1)
250
- # Style = gr.components.Dropdown(choices=style_choices, label="Select Beer Style", value= 'IPA - Session')
251
- # BreweryStyle = gr.components.Dropdown(choices=brewery_style_choices, label="Select Brewery Style", value= 'Micro Brewery')
252
- # Region = gr.components.Dropdown(choices=region_choices, label="Select USA Region", value= 'Far West')
253
- # State = gr.components.Dropdown(choices=state_choices, label="Select State", value= 'CA')
254
- # # Grouped checkboxes
255
- # Flavor_Group = gr.components.CheckboxGroup(choices=flavors_list, label="Select Flavors")
256
- # Hop_Group = gr.components.CheckboxGroup(choices=hops_list, label="Select Hops")
257
-
258
-
259
- BeerName = Textbox(label='Beer Name (not required)', value = 'New Beer 1')
260
- ABV = Slider(label="ABV %", minimum=0, maximum=20, value=5.5, step=.1)
261
- IBU = Slider(label="IBU", minimum=0.0, maximum=200, value=40, step=1)
262
- Style = Dropdown(choices=style_choices, label="Select Beer Style", value='IPA - New England / Hazy')
263
- BreweryStyle = Dropdown(choices=brewery_style_choices, label="Select Brewery Style", value='Regional Brewery')
264
- Region = Dropdown(choices=region_choices, label="Select USA Region", value='Mideast')
265
- State = Dropdown(choices=state_choices, label="Select State", value='VA')
266
- # Grouped checkboxes
267
- Flavor_Group = CheckboxGroup(choices=flavors_list, label="Select Flavors")
268
- Hop_Group = CheckboxGroup(choices=hops_list, label="Select Hops")
269
-
270
 
271
- #CREATE OUTPUTS
272
  with gr.Column(visible=True) as output_col:
273
- gr.Markdown("<h2><center><b>Untappd Score Prediction</b></center></h2>")
274
  score_predict_str = gr.Label(label="XGBoost Regressor")
275
-
276
- gr.Markdown("<h2><center><b>Prediction Drivers</b></center></h2>")
277
- local_plot = gr.Plot(label = 'Shap:')
278
-
279
- gr.Markdown(f"<h2><center><b>Percentiles for Beer </b></center></h2>")
280
- percentile_dict0 = gr.Label(label ='test', show_label=False)
281
-
282
 
283
- #percentile_df = gr.BarPlot(title = 'test bar plot', x ="type", y="value", vertical=False).style(container=False,)
284
-
285
-
286
- #label = gr.Label(label = "Predicted Label")
287
-
288
-
289
  submit_btn2 = gr.Button("Predict")
290
-
291
  Markdown("""---""")
292
 
293
-
294
-
295
- # Create a separate row for the output of filter_beers function
296
  with gr.Row():
297
  with gr.Column():
298
- gr.Markdown("<h1><center><b>Similar Beers</b></center></h1>")
299
- #similar_beers_df = gro.Dataframe(label="", type="pandas")
300
  similar_beers = Dataframe(label="", type="pandas")
301
 
302
 
303
- #Button Click Events
304
  submit_btn1.click(
305
  main_func,
306
  [BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group],
307
- [local_plot,similar_beers,score_predict_str,percentile_dict0],
 
308
  api_name="Untappd_Rating_Model")
309
 
310
  submit_btn2.click(
311
  main_func,
312
  [BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group],
313
- [local_plot,similar_beers,score_predict_str,percentile_dict0],
 
314
  api_name="Untappd_Rating_Model1")
315
 
316
- #EXAMPLES
317
- #example_list = aslin_example_df.values.tolist()
318
-
319
- # Create examples
320
  examples = []
321
  for _, row in aslin_example_df.iterrows():
322
- example = []
323
-
324
- # Add other inputs to example here
325
- example.append(row['Beer Name'])
326
- example.append(row['ABV'])
327
- example.append(row['IBU'])
328
- example.append(row['Style'])
329
- example.append(row['Brewery Style'])
330
- example.append(row['Region'])
331
- example.append(row['State'])
332
-
333
- # Convert binary columns to lists of selected flavors and hops
334
- example.append(extract_selected_items(row, flavors_list, ''))
335
- example.append(extract_selected_items(row, hops_list, ''))
336
-
337
  examples.append(example)
338
 
339
- gr.Markdown("<h1><center><b>Aslin Beers Example Inputs</b></center></h1>")
340
- gr.Examples(examples[:50],
341
- [BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group], # Flavor_Group, Hop_Group
342
- [local_plot, similar_beers, score_predict_str,percentile_dict0],
343
- main_func,
344
- cache_examples=True, label = "Aslin Beer List")
345
-
346
 
 
 
347
 
348
- demo.launch()
 
4
  from shap.plots._force_matplotlib import draw_additive_plot
5
  import gradio as gr
6
  import numpy as np
 
7
  import matplotlib.pyplot as plt
8
  import xgboost as xgb
 
9
  from scipy import stats
10
  from gradio import Interface
11
  from gradio.components import Markdown, Row, Column, Slider, Dropdown, CheckboxGroup, Button, Textbox, Dataframe
 
12
  from category_encoders import TargetEncoder
 
13
 
14
+ # Load DataFrames
 
 
 
15
  bb_df = pd.read_csv('beer_brewery_imputed.csv')
 
 
16
  bb_df_percentile = pd.read_csv('bb_df_testing.csv')
 
 
17
  aslin_example_df_full = pd.read_csv('App_Example_Aslin_Update.csv')
18
  aslin_example_df = aslin_example_df_full.drop(['Number of Ratings Beer'], axis=1)
19
  aslin_example_df['ABV'] = aslin_example_df['ABV']*100
20
 
21
+ # Load pickle files
 
22
  with open('unique_brewery_file.pickle', 'rb') as file:
23
  unique_breweries_list = pickle.load(file)
24
+
 
 
25
  loaded_model_regressor = pickle.load(open("XGB_Untappd_regressor_FlavorBreakout.pkl", 'rb'))
 
26
  loaded_model = pickle.load(open("XGB_Untappd_4_classifier_FlavorBreakout.pkl", 'rb'))
 
 
 
27
  loaded_enc_regressor = pickle.load(open("target_encoder_regressor_flavorbreakout.pkl", 'rb'))
28
  loaded_enc_classification = pickle.load(open("target_encoder_classification_flavorbreakout.pkl", 'rb'))
29
 
30
+ # Define choices
31
  region_choices = ['Far West','Great Lakes','Mideast','Non-Con','Northeast','OTHER','Plains','Rocky Mountain','Southeast','Southwest']
32
+ style_choices = ['Altbier', 'Barleywine - American', 'Belgian Blonde', 'Blonde Ale', 'Bock - Doppelbock', 'Brown Ale - American', 'Cream Ale', 'Dark Ale', 'IPA - American', 'Lager - American', 'Pilsner - German', 'Stout - American', 'Wheat Beer - American Pale Wheat']
33
+ brewery_style_choices = ['Brew Pub', 'Cidery', 'Contract Brewery','Macro Brewery','Micro Brewery', 'Nano Brewery', 'OTHER', 'Regional Brewery']
34
+ state_choices = ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MISSING', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  # Define flavors and hops
37
+ flavors_list = ['Apple', 'Apricot', 'Berry', 'Bitter', 'Caramel', 'Chocolate', 'Citrus', 'Clove', 'Coffee', 'Dry', 'Earthy', 'Fig', 'Floral', 'Fruity', 'Funky', 'Grapefruit', 'Hazelnut', 'Herbal', 'Malt', 'Nutmeg', 'Nutty', 'Peach', 'Pear', 'Peat', 'Pepper', 'Pine', 'Plum', 'Resin', 'Salty', 'Smoky', 'Sour', 'Spicy', 'Strawberry', 'Sweet', 'Tart', 'Toast', 'Toffee', 'Tropical', 'Vanilla']
38
+ hops_list = ['Amarillo', 'Cascade', 'Centennial', 'Chinook', 'Citra', 'Columbus', 'Crystal', 'Fuggle', 'Galaxy', 'Golding', 'Hallertau', 'Magnum', 'Mosaic', 'Noble', 'Nugget', 'Saaz', 'Simcoe', 'Tettnang', 'Warrior', 'Willamette']
 
 
 
 
 
 
 
 
 
39
 
40
  # Setup SHAP
41
+ explainer = shap.Explainer(loaded_model_regressor)
 
42
 
43
+ # Function to extract selected items
44
  def extract_selected_items(row, items_list, prefix):
45
+ return [item for item in items_list if row[prefix + item] == 1]
 
 
 
 
46
 
47
+ # Function to filter beers
48
  def filter_beers(style, state):
49
+ target_abv = 0.03
 
50
  filtered_df = bb_df[(bb_df['Style'] == style) & (bb_df['State'] == state)].copy()
51
  filtered_df['ABV_diff'] = abs(filtered_df['ABV'] - target_abv)
52
  filtered_df.loc[filtered_df['ABV_diff'] > target_abv, 'ABV_diff'] = target_abv
53
  filtered_df = filtered_df[filtered_df['ABV_diff'] <= target_abv]
54
  sorted_df = filtered_df.sort_values(by='Number of Ratings Beer', ascending=False)
55
  limited_df = sorted_df.head(5)[['Brewery', 'Beer Name', 'Average Rating Beer', 'Number of Ratings Beer', 'Style', 'ABV', 'IBU', 'State']]
 
56
  limited_df = limited_df.rename(columns={'Average Rating Beer': 'Avg Rating', 'Number of Ratings Beer': '# Ratings'})
 
57
  limited_df['ABV'] = (limited_df['ABV'] * 100).round(2).astype(str) + '%'
58
  limited_df['Avg Rating'] = limited_df['Avg Rating'].round(2)
59
  limited_df['IBU'] = limited_df['IBU'].astype(int)
60
+ limited_df['# Ratings'] = limited_df['# Ratings'].apply(lambda x: '{:,}'.format(x))
 
61
  return limited_df
62
 
63
+ # Main function
 
 
 
 
 
64
  def main_func(BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group):
 
65
  flavors_selected = [flavor for flavor in flavors_list if flavor in Flavor_Group]
66
  hops_selected = [hop for hop in hops_list if hop in Hop_Group]
 
67
  new_row = pd.DataFrame(columns=['ABV', 'IBU', 'Style', 'Brewery Style', 'Region', 'State'] + flavors_list + hops_list)
68
  new_row.loc[0] = [float(ABV), float(IBU), Style, BreweryStyle, Region, State] + [1 if flavor in Flavor_Group else 0 for flavor in flavors_list] + [1 if hop in Hop_Group else 0 for hop in hops_list]
69
  new_row[['ABV', 'IBU']] = new_row[['ABV', 'IBU']].astype(float)
70
  new_row['ABV'] = new_row['ABV']/100
71
 
72
+ new_row_class = new_row.copy()
73
+ new_row_regress = new_row.copy()
 
74
  new_row_encoded_class = loaded_enc_classification.transform(new_row_class)
75
  new_row_encoded_regressor = loaded_enc_regressor.transform(new_row_regress)
76
 
77
  prob = loaded_model.predict_proba(new_row_encoded_class)
78
+ score_predict = loaded_model_regressor.predict(new_row_encoded_regressor)[0]
 
 
79
  score_predict = round(score_predict, 2)
80
  score_predict_str = str(score_predict)
 
 
81
 
 
82
  shap_values = explainer(new_row_encoded_regressor)
 
83
  plot = shap.plots.bar(shap_values[0], max_display=7, order=shap.Explanation.abs, show_data='auto', show=False)
 
84
  plt.tight_layout()
85
  local_plot = plt.gcf()
86
  plt.close()
87
 
88
+ similar_beers = filter_beers(Style, State)
 
89
 
 
90
  nr_state_p = new_row['State'][0]
91
  nr_style_p = new_row['Style'][0]
92
 
 
93
  overall_df = bb_df_percentile
94
  state_df = bb_df_percentile[bb_df_percentile['State'] == nr_state_p]
95
  style_overall_df = bb_df_percentile[bb_df_percentile['Style'] == nr_style_p]
96
  style_state_df = bb_df_percentile[(bb_df_percentile['Style'] == nr_style_p) & (bb_df_percentile['State'] == nr_state_p)]
97
 
98
+ percent_100 = 1/1
99
+ percentile_overall = stats.percentileofscore(overall_df['Average Rating Beer'], score_predict).round(1)/100
100
+ percentile_state = stats.percentileofscore(state_df['Average Rating Beer'], score_predict).round(1)/100
101
+ percentile_style_overall = stats.percentileofscore(style_overall_df['Average Rating Beer'], score_predict).round(1)/100
102
+ percentile_style_state = stats.percentileofscore(style_state_df['Average Rating Beer'], score_predict).round(1)/100
 
103
 
104
  percentile_dict0 = {
105
+ #"Percentile Scale": [percent_100],
106
+ "Overall in USA": [percentile_overall],
107
+ f" Overall in {nr_state_p}": [percentile_state],
108
+ f"{nr_style_p}s in USA": [percentile_style_overall],
109
+ f"{nr_style_p}s in {nr_state_p}": [percentile_style_state]
110
+ }
111
 
112
+ title_text = f"<h2><center><b>{BeerName} Rating Percentile Comparison</b></center></h2>"
113
 
114
+ #return local_plot, similar_beers, score_predict_str, percentile_dict0
115
+ return local_plot, similar_beers, score_predict_str, percentile_dict0, title_text
 
 
 
 
 
 
 
 
 
 
 
116
 
117
 
118
+ # Create the UI
 
 
 
 
 
 
 
119
  title = "<center><b>🍻 **Untappd Beer Rating Predictor**🍻</b></center>"
120
  description1 = """
121
+ This app takes user-inputted beer characteristics to predict the most likely Untappd rating the beer would receive if brewed. The ratings are calculated on a 5-point scale using a model based on data pulled in June 2023.
 
122
 
123
+ - <b> Mean Average Error (MAE) of .12</b> and <b> Root Mean Squared Error (RMSE) of .16</b>
124
+ - The input variables in this model <b> explain 65% of the variation </b>in the Untappd beer score
 
125
 
126
+ """
127
 
128
+ theme = gr.themes.Default()
129
+
130
+ with gr.Blocks(title=title, theme=theme) as demo:
131
  Markdown(f"# {title}")
132
  Markdown(description1)
 
 
 
133
 
134
  submit_btn1 = gr.Button("Predict")
135
  with Row():
136
  with Column():
137
+ Markdown("<h2><center><b>New Beer Characteristics</b></center></h2>")
138
+ BeerName = Textbox(label='Beer Name', value='New Beer 1')
139
+ ABV = Slider(label="ABV %", minimum=0, maximum=20, value=4.5, step=.1)
140
+ IBU = Slider(label="IBU", minimum=0.0, maximum=200, value=41, step=1)
141
+ Style = Dropdown(choices=style_choices, label="Beer Style", value='IPA - Session')
142
+ BreweryStyle = Dropdown(choices=brewery_style_choices, label="Brewery Style", value='Micro Brewery')
143
+ Region = Dropdown(choices=region_choices, label="USA Region", value='Far West')
144
+ State = Dropdown(choices=state_choices, label="State", value='CA')
145
+ Flavor_Group = CheckboxGroup(choices=flavors_list, label="Flavors")
146
+ Hop_Group = CheckboxGroup(choices=hops_list, label="Hops")
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
 
148
  with gr.Column(visible=True) as output_col:
149
+ Markdown("<h2><center><b>Untappd Rating Prediction</b></center></h2>")
150
  score_predict_str = gr.Label(label="XGBoost Regressor")
151
+ Markdown("<h2><center><b>Characteristics Affecting Rating Prediction</b></center></h2>")
152
+ local_plot = gr.Plot(label='Shap:')
153
+ percentile_title = Markdown(f"<h2><center><b>Untappd Rating Percentiles</b></center></h2>")
154
+ percentile_dict0 = gr.Label( label='Percentile', show_label=False, value = float )
 
 
 
155
 
 
 
 
 
 
 
156
  submit_btn2 = gr.Button("Predict")
 
157
  Markdown("""---""")
158
 
 
 
 
159
  with gr.Row():
160
  with gr.Column():
161
+ Markdown("<h1><center><b>Similar Beers</b></center></h1>")
 
162
  similar_beers = Dataframe(label="", type="pandas")
163
 
164
 
 
165
  submit_btn1.click(
166
  main_func,
167
  [BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group],
168
+ #[local_plot, similar_beers, score_predict_str, percentile_dict0],
169
+ [local_plot, similar_beers, score_predict_str, percentile_dict0, percentile_title],
170
  api_name="Untappd_Rating_Model")
171
 
172
  submit_btn2.click(
173
  main_func,
174
  [BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group],
175
+ #[local_plot, similar_beers, score_predict_str, percentile_dict0],
176
+ [local_plot, similar_beers, score_predict_str, percentile_dict0, percentile_title],
177
  api_name="Untappd_Rating_Model1")
178
 
 
 
 
 
179
  examples = []
180
  for _, row in aslin_example_df.iterrows():
181
+ example = [
182
+ row['Beer Name'], row['ABV'], row['IBU'], row['Style'],
183
+ row['Brewery Style'], row['Region'], row['State'],
184
+ extract_selected_items(row, flavors_list, ''),
185
+ extract_selected_items(row, hops_list, '')
186
+ ]
 
 
 
 
 
 
 
 
 
187
  examples.append(example)
188
 
189
+ Markdown("<h1><center><b>Example Inputs from Aslin Beer Menu</b></center></h1>")
190
+ gr.Examples(examples[:10],
191
+ [BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group],
192
+ #[local_plot, similar_beers, score_predict_str, percentile_dict0],
193
+ [local_plot, similar_beers, score_predict_str, percentile_dict0, percentile_title],
 
 
194
 
195
+ main_func,
196
+ cache_examples=True, label="Aslin Beer List")
197
 
198
+ demo.launch()