Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from PIL import Image | |
| import requests | |
| import hopsworks | |
| import joblib | |
| import pandas as pd | |
| import random | |
| import numpy as np | |
| from io import BytesIO | |
| from great_expectations.dataset import PandasDataset | |
| from great_expectations.core import ExpectationSuite, ExpectationConfiguration | |
| from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| api = '151p8WWCoctBzBeg.wRj1VwLA6wwjCS2aG7A51NsbhEbqVZ35wLl5g03b85EeetLKtpsO9bDOjy8DR2O3' | |
| project = hopsworks.login(api_key_value = api) | |
| fs = project.get_feature_store() | |
| mr = project.get_model_registry() | |
| model = mr.get_model("wine_model_new", version=1) | |
| model_dir = model.download() | |
| model = joblib.load(model_dir + "/wine_model_new.pkl") | |
| print("Model downloaded") | |
| def generate_samples(num): | |
| def expect(suite, column, min_val, max_val): | |
| suite.add_expectation( | |
| ExpectationConfiguration( | |
| expectation_type="expect_column_values_to_be_between", | |
| kwargs={ | |
| "column":column, | |
| "min_value":min_val, | |
| "max_value":max_val, | |
| } | |
| ) | |
| ) | |
| suite = ExpectationSuite(expectation_suite_name="wine_dimensions") | |
| expect(suite, "fixed_acidity", 3.5, 16.0) | |
| expect(suite, "volatile_acidity", 0.06,1.60) | |
| expect(suite, "citric_acid", 0.0,7.5) | |
| expect(suite, "residual_sugar", 0.3,66.0) | |
| expect(suite, "chlorides", 0.00,0.65) | |
| expect(suite, "free_sulfur_dioxide", 0.8,290.0) | |
| expect(suite, "total_sulfur_dioxide", 5.5,450.0) | |
| expect(suite, "density", 0.95,1.03) | |
| expect(suite, "ph", 0.3,4.5) | |
| expect(suite, "sulphates", 0.2, 2.5) | |
| expect(suite, "alcohol", 7.8, 15.5) | |
| expect(suite, "quality", 0,2) | |
| def generate_synthetic_wine(seed): | |
| random.seed(seed) | |
| return { | |
| "fixed_acidity": random.uniform(3.6, 15.0), | |
| "volatile_acidity": random.uniform(0.061, 1.5), | |
| "citric_acid": random.uniform(0.1, 7.0), | |
| "residual_sugar":random.uniform(0.35, 60), | |
| "chlorides":random.uniform(0.1, 0.60), | |
| "free_sulfur_dioxide":random.uniform(0.85, 280), | |
| "total_sulfur_dioxide":random.uniform(5.6, 400), | |
| "density":random.uniform(0.98, 1), | |
| "ph":random.uniform(0.35, 4.2), | |
| "sulphates":random.uniform(0.3,2.3), | |
| "alcohol":random.uniform(7.9, 12.5), | |
| "quality": 1, | |
| } | |
| wine_fg = fs.get_or_create_feature_group( | |
| name="wine_2", | |
| version=2, | |
| primary_key=["fixed_acidity","volatile_acidity","citric_acid", "residual_sugar" ,"chlorides", | |
| "free_sulfur_dioxide", "total_sulfur_dioxide", "density","pH","sulphates","alcohol","quality"], | |
| description="For new wine data") | |
| # query = wine_fg.select_all() | |
| # feature_view_new = fs.create_feature_view( | |
| # name='wine_2', | |
| # query=query | |
| # ) | |
| num_samples = int(num) # Number of synthetic samples to generate | |
| synthetic_wines_new1 = [generate_synthetic_wine(seed=i) for i in range(num_samples)] | |
| # synthetic_wines_new = generate_synthetic_wine(seed=2) | |
| # Convert the list of dictionaries to a DataFrame | |
| synthetic_wines_df = pd.DataFrame(synthetic_wines_new1) | |
| synthetic_wines_ge_df = PandasDataset(synthetic_wines_df) | |
| # Validate the entire DataFrame | |
| results = synthetic_wines_ge_df.validate(expectation_suite=suite, result_format="SUMMARY") | |
| # Check if the new data meets the expectations | |
| if results["success"]: | |
| wine_fg.insert(synthetic_wines_df, overwrite=False, operation="append") | |
| print("All synthetic wine data inserted successfully.") | |
| return synthetic_wines_df | |
| else: | |
| print("Data validation failed:", results) | |
| return None | |
| def plot_confusion_matrix(y_true, y_pred): | |
| cm = confusion_matrix(y_true, y_pred) | |
| plt.figure(figsize=(10, 7)) | |
| sns.heatmap(cm, annot=True, fmt='d',cmap="crest") | |
| plt.xlabel('Predicted') | |
| plt.ylabel('True') | |
| # Convert the matplotlib plot to a PIL Image | |
| buf = BytesIO() | |
| plt.savefig(buf, format='png') | |
| plt.close() | |
| buf.seek(0) | |
| img = Image.open(buf) | |
| return img | |
| def wine_predict(fixed_acidity, volatile_acidity, citric_acid, residual_sugar, | |
| chlorides, free_sulfur_dioxide, total_sulfur_dioxide, density, | |
| ph, sulphates, alcohol, num_samples, num_samples_matrix): | |
| print("Calling function") | |
| feature_view = fs.get_feature_view(name="wine_2", version=1) | |
| X_train, X_test, y_train, y_test = feature_view.train_test_split(test_size = 0.2) | |
| num_samples_matrix = int(num_samples_matrix) | |
| X_test = X_test[:num_samples_matrix] | |
| y_test = y_test[:num_samples_matrix] | |
| y_pred = model.predict(X_test) | |
| confusion_matrix_plot = plot_confusion_matrix(y_test, y_pred) | |
| acc = accuracy_score(y_test, y_pred) | |
| prec = precision_score(y_test, y_pred, average='weighted') | |
| rec = recall_score(y_test, y_pred, average='weighted') | |
| f1 = f1_score(y_test, y_pred, average='weighted') | |
| pred_results = pd.DataFrame([['Random Forest', acc, prec, rec, f1]], | |
| columns = ['Model', 'Accuracy', 'Precision', 'Recall', 'F1 Score']) | |
| if num_samples == -1: | |
| df = pd.DataFrame([[fixed_acidity, volatile_acidity, citric_acid, residual_sugar, | |
| chlorides, free_sulfur_dioxide, total_sulfur_dioxide, density, | |
| ph, sulphates, alcohol]], | |
| columns=['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar', | |
| 'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density', | |
| 'ph', 'sulphates', 'alcohol']) | |
| print("Predicting") | |
| print(df) | |
| feature_view = fs.get_feature_view(name="wine_2", version=1) | |
| batch_data = feature_view.get_batch_data() | |
| synthetic_wines_df = batch_data[:50] | |
| res = model.predict(df) | |
| else: | |
| # Generate and predict num_samples | |
| synthetic_wines_df = generate_samples(num_samples) # This will also insert the data into the feature store | |
| synthetic_wines_df = synthetic_wines_df.drop('quality', axis=1) | |
| y_pred_new = model.predict(synthetic_wines_df) | |
| synthetic_wines_df = synthetic_wines_df[:50] | |
| res = y_pred_new.mean() | |
| if isinstance(synthetic_wines_df, pd.DataFrame): | |
| return res, synthetic_wines_df, confusion_matrix_plot,pred_results | |
| else: | |
| return res, pd.DataFrame(synthetic_wines_df), confusion_matrix_plot,pred_results | |
| def update_dashboard(fixed_acidity, volatile_acidity, citric_acid, residual_sugar, | |
| chlorides, free_sulfur_dioxide, total_sulfur_dioxide, density, | |
| ph, sulphates, alcohol, num_samples, num_samples_matrix): | |
| predction ,generated_samples,confusion_matrix_plot,pred_results= wine_predict(fixed_acidity, | |
| volatile_acidity, citric_acid, residual_sugar, | |
| chlorides, free_sulfur_dioxide, total_sulfur_dioxide, density, | |
| ph, sulphates, alcohol, num_samples, num_samples_matrix) | |
| return predction, confusion_matrix_plot, pred_results,generated_samples | |
| demo = gr.Interface( | |
| fn=update_dashboard, | |
| title="Wine Quality Predictive Analytics", | |
| description="Enter the wine characteristics or the number of samples to predict its quality, enter -1 to predict input sample", | |
| allow_flagging="never", | |
| inputs=[ | |
| gr.inputs.Number(default=7.4, label="Fixed Acidity"), | |
| gr.inputs.Number(default=0.7, label="Volatile Acidity"), | |
| gr.inputs.Number(default=0.0, label="Citric Acid"), | |
| gr.inputs.Number(default=1.9, label="Residual Sugar"), | |
| gr.inputs.Number(default=0.076, label="Chlorides"), | |
| gr.inputs.Number(default=11, label="Free Sulfur Dioxide"), | |
| gr.inputs.Number(default=34, label="Total Sulfur Dioxide"), | |
| gr.inputs.Number(default=0.9978, label="Density"), | |
| gr.inputs.Number(default=3.51, label="pH"), | |
| gr.inputs.Number(default=0.56, label="Sulphates"), | |
| gr.inputs.Number(default=9.4, label="Alcohol"), | |
| gr.inputs.Number(default=-1, label="Samples number to generate"), | |
| gr.inputs.Number(default=50, label="Samples number for confusion matrix") | |
| ], | |
| outputs=[ | |
| gr.outputs.Textbox(label="Predicted Quality"), | |
| gr.outputs.Image(label="Confusion Matrix",type="pil"), | |
| gr.outputs.Dataframe(label="Most Recent Prediction results",type='pandas'), | |
| gr.outputs.Dataframe(label="Most Recent Generated Wine Data",type='pandas') | |
| ], | |
| ) | |
| demo.launch(debug=True) |