| import numpy as np
|
| import time
|
| import os, sys
|
|
|
| import pandas as pd
|
| from pathlib import Path
|
|
|
| from concrete.ml.deployment import FHEModelClient
|
|
|
| import requests
|
|
|
|
|
| def to_json(python_object):
|
| if isinstance(python_object, bytes):
|
| return {"__class__": "bytes", "__value__": list(python_object)}
|
| raise TypeError(repr(python_object) + " is not JSON serializable")
|
|
|
|
|
| def from_json(python_object):
|
| if "__class__" in python_object:
|
| return bytes(python_object["__value__"])
|
|
|
|
|
| API_URL = "https://h0cvbig1fkmf57eb.eu-west-1.aws.endpoints.huggingface.cloud"
|
| headers = {
|
| "Authorization": "Bearer " + os.environ.get("HF_TOKEN"),
|
| "Content-Type": "application/json",
|
| }
|
|
|
|
|
| def query(payload, allowed_retries=2):
|
| response = requests.post(API_URL, headers=headers, json=payload)
|
|
|
| if response.json() is not None and "error" in response.json():
|
| if allowed_retries > 0:
|
|
|
| print(f"Warning, error {response=} {response.json()=} in the query, relaunching")
|
|
|
| return query(payload, allowed_retries - 1)
|
|
|
| assert False, f"Got an error: {response=} {response.json()=}"
|
|
|
| return response.json()
|
|
|
|
|
| path_to_model = Path("compiled_model")
|
|
|
|
|
| from sklearn.datasets import fetch_openml
|
| from sklearn.model_selection import train_test_split
|
| import numpy
|
|
|
|
|
| TrainData = pd.read_csv("/data/R_Module_Day_7.2_Credit_Risk_Train_data.csv")
|
| TestData = pd.read_csv("/data/R_Module_Day_7.2_Credit_Risk_Train_data.csv")
|
|
|
|
|
| TrainData["Source"] = "Train"
|
| TestData["Source"] = "Test"
|
|
|
|
|
| FullData = pd.concat([TrainData,TestData])
|
| FullData.shape
|
|
|
|
|
| FullData.head()
|
|
|
|
|
| FullData.describe()
|
|
|
|
|
| FullData.Dependents.value_counts()
|
|
|
| FullData.Dependents = np.where(FullData.Dependents == '3+',3,FullData.Dependents).astype(float)
|
| FullData.Dependents.value_counts()
|
| FullData.Dependents.dtype
|
|
|
|
|
| FullData.isnull().sum()
|
|
|
|
|
| for col_name in list(FullData):
|
| if ((col_name not in ['Loan_ID', 'Loan_Status', 'Source']) & (FullData[col_name].isnull().sum() >0)):
|
| if(FullData[col_name].dtype != object):
|
| temp1 = FullData[col_name][FullData.Source == "Train"].median()
|
| FullData[col_name].fillna(temp1, inplace=True)
|
| else:
|
| temp2 = FullData[col_name][FullData.Source =="Train"].mode()[0]
|
| FullData[col_name].fillna(temp2, inplace=True)
|
|
|
| FullData.isnull().sum()
|
|
|
|
|
|
|
|
|
| FullData.ApplicantIncome.dtype
|
| np.percentile(FullData.loc[FullData.Source == "Train","ApplicantIncome"],[95,96,97,98,99])
|
|
|
| FullData.ApplicantIncome = np.where(FullData.ApplicantIncome > np.percentile(FullData.loc[FullData.Source == "Train","ApplicantIncome"],99),np.percentile(FullData.loc[FullData.Source == "Train","ApplicantIncome"],99),FullData.ApplicantIncome)
|
| FullData.ApplicantIncome = np.where(FullData.ApplicantIncome > np.percentile(FullData.loc[FullData.Source == "Train","ApplicantIncome"],95),np.percentile(FullData.loc[FullData.Source == "Train","ApplicantIncome"],95),FullData.ApplicantIncome)
|
| FullData.ApplicantIncome = np.where(FullData.ApplicantIncome > np.percentile(FullData.loc[FullData.Source == "Train","ApplicantIncome"],90),np.percentile(FullData.loc[FullData.Source == "Train","ApplicantIncome"],90),FullData.ApplicantIncome)
|
|
|
|
|
| FullData.columns
|
| np.percentile(FullData.loc[FullData.Source == "Train","CoapplicantIncome"],99)
|
|
|
| FullData.CoapplicantIncome = np.where(FullData.CoapplicantIncome > np.percentile(FullData.loc[FullData.Source == "Train","CoapplicantIncome"],99),np.percentile(FullData.loc[FullData.Source == "Train","CoapplicantIncome"],99),FullData.CoapplicantIncome)
|
| FullData.CoapplicantIncome = np.where(FullData.CoapplicantIncome > np.percentile(FullData.loc[FullData.Source == "Train","CoapplicantIncome"],95),np.percentile(FullData.loc[FullData.Source == "Train","CoapplicantIncome"],95),FullData.CoapplicantIncome)
|
|
|
|
|
|
|
| np.percentile(FullData.loc[FullData.Source =="Train","LoanAmount"],99)
|
| FullData.LoanAmount = np.where(FullData.LoanAmount > np.percentile(FullData.loc[FullData.Source =="Train","LoanAmount"],99),np.percentile(FullData.loc[FullData.Source=="Train","LoanAmount"],99),FullData.LoanAmount)
|
| FullData.LoanAmount = np.where(FullData.LoanAmount > np.percentile(FullData.loc[FullData.Source =="Train","LoanAmount"],95),np.percentile(FullData.loc[FullData.Source=="Train","LoanAmount"],95),FullData.LoanAmount)
|
| FullData.LoanAmount = np.where(FullData.LoanAmount > np.percentile(FullData.loc[FullData.Source =="Train","LoanAmount"],90),np.percentile(FullData.loc[FullData.Source=="Train","LoanAmount"],90),FullData.LoanAmount)
|
|
|
|
|
| cat = FullData.loc[:,FullData.dtypes == object].columns
|
| Dummy = pd.get_dummies(FullData[cat].drop(['Loan_ID', 'Source', 'Loan_Status'], axis = 1),drop_first = True)
|
| Dummy.shape
|
| Dummy.columns
|
|
|
| FullData2 = pd.concat([FullData,Dummy],axis =1)
|
| FullData2.shape
|
|
|
| Cols_To_Drop = ['Loan_ID', 'Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area']
|
| FullData3 = FullData2.drop(Cols_To_Drop,axis = 1).copy()
|
| FullData3.columns
|
| FullData3.shape
|
|
|
|
|
| FullData3.Loan_Status = np.where(FullData3.Loan_Status == 'N',1,0)
|
| FullData3.Loan_Status.value_counts()
|
| FullData3.shape
|
| FullData3.dtypes
|
|
|
|
|
|
|
|
|
| Train = FullData3.loc[FullData3.Source == "Train",].drop("Source",axis = 1).copy()
|
| Train.shape
|
|
|
| Test = FullData3.loc[FullData3.Source == "Test",].drop("Source",axis =1).copy()
|
| Test.shape
|
|
|
|
|
| X_train = Train.drop("Loan_Status",axis = 1)
|
| Y_train = Train["Loan_Status"].copy()
|
| X_test = Test.drop("Loan_Status",axis = 1)
|
| Y_test = Test["Loan_Status"].copy()
|
|
|
|
|
| fhemodel_client = FHEModelClient(path_to_model)
|
|
|
|
|
| fhemodel_client.generate_private_and_evaluation_keys()
|
| evaluation_keys = fhemodel_client.get_serialized_evaluation_keys()
|
|
|
|
|
| evaluation_keys_remaining = evaluation_keys[:]
|
| uid = None
|
| is_first = True
|
| is_finished = False
|
| i = 0
|
| packet_size = 1024 * 1024 * 100
|
|
|
| while not is_finished:
|
|
|
|
|
| if sys.getsizeof(evaluation_keys_remaining) > packet_size:
|
| evaluation_keys_piece = evaluation_keys_remaining[:packet_size]
|
| evaluation_keys_remaining = evaluation_keys_remaining[packet_size:]
|
| else:
|
| evaluation_keys_piece = evaluation_keys_remaining
|
| evaluation_keys_remaining = None
|
| is_finished = True
|
|
|
| print(
|
| f"Sending {i}-th piece of the key (remaining size is {sys.getsizeof(evaluation_keys_remaining) / 1024:.2f} kbytes)"
|
| )
|
| i += 1
|
|
|
| if is_first:
|
| is_first = False
|
| payload = {
|
| "inputs": "fake",
|
| "evaluation_keys": to_json(evaluation_keys_piece),
|
| "method": "save_key",
|
| }
|
|
|
| uid = query(payload)["uid"]
|
| print(f"Storing the key in the database under {uid=}")
|
|
|
| else:
|
| payload = {
|
| "inputs": "fake",
|
| "evaluation_keys": to_json(evaluation_keys_piece),
|
| "method": "append_key",
|
| "uid": uid,
|
| }
|
|
|
| query(payload)
|
|
|
|
|
| nb_good = 0
|
| nb_samples = len(X_test)
|
| verbose = True
|
| time_start = time.time()
|
| duration = 0
|
| is_first = True
|
|
|
| for i in range(nb_samples):
|
|
|
|
|
| encrypted_inputs = fhemodel_client.quantize_encrypt_serialize(X_test[i].reshape(1, -1))
|
|
|
|
|
| payload = {
|
| "inputs": "fake",
|
| "encrypted_inputs": to_json(encrypted_inputs),
|
| "method": "inference",
|
| "uid": uid,
|
| }
|
|
|
| if is_first:
|
| print(f"Size of the payload: {sys.getsizeof(payload) / 1024:.2f} kilobytes")
|
| is_first = False
|
|
|
|
|
| duration -= time.time()
|
| duration_inference = -time.time()
|
| encrypted_prediction = query(payload)
|
| duration += time.time()
|
| duration_inference += time.time()
|
|
|
| encrypted_prediction = from_json(encrypted_prediction)
|
|
|
|
|
| prediction_proba = fhemodel_client.deserialize_decrypt_dequantize(encrypted_prediction)[0]
|
| prediction = np.argmax(prediction_proba)
|
|
|
| if verbose:
|
| print(
|
| f"for {i}-th input, {prediction=} with expected {Y_test[i]} in {duration_inference:.3f} seconds"
|
| )
|
|
|
|
|
| nb_good += Y_test[i] == prediction
|
|
|
| print(f"Accuracy on {nb_samples} samples is {nb_good * 1. / nb_samples}")
|
| print(f"Total time: {time.time() - time_start:.3f} seconds")
|
| print(f"Duration per inference: {duration / nb_samples:.3f} seconds") |