Spaces:

deeploy
/

AML

Running

App Files Files Community

adollbo commited on Feb 26, 2024

Commit

a22458b

1 Parent(s): 6877f18

added comments and ran black for proper formatting

Browse files

Files changed (1) hide show

utils.py +138 -61

utils.py CHANGED Viewed

@@ -8,28 +8,51 @@ import random
 from datetime import datetime, timedelta
 from babel.numbers import format_currency
-COL_NAMES = ['Transaction date',
- 'Transaction type',
- 'Amount transferred',
- 'Sender\'s initial balance',
- 'Sender\'s new balance',
- 'Recipient\'s initial balance',
- 'Recipient\'s new balance',
- "Sender exactly credited",
- "Receiver exactly credited",
- 'Large amount',
- 'Frequent receiver',
- 'Merchant receiver',
- 'Sender ID',
- 'Receiver ID']
-feature_texts = {0: "Date of transaction", 1: "Amount transferred", 2: "Initial balance of sender", 3: "New balance of sender",
-                 4: "Initial balance of recipient", 5: "New balance of recipient", 6: "Sender's balance was exactly credited",
-                   7: "Receiver's balance was exactly credited", 8: "Large amount", 9: "Frequent receiver of transactions", 10: "Receiver is merchant", 11: "Sender ID", 12: "Receiver ID",
-                   13: "Transaction type is Cash out", 14: "Transaction type is Transfer", 15: "Transaction type is Payment", 16: "Transaction type is Cash in", 17: "Transaction type is Debit"}
-CATEGORIES = np.array(['CASH_OUT', 'TRANSFER', 'PAYMENT', 'CASH_IN', 'DEBIT'])
 def transformation(input, categories):
     new_x = input
     cat = np.array(input[1])
@@ -38,21 +61,32 @@ def transformation(input, categories):
     match_index = np.where(categories == cat)[0]
     result_array[match_index] = 1
     new_x.extend(result_array.tolist())
-    python_objects = [np_type.item() if isinstance(np_type, np.generic) else np_type for np_type in new_x]
     return python_objects
 def get_request_body(datapoint):
     data = datapoint.iloc[0].tolist()
     instances = [int(x) if isinstance(x, (np.int32, np.int64)) else x for x in data]
-    request_body = {'instances': [instances]}
     return request_body
 def get_explainability_texts(shap_values, feature_texts):
     # Separate positive and negative values, keep indice as corresponds to key
     positive_dict = {index: val for index, val in enumerate(shap_values) if val > 0}
     # Sort dictionaries based on the magnitude of values
-    sorted_positive_indices = [index for index, _ in sorted(positive_dict.items(), key=lambda item: abs(item[1]), reverse=True)]
     positive_texts = [feature_texts[x] for x in sorted_positive_indices]
     positive_texts = positive_texts[2:]
     sorted_positive_indices = sorted_positive_indices[2:]
@@ -62,37 +96,56 @@ def get_explainability_texts(shap_values, feature_texts):
     return positive_texts, sorted_positive_indices
 def random_past_date_from_last_year():
     one_year_ago = datetime.now() - timedelta(days=365)
     random_days = random.randint(0, (datetime.now() - one_year_ago).days)
     random_date = one_year_ago + timedelta(days=random_days)
-    return random_date.strftime('%Y-%m-%d')
 def get_explainability_values(pos_indices, data):
-    rounded_data = [round(value, 2) if isinstance(value, float) else value for value in data]
     transformed_data = transformation(input=rounded_data, categories=CATEGORIES)
     vals = []
     for idx in pos_indices:
-        if idx in range(6,11) or idx in range(13,18):
             val = str(bool(transformed_data[idx])).capitalize()
         else:
             val = transformed_data[idx]
         vals.append(val)
     return vals
-def modify_datapoint(datapoint): # should return list, with correct numbers/amounts, and date
     data = datapoint.iloc[0].tolist()
     data[0] = random_past_date_from_last_year()
     modified_amounts = data.copy()
     if any(val > 12000 for val in data[2:7]):
-        modified_amounts[2:7] = [value / 100 if value != 0 else 0 for value in data[2:7]]
     if any(val > 120000 for val in modified_amounts[2:7]):
         new_list = [value / 10 if value != 0 else 0 for value in modified_amounts[2:7]]
         modified_amounts[2:7] = new_list
-    rounded_data = [round(value, 2) if isinstance(value, float) else value for value in modified_amounts]
-    rounded_data[2:7] = [format_currency(value, 'EUR', locale='en_GB') for value in rounded_data[2:7]]
     return rounded_data
 def get_weights(shap_values, sorted_indices, target_sum=0.95):
     weights = [shap_values[x] for x in sorted_indices]
     total_sum = sum(weights)
@@ -100,6 +153,8 @@ def get_weights(shap_values, sorted_indices, target_sum=0.95):
     scaled_values = [val * (target_sum / total_sum) for val in weights]
     return scaled_values
 def get_fake_certainty():
     # Generate a random certainty between 75% and 99%
     fake_certainty = uniform(0.75, 0.99)
@@ -107,20 +162,28 @@ def get_fake_certainty():
     return formatted_fake_certainty
 def get_random_suspicious_transaction(data):
-    suspicious_data=data[data["isFraud"]==1]
-    max_n=len(suspicious_data)
-    random_nr=randrange(max_n)
-    suspicous_transaction = suspicious_data[random_nr-1:random_nr].drop("isFraud", axis=1)
     return suspicous_transaction
-def send_evaluation(client, deployment_id, request_log_id, prediction_log_id, evaluation_input):
     """Send evaluation to Deeploy."""
     try:
         with st.spinner("Submitting response..."):
             # Call the explain endpoint as it also includes the prediction
-            client.evaluate(deployment_id, request_log_id, prediction_log_id, evaluation_input)
         return True
     except Exception as e:
         logging.error(e)
@@ -132,6 +195,7 @@ def send_evaluation(client, deployment_id, request_log_id, prediction_log_id, ev
         st.write(f"Error message: {e}")
 def get_model_url():
     """Get model url and retrieve workspace id and deployment id from it"""
     model_url = st.text_area(
@@ -148,37 +212,50 @@ def get_model_url():
         deployment_id = ""
     return model_url, workspace_id, deployment_id
 def get_comment_explanation(certainty, explainability_texts, explainability_values):
-    cleaned = [x.replace(':', '') for x in explainability_texts]
-    fi = [f'{cleaned[i]} is {x}' for i, x in enumerate(explainability_values)]
-    fi.insert(0, 'Important suspicious features: ')
-    result = '\n'.join(fi)
-    comment = f"Model certainty is {certainty}" + '\n''\n' + result
     return comment
 def create_data_input_table(data, col_names):
     st.subheader("Transaction details")
     data[7:12] = [bool(value) for value in data[7:12]]
-    rounded_list = [round(value, 2) if isinstance(value, float) else value for value in data]
-    df = pd.DataFrame({"Feature name": col_names, "Value": rounded_list })
-    st.dataframe(df, hide_index=True, width=475, height=35*len(df)+38) # use_container_width=True
-# Create a function to generate a table
 def create_table(texts, values, weights, title):
-    df = pd.DataFrame({"Feature Explanation": texts, 'Value': values, 'Weight': weights})
-    st.markdown(f'#### {title}')  # Markdown for styling
-    st.dataframe(df, hide_index=True, width=475, column_config={
-                     'Weight': st.column_config.ProgressColumn(
-                         'Weight',
-                         width='small',
-                         format="%.2f",
-                         min_value=0,
-                         max_value=1
-                     )
-                 }) #use_container_width=True
-def ChangeButtonColour(widget_label, font_color, background_color='transparent'):
     htmlstr = f"""
         <script>
             var elements = window.parent.document.querySelectorAll('button');
@@ -190,4 +267,4 @@ def ChangeButtonColour(widget_label, font_color, background_color='transparent')
             }}
         </script>
         """
-    components.html(f"{htmlstr}", height=0, width=0)

 from datetime import datetime, timedelta
 from babel.numbers import format_currency
+# Column names for data input
+COL_NAMES = [
+    "Transaction date",
+    "Transaction type",
+    "Amount transferred",
+    "Sender's initial balance",
+    "Sender's new balance",
+    "Recipient's initial balance",
+    "Recipient's new balance",
+    "Sender exactly credited",
+    "Receiver exactly credited",
+    "Large amount",
+    "Frequent receiver",
+    "Merchant receiver",
+    "Sender ID",
+    "Receiver ID",
+]
+# Texts for explanation
+feature_texts = {
+    0: "Date of transaction",
+    1: "Amount transferred",
+    2: "Initial balance of sender",
+    3: "New balance of sender",
+    4: "Initial balance of recipient",
+    5: "New balance of recipient",
+    6: "Sender's balance was exactly credited",
+    7: "Receiver's balance was exactly credited",
+    8: "Large amount",
+    9: "Frequent receiver of transactions",
+    10: "Receiver is merchant",
+    11: "Sender ID",
+    12: "Receiver ID",
+    13: "Transaction type is Cash out",
+    14: "Transaction type is Transfer",
+    15: "Transaction type is Payment",
+    16: "Transaction type is Cash in",
+    17: "Transaction type is Debit",
+}
+# categories for one hot encoding
+CATEGORIES = np.array(["CASH_OUT", "TRANSFER", "PAYMENT", "CASH_IN", "DEBIT"])
+# one hot encoding
 def transformation(input, categories):
     new_x = input
     cat = np.array(input[1])
     match_index = np.where(categories == cat)[0]
     result_array[match_index] = 1
     new_x.extend(result_array.tolist())
+    python_objects = [
+        np_type.item() if isinstance(np_type, np.generic) else np_type
+        for np_type in new_x
+    ]
     return python_objects
+# func to make the request body in the right format for the client
 def get_request_body(datapoint):
     data = datapoint.iloc[0].tolist()
     instances = [int(x) if isinstance(x, (np.int32, np.int64)) else x for x in data]
+    request_body = {"instances": [instances]}
     return request_body
+# func for sorting and retrieving the explanation texts
 def get_explainability_texts(shap_values, feature_texts):
     # Separate positive and negative values, keep indice as corresponds to key
     positive_dict = {index: val for index, val in enumerate(shap_values) if val > 0}
     # Sort dictionaries based on the magnitude of values
+    sorted_positive_indices = [
+        index
+        for index, _ in sorted(
+            positive_dict.items(), key=lambda item: abs(item[1]), reverse=True
+        )
+    ]
     positive_texts = [feature_texts[x] for x in sorted_positive_indices]
     positive_texts = positive_texts[2:]
     sorted_positive_indices = sorted_positive_indices[2:]
     return positive_texts, sorted_positive_indices
+# func to generate random date from the past year to replace var "steps" with
+# in the input data, to make it more understandable
 def random_past_date_from_last_year():
     one_year_ago = datetime.now() - timedelta(days=365)
     random_days = random.randint(0, (datetime.now() - one_year_ago).days)
     random_date = one_year_ago + timedelta(days=random_days)
+    return random_date.strftime("%Y-%m-%d")
+# func for retrieving the values for explanations, requires some data engineering
 def get_explainability_values(pos_indices, data):
+    rounded_data = [
+        round(value, 2) if isinstance(value, float) else value for value in data
+    ]
     transformed_data = transformation(input=rounded_data, categories=CATEGORIES)
     vals = []
     for idx in pos_indices:
+        if idx in range(6, 11) or idx in range(13, 18):
             val = str(bool(transformed_data[idx])).capitalize()
         else:
             val = transformed_data[idx]
         vals.append(val)
     return vals
+# func to modify the values of currency to make it more similar to euro
+def modify_datapoint(
+    datapoint,
+):  # should return list, with correct numbers/amounts, and date
     data = datapoint.iloc[0].tolist()
     data[0] = random_past_date_from_last_year()
     modified_amounts = data.copy()
     if any(val > 12000 for val in data[2:7]):
+        modified_amounts[2:7] = [
+            value / 100 if value != 0 else 0 for value in data[2:7]
+        ]
     if any(val > 120000 for val in modified_amounts[2:7]):
         new_list = [value / 10 if value != 0 else 0 for value in modified_amounts[2:7]]
         modified_amounts[2:7] = new_list
+    rounded_data = [
+        round(value, 2) if isinstance(value, float) else value
+        for value in modified_amounts
+    ]
+    rounded_data[2:7] = [
+        format_currency(value, "EUR", locale="en_GB") for value in rounded_data[2:7]
+    ]
     return rounded_data
+# func to retireve the weights of the features to be presented as explanation
 def get_weights(shap_values, sorted_indices, target_sum=0.95):
     weights = [shap_values[x] for x in sorted_indices]
     total_sum = sum(weights)
     scaled_values = [val * (target_sum / total_sum) for val in weights]
     return scaled_values
+# func to generate a fake certainty for the model to make it more realistic
 def get_fake_certainty():
     # Generate a random certainty between 75% and 99%
     fake_certainty = uniform(0.75, 0.99)
     return formatted_fake_certainty
+# func to get a datapoint marked as fraud in the dataset to be passed to the model
 def get_random_suspicious_transaction(data):
+    suspicious_data = data[data["isFraud"] == 1]
+    max_n = len(suspicious_data)
+    random_nr = randrange(max_n)
+    suspicous_transaction = suspicious_data[random_nr - 1 : random_nr].drop(
+        "isFraud", axis=1
+    )
     return suspicous_transaction
+# func to send the evaluation to Deeploy
+def send_evaluation(
+    client, deployment_id, request_log_id, prediction_log_id, evaluation_input
+):
     """Send evaluation to Deeploy."""
     try:
         with st.spinner("Submitting response..."):
             # Call the explain endpoint as it also includes the prediction
+            client.evaluate(
+                deployment_id, request_log_id, prediction_log_id, evaluation_input
+            )
         return True
     except Exception as e:
         logging.error(e)
         st.write(f"Error message: {e}")
+# func to retrieve model url and important vars for Deeploy client
 def get_model_url():
     """Get model url and retrieve workspace id and deployment id from it"""
     model_url = st.text_area(
         deployment_id = ""
     return model_url, workspace_id, deployment_id
+# func to create the prefilled text for the disagree button
 def get_comment_explanation(certainty, explainability_texts, explainability_values):
+    cleaned = [x.replace(":", "") for x in explainability_texts]
+    fi = [f"{cleaned[i]} is {x}" for i, x in enumerate(explainability_values)]
+    fi.insert(0, "Important suspicious features: ")
+    result = "\n".join(fi)
+    comment = f"Model certainty is {certainty}" + "\n" "\n" + result
     return comment
+# func to create the data input table
 def create_data_input_table(data, col_names):
     st.subheader("Transaction details")
     data[7:12] = [bool(value) for value in data[7:12]]
+    rounded_list = [
+        round(value, 2) if isinstance(value, float) else value for value in data
+    ]
+    df = pd.DataFrame({"Feature name": col_names, "Value": rounded_list})
+    st.dataframe(
+        df, hide_index=True, width=475, height=35 * len(df) + 38
+    )  # use_container_width=True
+# func to create the explanation table
 def create_table(texts, values, weights, title):
+    df = pd.DataFrame(
+        {"Feature Explanation": texts, "Value": values, "Weight": weights}
+    )
+    st.markdown(f"#### {title}")  # Markdown for styling
+    st.dataframe(
+        df,
+        hide_index=True,
+        width=475,
+        column_config={
+            "Weight": st.column_config.ProgressColumn(
+                "Weight", width="small", format="%.2f", min_value=0, max_value=1
+            )
+        },
+    )  # use_container_width=True
+# func to change button colors
+def ChangeButtonColour(widget_label, font_color, background_color="transparent"):
     htmlstr = f"""
         <script>
             var elements = window.parent.document.querySelectorAll('button');
             }}
         </script>
         """
+    components.html(f"{htmlstr}", height=0, width=0)