Spaces:
Sleeping
Sleeping
Updated app.py and utils.py
Browse files- src/app/app.py +19 -33
- src/utils.py +20 -28
src/app/app.py
CHANGED
|
@@ -17,11 +17,13 @@ from typing import List
|
|
| 17 |
# Create an instance of FastAPI
|
| 18 |
app = FastAPI(debug=True)
|
| 19 |
|
|
|
|
| 20 |
DIRPATH = os.path.dirname(os.path.realpath(__file__))
|
| 21 |
|
|
|
|
| 22 |
model_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'model-1.pkl')
|
| 23 |
transformer_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'preprocessor.pkl')
|
| 24 |
-
properties_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', '
|
| 25 |
|
| 26 |
|
| 27 |
# Load the trained model, pipeline, and other properties
|
|
@@ -46,16 +48,15 @@ def check_health():
|
|
| 46 |
# Model information endpoint
|
| 47 |
@app.post('/model-info')
|
| 48 |
async def model_info():
|
| 49 |
-
model_name = model.__class__.__name__
|
| 50 |
-
model_params = model.get_params()
|
| 51 |
-
features = properties['train features']
|
| 52 |
-
print(features)
|
| 53 |
model_information = {'model info': {
|
| 54 |
'model name ': model_name,
|
| 55 |
'model parameters': model_params,
|
| 56 |
'train feature': features}
|
| 57 |
}
|
| 58 |
-
return model_information
|
| 59 |
|
| 60 |
|
| 61 |
# Prediction endpoint
|
|
@@ -70,14 +71,9 @@ async def predict(plasma_glucose: float, blood_work_result_1: float,
|
|
| 70 |
blood_work_result_2,blood_work_result_3,body_mass_index,
|
| 71 |
blood_work_result_4, age,insurance]], columns=return_columns())
|
| 72 |
|
| 73 |
-
data_copy = data.copy() # Create a copy of the dataframe
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
data_copy['Predicted Label'] = data_copy.apply(process_label, axis=1)
|
| 77 |
-
inputs = data.to_dict('index') # Convert dataframe to dictionary
|
| 78 |
-
outputs = data_copy[['Predicted Label']].to_dict('index')
|
| 79 |
-
response = {'inputs': inputs,
|
| 80 |
-
'outputs': outputs}
|
| 81 |
return response
|
| 82 |
|
| 83 |
|
|
@@ -88,11 +84,7 @@ async def predict_batch(inputs: Inputs):
|
|
| 88 |
data = pd.DataFrame(inputs.return_dict_inputs())
|
| 89 |
data_copy = data.copy() # Create a copy of the data
|
| 90 |
labels, probs = make_prediction(data, transformer, model) # Get the labels
|
| 91 |
-
|
| 92 |
-
data_labels['Predicted Label'] = data_labels.apply(process_label, axis=1)
|
| 93 |
-
|
| 94 |
-
response = output_batch(data, data_labels)
|
| 95 |
-
|
| 96 |
return response
|
| 97 |
|
| 98 |
|
|
@@ -100,25 +92,19 @@ async def predict_batch(inputs: Inputs):
|
|
| 100 |
# Upload data endpoint
|
| 101 |
@app.post("/upload-data")
|
| 102 |
async def upload_data(file: UploadFile = File(...)):
|
| 103 |
-
file_type = file.content_type
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
valid_formats = ['text/csv', 'application/json']
|
| 107 |
-
|
| 108 |
if file_type not in valid_formats:
|
| 109 |
-
return JSONResponse(content={"error": f"Invalid file format. Must be one of: {', '.join(valid_formats)}"})
|
| 110 |
|
| 111 |
else:
|
| 112 |
-
contents = await file.read()
|
| 113 |
-
data= process_json_csv(contents=contents,file_type=file_type, valid_formats=valid_formats)
|
| 114 |
-
data_copy = data.copy() # Create a copy of the data
|
| 115 |
labels, probs = make_prediction(data, transformer, model) # Get the labels
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
# print(data_dict.index)
|
| 120 |
|
| 121 |
-
return {'outputs': data_dict}
|
| 122 |
|
| 123 |
# Run the FastAPI application
|
| 124 |
if __name__ == '__main__':
|
|
|
|
| 17 |
# Create an instance of FastAPI
|
| 18 |
app = FastAPI(debug=True)
|
| 19 |
|
| 20 |
+
# get absolute path
|
| 21 |
DIRPATH = os.path.dirname(os.path.realpath(__file__))
|
| 22 |
|
| 23 |
+
# set path for pickle files
|
| 24 |
model_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'model-1.pkl')
|
| 25 |
transformer_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'preprocessor.pkl')
|
| 26 |
+
properties_path = os.path.join(DIRPATH, '..', 'assets', 'ml_components', 'other-components.pkl')
|
| 27 |
|
| 28 |
|
| 29 |
# Load the trained model, pipeline, and other properties
|
|
|
|
| 48 |
# Model information endpoint
|
| 49 |
@app.post('/model-info')
|
| 50 |
async def model_info():
|
| 51 |
+
model_name = model.__class__.__name__ # get model name
|
| 52 |
+
model_params = model.get_params() # get model parameters
|
| 53 |
+
features = properties['train features'] # get training feature
|
|
|
|
| 54 |
model_information = {'model info': {
|
| 55 |
'model name ': model_name,
|
| 56 |
'model parameters': model_params,
|
| 57 |
'train feature': features}
|
| 58 |
}
|
| 59 |
+
return model_information # return model information
|
| 60 |
|
| 61 |
|
| 62 |
# Prediction endpoint
|
|
|
|
| 71 |
blood_work_result_2,blood_work_result_3,body_mass_index,
|
| 72 |
blood_work_result_4, age,insurance]], columns=return_columns())
|
| 73 |
|
| 74 |
+
# data_copy = data.copy() # Create a copy of the dataframe
|
| 75 |
+
labels, prob = make_prediction(data, transformer, model) # Get the labels
|
| 76 |
+
response = output_batch(data, labels) # output results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
return response
|
| 78 |
|
| 79 |
|
|
|
|
| 84 |
data = pd.DataFrame(inputs.return_dict_inputs())
|
| 85 |
data_copy = data.copy() # Create a copy of the data
|
| 86 |
labels, probs = make_prediction(data, transformer, model) # Get the labels
|
| 87 |
+
response = output_batch(data, labels) # output results
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
return response
|
| 89 |
|
| 90 |
|
|
|
|
| 92 |
# Upload data endpoint
|
| 93 |
@app.post("/upload-data")
|
| 94 |
async def upload_data(file: UploadFile = File(...)):
|
| 95 |
+
file_type = file.content_type # get the type of the uploaded file
|
| 96 |
+
valid_formats = ['text/csv', 'application/json'] # create a list of valid formats API can receive
|
|
|
|
|
|
|
|
|
|
| 97 |
if file_type not in valid_formats:
|
| 98 |
+
return JSONResponse(content={"error": f"Invalid file format. Must be one of: {', '.join(valid_formats)}"}) # return an error if file type is not included in the valid formats
|
| 99 |
|
| 100 |
else:
|
| 101 |
+
contents = await file.read() # read contents in file
|
| 102 |
+
data= process_json_csv(contents=contents,file_type=file_type, valid_formats=valid_formats) # process files
|
|
|
|
| 103 |
labels, probs = make_prediction(data, transformer, model) # Get the labels
|
| 104 |
+
response = output_batch(data, labels) # output results
|
| 105 |
+
|
| 106 |
+
return response
|
|
|
|
| 107 |
|
|
|
|
| 108 |
|
| 109 |
# Run the FastAPI application
|
| 110 |
if __name__ == '__main__':
|
src/utils.py
CHANGED
|
@@ -2,17 +2,12 @@ import pandas as pd
|
|
| 2 |
import numpy as np
|
| 3 |
import pickle
|
| 4 |
from io import StringIO
|
| 5 |
-
from
|
| 6 |
-
# from cachetools import cached, TTLCache
|
| 7 |
|
| 8 |
-
|
| 9 |
-
# cache = TTLCache(maxsize=5, ttl=3600,) # Cache with a maximum size of 1 and a TTL of 1 hour
|
| 10 |
-
|
| 11 |
-
# # # Load the model
|
| 12 |
-
# @cached(cache)
|
| 13 |
def load_pickle(filename):
|
| 14 |
-
with open(filename, 'rb') as file:
|
| 15 |
-
contents = pickle.load(file)
|
| 16 |
return contents
|
| 17 |
|
| 18 |
|
|
@@ -52,14 +47,14 @@ def combine_cats_nums(transformed_data, full_pipeline):
|
|
| 52 |
|
| 53 |
def make_prediction(data, transformer, model):
|
| 54 |
new_columns = return_columns()
|
| 55 |
-
dict_new_old_cols = dict(zip(data.columns, new_columns))
|
| 56 |
data = data.rename(columns=dict_new_old_cols)
|
| 57 |
feature_engineering(data) # create new features
|
| 58 |
transformed_data = transformer.transform(data) # transform the data using the transformer
|
| 59 |
combine_cats_nums(transformed_data, transformer)# create a dataframe from the transformed data
|
| 60 |
# make prediction
|
| 61 |
label = model.predict(transformed_data) # make a prediction
|
| 62 |
-
probs = model.predict_proba(transformed_data)
|
| 63 |
return label, probs.max()
|
| 64 |
|
| 65 |
|
|
@@ -70,6 +65,7 @@ def process_label(row):
|
|
| 70 |
return 'Sepsis status is Positive'
|
| 71 |
elif row['Predicted Label'] == 0:
|
| 72 |
return 'Sepsis status is Negative'
|
|
|
|
| 73 |
|
| 74 |
def return_columns():
|
| 75 |
# create new columns
|
|
@@ -84,29 +80,25 @@ def process_json_csv(contents, file_type, valid_formats):
|
|
| 84 |
# Read the file contents as a byte string
|
| 85 |
contents = contents.decode() # Decode the byte string to a regular string
|
| 86 |
new_columns = return_columns() # return new_columns
|
| 87 |
-
if file_type == valid_formats[0]:
|
| 88 |
-
data = pd.read_csv(StringIO(contents))
|
| 89 |
# Process the uploaded file
|
|
|
|
|
|
|
| 90 |
elif file_type == valid_formats[1]:
|
| 91 |
-
data = pd.read_json(contents)
|
| 92 |
-
data = data.drop(columns=['ID'])
|
| 93 |
dict_new_old_cols = dict(zip(data.columns, new_columns)) # get dict of new and old cols
|
| 94 |
-
data = data.rename(columns=dict_new_old_cols)
|
| 95 |
return data
|
| 96 |
|
| 97 |
|
| 98 |
-
def output_batch(data1,
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
#
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
for row1, row2 in zip(data1.itertuples(index=False), data2.itertuples(index=False)):
|
| 107 |
-
dictionary_from_dataframe1 = row1._asdict()
|
| 108 |
-
dictionary_from_dataframe2 = row2._asdict()
|
| 109 |
-
results_list.append({'input': dictionary_from_dataframe1, 'output': dictionary_from_dataframe2})
|
| 110 |
|
| 111 |
final_dict = {'results': results_list}
|
| 112 |
return final_dict
|
|
|
|
| 2 |
import numpy as np
|
| 3 |
import pickle
|
| 4 |
from io import StringIO
|
| 5 |
+
from functools import lru_cache
|
|
|
|
| 6 |
|
| 7 |
+
@lru_cache(maxsize=100, )
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
def load_pickle(filename):
|
| 9 |
+
with open(filename, 'rb') as file: # read file
|
| 10 |
+
contents = pickle.load(file) # load contents of file
|
| 11 |
return contents
|
| 12 |
|
| 13 |
|
|
|
|
| 47 |
|
| 48 |
def make_prediction(data, transformer, model):
|
| 49 |
new_columns = return_columns()
|
| 50 |
+
dict_new_old_cols = dict(zip(data.columns, new_columns)) # create a dict of original columns and new columns
|
| 51 |
data = data.rename(columns=dict_new_old_cols)
|
| 52 |
feature_engineering(data) # create new features
|
| 53 |
transformed_data = transformer.transform(data) # transform the data using the transformer
|
| 54 |
combine_cats_nums(transformed_data, transformer)# create a dataframe from the transformed data
|
| 55 |
# make prediction
|
| 56 |
label = model.predict(transformed_data) # make a prediction
|
| 57 |
+
probs = model.predict_proba(transformed_data) # predit sepsis status for inputs
|
| 58 |
return label, probs.max()
|
| 59 |
|
| 60 |
|
|
|
|
| 65 |
return 'Sepsis status is Positive'
|
| 66 |
elif row['Predicted Label'] == 0:
|
| 67 |
return 'Sepsis status is Negative'
|
| 68 |
+
|
| 69 |
|
| 70 |
def return_columns():
|
| 71 |
# create new columns
|
|
|
|
| 80 |
# Read the file contents as a byte string
|
| 81 |
contents = contents.decode() # Decode the byte string to a regular string
|
| 82 |
new_columns = return_columns() # return new_columns
|
|
|
|
|
|
|
| 83 |
# Process the uploaded file
|
| 84 |
+
if file_type == valid_formats[0]:
|
| 85 |
+
data = pd.read_csv(StringIO(contents)) # read csv files
|
| 86 |
elif file_type == valid_formats[1]:
|
| 87 |
+
data = pd.read_json(contents) # read json file
|
| 88 |
+
data = data.drop(columns=['ID']) # drop ID column
|
| 89 |
dict_new_old_cols = dict(zip(data.columns, new_columns)) # get dict of new and old cols
|
| 90 |
+
data = data.rename(columns=dict_new_old_cols) # rename colums to appropriate columns
|
| 91 |
return data
|
| 92 |
|
| 93 |
|
| 94 |
+
def output_batch(data1, labels):
|
| 95 |
+
data_labels = pd.DataFrame(labels, columns=['Predicted Label']) # convert label into a dataframe
|
| 96 |
+
data_labels['Predicted Label'] = data_labels.apply(process_label, axis=1) # change label to understanding strings
|
| 97 |
+
results_list = [] # create an empty lits
|
| 98 |
+
x = data1.to_dict('index') # convert datafram into dictionary
|
| 99 |
+
y = data_labels.to_dict('index') # convert datafram into dictionary
|
| 100 |
+
for i in range(len(y)):
|
| 101 |
+
results_list.append({i:{'inputs': x[i], 'output':y[i]}}) # append input and labels
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
final_dict = {'results': results_list}
|
| 104 |
return final_dict
|