llepogam commited on
Commit
fb94ccc
·
1 Parent(s): 08bd7b5

addition of preprocessing endpoint

Browse files
Files changed (1) hide show
  1. app.py +38 -0
app.py CHANGED
@@ -44,6 +44,10 @@ app = FastAPI(
44
  class PredictionFeatures(BaseModel):
45
  Text: str
46
 
 
 
 
 
47
  @app.get("/", tags=["Introduction Endpoints"])
48
  async def index():
49
  """
@@ -117,3 +121,37 @@ async def predict(predictionFeatures: PredictionFeatures):
117
 
118
  # Format and return the response
119
  return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  class PredictionFeatures(BaseModel):
45
  Text: str
46
 
47
+ class PreprocessingFeatures(BaseModel):
48
+ tweet: str
49
+
50
+
51
  @app.get("/", tags=["Introduction Endpoints"])
52
  async def index():
53
  """
 
121
 
122
  # Format and return the response
123
  return result
124
+
125
+
126
+
127
+ @app.post("/preprocess", tags=["Machine Learning"])
128
+ async def predict(preprocessingFeatures: PreprocessingFeatures):
129
+ """
130
+ This method will preprocess a raw tweet.This intermediate method is used as the preprocessing cannot be simply included in the prediction model
131
+
132
+ ### Input
133
+ - `preprocessingFeatures` (PreprocessingFeatures): An object containing the tweet to be preprocessed.
134
+ - `predictionFeatures` is a dictionnary with 'tweet' as only key
135
+ - The input text is provided as a string as a value of the 'tweet' key
136
+
137
+ ### Output
138
+ Returns a dictionary with the following keys:
139
+ - `tweet` (str): Initial tweet.
140
+ - `text_clean` (str): Preprocessed tweets after removal of punctation and stop words and text lemmatization .
141
+
142
+ """
143
+
144
+ # Convert input into a DataFrame
145
+ list_text = [preprocessingFeatures.tweet]
146
+
147
+ # Load model from MLflow
148
+ logged_model = 'runs:/08b34287ab0248f6b199121bff56fc86/text_preprocessor'
149
+ loaded_model = mlflow.pyfunc.load_model(logged_model)
150
+
151
+ df = pd.DataFrame(list_text,columns=['tweet'])
152
+
153
+ # Perform prediction
154
+ preprocessed_result = loaded_model.predict(pd.DataFrame(df))
155
+
156
+ # Format and return the response
157
+ return preprocessed_result