Spaces:
Build error
Build error
File size: 5,463 Bytes
6035a21 2131148 6035a21 b9cb6e4 6035a21 1ab42f8 6035a21 e36af4b 6035a21 fb94ccc 6035a21 f2cf38f 6035a21 f2cf38f 08bd7b5 6035a21 f2cf38f b9cb6e4 f2cf38f b9cb6e4 f2cf38f 6035a21 9b6bc2c bcf10bc 9b6bc2c 06a6509 e9c60fb 22ee632 e9c60fb 6035a21 b9cb6e4 6035a21 fb94ccc 17356d1 fb94ccc 2131148 fb94ccc 021c6ba fb94ccc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import mlflow
import uvicorn
import pandas as pd
from pydantic import BaseModel
from typing import Literal, List, Union
from fastapi import FastAPI, File, UploadFile
import joblib
import requests
description = """
Welcome to this offensive speech detection API.
It uses the model developped to detect if a tweet has hate speech or not
## Machine Learning
This is a Machine Learning endpoint that predict if a text is a hate speech or not with a certain degree of certainity. Here is the endpoint:
* `/predict` that accepts string value
* `/preprocess` that accepts string value
Check out documentation below 👇 for more information on each endpoint.
"""
tags_metadata = [
{
"name": "Machine Learning",
"description": "Prediction Endpoint."
}
]
app = FastAPI(
title="Offensive speech detection",
description=description,
version="0.1",
contact={
"name": "Louis Le Pogam",
"mail": "l.lepogam@gmail.com",
},
openapi_tags=tags_metadata
)
class PredictionFeatures(BaseModel):
Text: str
class PreprocessingFeatures(BaseModel):
tweet: str
@app.get("/", tags=["Introduction Endpoints"])
async def index():
"""
Simply returns a welcome message!
"""
message ="If you want to learn more, check out documentation of the api at `/docs`"
return message
@app.post("/predict", tags=["Machine Learning"])
async def predict(predictionFeatures: PredictionFeatures):
"""
Predict whether the provided text contains hate speech.
### Input
- `predictionFeatures` (PredictionFeatures): An object containing the text to be analyzed.
- `predictionFeatures` is a dictionnary with 'Text' as only key
- The input text is provided as a string as a value of the 'Text' key
### Output
Returns a dictionary with the following keys:
- `prediction` (str): Indicates whether the text is "offensive" or "not offensive".
- `probability` (float): A value between 0 and 1, representing the likelihood of hate speech.
- Texts with a probability >= 0.5 are classified as "offensive".
### Example Usage
To use this endpoint, send a POST request as follows:
```python
import requests
url = "https://llepogam-hate-speech-detection-api.hf.space/predict"
headers = {
"accept": "application/json",
"Content-Type": "application/json"
}
data = {
"Text": "your text here"
}
response = requests.post(url, headers=headers, json=data)
print(response.json())
```
"""
# Copy the preprocess method to avoid issue
list_text_for_preprocessing = [predictionFeatures.Text]
# Load model from MLflow
logged_model = 'runs:/89b183556cf34460858d94125f8df98d/text_preprocessor'
loaded_model = mlflow.pyfunc.load_model(logged_model)
df = pd.DataFrame(list_text_for_preprocessing,columns=['tweet'])
# Perform prediction
preprocessed_result = loaded_model.predict(pd.DataFrame(df))
df_preprocessed = pd.DataFrame(preprocessed_result)
list_text = [df_preprocessed.loc[0,"text_clean"]]
# Load model from MLflow
logged_model = 'runs:/227d2f8e431d40d6b5231add3a00d048/hate_speech_detection'
loaded_model = mlflow.pyfunc.load_model(logged_model)
df = pd.DataFrame(list_text)
# Perform prediction
test_pred_prod = loaded_model.predict(df)
test_pred = (test_pred_prod > 0.5).astype(int)
test_pred_final = ["offensive" if pred == 1 else "not offensive" for pred in test_pred]
result = {
"prediction": test_pred_final[0],
"probability": float(test_pred_prod[0][0])
}
# Format and return the response
return result
@app.post("/preprocess", tags=["Machine Learning"])
async def preprocess_text(preprocessingFeatures: PreprocessingFeatures):
"""
This method will preprocess a raw tweet.This intermediate method is used as the preprocessing cannot be simply included in the prediction model
### Input
- `preprocessingFeatures` (PreprocessingFeatures): An object containing the tweet to be preprocessed.
- `predictionFeatures` is a dictionnary with 'tweet' as only key
- The input text is provided as a string as a value of the 'tweet' key
### Output
Returns a dictionary with the following keys:
- `tweet` (str): Initial tweet.
- `text_clean` (str): Preprocessed tweets after removal of punctation and stop words and text lemmatization.
### Example Usage
To use this endpoint, send a POST request as follows:
```python
import requests
url = "https://llepogam-hate-speech-detection-api.hf.space/preprocess"
headers = {
"accept": "application/json",
"Content-Type": "application/json"
}
data = {
"tweet": "@user this is the tweet which i want to preprocess ! #machinelearning #prediction"
}
response = requests.post(url, headers=headers, json=data)
print(response.json())
```
"""
# Convert input into a DataFrame
list_text = [preprocessingFeatures.tweet]
# Load model from MLflow
logged_model = 'runs:/89b183556cf34460858d94125f8df98d/text_preprocessor'
loaded_model = mlflow.pyfunc.load_model(logged_model)
df = pd.DataFrame(list_text,columns=['tweet'])
# Perform prediction
preprocessed_result = loaded_model.predict(pd.DataFrame(df))
# Format and return the response
return preprocessed_result
|