import mlflow import uvicorn import pandas as pd from pydantic import BaseModel from typing import Literal, List, Union from fastapi import FastAPI, File, UploadFile import joblib import requests description = """ Welcome to this offensive speech detection API. It uses the model developped to detect if a tweet has hate speech or not ## Machine Learning This is a Machine Learning endpoint that predict if a text is a hate speech or not with a certain degree of certainity. Here is the endpoint: * `/predict` that accepts string value * `/preprocess` that accepts string value Check out documentation below 👇 for more information on each endpoint. """ tags_metadata = [ { "name": "Machine Learning", "description": "Prediction Endpoint." } ] app = FastAPI( title="Offensive speech detection", description=description, version="0.1", contact={ "name": "Louis Le Pogam", "mail": "l.lepogam@gmail.com", }, openapi_tags=tags_metadata ) class PredictionFeatures(BaseModel): Text: str class PreprocessingFeatures(BaseModel): tweet: str @app.get("/", tags=["Introduction Endpoints"]) async def index(): """ Simply returns a welcome message! """ message ="If you want to learn more, check out documentation of the api at `/docs`" return message @app.post("/predict", tags=["Machine Learning"]) async def predict(predictionFeatures: PredictionFeatures): """ Predict whether the provided text contains hate speech. ### Input - `predictionFeatures` (PredictionFeatures): An object containing the text to be analyzed. - `predictionFeatures` is a dictionnary with 'Text' as only key - The input text is provided as a string as a value of the 'Text' key ### Output Returns a dictionary with the following keys: - `prediction` (str): Indicates whether the text is "offensive" or "not offensive". - `probability` (float): A value between 0 and 1, representing the likelihood of hate speech. - Texts with a probability >= 0.5 are classified as "offensive". ### Example Usage To use this endpoint, send a POST request as follows: ```python import requests url = "https://llepogam-hate-speech-detection-api.hf.space/predict" headers = { "accept": "application/json", "Content-Type": "application/json" } data = { "Text": "your text here" } response = requests.post(url, headers=headers, json=data) print(response.json()) ``` """ # Copy the preprocess method to avoid issue list_text_for_preprocessing = [predictionFeatures.Text] # Load model from MLflow logged_model = 'runs:/89b183556cf34460858d94125f8df98d/text_preprocessor' loaded_model = mlflow.pyfunc.load_model(logged_model) df = pd.DataFrame(list_text_for_preprocessing,columns=['tweet']) # Perform prediction preprocessed_result = loaded_model.predict(pd.DataFrame(df)) df_preprocessed = pd.DataFrame(preprocessed_result) list_text = [df_preprocessed.loc[0,"text_clean"]] # Load model from MLflow logged_model = 'runs:/227d2f8e431d40d6b5231add3a00d048/hate_speech_detection' loaded_model = mlflow.pyfunc.load_model(logged_model) df = pd.DataFrame(list_text) # Perform prediction test_pred_prod = loaded_model.predict(df) test_pred = (test_pred_prod > 0.5).astype(int) test_pred_final = ["offensive" if pred == 1 else "not offensive" for pred in test_pred] result = { "prediction": test_pred_final[0], "probability": float(test_pred_prod[0][0]) } # Format and return the response return result @app.post("/preprocess", tags=["Machine Learning"]) async def preprocess_text(preprocessingFeatures: PreprocessingFeatures): """ This method will preprocess a raw tweet.This intermediate method is used as the preprocessing cannot be simply included in the prediction model ### Input - `preprocessingFeatures` (PreprocessingFeatures): An object containing the tweet to be preprocessed. - `predictionFeatures` is a dictionnary with 'tweet' as only key - The input text is provided as a string as a value of the 'tweet' key ### Output Returns a dictionary with the following keys: - `tweet` (str): Initial tweet. - `text_clean` (str): Preprocessed tweets after removal of punctation and stop words and text lemmatization. ### Example Usage To use this endpoint, send a POST request as follows: ```python import requests url = "https://llepogam-hate-speech-detection-api.hf.space/preprocess" headers = { "accept": "application/json", "Content-Type": "application/json" } data = { "tweet": "@user this is the tweet which i want to preprocess ! #machinelearning #prediction" } response = requests.post(url, headers=headers, json=data) print(response.json()) ``` """ # Convert input into a DataFrame list_text = [preprocessingFeatures.tweet] # Load model from MLflow logged_model = 'runs:/89b183556cf34460858d94125f8df98d/text_preprocessor' loaded_model = mlflow.pyfunc.load_model(logged_model) df = pd.DataFrame(list_text,columns=['tweet']) # Perform prediction preprocessed_result = loaded_model.predict(pd.DataFrame(df)) # Format and return the response return preprocessed_result