Spaces:
Build error
Build error
| import mlflow | |
| import uvicorn | |
| import pandas as pd | |
| from pydantic import BaseModel | |
| from typing import Literal, List, Union | |
| from fastapi import FastAPI, File, UploadFile | |
| import joblib | |
| import requests | |
| description = """ | |
| Welcome to this offensive speech detection API. | |
| It uses the model developped to detect if a tweet has hate speech or not | |
| ## Machine Learning | |
| This is a Machine Learning endpoint that predict if a text is a hate speech or not with a certain degree of certainity. Here is the endpoint: | |
| * `/predict` that accepts string value | |
| * `/preprocess` that accepts string value | |
| Check out documentation below 👇 for more information on each endpoint. | |
| """ | |
| tags_metadata = [ | |
| { | |
| "name": "Machine Learning", | |
| "description": "Prediction Endpoint." | |
| } | |
| ] | |
| app = FastAPI( | |
| title="Offensive speech detection", | |
| description=description, | |
| version="0.1", | |
| contact={ | |
| "name": "Louis Le Pogam", | |
| "mail": "l.lepogam@gmail.com", | |
| }, | |
| openapi_tags=tags_metadata | |
| ) | |
| class PredictionFeatures(BaseModel): | |
| Text: str | |
| class PreprocessingFeatures(BaseModel): | |
| tweet: str | |
| async def index(): | |
| """ | |
| Simply returns a welcome message! | |
| """ | |
| message ="If you want to learn more, check out documentation of the api at `/docs`" | |
| return message | |
| async def predict(predictionFeatures: PredictionFeatures): | |
| """ | |
| Predict whether the provided text contains hate speech. | |
| ### Input | |
| - `predictionFeatures` (PredictionFeatures): An object containing the text to be analyzed. | |
| - `predictionFeatures` is a dictionnary with 'Text' as only key | |
| - The input text is provided as a string as a value of the 'Text' key | |
| ### Output | |
| Returns a dictionary with the following keys: | |
| - `prediction` (str): Indicates whether the text is "offensive" or "not offensive". | |
| - `probability` (float): A value between 0 and 1, representing the likelihood of hate speech. | |
| - Texts with a probability >= 0.5 are classified as "offensive". | |
| ### Example Usage | |
| To use this endpoint, send a POST request as follows: | |
| ```python | |
| import requests | |
| url = "https://llepogam-hate-speech-detection-api.hf.space/predict" | |
| headers = { | |
| "accept": "application/json", | |
| "Content-Type": "application/json" | |
| } | |
| data = { | |
| "Text": "your text here" | |
| } | |
| response = requests.post(url, headers=headers, json=data) | |
| print(response.json()) | |
| ``` | |
| """ | |
| # Copy the preprocess method to avoid issue | |
| list_text_for_preprocessing = [predictionFeatures.Text] | |
| # Load model from MLflow | |
| logged_model = 'runs:/89b183556cf34460858d94125f8df98d/text_preprocessor' | |
| loaded_model = mlflow.pyfunc.load_model(logged_model) | |
| df = pd.DataFrame(list_text_for_preprocessing,columns=['tweet']) | |
| # Perform prediction | |
| preprocessed_result = loaded_model.predict(pd.DataFrame(df)) | |
| df_preprocessed = pd.DataFrame(preprocessed_result) | |
| list_text = [df_preprocessed.loc[0,"text_clean"]] | |
| # Load model from MLflow | |
| logged_model = 'runs:/227d2f8e431d40d6b5231add3a00d048/hate_speech_detection' | |
| loaded_model = mlflow.pyfunc.load_model(logged_model) | |
| df = pd.DataFrame(list_text) | |
| # Perform prediction | |
| test_pred_prod = loaded_model.predict(df) | |
| test_pred = (test_pred_prod > 0.5).astype(int) | |
| test_pred_final = ["offensive" if pred == 1 else "not offensive" for pred in test_pred] | |
| result = { | |
| "prediction": test_pred_final[0], | |
| "probability": float(test_pred_prod[0][0]) | |
| } | |
| # Format and return the response | |
| return result | |
| async def preprocess_text(preprocessingFeatures: PreprocessingFeatures): | |
| """ | |
| This method will preprocess a raw tweet.This intermediate method is used as the preprocessing cannot be simply included in the prediction model | |
| ### Input | |
| - `preprocessingFeatures` (PreprocessingFeatures): An object containing the tweet to be preprocessed. | |
| - `predictionFeatures` is a dictionnary with 'tweet' as only key | |
| - The input text is provided as a string as a value of the 'tweet' key | |
| ### Output | |
| Returns a dictionary with the following keys: | |
| - `tweet` (str): Initial tweet. | |
| - `text_clean` (str): Preprocessed tweets after removal of punctation and stop words and text lemmatization. | |
| ### Example Usage | |
| To use this endpoint, send a POST request as follows: | |
| ```python | |
| import requests | |
| url = "https://llepogam-hate-speech-detection-api.hf.space/preprocess" | |
| headers = { | |
| "accept": "application/json", | |
| "Content-Type": "application/json" | |
| } | |
| data = { | |
| "tweet": "@user this is the tweet which i want to preprocess ! #machinelearning #prediction" | |
| } | |
| response = requests.post(url, headers=headers, json=data) | |
| print(response.json()) | |
| ``` | |
| """ | |
| # Convert input into a DataFrame | |
| list_text = [preprocessingFeatures.tweet] | |
| # Load model from MLflow | |
| logged_model = 'runs:/89b183556cf34460858d94125f8df98d/text_preprocessor' | |
| loaded_model = mlflow.pyfunc.load_model(logged_model) | |
| df = pd.DataFrame(list_text,columns=['tweet']) | |
| # Perform prediction | |
| preprocessed_result = loaded_model.predict(pd.DataFrame(df)) | |
| # Format and return the response | |
| return preprocessed_result | |