Spaces:

Subi003
/

ToxicTagger_serveAPI

Running

App Files Files Community

Subi003 commited on Aug 15, 2025

Commit

2b6af04

verified ·

1 Parent(s): 818243f

Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

.gitignore +0 -0
Dockerfile +11 -6
main/helper.py +0 -43
main/{model_inference.py → inference.py} +36 -14
main/{validate_schema.py → schema.py} +2 -3
main/utils.py +84 -0
requirements.txt +3 -1

.gitignore DELETED Viewed

File without changes

Dockerfile CHANGED Viewed

@@ -1,15 +1,20 @@
 FROM python:3.11.11-slim-bookworm
-RUN apt-get update && apt-get upgrade -y
 COPY . /app
-WORKDIR /app
-RUN pip install --upgrade pip && \
-    pip install -r requirements.txt && \
-    pip install -r model/requirements.txt
 EXPOSE 7860
-CMD ["uvicorn", "main.model_inference:inference_api", "--host", "0.0.0.0", "--port", "7860"]

 FROM python:3.11.11-slim-bookworm
+RUN apt-get update && apt-get upgrade -y && \
+    apt-get install --no-install-recommends -y build-essential && \
+    rm -rf /var/lib/apt/lists/*
+WORKDIR /app
 COPY . /app
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt -r model/requirements.txt
+RUN useradd -m appuser
+USER appuser
 EXPOSE 7860
+ENV HOST=0.0.0.0 PORT=7860 PYTHONUNBUFFERED=1
+CMD ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "main.model_inference:inference_api", "--bind", "0.0.0.0:7860"]

main/helper.py DELETED Viewed

@@ -1,43 +0,0 @@
-# Helper functions for the model inference api
-import yaml
-import joblib
-import pandas as pd
-from pathlib import Path
-# load yaml files to get model meta data.
-try:
-    with open(Path("model/registered_model_meta"), 'r') as f:
-        model_metadata = yaml.safe_load(f)
-except:
-    raise FileNotFoundError("Failed to load file having model metadata")
-def load_model():
-    """ Loads ML model from location path and returns the model. """
-    try:
-        with open(Path("model/python_model.pkl"), "rb") as f:
-            model = joblib.load(f)
-        return model
-    except Exception as e:
-        raise RuntimeError(f"Failed to load model from hub: {e}")
-def get_model_registry():
-    """ Fetches the model registry name and returns it. """
-    model_registry = model_metadata['model_name']
-    return model_registry
-def get_model_version():
-    """ Fetches the model version and returns it. """
-    model_version = model_metadata['model_version']
-    return model_version
-def format_model_input(tweet: str) -> pd.DataFrame:
-    df = pd.DataFrame({
-        "comments": [tweet]
-    })
-    return df

main/{model_inference.py → inference.py} RENAMED Viewed

@@ -1,37 +1,57 @@
 from fastapi import FastAPI
 from fastapi.responses import JSONResponse
-from main.validate_schema import InputData, APIResponse
 from datetime import datetime
-from main.helper import *
 import uuid, time
-model = load_model()
-# Initializing fastapi
 inference_api = FastAPI()
 @inference_api.get("/")
-def root():
     return JSONResponse(content={
         "status": 200,
-        "message": "Inference API is running."
         })
 @inference_api.post('/get_prediction', response_model=APIResponse)
-def api(payload: InputData):
     timestamp = datetime.now().astimezone().isoformat()
     request_id = str(uuid.uuid4())
     start_time = time.perf_counter()
     tweet = payload.comment
-    model_input = format_model_input(tweet)
-    model_response = model.predict(context=None, model_input=model_input)
-    label = int(model_response["class_label"][0])
-    probability_scores = model_response["class_probability_scores"]
-    proba_class0 = float(probability_scores[0][0])
-    proba_class1 = float(probability_scores[0][1])
     end_time = time.perf_counter()
@@ -53,6 +73,7 @@ def api(payload: InputData):
                 "0": round(proba_class0, 4),
                 "1": round(proba_class1, 4)
             },
         },
         "metadata": {
             "request_id": request_id,
@@ -67,7 +88,8 @@ def api(payload: InputData):
             "model_version": get_model_version(),
             "vectorizer": type(model.vectorizer).__name__,
             "model_registry": f"Mlflow {get_model_registry()}",
-            "type": "production",
             "streamable": False,
             "api_version": "v-1.0",
             "developer": "Subinoy Bera"

 from fastapi import FastAPI
 from fastapi.responses import JSONResponse
+from hf_serve_api.main.schema import InputData, APIResponse
 from datetime import datetime
+from main.utils import *
 import uuid, time
+load_model()
 inference_api = FastAPI()
 @inference_api.get("/")
+def status():
+    """
+    Status endpoint for the model inference API.
+    Returns a JSON response with a status of 200 and a message indicating
+    that the API is active.
+    """
     return JSONResponse(content={
         "status": 200,
+        "message": "Inference API active."
         })
 @inference_api.post('/get_prediction', response_model=APIResponse)
+def api_response(payload: InputData):
+    """
+    Inference endpoint for getting prediction from the model.
+    This endpoint accepts a POST request with a JSON payload containing the text to be classified.
+    The response is a JSON object with the model prediction, confidence score, and other metadata.
+    :param payload: InputData object containing the text to be classified.
+    :return: APIResponse object containing the model prediction, confidence score,
+            and other metadata.
+    """
     timestamp = datetime.now().astimezone().isoformat()
     request_id = str(uuid.uuid4())
     start_time = time.perf_counter()
     tweet = payload.comment
+    explainer = LimeExplainer()
+    explaination = explainer.explain(tweet)
+    prediction = explainer.prediction
+    if prediction is not None:
+        label = int(prediction["class_label"][0])
+        probability_scores = prediction["class_probability_scores"][0]
+        proba_class0 = float(probability_scores[0])
+        proba_class1 = float(probability_scores[1])
+    else:
+        raise ValueError("Model prediction could not be made.")
     end_time = time.perf_counter()
                 "0": round(proba_class0, 4),
                 "1": round(proba_class1, 4)
             },
+            "explaination": explaination
         },
         "metadata": {
             "request_id": request_id,
             "model_version": get_model_version(),
             "vectorizer": type(model.vectorizer).__name__,
             "model_registry": f"Mlflow {get_model_registry()}",
+            "type": "Production",
+            "explainer_varient": "LimeTextExplainer",
             "streamable": False,
             "api_version": "v-1.0",
             "developer": "Subinoy Bera"

main/{validate_schema.py → schema.py} RENAMED Viewed

@@ -1,11 +1,11 @@
 from pydantic import BaseModel, Field
 from typing import Annotated, Dict
 class InputData(BaseModel):
     comment: Annotated[str, Field(..., description="User tweet or comment to be classified")]
 class Prediction(BaseModel):
     class_label: int
     confidence: float
@@ -26,7 +26,6 @@ class MetaData(BaseModel):
     api_version: str
     developer: str
 class APIResponse(BaseModel):
     response: Prediction
     metadata: MetaData

+# Schema validation for the API response
 from pydantic import BaseModel, Field
 from typing import Annotated, Dict
 class InputData(BaseModel):
     comment: Annotated[str, Field(..., description="User tweet or comment to be classified")]
 class Prediction(BaseModel):
     class_label: int
     confidence: float
     api_version: str
     developer: str
 class APIResponse(BaseModel):
     response: Prediction
     metadata: MetaData

main/utils.py ADDED Viewed

	@@ -0,0 +1,84 @@

+# Utility functions for the model inference api
+import yaml
+import joblib
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from lime.lime_text import LimeTextExplainer
+# load yaml files to get model meta data.
+try:
+    with open(Path("model/registered_model_meta"), 'r') as f:
+        model_metadata = yaml.safe_load(f)
+except:
+    raise FileNotFoundError("Failed to load file having model metadata")
+class LimeExplainer:
+    def __init__(self):
+        """
+        Initializes an instance of LimeExplainer.
+        Sets the class names for the explainer and initializes the LimeTextExplainer.
+        Also initializes the model prediction attribute to None.
+        """
+        class_names = ["hate", "non-hate"]
+        self.explainer = LimeTextExplainer(class_names=class_names)
+        self.prediction = None
+    def _get_prediction_explaination(self, tweet) -> np.ndarray:
+        """
+        Internal function to get prediction from the model and class probability scores
+        for lime explainer.
+        """
+        input_df = pd.DataFrame({
+            "comments": tweet
+        })
+        self.prediction = model.predict(context=None, model_input=input_df)
+        return np.array(self.prediction["class_probability_scores"])
+    def explain(self, tweet) -> dict:
+        """
+        Generate lime explanation for a given tweet.
+        Parameters
+            tweet: str : Input tweet or comment to be classified.
+        Returns
+            dict : A dictionary with words as keys and their corresponding weightage.
+        """
+        explanation = self.explainer.explain_instance(
+            tweet,
+            self._get_prediction_explaination,
+            num_features=5
+        )
+        return round_dict_values(dic = dict(explanation.as_list()))
+def load_model():
+    """Loads ML model from location path and returns the model."""
+    try:
+        with open(Path("model/python_model.pkl"), "rb") as f:
+            global model
+            model = joblib.load(f)
+    except Exception as e:
+        raise RuntimeError(f"Failed to load model from hub: {e}")
+def get_model_registry() -> str:
+    """Fetches the model registry name and returns it."""
+    model_registry = model_metadata['model_name']
+    return model_registry
+def get_model_version() -> str:
+    """Fetches the model version and returns it."""
+    model_version = model_metadata['model_version']
+    return model_version
+def round_dict_values(dic) -> dict:
+    """Rounds all values in a dictionary to 4 decimal places."""
+    return {str(k): round(v, 4) for k, v in dic.items()}

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 fastapi==0.116.1
 uvicorn==0.35.0
 joblib==1.5.1
-PyYAML==6.0.2

 fastapi==0.116.1
 uvicorn==0.35.0
 joblib==1.5.1
+PyYAML==6.0.2
+lime==0.2.0.1
+gunicorn==23.0.0