Spaces:

Subi003
/

ToxicTagger_serveAPI

Sleeping

App Files Files Community

Subi003 commited on Aug 17, 2025

Commit

6acd343

verified ·

1 Parent(s): 171f324

Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

.dockerignore +42 -42
Dockerfile +19 -19
main/inference.py +98 -98
main/schema.py +30 -30
main/utils.py +89 -89
model/MLmodel +25 -0
model/artifacts/Tfidf.joblib +3 -0
model/artifacts/XGB-v2.joblib +3 -0
model/conda.yaml +15 -0
model/python_env.yaml +7 -0
model/python_model.pkl +3 -0
model/registered_model_meta +2 -0
model/requirements.txt +8 -0
requirements.txt +5 -5

.dockerignore CHANGED Viewed

@@ -1,42 +1,42 @@
-# Ignore Python cache
-__pycache__/
-*.py[cod]
-*.so
-# Ignore Jupyter notebooks (if not used)
-*.ipynb
-.ipynb_checkpoints/
-# Ignore logs and temp files
-*.log
-*.tmp
-*.DS_Store
-# Ignore version control and dev files
-.git/
-.github/
-.vscode/
-*.env
-.env*
-.gitignore
-# MLflow & DVC metadata (keep only if you need them at runtime)
-.mlflow/
-.dvc/
-.dvcignore
-# CI/CD config files
-tox.ini
-pytest.ini
-setup.cfg
-setup.py
-requirements-dev.txt
-# Ignore Docker build context bloat
-*.tar
-*.zip
-*.gz
-*.egg-info/
-# Ignore Hugging Face cache
-~/.cache/huggingface/

+# Ignore Python cache
+__pycache__/
+*.py[cod]
+*.so
+# Ignore Jupyter notebooks (if not used)
+*.ipynb
+.ipynb_checkpoints/
+# Ignore logs and temp files
+*.log
+*.tmp
+*.DS_Store
+# Ignore version control and dev files
+.git/
+.github/
+.vscode/
+*.env
+.env*
+.gitignore
+# MLflow & DVC metadata (keep only if you need them at runtime)
+.mlflow/
+.dvc/
+.dvcignore
+# CI/CD config files
+tox.ini
+pytest.ini
+setup.cfg
+setup.py
+requirements-dev.txt
+# Ignore Docker build context bloat
+*.tar
+*.zip
+*.gz
+*.egg-info/
+# Ignore Hugging Face cache
+~/.cache/huggingface/

Dockerfile CHANGED Viewed

@@ -1,20 +1,20 @@
-FROM python:3.11.11-slim-bookworm
-RUN apt-get update && apt-get upgrade -y && \
-    apt-get install --no-install-recommends -y build-essential && \
-    rm -rf /var/lib/apt/lists/*
-WORKDIR /app
-COPY . /app
-RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r requirements.txt -r model/requirements.txt
-RUN useradd -m appuser
-USER appuser
-EXPOSE 7860
-ENV HOST=0.0.0.0 PORT=7860 PYTHONUNBUFFERED=1
 CMD ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "main.inference:inference_api", "--bind", "0.0.0.0:7860"]

+FROM python:3.11.11-slim-bookworm
+RUN apt-get update && apt-get upgrade -y && \
+    apt-get install --no-install-recommends -y build-essential && \
+    rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY . /app
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt -r model/requirements.txt
+RUN useradd -m appuser
+USER appuser
+EXPOSE 7860
+ENV HOST=0.0.0.0 PORT=7860 PYTHONUNBUFFERED=1
 CMD ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "main.inference:inference_api", "--bind", "0.0.0.0:7860"]

main/inference.py CHANGED Viewed

@@ -1,99 +1,99 @@
-from fastapi import FastAPI
-from fastapi.responses import JSONResponse
-from main.schema import InputData, APIResponse
-from datetime import datetime
-from main.utils import *
-import uuid, time
-model = load_model()
-inference_api = FastAPI()
-@inference_api.get("/")
-def status():
-    """
-    Status endpoint for the model inference API.
-    Returns a JSON response with a status of 200 and a message indicating
-    that the API is active.
-    """
-    return JSONResponse(content={
-        "status": 200,
-        "message": "Inference API active."
-        })
-@inference_api.post('/get_prediction', response_model=APIResponse)
-def api_response(payload: InputData):
-    """
-    Inference endpoint for getting prediction from the model.
-    This endpoint accepts a POST request with a JSON payload containing the text to be classified.
-    The response is a JSON object with the model prediction, confidence score, and other metadata.
-    :param payload: InputData object containing the text to be classified.
-    :return: APIResponse object containing the model prediction, confidence score,
-            and other metadata.
-    """
-    timestamp = datetime.now().astimezone().isoformat()
-    request_id = str(uuid.uuid4())
-    start_time = time.perf_counter()
-    tweet = payload.comment
-    explainer = LimeExplainer(model)
-    explaination = explainer.explain(tweet)
-    prediction = explainer.prediction
-    if prediction is not None:
-        label = int(prediction["class_label"][0])
-        probability_scores = prediction["class_probability_scores"][0]
-        proba_class0 = float(probability_scores[0])
-        proba_class1 = float(probability_scores[1])
-    else:
-        raise ValueError("Model prediction could not be made.")
-    end_time = time.perf_counter()
-    if proba_class1 > 0.70:
-        toxic_level = "strong"
-    elif proba_class1 > 0.54:
-        toxic_level = "high"
-    elif proba_class1 > 0.46:
-        toxic_level = "light"
-    else:
-        toxic_level = "none"
-    response = {
-        "prediction": {
-            "class_label": label,
-            "confidence": round(abs(proba_class0 - proba_class1), 4),
-            "toxic_level": toxic_level,
-            "pred_scores": {
-                "0": round(proba_class0, 4),
-                "1": round(proba_class1, 4)
-            },
-            "explaination": explaination
-        },
-        "metadata": {
-            "request_id": request_id,
-            "timestamp": timestamp,
-            "response_time": f"{round((end_time - start_time), 4)} sec",
-            "input": {
-                "num_tokens": int(len(tweet.split())),
-                "num_characters": int(len([i for i in tweet])),
-                "language": "en (iso 639-1code)",
-            },
-            "model": type(model.model).__name__,
-            "model_version": get_model_version(),
-            "vectorizer": type(model.vectorizer).__name__,
-            "model_registry": f"Mlflow {get_model_registry()}",
-            "type": "Production",
-            "explainer_varient": "LimeTextExplainer",
-            "streamable": False,
-            "api_version": "v-1.0",
-            "developer": "Subinoy Bera"
-        }
-    }
     return JSONResponse(status_code=200, content=response)

+from fastapi import FastAPI
+from fastapi.responses import JSONResponse
+from main.schema import InputData, APIResponse
+from datetime import datetime
+from main.utils import *
+import uuid, time
+model = load_model()
+inference_api = FastAPI()
+@inference_api.get("/")
+def status():
+    """
+    Status endpoint for the model inference API.
+    Returns a JSON response with a status of 200 and a message indicating
+    that the API is active.
+    """
+    return JSONResponse(content={
+        "status": 200,
+        "message": "Inference API active."
+        })
+@inference_api.post('/get_prediction', response_model=APIResponse)
+def api_response(payload: InputData):
+    """
+    Inference endpoint for getting prediction from the model.
+    This endpoint accepts a POST request with a JSON payload containing the text to be classified.
+    The response is a JSON object with the model prediction, confidence score, and other metadata.
+    :param payload: InputData object containing the text to be classified.
+    :return: APIResponse object containing the model prediction, confidence score,
+            and other metadata.
+    """
+    timestamp = datetime.now().astimezone().isoformat()
+    request_id = str(uuid.uuid4())
+    start_time = time.perf_counter()
+    tweet = payload.comment
+    explainer = LimeExplainer(model)
+    explaination = explainer.explain(tweet)
+    prediction = explainer.prediction
+    if prediction is not None:
+        label = int(prediction["class_label"][0])
+        probability_scores = prediction["class_probability_scores"][0]
+        proba_class0 = float(probability_scores[0])
+        proba_class1 = float(probability_scores[1])
+    else:
+        raise ValueError("Model prediction could not be made.")
+    end_time = time.perf_counter()
+    if proba_class1 > 0.70:
+        toxic_level = "strong"
+    elif proba_class1 > 0.54:
+        toxic_level = "high"
+    elif proba_class1 > 0.46:
+        toxic_level = "light"
+    else:
+        toxic_level = "none"
+    response = {
+        "prediction": {
+            "class_label": label,
+            "confidence": round(abs(proba_class0 - proba_class1), 4),
+            "toxic_level": toxic_level,
+            "pred_scores": {
+                "0": round(proba_class0, 4),
+                "1": round(proba_class1, 4)
+            },
+            "explaination": explaination
+        },
+        "metadata": {
+            "request_id": request_id,
+            "timestamp": timestamp,
+            "response_time": f"{round((end_time - start_time), 4)} sec",
+            "input": {
+                "num_tokens": int(len(tweet.split())),
+                "num_characters": int(len([i for i in tweet])),
+                "language": "en (iso 639-1code)",
+            },
+            "model": type(model.model).__name__,
+            "model_version": get_model_version(),
+            "vectorizer": type(model.vectorizer).__name__,
+            "model_registry": f"Mlflow {get_model_registry()}",
+            "type": "Production",
+            "explainer_varient": "LimeTextExplainer",
+            "streamable": False,
+            "api_version": "v-1.0",
+            "developer": "Subinoy Bera"
+        }
+    }
     return JSONResponse(status_code=200, content=response)

main/schema.py CHANGED Viewed

@@ -1,31 +1,31 @@
-# Schema validation for the API response
-from pydantic import BaseModel, Field
-from typing import Annotated, Dict
-class InputData(BaseModel):
-    comment: Annotated[str, Field(..., description="User tweet or comment to be classified")]
-class Prediction(BaseModel):
-    class_label: int
-    confidence: float
-    toxic_level: str
-    pred_scores: Dict[int, float]
-class MetaData(BaseModel):
-    request_id: str
-    timestamp: str
-    response_time: str
-    input: Dict[str, int]
-    model: str
-    version: int
-    vectorizer: str
-    type: str
-    loader_module: str
-    streamable: bool
-    api_version: str
-    developer: str
-class APIResponse(BaseModel):
-    response: Prediction
     metadata: MetaData

+# Schema validation for the API response
+from pydantic import BaseModel, Field
+from typing import Annotated, Dict
+class InputData(BaseModel):
+    comment: Annotated[str, Field(..., description="User tweet or comment to be classified")]
+class Prediction(BaseModel):
+    class_label: int
+    confidence: float
+    toxic_level: str
+    pred_scores: Dict[int, float]
+class MetaData(BaseModel):
+    request_id: str
+    timestamp: str
+    response_time: str
+    input: Dict[str, int]
+    model: str
+    version: int
+    vectorizer: str
+    type: str
+    loader_module: str
+    streamable: bool
+    api_version: str
+    developer: str
+class APIResponse(BaseModel):
+    response: Prediction
     metadata: MetaData

main/utils.py CHANGED Viewed

@@ -1,90 +1,90 @@
-# Utility functions for the model inference api
-import yaml
-import joblib
-import numpy as np
-import pandas as pd
-from pathlib import Path
-from typing import Any
-from lime.lime_text import LimeTextExplainer
-# load yaml files to get model meta data.
-try:
-    with open(Path("model/registered_model_meta"), 'r') as f:
-        model_metadata = yaml.safe_load(f)
-except:
-    raise FileNotFoundError("Failed to load file having model metadata")
-# Intialize lime explainer with class names
-_global_explainer = LimeTextExplainer(class_names=["hate", "non-hate"], bow=False)
-class LimeExplainer:
-    def __init__(self, model: Any):
-        """
-        Initializes an instance of LimeExplainer.
-        Sets the class names for the explainer and initializes the LimeTextExplainer.
-        Also initializes the model prediction attribute to None.
-        """
-        self.explainer = _global_explainer
-        self.prediction = None
-        self.model = model
-    def _get_prediction_explaination(self, tweet) -> np.ndarray:
-        """
-        Internal function to get prediction from the model and class probability scores
-        for lime explainer.
-        """
-        input_df = pd.DataFrame({
-            "comments": tweet
-        })
-        self.prediction = self.model.predict(context=None, model_input=input_df)
-        return np.array(self.prediction["class_probability_scores"])
-    def explain(self, tweet) -> dict:
-        """
-        Generate lime explanation for a given tweet.
-        Parameters
-            tweet: str : Input tweet or comment to be classified.
-        Returns
-            dict : A dictionary with words as keys and their corresponding weightage.
-        """
-        explanation = self.explainer.explain_instance(
-            tweet,
-            self._get_prediction_explaination,
-            num_features=5,
-            num_samples=20
-        )
-        return round_dict_values(dic = dict(explanation.as_list()))
-def load_model():
-    """Loads ML model from location path and returns the model."""
-    try:
-        with open(Path("model/python_model.pkl"), "rb") as f:
-            model = joblib.load(f)
-        return model
-    except Exception as e:
-        raise RuntimeError(f"Failed to load model from hub: {e}")
-def get_model_registry() -> str:
-    """Fetches the model registry name and returns it."""
-    model_registry = model_metadata['model_name']
-    return model_registry
-def get_model_version() -> str:
-    """Fetches the model version and returns it."""
-    model_version = model_metadata['model_version']
-    return model_version
-def round_dict_values(dic) -> dict:
-    """Rounds all values in a dictionary to 4 decimal places."""
     return {str(k): round(v, 4) for k, v in dic.items()}

+# Utility functions for the model inference api
+import yaml
+import joblib
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from typing import Any
+from lime.lime_text import LimeTextExplainer
+# load yaml files to get model meta data.
+try:
+    with open(Path("model/registered_model_meta"), 'r') as f:
+        model_metadata = yaml.safe_load(f)
+except:
+    raise FileNotFoundError("Failed to load file having model metadata")
+# Intialize lime explainer with class names
+_global_explainer = LimeTextExplainer(class_names=["hate", "non-hate"], bow=False)
+class LimeExplainer:
+    def __init__(self, model: Any):
+        """
+        Initializes an instance of LimeExplainer.
+        Sets the class names for the explainer and initializes the LimeTextExplainer.
+        Also initializes the model prediction attribute to None.
+        """
+        self.explainer = _global_explainer
+        self.prediction = None
+        self.model = model
+    def _get_prediction_explaination(self, tweet) -> np.ndarray:
+        """
+        Internal function to get prediction from the model and class probability scores
+        for lime explainer.
+        """
+        input_df = pd.DataFrame({
+            "comments": tweet
+        })
+        self.prediction = self.model.predict(context=None, model_input=input_df)
+        return np.array(self.prediction["class_probability_scores"])
+    def explain(self, tweet) -> dict:
+        """
+        Generate lime explanation for a given tweet.
+        Parameters
+            tweet: str : Input tweet or comment to be classified.
+        Returns
+            dict : A dictionary with words as keys and their corresponding weightage.
+        """
+        explanation = self.explainer.explain_instance(
+            tweet,
+            self._get_prediction_explaination,
+            num_features=5,
+            num_samples=20
+        )
+        return round_dict_values(dic = dict(explanation.as_list()))
+def load_model():
+    """Loads ML model from location path and returns the model."""
+    try:
+        with open(Path("model/python_model.pkl"), "rb") as f:
+            model = joblib.load(f)
+        return model
+    except Exception as e:
+        raise RuntimeError(f"Failed to load model from hub: {e}")
+def get_model_registry() -> str:
+    """Fetches the model registry name and returns it."""
+    model_registry = model_metadata['model_name']
+    return model_registry
+def get_model_version() -> str:
+    """Fetches the model version and returns it."""
+    model_version = model_metadata['model_version']
+    return model_version
+def round_dict_values(dic) -> dict:
+    """Rounds all values in a dictionary to 4 decimal places."""
     return {str(k): round(v, 4) for k, v in dic.items()}

model/MLmodel ADDED Viewed

	@@ -0,0 +1,25 @@

+artifact_path: XGB-v2
+flavors:
+  python_function:
+    artifacts:
+      classifier:
+        path: artifacts\XGB-v2.joblib
+        uri: models\XGB-v2.joblib
+      vectorizer:
+        path: artifacts\Tfidf.joblib
+        uri: models\Tfidf.joblib
+    cloudpickle_version: 3.1.1
+    code: null
+    env:
+      conda: conda.yaml
+      virtualenv: python_env.yaml
+    loader_module: mlflow.pyfunc.model
+    python_model: python_model.pkl
+    python_version: 3.11.5
+    streamable: false
+mlflow_version: 2.22.1
+model_size_bytes: 11990188
+model_uuid: 65490db310744bdf8f1c897d96f8aca8
+prompts: null
+run_id: cda6d2d206b34409a74cd67407bda91c
+utc_time_created: '2025-07-28 10:17:07.559763'

model/artifacts/Tfidf.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d3b128625a5b8b778ee4d4a97f8afdfba1268a3ee14b9e3328bab3de48e685cf
+size 120443

model/artifacts/XGB-v2.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa4330bca1029dc4532a5c4ced95b6fa62ef196f6789fad05a1414d662967fea
+size 5863647

model/conda.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+channels:
+- conda-forge
+dependencies:
+- python=3.11.5
+- pip<=25.1
+- pip:
+  - mlflow==2.22.1
+  - cloudpickle==3.1.1
+  - numpy==2.2.6
+  - pandas==2.3.1
+  - psutil==7.0.0
+  - scikit-learn==1.7.0
+  - scipy==1.13.1
+  - xgboost==3.0.2
+name: mlflow-env

model/python_env.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+python: 3.11.5
+build_dependencies:
+- pip==25.1
+- setuptools==78.1.1
+- wheel==0.45.1
+dependencies:
+- -r requirements.txt

model/python_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6d00d6029ae727539c833a6504499aee7c3d7da5de56b03be330806293f3954
+size 6006098

model/registered_model_meta ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ model_name: ToxicTagger-Models
2	+ model_version: '6'

model/requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+mlflow==2.22.1
+cloudpickle==3.1.1
+numpy==2.2.6
+pandas==2.3.1
+psutil==7.0.0
+scikit-learn==1.7.0
+scipy==1.13.1
+xgboost==3.0.2

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
-fastapi==0.116.1
-uvicorn==0.35.0
-joblib==1.5.1
-PyYAML==6.0.2
-lime==0.2.0.1
 gunicorn==23.0.0

+fastapi==0.116.1
+uvicorn==0.35.0
+joblib==1.5.1
+PyYAML==6.0.2
+lime==0.2.0.1
 gunicorn==23.0.0