Subi003 commited on
Commit
6acd343
·
verified ·
1 Parent(s): 171f324

Upload folder using huggingface_hub

Browse files
.dockerignore CHANGED
@@ -1,42 +1,42 @@
1
- # Ignore Python cache
2
- __pycache__/
3
- *.py[cod]
4
- *.so
5
-
6
- # Ignore Jupyter notebooks (if not used)
7
- *.ipynb
8
- .ipynb_checkpoints/
9
-
10
- # Ignore logs and temp files
11
- *.log
12
- *.tmp
13
- *.DS_Store
14
-
15
- # Ignore version control and dev files
16
- .git/
17
- .github/
18
- .vscode/
19
- *.env
20
- .env*
21
- .gitignore
22
-
23
- # MLflow & DVC metadata (keep only if you need them at runtime)
24
- .mlflow/
25
- .dvc/
26
- .dvcignore
27
-
28
- # CI/CD config files
29
- tox.ini
30
- pytest.ini
31
- setup.cfg
32
- setup.py
33
- requirements-dev.txt
34
-
35
- # Ignore Docker build context bloat
36
- *.tar
37
- *.zip
38
- *.gz
39
- *.egg-info/
40
-
41
- # Ignore Hugging Face cache
42
- ~/.cache/huggingface/
 
1
+ # Ignore Python cache
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.so
5
+
6
+ # Ignore Jupyter notebooks (if not used)
7
+ *.ipynb
8
+ .ipynb_checkpoints/
9
+
10
+ # Ignore logs and temp files
11
+ *.log
12
+ *.tmp
13
+ *.DS_Store
14
+
15
+ # Ignore version control and dev files
16
+ .git/
17
+ .github/
18
+ .vscode/
19
+ *.env
20
+ .env*
21
+ .gitignore
22
+
23
+ # MLflow & DVC metadata (keep only if you need them at runtime)
24
+ .mlflow/
25
+ .dvc/
26
+ .dvcignore
27
+
28
+ # CI/CD config files
29
+ tox.ini
30
+ pytest.ini
31
+ setup.cfg
32
+ setup.py
33
+ requirements-dev.txt
34
+
35
+ # Ignore Docker build context bloat
36
+ *.tar
37
+ *.zip
38
+ *.gz
39
+ *.egg-info/
40
+
41
+ # Ignore Hugging Face cache
42
+ ~/.cache/huggingface/
Dockerfile CHANGED
@@ -1,20 +1,20 @@
1
- FROM python:3.11.11-slim-bookworm
2
-
3
- RUN apt-get update && apt-get upgrade -y && \
4
- apt-get install --no-install-recommends -y build-essential && \
5
- rm -rf /var/lib/apt/lists/*
6
-
7
- WORKDIR /app
8
-
9
- COPY . /app
10
-
11
- RUN pip install --no-cache-dir --upgrade pip && \
12
- pip install --no-cache-dir -r requirements.txt -r model/requirements.txt
13
-
14
- RUN useradd -m appuser
15
- USER appuser
16
-
17
- EXPOSE 7860
18
- ENV HOST=0.0.0.0 PORT=7860 PYTHONUNBUFFERED=1
19
-
20
  CMD ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "main.inference:inference_api", "--bind", "0.0.0.0:7860"]
 
1
+ FROM python:3.11.11-slim-bookworm
2
+
3
+ RUN apt-get update && apt-get upgrade -y && \
4
+ apt-get install --no-install-recommends -y build-essential && \
5
+ rm -rf /var/lib/apt/lists/*
6
+
7
+ WORKDIR /app
8
+
9
+ COPY . /app
10
+
11
+ RUN pip install --no-cache-dir --upgrade pip && \
12
+ pip install --no-cache-dir -r requirements.txt -r model/requirements.txt
13
+
14
+ RUN useradd -m appuser
15
+ USER appuser
16
+
17
+ EXPOSE 7860
18
+ ENV HOST=0.0.0.0 PORT=7860 PYTHONUNBUFFERED=1
19
+
20
  CMD ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "main.inference:inference_api", "--bind", "0.0.0.0:7860"]
main/inference.py CHANGED
@@ -1,99 +1,99 @@
1
- from fastapi import FastAPI
2
- from fastapi.responses import JSONResponse
3
- from main.schema import InputData, APIResponse
4
- from datetime import datetime
5
- from main.utils import *
6
- import uuid, time
7
-
8
- model = load_model()
9
-
10
- inference_api = FastAPI()
11
-
12
- @inference_api.get("/")
13
- def status():
14
- """
15
- Status endpoint for the model inference API.
16
-
17
- Returns a JSON response with a status of 200 and a message indicating
18
- that the API is active.
19
-
20
- """
21
- return JSONResponse(content={
22
- "status": 200,
23
- "message": "Inference API active."
24
- })
25
-
26
-
27
- @inference_api.post('/get_prediction', response_model=APIResponse)
28
- def api_response(payload: InputData):
29
- """
30
- Inference endpoint for getting prediction from the model.
31
-
32
- This endpoint accepts a POST request with a JSON payload containing the text to be classified.
33
- The response is a JSON object with the model prediction, confidence score, and other metadata.
34
-
35
- :param payload: InputData object containing the text to be classified.
36
- :return: APIResponse object containing the model prediction, confidence score,
37
- and other metadata.
38
- """
39
- timestamp = datetime.now().astimezone().isoformat()
40
- request_id = str(uuid.uuid4())
41
- start_time = time.perf_counter()
42
-
43
- tweet = payload.comment
44
- explainer = LimeExplainer(model)
45
- explaination = explainer.explain(tweet)
46
- prediction = explainer.prediction
47
-
48
- if prediction is not None:
49
- label = int(prediction["class_label"][0])
50
- probability_scores = prediction["class_probability_scores"][0]
51
- proba_class0 = float(probability_scores[0])
52
- proba_class1 = float(probability_scores[1])
53
- else:
54
- raise ValueError("Model prediction could not be made.")
55
-
56
- end_time = time.perf_counter()
57
-
58
- if proba_class1 > 0.70:
59
- toxic_level = "strong"
60
- elif proba_class1 > 0.54:
61
- toxic_level = "high"
62
- elif proba_class1 > 0.46:
63
- toxic_level = "light"
64
- else:
65
- toxic_level = "none"
66
-
67
- response = {
68
- "prediction": {
69
- "class_label": label,
70
- "confidence": round(abs(proba_class0 - proba_class1), 4),
71
- "toxic_level": toxic_level,
72
- "pred_scores": {
73
- "0": round(proba_class0, 4),
74
- "1": round(proba_class1, 4)
75
- },
76
- "explaination": explaination
77
- },
78
- "metadata": {
79
- "request_id": request_id,
80
- "timestamp": timestamp,
81
- "response_time": f"{round((end_time - start_time), 4)} sec",
82
- "input": {
83
- "num_tokens": int(len(tweet.split())),
84
- "num_characters": int(len([i for i in tweet])),
85
- "language": "en (iso 639-1code)",
86
- },
87
- "model": type(model.model).__name__,
88
- "model_version": get_model_version(),
89
- "vectorizer": type(model.vectorizer).__name__,
90
- "model_registry": f"Mlflow {get_model_registry()}",
91
- "type": "Production",
92
- "explainer_varient": "LimeTextExplainer",
93
- "streamable": False,
94
- "api_version": "v-1.0",
95
- "developer": "Subinoy Bera"
96
- }
97
- }
98
-
99
  return JSONResponse(status_code=200, content=response)
 
1
+ from fastapi import FastAPI
2
+ from fastapi.responses import JSONResponse
3
+ from main.schema import InputData, APIResponse
4
+ from datetime import datetime
5
+ from main.utils import *
6
+ import uuid, time
7
+
8
+ model = load_model()
9
+
10
+ inference_api = FastAPI()
11
+
12
+ @inference_api.get("/")
13
+ def status():
14
+ """
15
+ Status endpoint for the model inference API.
16
+
17
+ Returns a JSON response with a status of 200 and a message indicating
18
+ that the API is active.
19
+
20
+ """
21
+ return JSONResponse(content={
22
+ "status": 200,
23
+ "message": "Inference API active."
24
+ })
25
+
26
+
27
+ @inference_api.post('/get_prediction', response_model=APIResponse)
28
+ def api_response(payload: InputData):
29
+ """
30
+ Inference endpoint for getting prediction from the model.
31
+
32
+ This endpoint accepts a POST request with a JSON payload containing the text to be classified.
33
+ The response is a JSON object with the model prediction, confidence score, and other metadata.
34
+
35
+ :param payload: InputData object containing the text to be classified.
36
+ :return: APIResponse object containing the model prediction, confidence score,
37
+ and other metadata.
38
+ """
39
+ timestamp = datetime.now().astimezone().isoformat()
40
+ request_id = str(uuid.uuid4())
41
+ start_time = time.perf_counter()
42
+
43
+ tweet = payload.comment
44
+ explainer = LimeExplainer(model)
45
+ explaination = explainer.explain(tweet)
46
+ prediction = explainer.prediction
47
+
48
+ if prediction is not None:
49
+ label = int(prediction["class_label"][0])
50
+ probability_scores = prediction["class_probability_scores"][0]
51
+ proba_class0 = float(probability_scores[0])
52
+ proba_class1 = float(probability_scores[1])
53
+ else:
54
+ raise ValueError("Model prediction could not be made.")
55
+
56
+ end_time = time.perf_counter()
57
+
58
+ if proba_class1 > 0.70:
59
+ toxic_level = "strong"
60
+ elif proba_class1 > 0.54:
61
+ toxic_level = "high"
62
+ elif proba_class1 > 0.46:
63
+ toxic_level = "light"
64
+ else:
65
+ toxic_level = "none"
66
+
67
+ response = {
68
+ "prediction": {
69
+ "class_label": label,
70
+ "confidence": round(abs(proba_class0 - proba_class1), 4),
71
+ "toxic_level": toxic_level,
72
+ "pred_scores": {
73
+ "0": round(proba_class0, 4),
74
+ "1": round(proba_class1, 4)
75
+ },
76
+ "explaination": explaination
77
+ },
78
+ "metadata": {
79
+ "request_id": request_id,
80
+ "timestamp": timestamp,
81
+ "response_time": f"{round((end_time - start_time), 4)} sec",
82
+ "input": {
83
+ "num_tokens": int(len(tweet.split())),
84
+ "num_characters": int(len([i for i in tweet])),
85
+ "language": "en (iso 639-1code)",
86
+ },
87
+ "model": type(model.model).__name__,
88
+ "model_version": get_model_version(),
89
+ "vectorizer": type(model.vectorizer).__name__,
90
+ "model_registry": f"Mlflow {get_model_registry()}",
91
+ "type": "Production",
92
+ "explainer_varient": "LimeTextExplainer",
93
+ "streamable": False,
94
+ "api_version": "v-1.0",
95
+ "developer": "Subinoy Bera"
96
+ }
97
+ }
98
+
99
  return JSONResponse(status_code=200, content=response)
main/schema.py CHANGED
@@ -1,31 +1,31 @@
1
- # Schema validation for the API response
2
-
3
- from pydantic import BaseModel, Field
4
- from typing import Annotated, Dict
5
-
6
- class InputData(BaseModel):
7
- comment: Annotated[str, Field(..., description="User tweet or comment to be classified")]
8
-
9
- class Prediction(BaseModel):
10
- class_label: int
11
- confidence: float
12
- toxic_level: str
13
- pred_scores: Dict[int, float]
14
-
15
- class MetaData(BaseModel):
16
- request_id: str
17
- timestamp: str
18
- response_time: str
19
- input: Dict[str, int]
20
- model: str
21
- version: int
22
- vectorizer: str
23
- type: str
24
- loader_module: str
25
- streamable: bool
26
- api_version: str
27
- developer: str
28
-
29
- class APIResponse(BaseModel):
30
- response: Prediction
31
  metadata: MetaData
 
1
+ # Schema validation for the API response
2
+
3
+ from pydantic import BaseModel, Field
4
+ from typing import Annotated, Dict
5
+
6
+ class InputData(BaseModel):
7
+ comment: Annotated[str, Field(..., description="User tweet or comment to be classified")]
8
+
9
+ class Prediction(BaseModel):
10
+ class_label: int
11
+ confidence: float
12
+ toxic_level: str
13
+ pred_scores: Dict[int, float]
14
+
15
+ class MetaData(BaseModel):
16
+ request_id: str
17
+ timestamp: str
18
+ response_time: str
19
+ input: Dict[str, int]
20
+ model: str
21
+ version: int
22
+ vectorizer: str
23
+ type: str
24
+ loader_module: str
25
+ streamable: bool
26
+ api_version: str
27
+ developer: str
28
+
29
+ class APIResponse(BaseModel):
30
+ response: Prediction
31
  metadata: MetaData
main/utils.py CHANGED
@@ -1,90 +1,90 @@
1
- # Utility functions for the model inference api
2
-
3
- import yaml
4
- import joblib
5
- import numpy as np
6
- import pandas as pd
7
- from pathlib import Path
8
- from typing import Any
9
- from lime.lime_text import LimeTextExplainer
10
-
11
- # load yaml files to get model meta data.
12
- try:
13
- with open(Path("model/registered_model_meta"), 'r') as f:
14
- model_metadata = yaml.safe_load(f)
15
- except:
16
- raise FileNotFoundError("Failed to load file having model metadata")
17
-
18
-
19
- # Intialize lime explainer with class names
20
- _global_explainer = LimeTextExplainer(class_names=["hate", "non-hate"], bow=False)
21
-
22
-
23
- class LimeExplainer:
24
- def __init__(self, model: Any):
25
- """
26
- Initializes an instance of LimeExplainer.
27
-
28
- Sets the class names for the explainer and initializes the LimeTextExplainer.
29
- Also initializes the model prediction attribute to None.
30
- """
31
- self.explainer = _global_explainer
32
- self.prediction = None
33
- self.model = model
34
-
35
- def _get_prediction_explaination(self, tweet) -> np.ndarray:
36
- """
37
- Internal function to get prediction from the model and class probability scores
38
- for lime explainer.
39
- """
40
- input_df = pd.DataFrame({
41
- "comments": tweet
42
- })
43
- self.prediction = self.model.predict(context=None, model_input=input_df)
44
- return np.array(self.prediction["class_probability_scores"])
45
-
46
- def explain(self, tweet) -> dict:
47
- """
48
- Generate lime explanation for a given tweet.
49
-
50
- Parameters
51
- tweet: str : Input tweet or comment to be classified.
52
-
53
- Returns
54
- dict : A dictionary with words as keys and their corresponding weightage.
55
- """
56
- explanation = self.explainer.explain_instance(
57
- tweet,
58
- self._get_prediction_explaination,
59
- num_features=5,
60
- num_samples=20
61
- )
62
- return round_dict_values(dic = dict(explanation.as_list()))
63
-
64
-
65
- def load_model():
66
- """Loads ML model from location path and returns the model."""
67
- try:
68
- with open(Path("model/python_model.pkl"), "rb") as f:
69
- model = joblib.load(f)
70
- return model
71
-
72
- except Exception as e:
73
- raise RuntimeError(f"Failed to load model from hub: {e}")
74
-
75
-
76
- def get_model_registry() -> str:
77
- """Fetches the model registry name and returns it."""
78
- model_registry = model_metadata['model_name']
79
- return model_registry
80
-
81
-
82
- def get_model_version() -> str:
83
- """Fetches the model version and returns it."""
84
- model_version = model_metadata['model_version']
85
- return model_version
86
-
87
-
88
- def round_dict_values(dic) -> dict:
89
- """Rounds all values in a dictionary to 4 decimal places."""
90
  return {str(k): round(v, 4) for k, v in dic.items()}
 
1
+ # Utility functions for the model inference api
2
+
3
+ import yaml
4
+ import joblib
5
+ import numpy as np
6
+ import pandas as pd
7
+ from pathlib import Path
8
+ from typing import Any
9
+ from lime.lime_text import LimeTextExplainer
10
+
11
+ # load yaml files to get model meta data.
12
+ try:
13
+ with open(Path("model/registered_model_meta"), 'r') as f:
14
+ model_metadata = yaml.safe_load(f)
15
+ except:
16
+ raise FileNotFoundError("Failed to load file having model metadata")
17
+
18
+
19
+ # Intialize lime explainer with class names
20
+ _global_explainer = LimeTextExplainer(class_names=["hate", "non-hate"], bow=False)
21
+
22
+
23
+ class LimeExplainer:
24
+ def __init__(self, model: Any):
25
+ """
26
+ Initializes an instance of LimeExplainer.
27
+
28
+ Sets the class names for the explainer and initializes the LimeTextExplainer.
29
+ Also initializes the model prediction attribute to None.
30
+ """
31
+ self.explainer = _global_explainer
32
+ self.prediction = None
33
+ self.model = model
34
+
35
+ def _get_prediction_explaination(self, tweet) -> np.ndarray:
36
+ """
37
+ Internal function to get prediction from the model and class probability scores
38
+ for lime explainer.
39
+ """
40
+ input_df = pd.DataFrame({
41
+ "comments": tweet
42
+ })
43
+ self.prediction = self.model.predict(context=None, model_input=input_df)
44
+ return np.array(self.prediction["class_probability_scores"])
45
+
46
+ def explain(self, tweet) -> dict:
47
+ """
48
+ Generate lime explanation for a given tweet.
49
+
50
+ Parameters
51
+ tweet: str : Input tweet or comment to be classified.
52
+
53
+ Returns
54
+ dict : A dictionary with words as keys and their corresponding weightage.
55
+ """
56
+ explanation = self.explainer.explain_instance(
57
+ tweet,
58
+ self._get_prediction_explaination,
59
+ num_features=5,
60
+ num_samples=20
61
+ )
62
+ return round_dict_values(dic = dict(explanation.as_list()))
63
+
64
+
65
+ def load_model():
66
+ """Loads ML model from location path and returns the model."""
67
+ try:
68
+ with open(Path("model/python_model.pkl"), "rb") as f:
69
+ model = joblib.load(f)
70
+ return model
71
+
72
+ except Exception as e:
73
+ raise RuntimeError(f"Failed to load model from hub: {e}")
74
+
75
+
76
+ def get_model_registry() -> str:
77
+ """Fetches the model registry name and returns it."""
78
+ model_registry = model_metadata['model_name']
79
+ return model_registry
80
+
81
+
82
+ def get_model_version() -> str:
83
+ """Fetches the model version and returns it."""
84
+ model_version = model_metadata['model_version']
85
+ return model_version
86
+
87
+
88
+ def round_dict_values(dic) -> dict:
89
+ """Rounds all values in a dictionary to 4 decimal places."""
90
  return {str(k): round(v, 4) for k, v in dic.items()}
model/MLmodel ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ artifact_path: XGB-v2
2
+ flavors:
3
+ python_function:
4
+ artifacts:
5
+ classifier:
6
+ path: artifacts\XGB-v2.joblib
7
+ uri: models\XGB-v2.joblib
8
+ vectorizer:
9
+ path: artifacts\Tfidf.joblib
10
+ uri: models\Tfidf.joblib
11
+ cloudpickle_version: 3.1.1
12
+ code: null
13
+ env:
14
+ conda: conda.yaml
15
+ virtualenv: python_env.yaml
16
+ loader_module: mlflow.pyfunc.model
17
+ python_model: python_model.pkl
18
+ python_version: 3.11.5
19
+ streamable: false
20
+ mlflow_version: 2.22.1
21
+ model_size_bytes: 11990188
22
+ model_uuid: 65490db310744bdf8f1c897d96f8aca8
23
+ prompts: null
24
+ run_id: cda6d2d206b34409a74cd67407bda91c
25
+ utc_time_created: '2025-07-28 10:17:07.559763'
model/artifacts/Tfidf.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3b128625a5b8b778ee4d4a97f8afdfba1268a3ee14b9e3328bab3de48e685cf
3
+ size 120443
model/artifacts/XGB-v2.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa4330bca1029dc4532a5c4ced95b6fa62ef196f6789fad05a1414d662967fea
3
+ size 5863647
model/conda.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ channels:
2
+ - conda-forge
3
+ dependencies:
4
+ - python=3.11.5
5
+ - pip<=25.1
6
+ - pip:
7
+ - mlflow==2.22.1
8
+ - cloudpickle==3.1.1
9
+ - numpy==2.2.6
10
+ - pandas==2.3.1
11
+ - psutil==7.0.0
12
+ - scikit-learn==1.7.0
13
+ - scipy==1.13.1
14
+ - xgboost==3.0.2
15
+ name: mlflow-env
model/python_env.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ python: 3.11.5
2
+ build_dependencies:
3
+ - pip==25.1
4
+ - setuptools==78.1.1
5
+ - wheel==0.45.1
6
+ dependencies:
7
+ - -r requirements.txt
model/python_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6d00d6029ae727539c833a6504499aee7c3d7da5de56b03be330806293f3954
3
+ size 6006098
model/registered_model_meta ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_name: ToxicTagger-Models
2
+ model_version: '6'
model/requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ mlflow==2.22.1
2
+ cloudpickle==3.1.1
3
+ numpy==2.2.6
4
+ pandas==2.3.1
5
+ psutil==7.0.0
6
+ scikit-learn==1.7.0
7
+ scipy==1.13.1
8
+ xgboost==3.0.2
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
- fastapi==0.116.1
2
- uvicorn==0.35.0
3
- joblib==1.5.1
4
- PyYAML==6.0.2
5
- lime==0.2.0.1
6
  gunicorn==23.0.0
 
1
+ fastapi==0.116.1
2
+ uvicorn==0.35.0
3
+ joblib==1.5.1
4
+ PyYAML==6.0.2
5
+ lime==0.2.0.1
6
  gunicorn==23.0.0