Subi003 commited on
Commit
2b6af04
·
verified ·
1 Parent(s): 818243f

Upload folder using huggingface_hub

Browse files
.gitignore DELETED
File without changes
Dockerfile CHANGED
@@ -1,15 +1,20 @@
1
  FROM python:3.11.11-slim-bookworm
2
 
3
- RUN apt-get update && apt-get upgrade -y
 
 
 
 
4
 
5
  COPY . /app
6
 
7
- WORKDIR /app
 
8
 
9
- RUN pip install --upgrade pip && \
10
- pip install -r requirements.txt && \
11
- pip install -r model/requirements.txt
12
 
13
  EXPOSE 7860
 
14
 
15
- CMD ["uvicorn", "main.model_inference:inference_api", "--host", "0.0.0.0", "--port", "7860"]
 
1
  FROM python:3.11.11-slim-bookworm
2
 
3
+ RUN apt-get update && apt-get upgrade -y && \
4
+ apt-get install --no-install-recommends -y build-essential && \
5
+ rm -rf /var/lib/apt/lists/*
6
+
7
+ WORKDIR /app
8
 
9
  COPY . /app
10
 
11
+ RUN pip install --no-cache-dir --upgrade pip && \
12
+ pip install --no-cache-dir -r requirements.txt -r model/requirements.txt
13
 
14
+ RUN useradd -m appuser
15
+ USER appuser
 
16
 
17
  EXPOSE 7860
18
+ ENV HOST=0.0.0.0 PORT=7860 PYTHONUNBUFFERED=1
19
 
20
+ CMD ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "main.model_inference:inference_api", "--bind", "0.0.0.0:7860"]
main/helper.py DELETED
@@ -1,43 +0,0 @@
1
- # Helper functions for the model inference api
2
-
3
- import yaml
4
- import joblib
5
- import pandas as pd
6
- from pathlib import Path
7
-
8
- # load yaml files to get model meta data.
9
- try:
10
- with open(Path("model/registered_model_meta"), 'r') as f:
11
- model_metadata = yaml.safe_load(f)
12
- except:
13
- raise FileNotFoundError("Failed to load file having model metadata")
14
-
15
-
16
- def load_model():
17
- """ Loads ML model from location path and returns the model. """
18
- try:
19
- with open(Path("model/python_model.pkl"), "rb") as f:
20
- model = joblib.load(f)
21
- return model
22
-
23
- except Exception as e:
24
- raise RuntimeError(f"Failed to load model from hub: {e}")
25
-
26
-
27
- def get_model_registry():
28
- """ Fetches the model registry name and returns it. """
29
- model_registry = model_metadata['model_name']
30
- return model_registry
31
-
32
-
33
- def get_model_version():
34
- """ Fetches the model version and returns it. """
35
- model_version = model_metadata['model_version']
36
- return model_version
37
-
38
-
39
- def format_model_input(tweet: str) -> pd.DataFrame:
40
- df = pd.DataFrame({
41
- "comments": [tweet]
42
- })
43
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main/{model_inference.py → inference.py} RENAMED
@@ -1,37 +1,57 @@
1
  from fastapi import FastAPI
2
  from fastapi.responses import JSONResponse
3
- from main.validate_schema import InputData, APIResponse
4
  from datetime import datetime
5
- from main.helper import *
6
  import uuid, time
7
 
8
- model = load_model()
9
 
10
- # Initializing fastapi
11
  inference_api = FastAPI()
12
 
13
  @inference_api.get("/")
14
- def root():
 
 
 
 
 
 
 
15
  return JSONResponse(content={
16
  "status": 200,
17
- "message": "Inference API is running."
18
  })
19
 
20
 
21
  @inference_api.post('/get_prediction', response_model=APIResponse)
22
- def api(payload: InputData):
 
 
 
 
 
 
 
 
 
 
23
  timestamp = datetime.now().astimezone().isoformat()
24
  request_id = str(uuid.uuid4())
25
  start_time = time.perf_counter()
26
 
27
  tweet = payload.comment
28
- model_input = format_model_input(tweet)
29
- model_response = model.predict(context=None, model_input=model_input)
 
30
 
31
- label = int(model_response["class_label"][0])
32
- probability_scores = model_response["class_probability_scores"]
33
- proba_class0 = float(probability_scores[0][0])
34
- proba_class1 = float(probability_scores[0][1])
 
 
 
35
 
36
  end_time = time.perf_counter()
37
 
@@ -53,6 +73,7 @@ def api(payload: InputData):
53
  "0": round(proba_class0, 4),
54
  "1": round(proba_class1, 4)
55
  },
 
56
  },
57
  "metadata": {
58
  "request_id": request_id,
@@ -67,7 +88,8 @@ def api(payload: InputData):
67
  "model_version": get_model_version(),
68
  "vectorizer": type(model.vectorizer).__name__,
69
  "model_registry": f"Mlflow {get_model_registry()}",
70
- "type": "production",
 
71
  "streamable": False,
72
  "api_version": "v-1.0",
73
  "developer": "Subinoy Bera"
 
1
  from fastapi import FastAPI
2
  from fastapi.responses import JSONResponse
3
+ from hf_serve_api.main.schema import InputData, APIResponse
4
  from datetime import datetime
5
+ from main.utils import *
6
  import uuid, time
7
 
8
+ load_model()
9
 
 
10
  inference_api = FastAPI()
11
 
12
  @inference_api.get("/")
13
+ def status():
14
+ """
15
+ Status endpoint for the model inference API.
16
+
17
+ Returns a JSON response with a status of 200 and a message indicating
18
+ that the API is active.
19
+
20
+ """
21
  return JSONResponse(content={
22
  "status": 200,
23
+ "message": "Inference API active."
24
  })
25
 
26
 
27
  @inference_api.post('/get_prediction', response_model=APIResponse)
28
+ def api_response(payload: InputData):
29
+ """
30
+ Inference endpoint for getting prediction from the model.
31
+
32
+ This endpoint accepts a POST request with a JSON payload containing the text to be classified.
33
+ The response is a JSON object with the model prediction, confidence score, and other metadata.
34
+
35
+ :param payload: InputData object containing the text to be classified.
36
+ :return: APIResponse object containing the model prediction, confidence score,
37
+ and other metadata.
38
+ """
39
  timestamp = datetime.now().astimezone().isoformat()
40
  request_id = str(uuid.uuid4())
41
  start_time = time.perf_counter()
42
 
43
  tweet = payload.comment
44
+ explainer = LimeExplainer()
45
+ explaination = explainer.explain(tweet)
46
+ prediction = explainer.prediction
47
 
48
+ if prediction is not None:
49
+ label = int(prediction["class_label"][0])
50
+ probability_scores = prediction["class_probability_scores"][0]
51
+ proba_class0 = float(probability_scores[0])
52
+ proba_class1 = float(probability_scores[1])
53
+ else:
54
+ raise ValueError("Model prediction could not be made.")
55
 
56
  end_time = time.perf_counter()
57
 
 
73
  "0": round(proba_class0, 4),
74
  "1": round(proba_class1, 4)
75
  },
76
+ "explaination": explaination
77
  },
78
  "metadata": {
79
  "request_id": request_id,
 
88
  "model_version": get_model_version(),
89
  "vectorizer": type(model.vectorizer).__name__,
90
  "model_registry": f"Mlflow {get_model_registry()}",
91
+ "type": "Production",
92
+ "explainer_varient": "LimeTextExplainer",
93
  "streamable": False,
94
  "api_version": "v-1.0",
95
  "developer": "Subinoy Bera"
main/{validate_schema.py → schema.py} RENAMED
@@ -1,11 +1,11 @@
 
 
1
  from pydantic import BaseModel, Field
2
  from typing import Annotated, Dict
3
 
4
-
5
  class InputData(BaseModel):
6
  comment: Annotated[str, Field(..., description="User tweet or comment to be classified")]
7
 
8
-
9
  class Prediction(BaseModel):
10
  class_label: int
11
  confidence: float
@@ -26,7 +26,6 @@ class MetaData(BaseModel):
26
  api_version: str
27
  developer: str
28
 
29
-
30
  class APIResponse(BaseModel):
31
  response: Prediction
32
  metadata: MetaData
 
1
+ # Schema validation for the API response
2
+
3
  from pydantic import BaseModel, Field
4
  from typing import Annotated, Dict
5
 
 
6
  class InputData(BaseModel):
7
  comment: Annotated[str, Field(..., description="User tweet or comment to be classified")]
8
 
 
9
  class Prediction(BaseModel):
10
  class_label: int
11
  confidence: float
 
26
  api_version: str
27
  developer: str
28
 
 
29
  class APIResponse(BaseModel):
30
  response: Prediction
31
  metadata: MetaData
main/utils.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Utility functions for the model inference api
2
+
3
+ import yaml
4
+ import joblib
5
+ import numpy as np
6
+ import pandas as pd
7
+ from pathlib import Path
8
+ from lime.lime_text import LimeTextExplainer
9
+
10
+ # load yaml files to get model meta data.
11
+ try:
12
+ with open(Path("model/registered_model_meta"), 'r') as f:
13
+ model_metadata = yaml.safe_load(f)
14
+ except:
15
+ raise FileNotFoundError("Failed to load file having model metadata")
16
+
17
+
18
+ class LimeExplainer:
19
+ def __init__(self):
20
+ """
21
+ Initializes an instance of LimeExplainer.
22
+
23
+ Sets the class names for the explainer and initializes the LimeTextExplainer.
24
+ Also initializes the model prediction attribute to None.
25
+ """
26
+ class_names = ["hate", "non-hate"]
27
+ self.explainer = LimeTextExplainer(class_names=class_names)
28
+ self.prediction = None
29
+
30
+ def _get_prediction_explaination(self, tweet) -> np.ndarray:
31
+ """
32
+ Internal function to get prediction from the model and class probability scores
33
+ for lime explainer.
34
+ """
35
+ input_df = pd.DataFrame({
36
+ "comments": tweet
37
+ })
38
+ self.prediction = model.predict(context=None, model_input=input_df)
39
+ return np.array(self.prediction["class_probability_scores"])
40
+
41
+ def explain(self, tweet) -> dict:
42
+ """
43
+ Generate lime explanation for a given tweet.
44
+
45
+ Parameters
46
+ tweet: str : Input tweet or comment to be classified.
47
+
48
+ Returns
49
+ dict : A dictionary with words as keys and their corresponding weightage.
50
+ """
51
+ explanation = self.explainer.explain_instance(
52
+ tweet,
53
+ self._get_prediction_explaination,
54
+ num_features=5
55
+ )
56
+ return round_dict_values(dic = dict(explanation.as_list()))
57
+
58
+
59
+ def load_model():
60
+ """Loads ML model from location path and returns the model."""
61
+ try:
62
+ with open(Path("model/python_model.pkl"), "rb") as f:
63
+ global model
64
+ model = joblib.load(f)
65
+
66
+ except Exception as e:
67
+ raise RuntimeError(f"Failed to load model from hub: {e}")
68
+
69
+
70
+ def get_model_registry() -> str:
71
+ """Fetches the model registry name and returns it."""
72
+ model_registry = model_metadata['model_name']
73
+ return model_registry
74
+
75
+
76
+ def get_model_version() -> str:
77
+ """Fetches the model version and returns it."""
78
+ model_version = model_metadata['model_version']
79
+ return model_version
80
+
81
+
82
+ def round_dict_values(dic) -> dict:
83
+ """Rounds all values in a dictionary to 4 decimal places."""
84
+ return {str(k): round(v, 4) for k, v in dic.items()}
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  fastapi==0.116.1
2
  uvicorn==0.35.0
3
  joblib==1.5.1
4
- PyYAML==6.0.2
 
 
 
1
  fastapi==0.116.1
2
  uvicorn==0.35.0
3
  joblib==1.5.1
4
+ PyYAML==6.0.2
5
+ lime==0.2.0.1
6
+ gunicorn==23.0.0