Ezhil commited on
Commit
e9a2c4c
·
1 Parent(s): 781c355

modified code

Browse files
Files changed (5) hide show
  1. Dockerfile +9 -17
  2. main.py +33 -15
  3. model.py +65 -0
  4. requirements.txt +3 -2
  5. service.py +9 -0
Dockerfile CHANGED
@@ -1,26 +1,18 @@
1
- # Use an official Python base image
2
- FROM python:3.10
3
 
4
- # Set the working directory
5
  WORKDIR /app
6
 
7
- # Set environment variables for HF cache
8
- ENV HF_HOME="/app/cache"
9
- ENV TRANSFORMERS_CACHE="/app/cache"
10
- ENV SENTENCE_TRANSFORMERS_HOME="/app/cache"
11
-
12
- # Create the cache directory with appropriate permissions
13
- RUN mkdir -p /app/cache && chmod -R 777 /app/cache
14
-
15
- # Copy the requirements file and install dependencies
16
  COPY requirements.txt .
17
  RUN pip install --no-cache-dir -r requirements.txt
18
 
19
- # Copy the application code
20
  COPY . .
21
 
22
- # Expose FastAPI default port
23
- EXPOSE 7860
24
 
25
- # Run FastAPI with Uvicorn
26
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ # Use official Python image as the base
2
+ FROM python:3.9-slim
3
 
4
+ # Set working directory
5
  WORKDIR /app
6
 
7
+ # Copy requirements file and install dependencies
 
 
 
 
 
 
 
 
8
  COPY requirements.txt .
9
  RUN pip install --no-cache-dir -r requirements.txt
10
 
11
+ # Copy the rest of the application
12
  COPY . .
13
 
14
+ # Expose the FastAPI app port
15
+ EXPOSE 8000
16
 
17
+ # Run the application with Uvicorn
18
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
main.py CHANGED
@@ -1,32 +1,50 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
- from typing import List, Tuple
4
- import numpy as np
5
- from sentence_transformers import SentenceTransformer
6
 
7
- # Load the pre-trained model
8
- model = SentenceTransformer('Alibaba-NLP/gte-base-en-v1.5', trust_remote_code=True)
9
 
10
- # Define request model
11
  class MessageRequest(BaseModel):
12
  messages: List[str]
13
 
14
- # Define response model
 
 
 
 
 
 
15
  class EmbeddingResponse(BaseModel):
16
- dimensions: int # Only return embedding size
17
  numeric_values: List[List[float]]
18
 
19
- # Initialize FastAPI app
20
- app = FastAPI()
 
 
 
21
 
22
  @app.get("/")
23
- def home ():
24
- return {"Message":"Welcome to homepage, kindly proceed by giving /docs in the URL" }
25
 
26
  @app.post("/embed", response_model=EmbeddingResponse)
27
  def embed(request: MessageRequest):
28
- new_embeddings = model.encode(request.messages, convert_to_tensor=True)
29
  return EmbeddingResponse(
30
- dimensions=new_embeddings.shape[1], # Return only the embedding dimension
31
- numeric_values=new_embeddings.tolist()
32
  )
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
+ from typing import List
4
+ from model import get_embeddings, predict_sms_category
5
+ from service import calculate_cosine_similarity
6
 
7
+ # FastAPI app
8
+ app = FastAPI()
9
 
 
10
  class MessageRequest(BaseModel):
11
  messages: List[str]
12
 
13
+ class CosineSimilarityRequest(BaseModel):
14
+ message1: str
15
+ message2: str
16
+
17
+ class PredictionRequest(BaseModel):
18
+ message: str
19
+
20
  class EmbeddingResponse(BaseModel):
21
+ dimensions: int
22
  numeric_values: List[List[float]]
23
 
24
+ class CosineSimilarityResponse(BaseModel):
25
+ similarity: float
26
+
27
+ class PredictionResponse(BaseModel):
28
+ label: str
29
 
30
  @app.get("/")
31
+ def home():
32
+ return {"Message": "Welcome to the SMS classifier API. Use /docs for documentation."}
33
 
34
  @app.post("/embed", response_model=EmbeddingResponse)
35
  def embed(request: MessageRequest):
36
+ embeddings = get_embeddings(request.messages)
37
  return EmbeddingResponse(
38
+ dimensions=embeddings.shape[1], # Number of embedding dimensions
39
+ numeric_values=embeddings.tolist()
40
  )
41
+
42
+ @app.post("/cosine_similarity", response_model=CosineSimilarityResponse)
43
+ def cosine_similarity(request: CosineSimilarityRequest):
44
+ similarity = calculate_cosine_similarity(request.message1, request.message2)
45
+ return CosineSimilarityResponse(similarity=similarity)
46
+
47
+ @app.post("/predict", response_model=PredictionResponse)
48
+ def predict(request: PredictionRequest):
49
+ label = predict_sms_category(request.message)
50
+ return PredictionResponse(label=label)
model.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import joblib
2
+ from sentence_transformers import SentenceTransformer
3
+ import numpy as np
4
+ from sklearn.linear_model import LogisticRegression
5
+ from sklearn.preprocessing import LabelEncoder
6
+ from sklearn.model_selection import train_test_split
7
+ import pandas as pd
8
+
9
+ # Load pre-trained Sentence Transformer model
10
+ model = SentenceTransformer('Alibaba-NLP/gte-base-en-v1.5', trust_remote_code=True)
11
+
12
+ # Load and preprocess SMS data (from an Excel file)
13
+ def load_sms_data(file_path="data/sms_process_data_main.xlsx"):
14
+ data = pd.read_excel(file_path)
15
+ texts = data['MessageText'].tolist()
16
+ labels = data['label'].tolist()
17
+
18
+ embeddings = model.encode(texts, convert_to_tensor=True)
19
+ embeddings = embeddings.detach().numpy()
20
+
21
+ label_encoder = LabelEncoder()
22
+ encoded_labels = label_encoder.fit_transform(labels)
23
+
24
+ return embeddings, encoded_labels, label_encoder
25
+
26
+ # Train and save the Logistic Regression model
27
+ def train_sms_classifier():
28
+ embeddings, labels, label_encoder = load_sms_data()
29
+ X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.3, random_state=42)
30
+
31
+ # Train Logistic Regression
32
+ lr_model = LogisticRegression()
33
+ lr_model.fit(X_train, y_train)
34
+
35
+ accuracy = lr_model.score(X_test, y_test)
36
+ print(f"Model Accuracy: {accuracy * 100:.2f}%")
37
+
38
+ # Save the trained model and label encoder
39
+ joblib.dump(lr_model, 'model/sms_classifier_model.pkl')
40
+ joblib.dump(label_encoder, 'model/label_encoder.pkl')
41
+
42
+ return lr_model, label_encoder
43
+
44
+ # Load the saved model and label encoder
45
+ def load_saved_model():
46
+ lr_model = joblib.load('model/sms_classifier_model.pkl')
47
+ label_encoder = joblib.load('model/label_encoder.pkl')
48
+ return lr_model, label_encoder
49
+
50
+ # Generate embeddings for the messages
51
+ def get_embeddings(messages):
52
+ embeddings = model.encode(messages, convert_to_tensor=True)
53
+ return embeddings.detach().numpy()
54
+
55
+ # Predict the label of an SMS message
56
+ def predict_sms_category(message):
57
+ # Load the saved model and label encoder
58
+ lr_model, label_encoder = load_saved_model()
59
+
60
+ embedding = model.encode([message], convert_to_tensor=True)
61
+ embedding = embedding.detach().numpy()
62
+
63
+ prediction = lr_model.predict(embedding)
64
+ label = label_encoder.inverse_transform(prediction)[0]
65
+ return label
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  fastapi
2
  uvicorn
3
  pandas
4
- scikit-learn
5
  sentence-transformers
6
- numpy
 
 
 
1
  fastapi
2
  uvicorn
3
  pandas
 
4
  sentence-transformers
5
+ scikit-learn
6
+ openpyxl
7
+ joblib
service.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.metrics.pairwise import cosine_similarity
2
+ from model import model
3
+
4
+ # Calculate cosine similarity between two messages
5
+ def calculate_cosine_similarity(message1, message2):
6
+ embeddings = model.encode([message1, message2], convert_to_tensor=True)
7
+ embeddings = embeddings.detach().numpy()
8
+ similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
9
+ return similarity