Ezhil commited on
Commit
1aa4489
·
0 Parent(s):

Initial commit

Browse files
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python image as a base image
2
+ FROM python:3.9-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file into the container
8
+ COPY requirements.txt .
9
+
10
+ # Install dependencies
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy the entire app folder into the container
14
+ COPY . .
15
+
16
+ # Expose the port the app runs on
17
+ EXPOSE 7860
18
+
19
+ # Command to run the FastAPI app using Uvicorn
20
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Embedding Restapi
3
+ emoji: 😻
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ ---
data/sms_process_data_main.xlsx ADDED
Binary file (42.2 kB). View file
 
main.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from services.sms_service import predict_label, compute_cosine_similarity, compute_embeddings
3
+ from schemas.input_schemas import CosineSimilarityInput, MessageInput, EmbeddingInput
4
+
5
+ app = FastAPI()
6
+
7
+ # 🚀 1️⃣ Homepage Endpoint
8
+ @app.get("/")
9
+ async def home():
10
+ return {"message": "Welcome to SMS Classification API"}
11
+
12
+ # 🔢 2️⃣ Cosine Similarity Endpoint
13
+ @app.post("/cosine_similarity")
14
+ async def get_cosine_similarity(input_data: CosineSimilarityInput):
15
+ try:
16
+ return await compute_cosine_similarity(input_data.text1, input_data.text2)
17
+ except Exception as e:
18
+ raise HTTPException(status_code=500, detail=f"Error computing similarity: {str(e)}")
19
+
20
+ # 📩 3️⃣ SMS Classification Endpoint
21
+ @app.post("/predict_label")
22
+ async def classify_message(input_data: MessageInput):
23
+ try:
24
+ return await predict_label(input_data.message)
25
+ except Exception as e:
26
+ raise HTTPException(status_code=500, detail=f"Error predicting label: {str(e)}")
27
+
28
+ # 📊 4️⃣ Text Embedding Endpoint
29
+ @app.post("/compute_embeddings")
30
+ async def get_embeddings(input_data: EmbeddingInput):
31
+ try:
32
+ return await compute_embeddings(input_data.message)
33
+ except Exception as e:
34
+ raise HTTPException(status_code=500, detail=f"Error computing embeddings: {str(e)}")
models/sms_classifier_model.pkl ADDED
Binary file (21.3 kB). View file
 
models/tfidf_vectorizer.pkl ADDED
Binary file (93.6 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ sentence-transformers
4
+ scikit-learn
5
+ pandas
6
+ numpy
7
+ openpyxl # Needed for reading Excel files
8
+ gradio
schemas/input_schemas.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List
3
+
4
+ # Request schema for computing cosine similarity
5
+ class CosineSimilarityInput(BaseModel):
6
+ text1: str
7
+ text2: str
8
+
9
+ # Request schema for SMS classification
10
+ class MessageInput(BaseModel):
11
+ message: str
12
+
13
+ # Request schema for computing embeddings
14
+ class EmbeddingInput(BaseModel):
15
+ message: str
16
+
17
+ # Response schema for cosine similarity
18
+ class CosineSimilarityResponse(BaseModel):
19
+ cosine_similarity: float
20
+
21
+ # Response schema for embeddings
22
+ class EmbeddingResponse(BaseModel):
23
+ embeddings: List[List[float]]
services/sms_service.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import numpy as np
3
+ from sentence_transformers import SentenceTransformer
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from fastapi import HTTPException
6
+ from schemas.input_schemas import CosineSimilarityResponse, EmbeddingResponse
7
+
8
+ # Load the trained model and vectorizer
9
+ def load_model():
10
+ model_path = "models/sms_classifier_model.pkl"
11
+ vectorizer_path = "models/tfidf_vectorizer.pkl"
12
+
13
+ try:
14
+ with open(model_path, 'rb') as f:
15
+ classifier = pickle.load(f)
16
+
17
+ with open(vectorizer_path, 'rb') as f:
18
+ vectorizer = pickle.load(f)
19
+
20
+ return classifier, vectorizer
21
+ except Exception as e:
22
+ raise HTTPException(status_code=500, detail=f"Error loading model: {str(e)}")
23
+
24
+ async def predict_label(message: str):
25
+ try:
26
+ classifier, vectorizer = load_model()
27
+ # Vectorize the input message
28
+ message_vec = vectorizer.transform([message])
29
+
30
+ # Predict the label
31
+ label = classifier.predict(message_vec)[0]
32
+ return {"label": label}
33
+ except Exception as e:
34
+ raise HTTPException(status_code=500, detail=f"Error predicting label: {str(e)}")
35
+
36
+ async def compute_cosine_similarity(text1: str, text2: str):
37
+ try:
38
+ classifier, vectorizer = load_model()
39
+
40
+ # Vectorize the input texts
41
+ vec1 = vectorizer.transform([text1]).toarray()
42
+ vec2 = vectorizer.transform([text2]).toarray()
43
+
44
+ # Compute cosine similarity
45
+ cosine_sim = np.dot(vec1, vec2.T) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
46
+ return CosineSimilarityResponse(cosine_similarity=cosine_sim[0][0])
47
+ except Exception as e:
48
+ raise HTTPException(status_code=500, detail=f"Error computing similarity: {str(e)}")
49
+
50
+ async def compute_embeddings(message: str):
51
+ try:
52
+ classifier, vectorizer = load_model()
53
+
54
+ # Vectorize the input message
55
+ embedding = vectorizer.transform([message]).toarray().tolist()
56
+ return EmbeddingResponse(embeddings=embedding)
57
+ except Exception as e:
58
+ raise HTTPException(status_code=500, detail=f"Error computing embeddings: {str(e)}")
services/train_model.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.feature_extraction.text import TfidfVectorizer
3
+ from sklearn.linear_model import LogisticRegression
4
+ from sklearn.model_selection import train_test_split
5
+ import pickle
6
+ import os
7
+
8
+ # Load the dataset
9
+ file_path = "data/sms_process_data_main.xlsx"
10
+ df = pd.read_excel(file_path)
11
+
12
+ # Prepare the features and labels
13
+ X = df['MessageText'] # SMS messages
14
+ y = df['label'] # Labels: 'Transaction' or 'Offer'
15
+
16
+ # Split the data into training and testing sets
17
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
18
+
19
+ # Initialize the TF-IDF Vectorizer
20
+ vectorizer = TfidfVectorizer(max_features=5000)
21
+
22
+ # Fit the vectorizer on the training data and transform the training data
23
+ X_train_vec = vectorizer.fit_transform(X_train)
24
+
25
+ # Initialize and train the logistic regression model
26
+ classifier = LogisticRegression()
27
+ classifier.fit(X_train_vec, y_train)
28
+
29
+ # Save the trained model and vectorizer
30
+ models_dir = "models"
31
+ if not os.path.exists(models_dir):
32
+ os.makedirs(models_dir)
33
+
34
+ # Save the classifier model
35
+ with open(os.path.join(models_dir, 'sms_classifier_model.pkl'), 'wb') as model_file:
36
+ pickle.dump(classifier, model_file)
37
+
38
+ # Save the vectorizer
39
+ with open(os.path.join(models_dir, 'tfidf_vectorizer.pkl'), 'wb') as vectorizer_file:
40
+ pickle.dump(vectorizer, vectorizer_file)
41
+
42
+ print("Model and vectorizer saved successfully!")