Ezhil commited on
Commit
e1ad655
Β·
1 Parent(s): 17c8f2e

folder structure is added

Browse files
Dockerfile CHANGED
@@ -19,9 +19,8 @@ RUN pip install --no-cache-dir -r requirements.txt
19
  # Copy the application code
20
  COPY . .
21
 
22
- # Expose the FastAPI default port
23
- EXPOSE 8000
24
 
25
  # Run FastAPI with Uvicorn
26
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
27
-
 
19
  # Copy the application code
20
  COPY . .
21
 
22
+ # Expose FastAPI default port
23
+ EXPOSE 7860
24
 
25
  # Run FastAPI with Uvicorn
26
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
README.md CHANGED
@@ -1,8 +1,11 @@
1
- ---
2
- title: Embedding Fastapi
3
- emoji: πŸ†
4
- colorFrom: pink
5
- colorTo: yellow
6
- sdk: docker
7
- pinned: false
8
- ---
 
 
 
 
1
+ # SMS Classification API
2
+
3
+ This project is a FastAPI-based web service for classifying SMS messages into categories like "Offer" and "Transaction."
4
+
5
+ ## πŸš€ Features
6
+ - Uses Alibaba-NLP's `gte-base-en-v1.5` to generate embeddings.
7
+ - Trained with `Logistic Regression` on labeled SMS data.
8
+ - Supports API routes for embedding generation and classification.
9
+ - Built using `FastAPI`, `Scikit-Learn`, and `SentenceTransformers`.
10
+
11
+ ## πŸ“ Project Structure
Routes/classify_sms.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from schemas.sms_schema import SMSRequest, SMSResponse
3
+ from services.sms_service import classify_sms
4
+
5
+ classify_sms_router = APIRouter()
6
+
7
+ @classify_sms_router.post("/classify_sms", response_model=SMSResponse)
8
+ def classify(request: SMSRequest):
9
+ return classify_sms(request.text)
data/sms_process_data_main.xlsx ADDED
Binary file (42.2 kB). View file
 
main.py CHANGED
@@ -1,114 +1,16 @@
1
- # from fastapi import FastAPI
2
- # from pydantic import BaseModel
3
- # from typing import List
4
- # import numpy as np
5
- # from sentence_transformers import SentenceTransformer
6
-
7
- # # Load the pre-trained model
8
- # model = SentenceTransformer('Alibaba-NLP/gte-base-en-v1.5', trust_remote_code=True)
9
-
10
- # # Define request models
11
- # class MessageRequest(BaseModel):
12
- # messages: List[str]
13
-
14
- # class CosineSimilarityRequest(BaseModel):
15
- # text1: str
16
- # text2: str
17
-
18
- # # Define response models
19
- # class EmbeddingResponse(BaseModel):
20
- # dimensions: int # Only return embedding size
21
- # numeric_values: List[List[float]]
22
-
23
- # class CosineSimilarityResponse(BaseModel):
24
- # similarity: float
25
-
26
- # # Initialize FastAPI app
27
- # app = FastAPI()
28
-
29
- # @app.get("/")
30
- # def home():
31
- # return {"Message": "Welcome to homepage, kindly proceed by giving /docs in the URL"}
32
-
33
- # @app.post("/embed", response_model=EmbeddingResponse)
34
- # def embed(request: MessageRequest):
35
- # new_embeddings = model.encode(request.messages, convert_to_tensor=True)
36
- # return EmbeddingResponse(
37
- # dimensions=new_embeddings.shape[1], # Return only the embedding dimension
38
- # numeric_values=new_embeddings.tolist()
39
- # )
40
-
41
- # @app.post("/cosine_similarity", response_model=CosineSimilarityResponse)
42
- # def cosine_similarity(request: CosineSimilarityRequest):
43
- # embeddings = model.encode([request.text1, request.text2], convert_to_tensor=True)
44
- # cos_sim = np.dot(embeddings[0], embeddings[1]) / (np.linalg.norm(embeddings[0]) * np.linalg.norm(embeddings[1]))
45
- # return CosineSimilarityResponse(similarity=cos_sim)
46
-
47
-
48
  from fastapi import FastAPI
49
- from pydantic import BaseModel
50
- from typing import List
51
- import numpy as np
52
- from sentence_transformers import SentenceTransformer
53
-
54
- # Load the pre-trained model
55
- model = SentenceTransformer('Alibaba-NLP/gte-base-en-v1.5', trust_remote_code=True)
56
-
57
- # Define request models
58
- class MessageRequest(BaseModel):
59
- messages: List[str]
60
-
61
- class CosineSimilarityRequest(BaseModel):
62
- text1: str
63
- text2: str
64
-
65
- class SMSClassificationRequest(BaseModel):
66
- text: str
67
-
68
- # Define response models
69
- class EmbeddingResponse(BaseModel):
70
- dimensions: int # Only return embedding size
71
- numeric_values: List[List[float]]
72
-
73
- class CosineSimilarityResponse(BaseModel):
74
- similarity: float
75
-
76
- class SMSClassificationResponse(BaseModel):
77
- category: str
78
 
79
  # Initialize FastAPI app
80
- app = FastAPI()
81
 
82
  @app.get("/")
83
  def home():
84
- return {"Message": "Welcome to homepage, kindly proceed by giving /docs in the URL"}
85
-
86
- @app.post("/embed", response_model=EmbeddingResponse)
87
- def embed(request: MessageRequest):
88
- new_embeddings = model.encode(request.messages, convert_to_tensor=True)
89
- return EmbeddingResponse(
90
- dimensions=new_embeddings.shape[1], # Return only the embedding dimension
91
- numeric_values=new_embeddings.tolist()
92
- )
93
-
94
- @app.post("/cosine_similarity", response_model=CosineSimilarityResponse)
95
- def cosine_similarity(request: CosineSimilarityRequest):
96
- embeddings = model.encode([request.text1, request.text2], convert_to_tensor=True)
97
- cos_sim = np.dot(embeddings[0], embeddings[1]) / (np.linalg.norm(embeddings[0]) * np.linalg.norm(embeddings[1]))
98
- return CosineSimilarityResponse(similarity=cos_sim)
99
 
100
- @app.post("/classify_sms", response_model=SMSClassificationResponse)
101
- def classify_sms(request: SMSClassificationRequest):
102
- offer_keywords = ["discount", "offer", "sale", "deal", "promo", "free"]
103
- transaction_keywords = ["payment", "transaction", "debit", "credit", "purchase", "order"]
104
-
105
- text_lower = request.text.lower()
106
-
107
- if any(word in text_lower for word in offer_keywords):
108
- category = "offer"
109
- elif any(word in text_lower for word in transaction_keywords):
110
- category = "transaction"
111
- else:
112
- category = "unknown"
113
-
114
- return SMSClassificationResponse(category=category)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
+ from routes.embedding import embedding_router
3
+ from routes.cosine_similarity import similarity_router
4
+ from routes.classify_sms import classify_sms_router
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  # Initialize FastAPI app
7
+ app = FastAPI(title="SMS Classification API", description="Classifies SMS messages into categories.")
8
 
9
  @app.get("/")
10
  def home():
11
+ return {"Message": "Welcome! Use /docs to test the API"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Include API routes
14
+ app.include_router(embedding_router)
15
+ app.include_router(similarity_router)
16
+ app.include_router(classify_sms_router)
 
 
 
 
 
 
 
 
 
 
 
models/train_models.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import joblib
4
+ from sentence_transformers import SentenceTransformer
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.linear_model import LogisticRegression
7
+
8
+ # Load dataset
9
+ df = pd.read_excel("data/sms_process_data_main.xlsx")
10
+
11
+ # Load SentenceTransformer model
12
+ encoder_model = SentenceTransformer("Alibaba-NLP/gte-base-en-v1.5", trust_remote_code=True)
13
+
14
+ # Generate embeddings
15
+ embeddings = encoder_model.encode(df["MessageText"].tolist(), convert_to_numpy=True)
16
+
17
+ # Encode labels
18
+ label_map = {"Offer": 0, "Transaction": 1}
19
+ df["label"] = df["label"].map(label_map)
20
+
21
+ # Split dataset
22
+ X_train, X_test, y_train, y_test = train_test_split(embeddings, df["label"], test_size=0.2, random_state=42)
23
+
24
+ # Train model
25
+ classifier = LogisticRegression()
26
+ classifier.fit(X_train, y_train)
27
+
28
+ # Save trained model
29
+ joblib.dump(classifier, "models/sms_classifier.pkl")
30
+ print("Model saved as 'sms_classifier.pkl'")
schemas/sms_schema.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class SMSRequest(BaseModel):
4
+ text: str
5
+
6
+ class SMSResponse(BaseModel):
7
+ category: str
services/sms_service.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import joblib
2
+ from sentence_transformers import SentenceTransformer
3
+ import numpy as np
4
+
5
+ # Load the trained model
6
+ classifier = joblib.load("models/sms_classifier.pkl")
7
+ encoder_model = SentenceTransformer("Alibaba-NLP/gte-base-en-v1.5", trust_remote_code=True)
8
+
9
+ def classify_sms(text: str):
10
+ embedding = encoder_model.encode([text], convert_to_numpy=True)
11
+ prediction = classifier.predict(embedding)
12
+
13
+ category = "Offer" if prediction[0] == 0 else "Transaction"
14
+ return {"category": category}