tharu22 commited on
Commit
1a979ff
·
1 Parent(s): 0a040f4

created test project

Browse files
Files changed (8) hide show
  1. .gitignore +1 -0
  2. Dockerfile +48 -0
  3. log_reg_model.pkl +3 -0
  4. main.py +35 -0
  5. models.py +37 -0
  6. requirements.txt +5 -0
  7. schema.py +5 -0
  8. sms_process_data_main.xlsx +0 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv
Dockerfile ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as the base image
2
+ # This image contains Python 3.9 and is a lightweight slim version to minimize image size
3
+ FROM python:3.9-slim
4
+
5
+ # Set the working directory inside the container
6
+ # All subsequent commands will run in this /app directory
7
+ WORKDIR /app
8
+
9
+ # Copy the local files into the container
10
+ # Copies everything from the current directory on the host machine to /app in the container
11
+ COPY . /app
12
+
13
+ # Set environment variable for Hugging Face cache directory
14
+ # This helps set a custom cache location for Hugging Face models and datasets
15
+ ENV HF_HOME=/app/.cache
16
+
17
+ # Create the necessary cache directories for Hugging Face
18
+ # This ensures that Hugging Face has the required directories set up for caching
19
+
20
+ RUN mkdir -p /app/.cache/huggingface/hub && \
21
+ chmod -R 777 /app/.cache && \
22
+ chmod -R 777 /app/.cache/huggingface
23
+
24
+ # Upgrade pip to the latest version
25
+ # This ensures you are using the most up-to-date version of pip for installing dependencies
26
+ RUN pip install --upgrade pip
27
+
28
+ # Install the dependencies listed in requirements.txt
29
+ # The --no-cache-dir flag ensures pip does not use or store cached versions of packages, saving space
30
+ RUN pip install --no-cache-dir -r requirements.txt
31
+
32
+ # Copy the requirements.txt file with ownership changes
33
+ # --chown=user ensures that the requirements.txt file inside the container is owned by a specific user (e.g., user) for security and permissions
34
+ COPY --chown=user ./requirements.txt requirements.txt
35
+
36
+ # Reinstall dependencies from the requirements.txt
37
+ # Installing again to ensure the dependencies are properly set with the correct ownership and permissions
38
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
39
+
40
+ # Expose the port the app will run on
41
+ # FastAPI typically runs on port 8000, but we’re using 7860 in this case
42
+ EXPOSE 7860
43
+
44
+ # Command to run the application using uvicorn
45
+ # Uvicorn is an ASGI server that runs the FastAPI app
46
+ # --host 0.0.0.0 makes the app accessible to any IP address, so it's reachable from outside the container
47
+ # --port 7860 sets the port number on which the FastAPI app will be available
48
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
log_reg_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d21e0e1132a61d2fed963c2786120590917124684a4ed569075ba813165a8368
3
+ size 6874
main.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ import models
3
+ from schema import Prediction
4
+ from sentence_transformers import util
5
+
6
+ app = FastAPI()
7
+
8
+ @app.get("/")
9
+ def home_page():
10
+ return {"Home": "Welcome to prediction hub"}
11
+
12
+ @app.get("/embeddings")
13
+ def display_embedding(message : str = "Hello guys enter a text to get embeddings"):
14
+ try:
15
+ embedding = models.get_embedding(message)
16
+ dimension = len(embedding)
17
+ return {"Dimension" : {dimension : embedding.tolist()}}
18
+ except Exception as e:
19
+ return {f"Unable to fetch the embeddings. Error :{e}" }
20
+
21
+ @app.post("/prediction")
22
+ def display_prediction(prediction : Prediction):
23
+ message = prediction.message
24
+ embedding = models.get_embedding([message])
25
+ loaded_model = models.load_model('log_reg_model.pkl')
26
+ result = loaded_model.predict(embedding).tolist()
27
+ return {"Prediction": f"{message} is a {result}"}
28
+
29
+ @app.post("/cosine_similarity")
30
+ def display_cosine_similarity(prediction : Prediction):
31
+ message = prediction.message
32
+ message_1 = prediction.message_1
33
+ embendding = models.get_embedding([message,message_1])
34
+ similarity = util.cos_sim(embendding[0], embendding[1]).item()
35
+ return {f"Cosine Similarity between {message} and {message_1} is" : round(similarity, 4)}
models.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from sklearn.linear_model import LogisticRegression
3
+ import pickle
4
+ from sklearn.model_selection import train_test_split
5
+ import joblib
6
+
7
+ import pandas as pd
8
+
9
+ def get_embedding(text):
10
+ model_encode = SentenceTransformer('Alibaba-NLP/gte-base-en-v1.5', trust_remote_code=True)
11
+ embedding = model_encode.encode(text)
12
+ return embedding
13
+
14
+ def train_model():
15
+ sample_data_df = pd.read_excel("sms_process_data_main.xlsx")
16
+ sample_data_df.dropna(subset=['MessageText', 'label'], inplace=True)
17
+ input = sample_data_df['MessageText']
18
+ label = sample_data_df['label']
19
+ X_train, X_test, y_train, y_test = train_test_split(input, label, test_size=0.2, random_state=42)
20
+ X_train_embeddings = get_embedding(X_train.tolist())
21
+ log_reg_model = LogisticRegression( max_iter = 1000)
22
+ log_reg_model.fit(X_train_embeddings, y_train)
23
+ save_model(log_reg_model,'log_reg_model.pkl')
24
+ return log_reg_model
25
+
26
+ def save_model(model, filename):
27
+ with open(filename, 'wb') as model_file:
28
+ pickle.dump(model, model_file)
29
+ print(f"Model saved to {filename}")
30
+
31
+
32
+ def load_model(filename):
33
+ # loaded_model = joblib.load('log_reg_model.pkl')
34
+ with open(filename, 'rb') as model_file:
35
+ loaded_model = pickle.load(model_file)
36
+ print(f"Model loaded from {filename}")
37
+ return loaded_model
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi[standard]
2
+ pandas
3
+ scikit-learn
4
+ sentence_transformers
5
+ openpyxl
schema.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class Prediction(BaseModel):
4
+ message : str = "Enter a text message"
5
+ message_1 : str = "Enter a text message"
sms_process_data_main.xlsx ADDED
Binary file (42.2 kB). View file