Spaces:

tharu22
/

test

Sleeping

App Files Files Community

tharu22 commited on Feb 18, 2025

Commit

1a979ff

1 Parent(s): 0a040f4

created test project

Browse files

Files changed (8) hide show

.gitignore +1 -0
Dockerfile +48 -0
log_reg_model.pkl +3 -0
main.py +35 -0
models.py +37 -0
requirements.txt +5 -0
schema.py +5 -0
sms_process_data_main.xlsx +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .venv

Dockerfile ADDED Viewed

	@@ -0,0 +1,48 @@

+# Use an official Python runtime as the base image
+# This image contains Python 3.9 and is a lightweight slim version to minimize image size
+FROM python:3.9-slim
+# Set the working directory inside the container
+# All subsequent commands will run in this /app directory
+WORKDIR /app
+# Copy the local files into the container
+# Copies everything from the current directory on the host machine to /app in the container
+COPY . /app
+# Set environment variable for Hugging Face cache directory
+# This helps set a custom cache location for Hugging Face models and datasets
+ENV HF_HOME=/app/.cache
+# Create the necessary cache directories for Hugging Face
+# This ensures that Hugging Face has the required directories set up for caching
+RUN mkdir -p /app/.cache/huggingface/hub && \
+    chmod -R 777 /app/.cache && \
+    chmod -R 777 /app/.cache/huggingface
+# Upgrade pip to the latest version
+# This ensures you are using the most up-to-date version of pip for installing dependencies
+RUN pip install --upgrade pip
+# Install the dependencies listed in requirements.txt
+# The --no-cache-dir flag ensures pip does not use or store cached versions of packages, saving space
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the requirements.txt file with ownership changes
+# --chown=user ensures that the requirements.txt file inside the container is owned by a specific user (e.g., user) for security and permissions
+COPY --chown=user ./requirements.txt requirements.txt
+# Reinstall dependencies from the requirements.txt
+# Installing again to ensure the dependencies are properly set with the correct ownership and permissions
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Expose the port the app will run on
+# FastAPI typically runs on port 8000, but we’re using 7860 in this case
+EXPOSE 7860
+# Command to run the application using uvicorn
+# Uvicorn is an ASGI server that runs the FastAPI app
+# --host 0.0.0.0 makes the app accessible to any IP address, so it's reachable from outside the container
+# --port 7860 sets the port number on which the FastAPI app will be available
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

log_reg_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d21e0e1132a61d2fed963c2786120590917124684a4ed569075ba813165a8368
+size 6874

main.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from fastapi import FastAPI
+import models
+from schema import Prediction
+from sentence_transformers import  util
+app = FastAPI()
+@app.get("/")
+def home_page():
+    return {"Home": "Welcome to prediction hub"}
+@app.get("/embeddings")
+def display_embedding(message : str = "Hello guys enter a text to get embeddings"):
+    try:
+        embedding = models.get_embedding(message)
+        dimension = len(embedding)
+        return {"Dimension" : {dimension : embedding.tolist()}}
+    except Exception as e:
+        return {f"Unable to fetch the embeddings. Error :{e}" }
+@app.post("/prediction")
+def display_prediction(prediction : Prediction):
+    message = prediction.message
+    embedding = models.get_embedding([message])
+    loaded_model = models.load_model('log_reg_model.pkl')
+    result = loaded_model.predict(embedding).tolist()
+    return {"Prediction": f"{message} is a {result}"}
+@app.post("/cosine_similarity")
+def display_cosine_similarity(prediction : Prediction):
+    message = prediction.message
+    message_1 = prediction.message_1
+    embendding = models.get_embedding([message,message_1])
+    similarity = util.cos_sim(embendding[0], embendding[1]).item()
+    return {f"Cosine Similarity between {message}  and {message_1} is" : round(similarity, 4)}

models.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from sentence_transformers import SentenceTransformer
+from sklearn.linear_model import LogisticRegression
+import pickle
+from sklearn.model_selection import train_test_split
+import joblib
+import pandas as pd
+def get_embedding(text):
+    model_encode = SentenceTransformer('Alibaba-NLP/gte-base-en-v1.5', trust_remote_code=True)
+    embedding = model_encode.encode(text)
+    return embedding
+def train_model():
+    sample_data_df = pd.read_excel("sms_process_data_main.xlsx")
+    sample_data_df.dropna(subset=['MessageText', 'label'], inplace=True)
+    input = sample_data_df['MessageText']
+    label = sample_data_df['label']
+    X_train, X_test, y_train, y_test = train_test_split(input, label, test_size=0.2, random_state=42)
+    X_train_embeddings = get_embedding(X_train.tolist())
+    log_reg_model = LogisticRegression( max_iter = 1000)
+    log_reg_model.fit(X_train_embeddings, y_train)
+    save_model(log_reg_model,'log_reg_model.pkl')
+    return log_reg_model
+def save_model(model, filename):
+    with open(filename, 'wb') as model_file:
+        pickle.dump(model, model_file)
+    print(f"Model saved to {filename}")
+def load_model(filename):
+    # loaded_model = joblib.load('log_reg_model.pkl')
+    with open(filename, 'rb') as model_file:
+        loaded_model = pickle.load(model_file)
+    print(f"Model loaded from {filename}")
+    return loaded_model

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi[standard]
+pandas
+scikit-learn
+sentence_transformers
+openpyxl

schema.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from pydantic import BaseModel
+class Prediction(BaseModel):
+    message : str = "Enter a text message"
+    message_1 : str = "Enter a text message"

sms_process_data_main.xlsx ADDED Viewed

Binary file (42.2 kB). View file