logeswari commited on
Commit
27199b6
·
1 Parent(s): 41d9d67
Files changed (2) hide show
  1. Dockerfile +15 -28
  2. main.py +35 -67
Dockerfile CHANGED
@@ -1,35 +1,22 @@
1
- # Use an official Python image as the base image
2
- FROM python:3.9-slim
3
-
4
- # Set environment variables to prevent Python from buffering output
5
- ENV PYTHONDONTWRITEBYTECODE=1
6
- ENV PYTHONUNBUFFERED=1
7
-
8
- # Install necessary system dependencies
9
- RUN apt-get update && apt-get install -y \
10
- build-essential \
11
- libopenblas-dev \
12
- liblapack-dev \
13
- libglib2.0-0 \
14
- libgl1-mesa-glx \
15
- libstdc++6 \
16
- wget \
17
- && rm -rf /var/lib/apt/lists/*
18
-
19
- # Create a directory for the app
20
  WORKDIR /app
 
 
 
 
 
 
 
 
21
 
22
- # Copy the requirements file
23
- COPY requirements.txt /app/
24
 
25
- # Install Python dependencies
26
  RUN pip install --no-cache-dir -r requirements.txt
27
 
28
- # Copy the application code
29
- COPY . /app/
30
 
31
- # Expose the FastAPI application port
32
- EXPOSE 8000
33
 
34
- # Command to run the application
35
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
 
1
+ FROM python:3.9
2
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  WORKDIR /app
4
+ COPY . /app
5
+
6
+ ENV HF_HOME=/app/.cache
7
+
8
+ RUN mkdir -p /app/.cache/huggingface/hub && \
9
+ chmod -R 777 /app/.cache && \
10
+ chmod -R 777 /app/.cache/huggingface
11
+
12
 
 
 
13
 
14
+ RUN pip install --upgrade pip
15
  RUN pip install --no-cache-dir -r requirements.txt
16
 
17
+ COPY --chown=user ./requirements.txt requirements.txt
18
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
19
 
20
+ EXPOSE 7860
 
21
 
22
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
main.py CHANGED
@@ -1,83 +1,51 @@
1
  from fastapi import FastAPI, HTTPException
2
- from fastapi.responses import FileResponse
3
- import pandas as pd
4
- from sklearn.model_selection import train_test_split
5
- from sentence_transformers import SentenceTransformer
6
- from sklearn.linear_model import LogisticRegression
7
- from sklearn.metrics import accuracy_score
8
  from pydantic import BaseModel
 
9
  import numpy as np
10
- import uvicorn
11
- import logging
12
-
13
- # Set up logging
14
- logging.basicConfig(level=logging.INFO)
15
- logger = logging.getLogger(__name__)
16
 
 
17
  app = FastAPI()
18
 
19
- # Load and preprocess dataset
20
- file_name = r"D:/new/sms_process_data_main.xlsx"
21
- sheet = "Sheet1"
22
- df = pd.read_excel(file_name, sheet_name=sheet)
23
-
24
- # Split data
25
- X_train, X_test, y_train, y_test = train_test_split(
26
- df['MessageText'], df['label'], test_size=0.2, random_state=42
27
- )
28
-
29
- # Load sentence embedding model
30
- embedding_model = SentenceTransformer('Alibaba-NLP/gte-base-en-v1.5', trust_remote_code=True)
31
-
32
- # Generate embeddings
33
- X_train_embeddings = embedding_model.encode(X_train.tolist(), convert_to_tensor=True).cpu().numpy()
34
- X_test_embeddings = embedding_model.encode(X_test.tolist(), convert_to_tensor=True).cpu().numpy()
35
 
36
- # Train logistic regression model
37
- logistic_model = LogisticRegression(max_iter=1000)
38
- logistic_model.fit(X_train_embeddings, y_train)
39
 
40
- # Evaluate model
41
- y_pred = logistic_model.predict(X_test_embeddings)
42
- accuracy = accuracy_score(y_test, y_pred)
43
- logger.info(f"Model trained with accuracy: {accuracy:.4f}")
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- # API Input Model
46
- class MessageInput(BaseModel):
47
- messages: list[str]
48
 
49
- # Root endpoint
50
- @app.get("/")
51
- def read_root():
52
- return {"message": "Welcome to the SMS Classification API!"}
53
 
54
- # Predict endpoint
55
- @app.post("/predict")
56
- def predict_sms(data: MessageInput):
57
- try:
58
- # Generate embeddings for new messages
59
- new_embeddings = embedding_model.encode(data.messages, convert_to_tensor=True).cpu().numpy()
60
-
61
- # Predict labels
62
- predictions = logistic_model.predict(new_embeddings).tolist()
63
-
64
-
65
- # Prepare the response with embeddings and dimensions
66
- response = {
67
- "dimensions": new_embeddings.shape[1], # Number of dimensions in the embeddings
68
- "embeddings": new_embeddings.tolist(), # Convert embeddings to a list
69
- "predictions": predictions # Include predictions
70
  }
71
- return response
72
-
73
  except Exception as e:
74
- logger.error(f"Error during prediction: {e}")
75
  raise HTTPException(status_code=500, detail=str(e))
76
 
77
- # Favicon endpoint (optional)
78
- @app.get("/favicon.ico")
79
- def favicon():
80
- return FileResponse("path/to/favicon.ico")
81
-
82
  if __name__ == "__main__":
83
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
1
  from fastapi import FastAPI, HTTPException
 
 
 
 
 
 
2
  from pydantic import BaseModel
3
+ from sentence_transformers import SentenceTransformer
4
  import numpy as np
 
 
 
 
 
 
5
 
6
+ # Initialize the FastAPI app
7
  app = FastAPI()
8
 
9
+ # Load the pre-trained SentenceTransformer model from Hugging Face
10
+ #model = SentenceTransformer("//huggingface.co/spaces/Kabila22/Kabilan_embedding_1", trust_remote_code=True)
11
+ model = SentenceTransformer("Alibaba-NLP/gte-base-en-v1.5", trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Define the request body schema
14
+ class TextInput(BaseModel):
15
+ text: str
16
 
17
+ # Home route
18
+ @app.get("/")
19
+ async def home():
20
+ return {"message": "Welcome to embedding SMS API, use /docs to post SMS text and get dimensions"}
21
+
22
+ # Define the API endpoint
23
+ @app.post("/embed")
24
+ async def generate_embedding(text_input: TextInput):
25
+ """
26
+ Generate a 768-dimensional embedding for the input text.
27
+ Returns the embedding in a structured format with rounded values.
28
+ """
29
+ try:
30
+ # Generate the embedding
31
+ embedding = model.encode(text_input.text, convert_to_tensor=True).cpu().numpy()
32
 
33
+ # Round embedding values to 2 decimal places
34
+ rounded_embedding = np.round(embedding, decimals=2).tolist()
 
35
 
36
+ # Get the number of dimensions
37
+ dimensions = len(rounded_embedding)
 
 
38
 
39
+ # Return structured response
40
+ return {
41
+ "dimensions": dimensions,
42
+ "embeddings": [rounded_embedding] # Wrap the embedding inside a list
 
 
 
 
 
 
 
 
 
 
 
 
43
  }
 
 
44
  except Exception as e:
45
+ # Handle any errors
46
  raise HTTPException(status_code=500, detail=str(e))
47
 
48
+ # Run the FastAPI app
 
 
 
 
49
  if __name__ == "__main__":
50
+ import uvicorn
51
+ uvicorn.run(app, host="0.0.0.0", port=7860)