Spaces:

logeswari
/

embeded

Sleeping

App Files Files Community

logeswari commited on Feb 14, 2025

Commit

8425d1d

1 Parent(s): a1738a7

message

Browse files

Files changed (3) hide show

Dockerfile +22 -0
app.py +83 -0
requirements.txt +13 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+FROM python:3.9
+WORKDIR /app
+COPY . /app
+ENV HF_HOME=/app/.cache
+RUN mkdir -p /app/.cache/huggingface/hub && \
+    chmod -R 777 /app/.cache && \
+    chmod -R 777 /app/.cache/huggingface
+RUN pip install --upgrade pip
+RUN pip install --no-cache-dir -r requirements.txt
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+from fastapi import FastAPI, HTTPException
+from fastapi.responses import FileResponse
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sentence_transformers import SentenceTransformer
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score
+from pydantic import BaseModel
+import numpy as np
+import uvicorn
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI()
+# Load and preprocess dataset
+file_name = r"D:/new/sms_process_data_main.xlsx"
+sheet = "Sheet1"
+df = pd.read_excel(file_name, sheet_name=sheet)
+# Split data
+X_train, X_test, y_train, y_test = train_test_split(
+    df['MessageText'], df['label'], test_size=0.2, random_state=42
+)
+# Load sentence embedding model
+embedding_model = SentenceTransformer('Alibaba-NLP/gte-base-en-v1.5', trust_remote_code=True)
+# Generate embeddings
+X_train_embeddings = embedding_model.encode(X_train.tolist(), convert_to_tensor=True).cpu().numpy()
+X_test_embeddings = embedding_model.encode(X_test.tolist(), convert_to_tensor=True).cpu().numpy()
+# Train logistic regression model
+logistic_model = LogisticRegression(max_iter=1000)
+logistic_model.fit(X_train_embeddings, y_train)
+# Evaluate model
+y_pred = logistic_model.predict(X_test_embeddings)
+accuracy = accuracy_score(y_test, y_pred)
+logger.info(f"Model trained with accuracy: {accuracy:.4f}")
+# API Input Model
+class MessageInput(BaseModel):
+    messages: list[str]
+# Root endpoint
+@app.get("/")
+def read_root():
+    return {"message": "Welcome to the SMS Classification API!"}
+# Predict endpoint
+@app.post("/predict")
+def predict_sms(data: MessageInput):
+    try:
+        # Generate embeddings for new messages
+        new_embeddings = embedding_model.encode(data.messages, convert_to_tensor=True).cpu().numpy()
+        # Predict labels
+        predictions = logistic_model.predict(new_embeddings).tolist()
+        # Prepare the response with embeddings and dimensions
+        response = {
+            "dimensions": new_embeddings.shape[1],  # Number of dimensions in the embeddings
+            "embeddings": new_embeddings.tolist(),   # Convert embeddings to a list
+            "predictions": predictions               # Include predictions
+        }
+        return response
+    except Exception as e:
+        logger.error(f"Error during prediction: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+# Favicon endpoint (optional)
+@app.get("/favicon.ico")
+def favicon():
+    return FileResponse("path/to/favicon.ico")
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+annotated-types==0.7.0
+anyio==4.8.0
+fastapi==0.115.8
+idna==3.10
+pydantic==2.10.6
+pydantic_core==2.27.2
+sniffio==1.3.1
+starlette==0.45.3
+typing_extensions==4.12.2
+sentence-transformers==2.2.2
+scikit-learn==1.3.2
+numpy==1.26.4
+pandas==2.1.4