Spaces:

ibi15
/

Spam-Detection

Sleeping

ibraheem15 commited on Jan 1

Commit

a1099ee

1 Parent(s): 3fa282f

Refactor Dockerfile for multi-stage builds and enhance main.py with CORS middleware and updated model

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -1,24 +1,36 @@
-# Dockerfile
-# Use a lean official Python image
-FROM python:3.10-slim
-# Set working directory
 WORKDIR /app
-# Copy requirements and install dependencies
-COPY requirements.txt /app/
 RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy the application code
 COPY main.py /app/
 COPY tests/ /app/tests/
-# The port is often fixed by the cloud host (e.g., 7860 for HF Spaces)
-# We use 8000 as a standard, and the cloud host will map its port to this one.
-EXPOSE 8000
-# Command to run the application using Uvicorn
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

+# STAGE 1: Base (Heavy Dependencies)
+FROM python:3.10-slim as base
 WORKDIR /app
+# Install system deps if needed (e.g., usually needed for cv2 or weird numpy versions)
+# RUN apt-get update && apt-get install -y --no-install-recommends gcc libgomp1
+COPY requirements.txt .
+# 1. Install Torch (Heavy - Cached in this layer)
 RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
+# 2. Install other Prod deps
 RUN pip install --no-cache-dir -r requirements.txt
+# STAGE 2: Test (Needs App Code + Tests)
+FROM base as test
+# 1. Install Test Tools
+RUN pip install pytest flake8 httpx
+# 2. COPY THE APP CODE (Crucial Step!)
+# The tests need to import 'main', so main.py must be here.
 COPY main.py /app/
+# 3. Copy Tests
 COPY tests/ /app/tests/
+ENV PYTHONPATH=/app
+# 4. Run Linting and Tests
+RUN flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+RUN pytest tests/
+# STAGE 3: Production (Final Image)
+FROM base as prod
+# We start fresh from 'base'. The junk from 'test' stage is gone.
+COPY main.py /app/
+# Note: We do NOT copy tests/ here to keep prod image smaller
+EXPOSE 8000
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

main.py CHANGED Viewed

@@ -1,12 +1,13 @@
 # main.py
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 from contextlib import asynccontextmanager
 import torch
 # --- MLOps Configuration ---
-HF_MODEL_NAME = "roshana1s/spam-message-classifier"
 CLASSIFIER_PIPELINE = None
 # Pydantic model for request body
@@ -45,6 +46,14 @@ app = FastAPI(
     lifespan=lifespan
 )
 @app.get("/")
 def health_check():
     return {"status": "ok", "model_loaded": CLASSIFIER_PIPELINE is not None}
@@ -55,7 +64,8 @@ def predict_spam(item: Message):
         raise HTTPException(status_code=503, detail="Model is not ready.")
     try:
-        results = CLASSIFIER_PIPELINE(item.text)
         label = results[0]['label']
         score = results[0]['score']
         output_label = "spam" if label == 'LABEL_1' else "ham"

 # main.py
 from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 from contextlib import asynccontextmanager
 import torch
 # --- MLOps Configuration ---
+HF_MODEL_NAME = "mrm8488/bert-tiny-finetuned-sms-spam-detection"
 CLASSIFIER_PIPELINE = None
 # Pydantic model for request body
     lifespan=lifespan
 )
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 @app.get("/")
 def health_check():
     return {"status": "ok", "model_loaded": CLASSIFIER_PIPELINE is not None}
         raise HTTPException(status_code=503, detail="Model is not ready.")
     try:
+        print(f"Received text for prediction: {item.text}")
+        results = CLASSIFIER_PIPELINE(item.text, truncation=True, max_length=512)
         label = results[0]['label']
         score = results[0]['score']
         output_label = "spam" if label == 'LABEL_1' else "ham"