Spaces:

codeby-hp
/

sentiment-classification

Sleeping

App Files Files Community

codeby-hp commited on Dec 16, 2025

Commit

28a0fff

verified ·

1 Parent(s): e415a87

adding files

Browse files

Files changed (5) hide show

Dockerfile +41 -0
app.py +192 -0
models/vectorizer.pkl +3 -0
requirements.txt +11 -0
templates/index.html +113 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,41 @@

+# Use Python 3.10 slim image for smaller size
+FROM python:3.10-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    software-properties-common \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better layer caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Download NLTK data
+RUN python -c "import nltk; nltk.download('stopwords'); nltk.download('wordnet'); nltk.download('omw-1.4')"
+# Copy application files
+COPY app.py .
+COPY templates/ templates/
+COPY models/ models/
+# Expose port 7860 (Hugging Face Spaces default)
+EXPOSE 7860
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV PORT=7860
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:7860/health || exit 1
+# Run the application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,192 @@

+from fastapi import FastAPI, Request, Form
+from fastapi.responses import HTMLResponse
+from fastapi.templating import Jinja2Templates
+from fastapi.staticfiles import StaticFiles
+import mlflow
+import pickle
+import os
+import pandas as pd
+import numpy as np
+from nltk.stem import WordNetLemmatizer
+from nltk.corpus import stopwords
+import string
+import re
+import dagshub
+import nltk
+import warnings
+warnings.simplefilter("ignore", UserWarning)
+warnings.filterwarnings("ignore")
+from dotenv import load_dotenv
+load_dotenv()
+# Download required NLTK data
+try:
+    nltk.download('stopwords', quiet=True)
+    nltk.download('wordnet', quiet=True)
+    nltk.download('omw-1.4', quiet=True)
+except:
+    pass
+def lemmatization(text):
+    """Lemmatize the text."""
+    lemmatizer = WordNetLemmatizer()
+    text = text.split()
+    text = [lemmatizer.lemmatize(word) for word in text]
+    return " ".join(text)
+def remove_stop_words(text):
+    """Remove stop words from the text."""
+    stop_words = set(stopwords.words("english"))
+    text = [word for word in str(text).split() if word not in stop_words]
+    return " ".join(text)
+def removing_numbers(text):
+    """Remove numbers from the text."""
+    text = ''.join([char for char in text if not char.isdigit()])
+    return text
+def lower_case(text):
+    """Convert text to lower case."""
+    text = text.split()
+    text = [word.lower() for word in text]
+    return " ".join(text)
+def removing_punctuations(text):
+    """Remove punctuations from the text."""
+    text = re.sub('[%s]' % re.escape(string.punctuation), ' ', text)
+    text = text.replace('؛', "")
+    text = re.sub('\s+', ' ', text).strip()
+    return text
+def removing_urls(text):
+    """Remove URLs from the text."""
+    url_pattern = re.compile(r'https?://\S+|www\.\S+')
+    return url_pattern.sub(r'', text)
+def remove_small_sentences(df):
+    """Remove sentences with less than 3 words."""
+    for i in range(len(df)):
+        if len(df.text.iloc[i].split()) < 3:
+            df.text.iloc[i] = np.nan
+def normalize_text(text):
+    text = lower_case(text)
+    text = remove_stop_words(text)
+    text = removing_numbers(text)
+    text = removing_punctuations(text)
+    text = removing_urls(text)
+    text = lemmatization(text)
+    return text
+# Below code block is for local use
+# -------------------------------------------------------------------------------------
+# mlflow.set_tracking_uri('https://dagshub.com/CodeBy-HP/Sentiment-Classification-Mlflow-DVC.mlflow')
+# dagshub.init(repo_owner='CodeBy-HP', repo_name='Sentiment-Classification-Mlflow-DVC', mlflow=True)
+# -------------------------------------------------------------------------------------
+# Below code block is for production use
+# -------------------------------------------------------------------------------------
+# Set up DagsHub credentials for MLflow tracking
+dagshub_token = os.getenv("CAPSTONE_TEST")
+if not dagshub_token:
+    raise EnvironmentError("CAPSTONE_TEST environment variable is not set")
+os.environ["MLFLOW_TRACKING_USERNAME"] = dagshub_token
+os.environ["MLFLOW_TRACKING_PASSWORD"] = dagshub_token
+dagshub_url = "https://dagshub.com"
+repo_owner = "CodeBy-HP"
+repo_name = "Sentiment-Classification-Mlflow-DVC"
+# Set up MLflow tracking URI
+mlflow.set_tracking_uri(f'{dagshub_url}/{repo_owner}/{repo_name}.mlflow')
+# -------------------------------------------------------------------------------------
+# Initialize FastAPI app
+app = FastAPI(title="Sentiment Analysis API", version="1.0.0")
+# Set up Jinja2 templates
+current_file_dir = os.path.dirname(os.path.abspath(__file__))
+templates_dir = os.path.join(current_file_dir, "templates")
+templates = Jinja2Templates(directory=templates_dir)
+# ------------------------------------------------------------------------------------------
+# Model and vectorizer setup
+model_name = "my_model"
+# Get the path to the vectorizer file
+current_dir = os.path.dirname(os.path.abspath(__file__))
+vectorizer_path = os.path.join(current_dir, 'models', 'vectorizer.pkl')
+if not os.path.exists(vectorizer_path):
+    # Try alternative paths
+    alt_paths = [
+        os.path.join(os.getcwd(), 'models', 'vectorizer.pkl'),
+        os.path.join(current_dir, '..', 'models', 'vectorizer.pkl'),
+        '/app/models/vectorizer.pkl'  # Docker path
+    ]
+    for path in alt_paths:
+        if os.path.exists(path):
+            vectorizer_path = path
+            break
+def get_latest_model_version(model_name):
+    client = mlflow.MlflowClient()
+    latest_version = client.get_latest_versions(model_name, stages=["Production"])
+    if not latest_version:
+        latest_version = client.get_latest_versions(model_name, stages=["None"])
+    return latest_version[0].version if latest_version else None
+model_version = get_latest_model_version(model_name)
+model_uri = f'models:/{model_name}/{model_version}'
+print(f"Fetching model from: {model_uri}")
+model = mlflow.sklearn.load_model(model_uri)
+vectorizer = pickle.load(open(vectorizer_path, 'rb'))
+# Routes
+@app.get("/", response_class=HTMLResponse)
+async def home(request: Request):
+    """Render the home page."""
+    return templates.TemplateResponse(
+        request=request,
+        name="index.html",
+        context={"result": None}
+    )
+@app.post("/predict", response_class=HTMLResponse)
+async def predict(request: Request, text: str = Form(...)):
+    """Handle sentiment prediction."""
+    # Clean text
+    cleaned_text = normalize_text(text)
+    # Convert to features
+    features = vectorizer.transform([cleaned_text])
+    # Convert to array without column names to avoid sklearn warning
+    features_array = features.toarray()
+    # Predict
+    result = model.predict(features_array)
+    prediction = int(result[0])
+    # Get probability scores for confidence
+    # Note: predict_proba returns [prob_negative, prob_positive]
+    probabilities = model.predict_proba(features_array)[0]
+    confidence = float(probabilities[prediction]) * 100  # Convert to percentage
+    return templates.TemplateResponse(
+        request=request,
+        name="index.html",
+        context={"result": prediction, "confidence": confidence}
+    )
+@app.get("/health")
+async def health_check():
+    """Health check endpoint for monitoring."""
+    return {"status": "healthy", "model_version": model_version}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

models/vectorizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61990dedece0a2f2cd0da34d59751c9d0c0bf9865fa6318b5a1d4d5e330b4d01
+size 250077

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+dagshub==0.4.2
+fastapi==0.115.5
+uvicorn[standard]==0.32.1
+jinja2==3.1.4
+python-multipart==0.0.18
+mlflow==2.19.0
+mlflow_skinny==2.19.0
+nltk==3.9.1
+numpy==2.2.1
+pandas==2.2.3
+scikit-learn==1.5.2

templates/index.html ADDED Viewed

	@@ -0,0 +1,113 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Sentiment Analysis</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+    <style>
+        @keyframes fadeIn {
+            from { opacity: 0; transform: translateY(10px); }
+            to { opacity: 1; transform: translateY(0); }
+        }
+        .fade-in {
+            animation: fadeIn 0.5s ease-out forwards;
+        }
+        body {
+            font-family: 'Inter', sans-serif;
+        }
+        .glass-card {
+            background: rgba(255, 255, 255, 0.05);
+            backdrop-filter: blur(10px);
+            border: 1px solid rgba(255, 255, 255, 0.1);
+        }
+        .btn-gradient {
+            background-image: linear-gradient(to right, #4f46e5, #c026d3);
+        }
+    </style>
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
+</head>
+<body class="bg-gray-900 text-white">
+    <div class="min-h-screen flex flex-col items-center justify-center p-4">
+        <div class="w-full max-w-2xl">
+            <div class="text-center mb-8 fade-in">
+                <h1 class="text-4xl md:text-5xl font-bold tracking-tight">Sentiment Analyzer</h1>
+                <p class="text-gray-400 mt-3 text-lg">Instantly analyze the sentiment of your text.</p>
+            </div>
+            <div class="glass-card rounded-2xl shadow-2xl p-8 fade-in" style="animation-delay: 0.2s;">
+                <form action="/predict" method="POST">
+                    <div class="mb-6">
+                        <label for="text" class="sr-only">Your text</label>
+                        <textarea
+                            id="text"
+                            name="text"
+                            rows="6"
+                            required
+                            class="w-full bg-gray-800 border-2 border-gray-700 rounded-lg px-4 py-3 text-base text-white placeholder-gray-500 focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 transition-all duration-300 resize-none"
+                            placeholder="Enter text to analyze..."
+                        ></textarea>
+                    </div>
+                    <button
+                        type="submit"
+                        class="w-full btn-gradient text-white font-semibold py-3 rounded-lg transition-transform transform hover:scale-105 focus:outline-none focus:ring-4 focus:ring-purple-500 focus:ring-opacity-50"
+                    >
+                        Analyze Sentiment
+                    </button>
+                </form>
+                {% if result is not none %}
+                <button
+                    onclick="window.location.href='/'"
+                    class="w-full mt-4 bg-gray-700 hover:bg-gray-600 text-white font-semibold py-3 rounded-lg transition-all duration-300"
+                >
+                    New Analysis
+                </button>
+                {% endif %}
+                {% if result is not none %}
+                <div class="mt-8 pt-6 border-t border-gray-700 fade-in" style="animation-delay: 0.4s;">
+                    {% if result == 1 %}
+                    <div class="flex items-start space-x-4 bg-green-500/10 border border-green-500/30 text-green-300 px-5 py-4 rounded-lg">
+                        <svg class="w-6 h-6 flex-shrink-0 mt-1" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.828 14.828a4 4 0 01-5.656 0M9 10h.01M15 10h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>
+                        <div class="flex-1">
+                            <h4 class="text-lg font-semibold">Positive Sentiment</h4>
+                            <p class="text-sm text-green-300/80">The analysis indicates a positive tone.</p>
+                            {% if confidence %}
+                            <div class="mt-2">
+                                <span class="text-xs font-medium text-green-400">Confidence: {{ "%.2f"|format(confidence) }}%</span>
+                                <div class="w-full bg-gray-700 rounded-full h-2 mt-1">
+                                    <div class="bg-green-500 h-2 rounded-full" style="width: {{ confidence }}%"></div>
+                                </div>
+                            </div>
+                            {% endif %}
+                        </div>
+                    </div>
+                    {% else %}
+                    <div class="flex items-start space-x-4 bg-red-500/10 border border-red-500/30 text-red-300 px-5 py-4 rounded-lg">
+                        <svg class="w-6 h-6 flex-shrink-0 mt-1" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.172 16.172a4 4 0 015.656 0M9 10h.01M15 10h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>
+                        <div class="flex-1">
+                            <h4 class="text-lg font-semibold">Negative Sentiment</h4>
+                            <p class="text-sm text-red-300/80">The analysis indicates a negative tone.</p>
+                            {% if confidence %}
+                            <div class="mt-2">
+                                <span class="text-xs font-medium text-red-400">Confidence: {{ "%.2f"|format(confidence) }}%</span>
+                                <div class="w-full bg-gray-700 rounded-full h-2 mt-1">
+                                    <div class="bg-red-500 h-2 rounded-full" style="width: {{ confidence }}%"></div>
+                                </div>
+                            </div>
+                            {% endif %}
+                        </div>
+                    </div>
+                    {% endif %}
+                </div>
+                {% endif %}
+            </div>
+        </div>
+    </div>
+</body>
+</html>