codeby-hp commited on
Commit
28a0fff
·
verified ·
1 Parent(s): e415a87

adding files

Browse files
Files changed (5) hide show
  1. Dockerfile +41 -0
  2. app.py +192 -0
  3. models/vectorizer.pkl +3 -0
  4. requirements.txt +11 -0
  5. templates/index.html +113 -0
Dockerfile ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.10 slim image for smaller size
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ build-essential \
10
+ curl \
11
+ software-properties-common \
12
+ git \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ # Copy requirements first for better layer caching
16
+ COPY requirements.txt .
17
+
18
+ # Install Python dependencies
19
+ RUN pip install --no-cache-dir -r requirements.txt
20
+
21
+ # Download NLTK data
22
+ RUN python -c "import nltk; nltk.download('stopwords'); nltk.download('wordnet'); nltk.download('omw-1.4')"
23
+
24
+ # Copy application files
25
+ COPY app.py .
26
+ COPY templates/ templates/
27
+ COPY models/ models/
28
+
29
+ # Expose port 7860 (Hugging Face Spaces default)
30
+ EXPOSE 7860
31
+
32
+ # Set environment variables
33
+ ENV PYTHONUNBUFFERED=1
34
+ ENV PORT=7860
35
+
36
+ # Health check
37
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
38
+ CMD curl -f http://localhost:7860/health || exit 1
39
+
40
+ # Run the application
41
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, Form
2
+ from fastapi.responses import HTMLResponse
3
+ from fastapi.templating import Jinja2Templates
4
+ from fastapi.staticfiles import StaticFiles
5
+ import mlflow
6
+ import pickle
7
+ import os
8
+ import pandas as pd
9
+ import numpy as np
10
+ from nltk.stem import WordNetLemmatizer
11
+ from nltk.corpus import stopwords
12
+ import string
13
+ import re
14
+ import dagshub
15
+ import nltk
16
+
17
+ import warnings
18
+ warnings.simplefilter("ignore", UserWarning)
19
+ warnings.filterwarnings("ignore")
20
+
21
+ from dotenv import load_dotenv
22
+
23
+ load_dotenv()
24
+
25
+ # Download required NLTK data
26
+ try:
27
+ nltk.download('stopwords', quiet=True)
28
+ nltk.download('wordnet', quiet=True)
29
+ nltk.download('omw-1.4', quiet=True)
30
+ except:
31
+ pass
32
+
33
+ def lemmatization(text):
34
+ """Lemmatize the text."""
35
+ lemmatizer = WordNetLemmatizer()
36
+ text = text.split()
37
+ text = [lemmatizer.lemmatize(word) for word in text]
38
+ return " ".join(text)
39
+
40
+ def remove_stop_words(text):
41
+ """Remove stop words from the text."""
42
+ stop_words = set(stopwords.words("english"))
43
+ text = [word for word in str(text).split() if word not in stop_words]
44
+ return " ".join(text)
45
+
46
+ def removing_numbers(text):
47
+ """Remove numbers from the text."""
48
+ text = ''.join([char for char in text if not char.isdigit()])
49
+ return text
50
+
51
+ def lower_case(text):
52
+ """Convert text to lower case."""
53
+ text = text.split()
54
+ text = [word.lower() for word in text]
55
+ return " ".join(text)
56
+
57
+ def removing_punctuations(text):
58
+ """Remove punctuations from the text."""
59
+ text = re.sub('[%s]' % re.escape(string.punctuation), ' ', text)
60
+ text = text.replace('؛', "")
61
+ text = re.sub('\s+', ' ', text).strip()
62
+ return text
63
+
64
+ def removing_urls(text):
65
+ """Remove URLs from the text."""
66
+ url_pattern = re.compile(r'https?://\S+|www\.\S+')
67
+ return url_pattern.sub(r'', text)
68
+
69
+ def remove_small_sentences(df):
70
+ """Remove sentences with less than 3 words."""
71
+ for i in range(len(df)):
72
+ if len(df.text.iloc[i].split()) < 3:
73
+ df.text.iloc[i] = np.nan
74
+
75
+ def normalize_text(text):
76
+ text = lower_case(text)
77
+ text = remove_stop_words(text)
78
+ text = removing_numbers(text)
79
+ text = removing_punctuations(text)
80
+ text = removing_urls(text)
81
+ text = lemmatization(text)
82
+
83
+ return text
84
+
85
+ # Below code block is for local use
86
+ # -------------------------------------------------------------------------------------
87
+ # mlflow.set_tracking_uri('https://dagshub.com/CodeBy-HP/Sentiment-Classification-Mlflow-DVC.mlflow')
88
+ # dagshub.init(repo_owner='CodeBy-HP', repo_name='Sentiment-Classification-Mlflow-DVC', mlflow=True)
89
+ # -------------------------------------------------------------------------------------
90
+
91
+ # Below code block is for production use
92
+ # -------------------------------------------------------------------------------------
93
+ # Set up DagsHub credentials for MLflow tracking
94
+ dagshub_token = os.getenv("CAPSTONE_TEST")
95
+ if not dagshub_token:
96
+ raise EnvironmentError("CAPSTONE_TEST environment variable is not set")
97
+
98
+ os.environ["MLFLOW_TRACKING_USERNAME"] = dagshub_token
99
+ os.environ["MLFLOW_TRACKING_PASSWORD"] = dagshub_token
100
+
101
+ dagshub_url = "https://dagshub.com"
102
+ repo_owner = "CodeBy-HP"
103
+ repo_name = "Sentiment-Classification-Mlflow-DVC"
104
+ # Set up MLflow tracking URI
105
+ mlflow.set_tracking_uri(f'{dagshub_url}/{repo_owner}/{repo_name}.mlflow')
106
+ # -------------------------------------------------------------------------------------
107
+
108
+
109
+ # Initialize FastAPI app
110
+ app = FastAPI(title="Sentiment Analysis API", version="1.0.0")
111
+
112
+ # Set up Jinja2 templates
113
+ current_file_dir = os.path.dirname(os.path.abspath(__file__))
114
+ templates_dir = os.path.join(current_file_dir, "templates")
115
+ templates = Jinja2Templates(directory=templates_dir)
116
+
117
+ # ------------------------------------------------------------------------------------------
118
+ # Model and vectorizer setup
119
+ model_name = "my_model"
120
+
121
+ # Get the path to the vectorizer file
122
+ current_dir = os.path.dirname(os.path.abspath(__file__))
123
+ vectorizer_path = os.path.join(current_dir, 'models', 'vectorizer.pkl')
124
+ if not os.path.exists(vectorizer_path):
125
+ # Try alternative paths
126
+ alt_paths = [
127
+ os.path.join(os.getcwd(), 'models', 'vectorizer.pkl'),
128
+ os.path.join(current_dir, '..', 'models', 'vectorizer.pkl'),
129
+ '/app/models/vectorizer.pkl' # Docker path
130
+ ]
131
+ for path in alt_paths:
132
+ if os.path.exists(path):
133
+ vectorizer_path = path
134
+ break
135
+
136
+ def get_latest_model_version(model_name):
137
+ client = mlflow.MlflowClient()
138
+ latest_version = client.get_latest_versions(model_name, stages=["Production"])
139
+ if not latest_version:
140
+ latest_version = client.get_latest_versions(model_name, stages=["None"])
141
+ return latest_version[0].version if latest_version else None
142
+
143
+ model_version = get_latest_model_version(model_name)
144
+ model_uri = f'models:/{model_name}/{model_version}'
145
+ print(f"Fetching model from: {model_uri}")
146
+ model = mlflow.sklearn.load_model(model_uri)
147
+ vectorizer = pickle.load(open(vectorizer_path, 'rb'))
148
+
149
+ # Routes
150
+ @app.get("/", response_class=HTMLResponse)
151
+ async def home(request: Request):
152
+ """Render the home page."""
153
+ return templates.TemplateResponse(
154
+ request=request,
155
+ name="index.html",
156
+ context={"result": None}
157
+ )
158
+
159
+ @app.post("/predict", response_class=HTMLResponse)
160
+ async def predict(request: Request, text: str = Form(...)):
161
+ """Handle sentiment prediction."""
162
+ # Clean text
163
+ cleaned_text = normalize_text(text)
164
+
165
+ # Convert to features
166
+ features = vectorizer.transform([cleaned_text])
167
+ # Convert to array without column names to avoid sklearn warning
168
+ features_array = features.toarray()
169
+
170
+ # Predict
171
+ result = model.predict(features_array)
172
+ prediction = int(result[0])
173
+
174
+ # Get probability scores for confidence
175
+ # Note: predict_proba returns [prob_negative, prob_positive]
176
+ probabilities = model.predict_proba(features_array)[0]
177
+ confidence = float(probabilities[prediction]) * 100 # Convert to percentage
178
+
179
+ return templates.TemplateResponse(
180
+ request=request,
181
+ name="index.html",
182
+ context={"result": prediction, "confidence": confidence}
183
+ )
184
+
185
+ @app.get("/health")
186
+ async def health_check():
187
+ """Health check endpoint for monitoring."""
188
+ return {"status": "healthy", "model_version": model_version}
189
+
190
+ if __name__ == "__main__":
191
+ import uvicorn
192
+ uvicorn.run(app, host="0.0.0.0", port=8000)
models/vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61990dedece0a2f2cd0da34d59751c9d0c0bf9865fa6318b5a1d4d5e330b4d01
3
+ size 250077
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dagshub==0.4.2
2
+ fastapi==0.115.5
3
+ uvicorn[standard]==0.32.1
4
+ jinja2==3.1.4
5
+ python-multipart==0.0.18
6
+ mlflow==2.19.0
7
+ mlflow_skinny==2.19.0
8
+ nltk==3.9.1
9
+ numpy==2.2.1
10
+ pandas==2.2.3
11
+ scikit-learn==1.5.2
templates/index.html ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Sentiment Analysis</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <style>
9
+ @keyframes fadeIn {
10
+ from { opacity: 0; transform: translateY(10px); }
11
+ to { opacity: 1; transform: translateY(0); }
12
+ }
13
+ .fade-in {
14
+ animation: fadeIn 0.5s ease-out forwards;
15
+ }
16
+ body {
17
+ font-family: 'Inter', sans-serif;
18
+ }
19
+ .glass-card {
20
+ background: rgba(255, 255, 255, 0.05);
21
+ backdrop-filter: blur(10px);
22
+ border: 1px solid rgba(255, 255, 255, 0.1);
23
+ }
24
+ .btn-gradient {
25
+ background-image: linear-gradient(to right, #4f46e5, #c026d3);
26
+ }
27
+ </style>
28
+ <link rel="preconnect" href="https://fonts.googleapis.com">
29
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
30
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
31
+ </head>
32
+ <body class="bg-gray-900 text-white">
33
+ <div class="min-h-screen flex flex-col items-center justify-center p-4">
34
+ <div class="w-full max-w-2xl">
35
+ <div class="text-center mb-8 fade-in">
36
+ <h1 class="text-4xl md:text-5xl font-bold tracking-tight">Sentiment Analyzer</h1>
37
+ <p class="text-gray-400 mt-3 text-lg">Instantly analyze the sentiment of your text.</p>
38
+ </div>
39
+
40
+ <div class="glass-card rounded-2xl shadow-2xl p-8 fade-in" style="animation-delay: 0.2s;">
41
+ <form action="/predict" method="POST">
42
+ <div class="mb-6">
43
+ <label for="text" class="sr-only">Your text</label>
44
+ <textarea
45
+ id="text"
46
+ name="text"
47
+ rows="6"
48
+ required
49
+ class="w-full bg-gray-800 border-2 border-gray-700 rounded-lg px-4 py-3 text-base text-white placeholder-gray-500 focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 transition-all duration-300 resize-none"
50
+ placeholder="Enter text to analyze..."
51
+ ></textarea>
52
+ </div>
53
+
54
+ <button
55
+ type="submit"
56
+ class="w-full btn-gradient text-white font-semibold py-3 rounded-lg transition-transform transform hover:scale-105 focus:outline-none focus:ring-4 focus:ring-purple-500 focus:ring-opacity-50"
57
+ >
58
+ Analyze Sentiment
59
+ </button>
60
+ </form>
61
+
62
+ {% if result is not none %}
63
+ <button
64
+ onclick="window.location.href='/'"
65
+ class="w-full mt-4 bg-gray-700 hover:bg-gray-600 text-white font-semibold py-3 rounded-lg transition-all duration-300"
66
+ >
67
+ New Analysis
68
+ </button>
69
+ {% endif %}
70
+
71
+ {% if result is not none %}
72
+ <div class="mt-8 pt-6 border-t border-gray-700 fade-in" style="animation-delay: 0.4s;">
73
+ {% if result == 1 %}
74
+ <div class="flex items-start space-x-4 bg-green-500/10 border border-green-500/30 text-green-300 px-5 py-4 rounded-lg">
75
+ <svg class="w-6 h-6 flex-shrink-0 mt-1" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.828 14.828a4 4 0 01-5.656 0M9 10h.01M15 10h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>
76
+ <div class="flex-1">
77
+ <h4 class="text-lg font-semibold">Positive Sentiment</h4>
78
+ <p class="text-sm text-green-300/80">The analysis indicates a positive tone.</p>
79
+ {% if confidence %}
80
+ <div class="mt-2">
81
+ <span class="text-xs font-medium text-green-400">Confidence: {{ "%.2f"|format(confidence) }}%</span>
82
+ <div class="w-full bg-gray-700 rounded-full h-2 mt-1">
83
+ <div class="bg-green-500 h-2 rounded-full" style="width: {{ confidence }}%"></div>
84
+ </div>
85
+ </div>
86
+ {% endif %}
87
+ </div>
88
+ </div>
89
+ {% else %}
90
+ <div class="flex items-start space-x-4 bg-red-500/10 border border-red-500/30 text-red-300 px-5 py-4 rounded-lg">
91
+ <svg class="w-6 h-6 flex-shrink-0 mt-1" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.172 16.172a4 4 0 015.656 0M9 10h.01M15 10h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>
92
+ <div class="flex-1">
93
+ <h4 class="text-lg font-semibold">Negative Sentiment</h4>
94
+ <p class="text-sm text-red-300/80">The analysis indicates a negative tone.</p>
95
+ {% if confidence %}
96
+ <div class="mt-2">
97
+ <span class="text-xs font-medium text-red-400">Confidence: {{ "%.2f"|format(confidence) }}%</span>
98
+ <div class="w-full bg-gray-700 rounded-full h-2 mt-1">
99
+ <div class="bg-red-500 h-2 rounded-full" style="width: {{ confidence }}%"></div>
100
+ </div>
101
+ </div>
102
+ {% endif %}
103
+ </div>
104
+ </div>
105
+ {% endif %}
106
+ </div>
107
+ {% endif %}
108
+ </div>
109
+
110
+ </div>
111
+ </div>
112
+ </body>
113
+ </html>