Spaces:
Configuration error
Configuration error
Upload 6 files
Browse files- Dockerfile.py +20 -0
- Readme.md +18 -0
- app.py +77 -0
- requirements.txt +5 -0
- sentiment_model.pkl +3 -0
- tfidf_vectorizer.pkl +3 -0
Dockerfile.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use a lightweight Python version
|
| 2 |
+
FROM python:3.9-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory inside the container
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy all files from your computer to the container
|
| 8 |
+
COPY . /app
|
| 9 |
+
|
| 10 |
+
# Install the required libraries
|
| 11 |
+
RUN pip install flask pandas scikit-learn nltk joblib
|
| 12 |
+
|
| 13 |
+
# Download NLTK data inside the container so it doesn't fail
|
| 14 |
+
RUN python -m nltk.downloader stopwords wordnet punkt punkt_tab
|
| 15 |
+
|
| 16 |
+
# Expose Port 5000 so we can access the app
|
| 17 |
+
EXPOSE 5000
|
| 18 |
+
|
| 19 |
+
# Command to run the app when the container starts
|
| 20 |
+
CMD ["python", "app.py"]
|
Readme.md
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Sentiment Analysis API 🚀
|
| 2 |
+
|
| 3 |
+
## Project Overview
|
| 4 |
+
This is a containerized Flask API that predicts the sentiment (Positive/Negative) of consumer reviews using a Logistic Regression model trained on scraped e-commerce data.
|
| 5 |
+
|
| 6 |
+
## 📂 Project Structure
|
| 7 |
+
- `app.py`: Main Flask application code.
|
| 8 |
+
- `Dockerfile`: Configuration to build the Docker image.
|
| 9 |
+
- `requirements.txt`: List of dependencies.
|
| 10 |
+
- `sentiment_model.pkl`: The trained ML model.
|
| 11 |
+
- `tfidf_vectorizer.pkl`: The TF-IDF vectorizer.
|
| 12 |
+
|
| 13 |
+
## 🛠️ How to Run (Docker)
|
| 14 |
+
|
| 15 |
+
### 1. Build the Image
|
| 16 |
+
Run this command in the terminal inside the project folder:
|
| 17 |
+
```bash
|
| 18 |
+
docker build -t sentiment-app .
|
app.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify
|
| 2 |
+
import joblib
|
| 3 |
+
import string
|
| 4 |
+
import nltk
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
# Initialize App
|
| 8 |
+
app = Flask(__name__)
|
| 9 |
+
|
| 10 |
+
# --- ROBUST NLTK SETUP ---
|
| 11 |
+
# Set NLTK data path to a local folder to avoid permission issues
|
| 12 |
+
nltk_data_dir = os.path.join(os.getcwd(), 'nltk_data')
|
| 13 |
+
nltk.data.path.append(nltk_data_dir)
|
| 14 |
+
|
| 15 |
+
def download_nltk_resources():
|
| 16 |
+
resources = ['stopwords', 'wordnet', 'punkt', 'punkt_tab']
|
| 17 |
+
for res in resources:
|
| 18 |
+
try:
|
| 19 |
+
nltk.data.find(f'corpora/{res}')
|
| 20 |
+
except LookupError:
|
| 21 |
+
try:
|
| 22 |
+
nltk.data.find(f'tokenizers/{res}')
|
| 23 |
+
except LookupError:
|
| 24 |
+
print(f"Downloading {res}...")
|
| 25 |
+
nltk.download(res, download_dir=nltk_data_dir, quiet=True)
|
| 26 |
+
|
| 27 |
+
download_nltk_resources()
|
| 28 |
+
from nltk.stem import WordNetLemmatizer
|
| 29 |
+
from nltk.corpus import stopwords
|
| 30 |
+
# -------------------------
|
| 31 |
+
|
| 32 |
+
# Load Model
|
| 33 |
+
print("Loading model...")
|
| 34 |
+
try:
|
| 35 |
+
model = joblib.load('sentiment_model.pkl')
|
| 36 |
+
vectorizer = joblib.load('tfidf_vectorizer.pkl')
|
| 37 |
+
print("Model loaded successfully.")
|
| 38 |
+
except Exception as e:
|
| 39 |
+
print(f"CRITICAL ERROR: Could not load model files. {e}")
|
| 40 |
+
model = None
|
| 41 |
+
|
| 42 |
+
lemmatizer = WordNetLemmatizer()
|
| 43 |
+
stop_words = set(stopwords.words('english'))
|
| 44 |
+
|
| 45 |
+
def preprocess_text(text):
|
| 46 |
+
if not isinstance(text, str): return ""
|
| 47 |
+
text = text.lower()
|
| 48 |
+
text = text.translate(str.maketrans('', '', string.punctuation))
|
| 49 |
+
tokens = nltk.word_tokenize(text)
|
| 50 |
+
clean_tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
|
| 51 |
+
return " ".join(clean_tokens)
|
| 52 |
+
|
| 53 |
+
@app.route('/predict', methods=['POST'])
|
| 54 |
+
def predict():
|
| 55 |
+
if model is None:
|
| 56 |
+
return jsonify({'error': 'Model not loaded properly.'}), 500
|
| 57 |
+
|
| 58 |
+
try:
|
| 59 |
+
data = request.get_json()
|
| 60 |
+
if not data or 'review_text' not in data:
|
| 61 |
+
return jsonify({'error': 'No review_text provided'}), 400
|
| 62 |
+
|
| 63 |
+
text = data['review_text']
|
| 64 |
+
clean_text = preprocess_text(text)
|
| 65 |
+
vectorized_text = vectorizer.transform([clean_text])
|
| 66 |
+
prediction = model.predict(vectorized_text)[0]
|
| 67 |
+
|
| 68 |
+
return jsonify({
|
| 69 |
+
'review': text,
|
| 70 |
+
'sentiment': "Positive" if prediction == 1 else "Negative"
|
| 71 |
+
})
|
| 72 |
+
except Exception as e:
|
| 73 |
+
print(f"Prediction Error: {e}")
|
| 74 |
+
return jsonify({'error': str(e)}), 500
|
| 75 |
+
|
| 76 |
+
if __name__ == '__main__':
|
| 77 |
+
app.run(host='0.0.0.0', port=5000)
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
flask
|
| 2 |
+
pandas
|
| 3 |
+
scikit-learn
|
| 4 |
+
nltk
|
| 5 |
+
joblib
|
sentiment_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:205d99a64d43df3e22cf9f02a2f9afe65c05ba67f7ebc87ef0b8fbdf406414ef
|
| 3 |
+
size 80731
|
tfidf_vectorizer.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac38e1855742e25a57a1a19aa77356ec818c797263e863330f87b3ff9e0ed0d0
|
| 3 |
+
size 415129
|