Upload 7 files
Browse files- Dockerfile +18 -0
- README.md +41 -11
- app.py +56 -0
- requirements.txt +4 -0
- static/style.css +24 -0
- templates/index.html +31 -0
- utils/model_loader.py +8 -0
Dockerfile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Use official slim Python image
|
| 3 |
+
FROM python:3.8-slim
|
| 4 |
+
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy project files
|
| 8 |
+
COPY . /app
|
| 9 |
+
|
| 10 |
+
# Upgrade pip and install dependencies
|
| 11 |
+
RUN pip install --upgrade pip
|
| 12 |
+
RUN pip install -r requirements.txt
|
| 13 |
+
|
| 14 |
+
# Expose port used by HF Spaces Docker (default 7860)
|
| 15 |
+
EXPOSE 7860
|
| 16 |
+
|
| 17 |
+
# Start the Flask app
|
| 18 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
|
@@ -1,11 +1,41 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# RoBERTa IMDb Sentiment Analyzer — Pro (Docker-ready)
|
| 3 |
+
|
| 4 |
+
This project provides a professional-grade sentiment analysis API & UI using a RoBERTa model fine-tuned on the IMDb dataset.
|
| 5 |
+
|
| 6 |
+
## Features
|
| 7 |
+
- Model: `textattack/roberta-base-imdb` (RoBERTa fine-tuned on IMDb)
|
| 8 |
+
- Flask web UI + /api/predict JSON endpoint
|
| 9 |
+
- Dockerfile for Hugging Face Spaces (Docker SDK)
|
| 10 |
+
- Production-ready (port binding, HF cache fix)
|
| 11 |
+
|
| 12 |
+
## Local Testing (Optional)
|
| 13 |
+
1. Create & activate venv:
|
| 14 |
+
```
|
| 15 |
+
python -m venv venv
|
| 16 |
+
venv\Scripts\activate
|
| 17 |
+
```
|
| 18 |
+
2. Install dependencies (recommended to install torch wheel suitable for your system first):
|
| 19 |
+
```
|
| 20 |
+
pip install -r requirements.txt
|
| 21 |
+
```
|
| 22 |
+
3. Run:
|
| 23 |
+
```
|
| 24 |
+
python app.py
|
| 25 |
+
```
|
| 26 |
+
4. Open: http://127.0.0.1:7860/
|
| 27 |
+
|
| 28 |
+
## Deploy to Hugging Face Spaces (Docker)
|
| 29 |
+
1. Create a new Space on Hugging Face:
|
| 30 |
+
- SDK: Docker
|
| 31 |
+
- Hardware: CPU (Free)
|
| 32 |
+
2. Upload all files from this repository (you can upload the ZIP contents).
|
| 33 |
+
3. Wait for the Space to build. The first run may take time as the model downloads.
|
| 34 |
+
|
| 35 |
+
## API
|
| 36 |
+
- `POST /api/predict` accepts JSON: `{ "text": "your text" }`
|
| 37 |
+
- Response: `{ "label": "Positive"/"Negative", "confidence": 0.987 }`
|
| 38 |
+
|
| 39 |
+
## Notes
|
| 40 |
+
- The app sets Hugging Face cache to `/tmp` to avoid container permission issues.
|
| 41 |
+
- If you want to include a local pre-downloaded model, place it under `model/` and change `MODEL_ID` to the path.
|
app.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import os
|
| 3 |
+
# Fix Hugging Face cache permission issues on hosted runtimes
|
| 4 |
+
os.environ["TRANSFORMERS_CACHE"] = os.environ.get("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers")
|
| 5 |
+
os.environ["HF_HOME"] = os.environ.get("HF_HOME", "/tmp/huggingface")
|
| 6 |
+
|
| 7 |
+
from flask import Flask, request, render_template, jsonify
|
| 8 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 9 |
+
import torch
|
| 10 |
+
|
| 11 |
+
app = Flask(__name__)
|
| 12 |
+
|
| 13 |
+
# Use a RoBERTa model fine-tuned on IMDb (public Hugging Face model).
|
| 14 |
+
# Model choice: textattack/roberta-base-imdb (widely used fine-tuned checkpoint)
|
| 15 |
+
MODEL_ID = "textattack/roberta-base-imdb"
|
| 16 |
+
|
| 17 |
+
# Load tokenizer & model once at startup
|
| 18 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| 19 |
+
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
|
| 20 |
+
model.eval()
|
| 21 |
+
|
| 22 |
+
def predict(text: str):
|
| 23 |
+
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
| 24 |
+
with torch.no_grad():
|
| 25 |
+
outputs = model(**inputs)
|
| 26 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
|
| 27 |
+
label = int(torch.argmax(probs, dim=1).item())
|
| 28 |
+
confidence = float(probs[0][label].item())
|
| 29 |
+
# IMDb fine-tuned label mapping: 1 => Positive, 0 => Negative
|
| 30 |
+
label_map = {0: "Negative", 1: "Positive"}
|
| 31 |
+
return {"label": label_map.get(label, "Neutral"), "confidence": round(confidence, 3)}
|
| 32 |
+
|
| 33 |
+
@app.route("/", methods=["GET"])
|
| 34 |
+
def index():
|
| 35 |
+
return render_template("index.html")
|
| 36 |
+
|
| 37 |
+
@app.route("/predict", methods=["POST"])
|
| 38 |
+
def predict_route():
|
| 39 |
+
text = request.form.get("text", "").strip()
|
| 40 |
+
if not text:
|
| 41 |
+
return render_template("index.html", result="Please enter text to analyze.", input_text="")
|
| 42 |
+
result = predict(text)
|
| 43 |
+
return render_template("index.html", result=f"{result['label']} (conf: {result['confidence']})", input_text=text)
|
| 44 |
+
|
| 45 |
+
@app.route("/api/predict", methods=["POST"])
|
| 46 |
+
def api_predict():
|
| 47 |
+
data = request.get_json(force=True)
|
| 48 |
+
text = data.get("text", "")
|
| 49 |
+
if not text:
|
| 50 |
+
return jsonify({"error":"No text provided"}), 400
|
| 51 |
+
result = predict(text)
|
| 52 |
+
return jsonify(result)
|
| 53 |
+
|
| 54 |
+
if __name__ == "__main__":
|
| 55 |
+
port = int(os.environ.get("PORT", 7860))
|
| 56 |
+
app.run(host="0.0.0.0", port=port)
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
flask==2.3.3
|
| 2 |
+
transformers==4.57.0
|
| 3 |
+
torch==2.3.0
|
| 4 |
+
gunicorn==23.0.0
|
static/style.css
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
:root{
|
| 3 |
+
--bg:#0f1724;
|
| 4 |
+
--card:#0b1220;
|
| 5 |
+
--accent:#f97316;
|
| 6 |
+
--muted:#94a3b8;
|
| 7 |
+
--white:#e6eef8;
|
| 8 |
+
}
|
| 9 |
+
*{box-sizing:border-box}
|
| 10 |
+
body{
|
| 11 |
+
margin:0;font-family:Inter,Segoe UI,Roboto,Arial,sans-serif;background:var(--bg);color:var(--white);
|
| 12 |
+
display:flex;align-items:center;justify-content:center;min-height:100vh;padding:24px;
|
| 13 |
+
}
|
| 14 |
+
.wrap{width:100%;max-width:880px;}
|
| 15 |
+
.card{background:linear-gradient(180deg, rgba(255,255,255,0.02), rgba(255,255,255,0.01));padding:36px;border-radius:12px;box-shadow:0 10px 40px rgba(2,6,23,0.6);}
|
| 16 |
+
h1{margin:0;color:var(--accent);letter-spacing:0.3px}
|
| 17 |
+
.subtitle{margin:8px 0 20px;color:var(--muted)}
|
| 18 |
+
textarea{width:100%;min-height:140px;padding:14px;border-radius:8px;border:1px solid rgba(255,255,255,0.04);background:transparent;color:var(--white);resize:vertical;font-size:15px}
|
| 19 |
+
.btn-row{display:flex;justify-content:flex-end;margin-top:12px}
|
| 20 |
+
.btn{background:var(--accent);color:#041017;border:none;padding:10px 18px;border-radius:8px;font-weight:700;cursor:pointer}
|
| 21 |
+
.result{margin-top:18px;display:flex;flex-direction:column;gap:8px}
|
| 22 |
+
.pill{display:inline-block;padding:12px 18px;border-radius:999px;background:rgba(255,255,255,0.03);color:var(--white);font-weight:800}
|
| 23 |
+
.footer{margin-top:18px;color:var(--muted);font-size:13px}
|
| 24 |
+
code{background:rgba(255,255,255,0.02);padding:2px 6px;border-radius:4px;font-size:13px}
|
templates/index.html
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
<!doctype html>
|
| 3 |
+
<html lang="en">
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="utf-8">
|
| 6 |
+
<meta name="viewport" content="width=device-width,initial-scale=1">
|
| 7 |
+
<title>RoBERTa IMDb Sentiment — Professional</title>
|
| 8 |
+
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
|
| 9 |
+
</head>
|
| 10 |
+
<body>
|
| 11 |
+
<div class="wrap">
|
| 12 |
+
<div class="card">
|
| 13 |
+
<h1>RoBERTa IMDb Sentiment Analyzer</h1>
|
| 14 |
+
<p class="subtitle">Fine-tuned on IMDb reviews — Professional demo</p>
|
| 15 |
+
<form method="post" action="/predict">
|
| 16 |
+
<textarea name="text" placeholder="Paste movie review or any text..." required>{{ input_text if input_text else '' }}</textarea>
|
| 17 |
+
<div class="btn-row">
|
| 18 |
+
<button type="submit" class="btn">Analyze Sentiment</button>
|
| 19 |
+
</div>
|
| 20 |
+
</form>
|
| 21 |
+
{% if result %}
|
| 22 |
+
<div class="result">
|
| 23 |
+
<label>Result:</label>
|
| 24 |
+
<div class="pill">{{ result }}</div>
|
| 25 |
+
</div>
|
| 26 |
+
{% endif %}
|
| 27 |
+
<div class="footer">Model: <code>textattack/roberta-base-imdb</code></div>
|
| 28 |
+
</div>
|
| 29 |
+
</div>
|
| 30 |
+
</body>
|
| 31 |
+
</html>
|
utils/model_loader.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 3 |
+
|
| 4 |
+
def load_model(model_id: str):
|
| 5 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 6 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_id)
|
| 7 |
+
model.eval()
|
| 8 |
+
return tokenizer, model
|