devil2810 commited on
Commit
8205a80
·
verified ·
1 Parent(s): cd479b0

Upload 7 files

Browse files
Files changed (7) hide show
  1. Dockerfile +18 -0
  2. README.md +41 -11
  3. app.py +56 -0
  4. requirements.txt +4 -0
  5. static/style.css +24 -0
  6. templates/index.html +31 -0
  7. utils/model_loader.py +8 -0
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Use official slim Python image
3
+ FROM python:3.8-slim
4
+
5
+ WORKDIR /app
6
+
7
+ # Copy project files
8
+ COPY . /app
9
+
10
+ # Upgrade pip and install dependencies
11
+ RUN pip install --upgrade pip
12
+ RUN pip install -r requirements.txt
13
+
14
+ # Expose port used by HF Spaces Docker (default 7860)
15
+ EXPOSE 7860
16
+
17
+ # Start the Flask app
18
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,11 +1,41 @@
1
- ---
2
- title: Roberta Sentiment Analyzer
3
- emoji: 📊
4
- colorFrom: gray
5
- colorTo: gray
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # RoBERTa IMDb Sentiment Analyzer — Pro (Docker-ready)
3
+
4
+ This project provides a professional-grade sentiment analysis API & UI using a RoBERTa model fine-tuned on the IMDb dataset.
5
+
6
+ ## Features
7
+ - Model: `textattack/roberta-base-imdb` (RoBERTa fine-tuned on IMDb)
8
+ - Flask web UI + /api/predict JSON endpoint
9
+ - Dockerfile for Hugging Face Spaces (Docker SDK)
10
+ - Production-ready (port binding, HF cache fix)
11
+
12
+ ## Local Testing (Optional)
13
+ 1. Create & activate venv:
14
+ ```
15
+ python -m venv venv
16
+ venv\Scripts\activate
17
+ ```
18
+ 2. Install dependencies (recommended to install torch wheel suitable for your system first):
19
+ ```
20
+ pip install -r requirements.txt
21
+ ```
22
+ 3. Run:
23
+ ```
24
+ python app.py
25
+ ```
26
+ 4. Open: http://127.0.0.1:7860/
27
+
28
+ ## Deploy to Hugging Face Spaces (Docker)
29
+ 1. Create a new Space on Hugging Face:
30
+ - SDK: Docker
31
+ - Hardware: CPU (Free)
32
+ 2. Upload all files from this repository (you can upload the ZIP contents).
33
+ 3. Wait for the Space to build. The first run may take time as the model downloads.
34
+
35
+ ## API
36
+ - `POST /api/predict` accepts JSON: `{ "text": "your text" }`
37
+ - Response: `{ "label": "Positive"/"Negative", "confidence": 0.987 }`
38
+
39
+ ## Notes
40
+ - The app sets Hugging Face cache to `/tmp` to avoid container permission issues.
41
+ - If you want to include a local pre-downloaded model, place it under `model/` and change `MODEL_ID` to the path.
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ # Fix Hugging Face cache permission issues on hosted runtimes
4
+ os.environ["TRANSFORMERS_CACHE"] = os.environ.get("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers")
5
+ os.environ["HF_HOME"] = os.environ.get("HF_HOME", "/tmp/huggingface")
6
+
7
+ from flask import Flask, request, render_template, jsonify
8
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
9
+ import torch
10
+
11
+ app = Flask(__name__)
12
+
13
+ # Use a RoBERTa model fine-tuned on IMDb (public Hugging Face model).
14
+ # Model choice: textattack/roberta-base-imdb (widely used fine-tuned checkpoint)
15
+ MODEL_ID = "textattack/roberta-base-imdb"
16
+
17
+ # Load tokenizer & model once at startup
18
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
19
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
20
+ model.eval()
21
+
22
+ def predict(text: str):
23
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
24
+ with torch.no_grad():
25
+ outputs = model(**inputs)
26
+ probs = torch.nn.functional.softmax(outputs.logits, dim=1)
27
+ label = int(torch.argmax(probs, dim=1).item())
28
+ confidence = float(probs[0][label].item())
29
+ # IMDb fine-tuned label mapping: 1 => Positive, 0 => Negative
30
+ label_map = {0: "Negative", 1: "Positive"}
31
+ return {"label": label_map.get(label, "Neutral"), "confidence": round(confidence, 3)}
32
+
33
+ @app.route("/", methods=["GET"])
34
+ def index():
35
+ return render_template("index.html")
36
+
37
+ @app.route("/predict", methods=["POST"])
38
+ def predict_route():
39
+ text = request.form.get("text", "").strip()
40
+ if not text:
41
+ return render_template("index.html", result="Please enter text to analyze.", input_text="")
42
+ result = predict(text)
43
+ return render_template("index.html", result=f"{result['label']} (conf: {result['confidence']})", input_text=text)
44
+
45
+ @app.route("/api/predict", methods=["POST"])
46
+ def api_predict():
47
+ data = request.get_json(force=True)
48
+ text = data.get("text", "")
49
+ if not text:
50
+ return jsonify({"error":"No text provided"}), 400
51
+ result = predict(text)
52
+ return jsonify(result)
53
+
54
+ if __name__ == "__main__":
55
+ port = int(os.environ.get("PORT", 7860))
56
+ app.run(host="0.0.0.0", port=port)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ flask==2.3.3
2
+ transformers==4.57.0
3
+ torch==2.3.0
4
+ gunicorn==23.0.0
static/style.css ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ :root{
3
+ --bg:#0f1724;
4
+ --card:#0b1220;
5
+ --accent:#f97316;
6
+ --muted:#94a3b8;
7
+ --white:#e6eef8;
8
+ }
9
+ *{box-sizing:border-box}
10
+ body{
11
+ margin:0;font-family:Inter,Segoe UI,Roboto,Arial,sans-serif;background:var(--bg);color:var(--white);
12
+ display:flex;align-items:center;justify-content:center;min-height:100vh;padding:24px;
13
+ }
14
+ .wrap{width:100%;max-width:880px;}
15
+ .card{background:linear-gradient(180deg, rgba(255,255,255,0.02), rgba(255,255,255,0.01));padding:36px;border-radius:12px;box-shadow:0 10px 40px rgba(2,6,23,0.6);}
16
+ h1{margin:0;color:var(--accent);letter-spacing:0.3px}
17
+ .subtitle{margin:8px 0 20px;color:var(--muted)}
18
+ textarea{width:100%;min-height:140px;padding:14px;border-radius:8px;border:1px solid rgba(255,255,255,0.04);background:transparent;color:var(--white);resize:vertical;font-size:15px}
19
+ .btn-row{display:flex;justify-content:flex-end;margin-top:12px}
20
+ .btn{background:var(--accent);color:#041017;border:none;padding:10px 18px;border-radius:8px;font-weight:700;cursor:pointer}
21
+ .result{margin-top:18px;display:flex;flex-direction:column;gap:8px}
22
+ .pill{display:inline-block;padding:12px 18px;border-radius:999px;background:rgba(255,255,255,0.03);color:var(--white);font-weight:800}
23
+ .footer{margin-top:18px;color:var(--muted);font-size:13px}
24
+ code{background:rgba(255,255,255,0.02);padding:2px 6px;border-radius:4px;font-size:13px}
templates/index.html ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <!doctype html>
3
+ <html lang="en">
4
+ <head>
5
+ <meta charset="utf-8">
6
+ <meta name="viewport" content="width=device-width,initial-scale=1">
7
+ <title>RoBERTa IMDb Sentiment — Professional</title>
8
+ <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
9
+ </head>
10
+ <body>
11
+ <div class="wrap">
12
+ <div class="card">
13
+ <h1>RoBERTa IMDb Sentiment Analyzer</h1>
14
+ <p class="subtitle">Fine-tuned on IMDb reviews — Professional demo</p>
15
+ <form method="post" action="/predict">
16
+ <textarea name="text" placeholder="Paste movie review or any text..." required>{{ input_text if input_text else '' }}</textarea>
17
+ <div class="btn-row">
18
+ <button type="submit" class="btn">Analyze Sentiment</button>
19
+ </div>
20
+ </form>
21
+ {% if result %}
22
+ <div class="result">
23
+ <label>Result:</label>
24
+ <div class="pill">{{ result }}</div>
25
+ </div>
26
+ {% endif %}
27
+ <div class="footer">Model: <code>textattack/roberta-base-imdb</code></div>
28
+ </div>
29
+ </div>
30
+ </body>
31
+ </html>
utils/model_loader.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+
4
+ def load_model(model_id: str):
5
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
6
+ model = AutoModelForSequenceClassification.from_pretrained(model_id)
7
+ model.eval()
8
+ return tokenizer, model