Spaces:

devil2810
/

roberta-sentiment-analyzer

Runtime error

App Files Files Community

devil2810 commited on Oct 10, 2025

Commit

8205a80

verified ·

1 Parent(s): cd479b0

Upload 7 files

Browse files

Files changed (7) hide show

Dockerfile +18 -0
README.md +41 -11
app.py +56 -0
requirements.txt +4 -0
static/style.css +24 -0
templates/index.html +31 -0
utils/model_loader.py +8 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,18 @@

+# Use official slim Python image
+FROM python:3.8-slim
+WORKDIR /app
+# Copy project files
+COPY . /app
+# Upgrade pip and install dependencies
+RUN pip install --upgrade pip
+RUN pip install -r requirements.txt
+# Expose port used by HF Spaces Docker (default 7860)
+EXPOSE 7860
+# Start the Flask app
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,11 +1,41 @@
----
-title: Roberta Sentiment Analyzer
-emoji: 📊
-colorFrom: gray
-colorTo: gray
-sdk: docker
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# RoBERTa IMDb Sentiment Analyzer — Pro (Docker-ready)
+This project provides a professional-grade sentiment analysis API & UI using a RoBERTa model fine-tuned on the IMDb dataset.
+## Features
+- Model: `textattack/roberta-base-imdb` (RoBERTa fine-tuned on IMDb)
+- Flask web UI + /api/predict JSON endpoint
+- Dockerfile for Hugging Face Spaces (Docker SDK)
+- Production-ready (port binding, HF cache fix)
+## Local Testing (Optional)
+1. Create & activate venv:
+   ```
+   python -m venv venv
+   venv\Scripts\activate
+   ```
+2. Install dependencies (recommended to install torch wheel suitable for your system first):
+   ```
+   pip install -r requirements.txt
+   ```
+3. Run:
+   ```
+   python app.py
+   ```
+4. Open: http://127.0.0.1:7860/
+## Deploy to Hugging Face Spaces (Docker)
+1. Create a new Space on Hugging Face:
+   - SDK: Docker
+   - Hardware: CPU (Free)
+2. Upload all files from this repository (you can upload the ZIP contents).
+3. Wait for the Space to build. The first run may take time as the model downloads.
+## API
+- `POST /api/predict` accepts JSON: `{ "text": "your text" }`
+- Response: `{ "label": "Positive"/"Negative", "confidence": 0.987 }`
+## Notes
+- The app sets Hugging Face cache to `/tmp` to avoid container permission issues.
+- If you want to include a local pre-downloaded model, place it under `model/` and change `MODEL_ID` to the path.

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import os
+# Fix Hugging Face cache permission issues on hosted runtimes
+os.environ["TRANSFORMERS_CACHE"] = os.environ.get("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers")
+os.environ["HF_HOME"] = os.environ.get("HF_HOME", "/tmp/huggingface")
+from flask import Flask, request, render_template, jsonify
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+app = Flask(__name__)
+# Use a RoBERTa model fine-tuned on IMDb (public Hugging Face model).
+# Model choice: textattack/roberta-base-imdb (widely used fine-tuned checkpoint)
+MODEL_ID = "textattack/roberta-base-imdb"
+# Load tokenizer & model once at startup
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
+model.eval()
+def predict(text: str):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
+        label = int(torch.argmax(probs, dim=1).item())
+        confidence = float(probs[0][label].item())
+    # IMDb fine-tuned label mapping: 1 => Positive, 0 => Negative
+    label_map = {0: "Negative", 1: "Positive"}
+    return {"label": label_map.get(label, "Neutral"), "confidence": round(confidence, 3)}
+@app.route("/", methods=["GET"])
+def index():
+    return render_template("index.html")
+@app.route("/predict", methods=["POST"])
+def predict_route():
+    text = request.form.get("text", "").strip()
+    if not text:
+        return render_template("index.html", result="Please enter text to analyze.", input_text="")
+    result = predict(text)
+    return render_template("index.html", result=f"{result['label']} (conf: {result['confidence']})", input_text=text)
+@app.route("/api/predict", methods=["POST"])
+def api_predict():
+    data = request.get_json(force=True)
+    text = data.get("text", "")
+    if not text:
+        return jsonify({"error":"No text provided"}), 400
+    result = predict(text)
+    return jsonify(result)
+if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 7860))
+    app.run(host="0.0.0.0", port=port)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+flask==2.3.3
+transformers==4.57.0
+torch==2.3.0
+gunicorn==23.0.0

static/style.css ADDED Viewed

	@@ -0,0 +1,24 @@

+:root{
+  --bg:#0f1724;
+  --card:#0b1220;
+  --accent:#f97316;
+  --muted:#94a3b8;
+  --white:#e6eef8;
+}
+*{box-sizing:border-box}
+body{
+  margin:0;font-family:Inter,Segoe UI,Roboto,Arial,sans-serif;background:var(--bg);color:var(--white);
+  display:flex;align-items:center;justify-content:center;min-height:100vh;padding:24px;
+}
+.wrap{width:100%;max-width:880px;}
+.card{background:linear-gradient(180deg, rgba(255,255,255,0.02), rgba(255,255,255,0.01));padding:36px;border-radius:12px;box-shadow:0 10px 40px rgba(2,6,23,0.6);}
+h1{margin:0;color:var(--accent);letter-spacing:0.3px}
+.subtitle{margin:8px 0 20px;color:var(--muted)}
+textarea{width:100%;min-height:140px;padding:14px;border-radius:8px;border:1px solid rgba(255,255,255,0.04);background:transparent;color:var(--white);resize:vertical;font-size:15px}
+.btn-row{display:flex;justify-content:flex-end;margin-top:12px}
+.btn{background:var(--accent);color:#041017;border:none;padding:10px 18px;border-radius:8px;font-weight:700;cursor:pointer}
+.result{margin-top:18px;display:flex;flex-direction:column;gap:8px}
+.pill{display:inline-block;padding:12px 18px;border-radius:999px;background:rgba(255,255,255,0.03);color:var(--white);font-weight:800}
+.footer{margin-top:18px;color:var(--muted);font-size:13px}
+code{background:rgba(255,255,255,0.02);padding:2px 6px;border-radius:4px;font-size:13px}

templates/index.html ADDED Viewed

	@@ -0,0 +1,31 @@

+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width,initial-scale=1">
+  <title>RoBERTa IMDb Sentiment — Professional</title>
+  <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
+</head>
+<body>
+  <div class="wrap">
+    <div class="card">
+      <h1>RoBERTa IMDb Sentiment Analyzer</h1>
+      <p class="subtitle">Fine-tuned on IMDb reviews — Professional demo</p>
+      <form method="post" action="/predict">
+        <textarea name="text" placeholder="Paste movie review or any text..." required>{{ input_text if input_text else '' }}</textarea>
+        <div class="btn-row">
+          <button type="submit" class="btn">Analyze Sentiment</button>
+        </div>
+      </form>
+      {% if result %}
+      <div class="result">
+        <label>Result:</label>
+        <div class="pill">{{ result }}</div>
+      </div>
+      {% endif %}
+      <div class="footer">Model: <code>textattack/roberta-base-imdb</code></div>
+    </div>
+  </div>
+</body>
+</html>

utils/model_loader.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+def load_model(model_id: str):
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForSequenceClassification.from_pretrained(model_id)
+    model.eval()
+    return tokenizer, model