Spaces:
Sleeping
Sleeping
| import os | |
| os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache" | |
| os.environ["HF_HOME"] = "/tmp/hf-home" | |
| from fastapi import FastAPI, Request | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig | |
| from scipy.special import softmax | |
| import numpy as np | |
| # ✅ Define app BEFORE any @app.route | |
| app = FastAPI() | |
| MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL) | |
| config = AutoConfig.from_pretrained(MODEL) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL) | |
| def preprocess(text): | |
| tokens = [] | |
| for t in text.split(): | |
| if t.startswith("@") and len(t) > 1: | |
| t = "@user" | |
| elif t.startswith("http"): | |
| t = "http" | |
| tokens.append(t) | |
| return " ".join(tokens) | |
| async def analyze(request: Request): | |
| data = await request.json() | |
| text = preprocess(data.get("text", "")) | |
| if not text.strip(): | |
| return {"error": "Empty input"} | |
| # Token length check | |
| tokenized = tokenizer(text, return_tensors='pt', add_special_tokens=True) | |
| if tokenized.input_ids.shape[1] <= 512: | |
| encoded_input = tokenizer(text, return_tensors='pt', truncation=True, padding=True) | |
| output = model(**encoded_input) | |
| probs = softmax(output[0][0].detach().numpy()) | |
| else: | |
| max_words = 500 | |
| words = text.split() | |
| chunks = [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)] | |
| all_probs = [] | |
| for chunk in chunks: | |
| encoded_input = tokenizer(chunk, return_tensors='pt', truncation=True, padding=True, max_length=512) | |
| output = model(**encoded_input) | |
| probs_chunk = softmax(output[0][0].detach().numpy()) | |
| all_probs.append(probs_chunk) | |
| probs = np.mean(all_probs, axis=0) | |
| # Define the fixed order | |
| fixed_order = ["negative", "neutral", "positive"] | |
| # Build the result using fixed order | |
| result = [ | |
| {"label": label, "score": round(float(probs[config.label2id[label]]), 4)} | |
| for label in fixed_order | |
| ] | |
| return {"result": result} | |