Spaces:

MakPr016
/

sdg-api

Sleeping

App Files Files Community

MakPr016 commited on Feb 21

Commit

63f5626

0 Parent(s):

SDG Api

Browse files

Files changed (17) hide show

.gitattributes +1 -0
.gitignore +1 -0
Dockerfile +18 -0
app/__init__.py +0 -0
app/__pycache__/__init__.cpython-310.pyc +0 -0
app/__pycache__/limiter.cpython-310.pyc +0 -0
app/__pycache__/main.cpython-310.pyc +0 -0
app/__pycache__/model.cpython-310.pyc +0 -0
app/limiter.py +5 -0
app/main.py +93 -0
app/model.py +66 -0
model/config.json +69 -0
model/model.safetensors +3 -0
model/tokenizer.json +0 -0
model/tokenizer_config.json +14 -0
requirements.txt +6 -0
run.py +9 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ model/*.safetensors filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ sdg/

Dockerfile ADDED Viewed

	@@ -0,0 +1,18 @@

+FROM python:3.10-slim
+WORKDIR /app
+RUN apt-get update && apt-get install -y \
+    gcc \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY app/ ./app/
+COPY model/ ./model/
+COPY run.py .
+EXPOSE 7860
+CMD ["python", "run.py"]

app/__init__.py ADDED Viewed

File without changes

app/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (149 Bytes). View file

app/__pycache__/limiter.cpython-310.pyc ADDED Viewed

Binary file (275 Bytes). View file

app/__pycache__/main.cpython-310.pyc ADDED Viewed

Binary file (3.24 kB). View file

app/__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (2.66 kB). View file

app/limiter.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from slowapi import Limiter
+from slowapi.util import get_remote_address
+# Rate limiter — keyed by IP address
+limiter = Limiter(key_func=get_remote_address)

app/main.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from fastapi import FastAPI, Request, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from slowapi import _rate_limit_exceeded_handler
+from slowapi.errors import RateLimitExceeded
+from pydantic import BaseModel, field_validator
+from app.limiter import limiter
+from app.model import classifier
+import time
+app = FastAPI(
+    title="SDG Classifier API",
+    description="Classifies text into UN Sustainable Development Goals",
+    version="1.0.0"
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.state.limiter = limiter
+app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+class ClassifyRequest(BaseModel):
+    text: str
+    top_k: int = 3
+    @field_validator("text")
+    @classmethod
+    def text_must_not_be_empty(cls, v):
+        if not v.strip():
+            raise ValueError("text must not be empty")
+        if len(v) > 2000:
+            raise ValueError("text must be under 2000 characters")
+        return v.strip()
+    @field_validator("top_k")
+    @classmethod
+    def top_k_must_be_valid(cls, v):
+        if not 1 <= v <= 5:
+            raise ValueError("top_k must be between 1 and 5")
+        return v
+class SDGResult(BaseModel):
+    sdg:        str
+    name:       str
+    confidence: float
+class ClassifyResponse(BaseModel):
+    text:        str
+    predictions: list[SDGResult]
+    latency_ms:  float
+    warning:     str | None = None
+@app.get("/")
+def root():
+    return {"status": "ok", "message": "SDG Classifier API is running"}
+@app.get("/health")
+def health():
+    return {"status": "healthy"}
+@app.post("/classify", response_model=ClassifyResponse, summary="Classify text into SDGs")
+@limiter.limit("20/minute")
+async def classify(request: Request, body: ClassifyRequest):
+    start = time.time()
+    try:
+        predictions = classifier.predict(body.text, body.top_k)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Inference error: {str(e)}")
+    latency = round((time.time() - start) * 1000, 2)
+    warning = None
+    if predictions[0]["confidence"] > 85 and predictions[1]["confidence"] < 5:
+        warning = "Low prediction diversity — input may not be SDG-related text."
+    return ClassifyResponse(
+        text=body.text,
+        predictions=predictions,
+        latency_ms=latency,
+        warning=warning
+    )

app/model.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import torch
+import torch.nn.functional as F
+from transformers import AutoTokenizer, BertForSequenceClassification
+from pathlib import Path
+MODEL_PATH = Path(__file__).parent.parent / "model"
+SDG_METADATA = {
+    "SDG 1":  "No Poverty",
+    "SDG 2":  "Zero Hunger",
+    "SDG 3":  "Good Health and Well-being",
+    "SDG 4":  "Quality Education",
+    "SDG 5":  "Gender Equality",
+    "SDG 6":  "Clean Water and Sanitation",
+    "SDG 7":  "Affordable and Clean Energy",
+    "SDG 8":  "Decent Work and Economic Growth",
+    "SDG 9":  "Industry, Innovation and Infrastructure",
+    "SDG 10": "Reduced Inequalities",
+    "SDG 11": "Sustainable Cities and Communities",
+    "SDG 12": "Responsible Consumption and Production",
+    "SDG 13": "Climate Action",
+    "SDG 14": "Life Below Water",
+    "SDG 15": "Life on Land",
+    "SDG 16": "Peace, Justice and Strong Institutions",
+    "SDG 17": "Partnerships for the Goals",
+}
+class SDGClassifier:
+    def __init__(self):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Loading model on {self.device}...")
+        self.tokenizer = AutoTokenizer.from_pretrained(str(MODEL_PATH))
+        self.model = BertForSequenceClassification.from_pretrained(str(MODEL_PATH))
+        self.model.to(self.device)
+        self.model.eval()
+        print("Model loaded successfully!")
+    def predict(self, text: str, top_k: int = 3) -> list:
+        inputs = self.tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
+            max_length=128,
+            padding=True
+        )
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        with torch.no_grad():
+            logits = self.model(**inputs).logits
+        probs = F.softmax(logits, dim=-1).squeeze()
+        top   = probs.topk(top_k)
+        results = []
+        for i, idx in enumerate(top.indices):
+            sdg_key = f"SDG {idx.item() + 1}"
+            results.append({
+                "sdg":        sdg_key,
+                "name":       SDG_METADATA[sdg_key],
+                "confidence": round(top.values[i].item() * 100, 2)
+            })
+        return results
+# Singleton — loaded once when the app starts
+classifier = SDGClassifier()

model/config.json ADDED Viewed

	@@ -0,0 +1,69 @@

+{
+  "add_cross_attention": false,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": null,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "eos_token_id": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "SDG 1",
+    "1": "SDG 2",
+    "2": "SDG 3",
+    "3": "SDG 4",
+    "4": "SDG 5",
+    "5": "SDG 6",
+    "6": "SDG 7",
+    "7": "SDG 8",
+    "8": "SDG 9",
+    "9": "SDG 10",
+    "10": "SDG 11",
+    "11": "SDG 12",
+    "12": "SDG 13",
+    "13": "SDG 14",
+    "14": "SDG 15",
+    "15": "SDG 16",
+    "16": "SDG 17"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "is_decoder": false,
+  "label2id": {
+    "SDG 1": 0,
+    "SDG 10": 9,
+    "SDG 11": 10,
+    "SDG 12": 11,
+    "SDG 13": 12,
+    "SDG 14": 13,
+    "SDG 15": 14,
+    "SDG 16": 15,
+    "SDG 17": 16,
+    "SDG 2": 1,
+    "SDG 3": 2,
+    "SDG 4": 3,
+    "SDG 5": 4,
+    "SDG 6": 5,
+    "SDG 7": 6,
+    "SDG 8": 7,
+    "SDG 9": 8
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "tie_word_embeddings": true,
+  "transformers_version": "5.0.0",
+  "type_vocab_size": 2,
+  "use_cache": false,
+  "vocab_size": 30522
+}

model/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65bfdd2b0083217dee9ebd9861cea316d212c88c0579a20aef56906a323948a9
+size 438004764

model/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "backend": "tokenizers",
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "is_local": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastapi==0.115.0
+uvicorn==0.30.0
+transformers==4.47.0
+torch==2.5.1
+slowapi==0.1.9
+python-dotenv==1.0.0

run.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import uvicorn
+if __name__ == "__main__":
+    uvicorn.run(
+        "app.main:app",
+        host="0.0.0.0",
+        port=7860,
+        reload=False
+    )