File size: 2,095 Bytes
372f608
 
b7fd58f
 
372f608
 
b7fd58f
 
 
 
372f608
 
 
b7fd58f
 
 
 
 
 
372f608
 
 
 
 
 
 
 
 
b7fd58f
372f608
 
 
b7fd58f
 
 
 
 
 
 
 
 
 
 
372f608
 
 
 
b7fd58f
 
 
 
372f608
 
 
b7fd58f
 
 
 
 
 
 
 
 
 
 
 
 
372f608
 
b7fd58f
 
 
 
372f608
 
 
b7fd58f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import uvicorn

app = FastAPI(
    title="MAKINI API",
    description="Gender bias detection for Swahili and African French",
    version="1.0.0"
)

SUPPORTED_LANGUAGES = {"sw", "fr"}
LABELS = ["neutral", "stereotype", "counter-stereotype", "derogation"]
MODEL_ID = "Daudipdg/makini-v1"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
model.eval()

class InferenceRequest(BaseModel):
    text: str
    language: str

class InferenceResponse(BaseModel):
    label: str
    confidence: float
    scores: dict
    language: str

@app.get("/")
def root():
    return {
        "model": "MAKINI v1",
        "company": "Iroh Intelligence Labs",
        "contact": "david@makini.tech",
        "supported_languages": ["sw", "fr"],
        "labels": LABELS
    }

@app.get("/health")
def health():
    return {"status": "ok", "model": MODEL_ID}

@app.post("/predict", response_model=InferenceResponse)
def predict(request: InferenceRequest):
    if request.language not in SUPPORTED_LANGUAGES:
        raise HTTPException(
            status_code=400,
            detail=f"Unsupported language. Supported: {SUPPORTED_LANGUAGES}"
        )
    if not request.text.strip():
        raise HTTPException(status_code=400, detail="Text cannot be empty")

    inputs = tokenizer(
        request.text,
        return_tensors="pt",
        truncation=True,
        max_length=128
    )

    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.softmax(outputs.logits, dim=-1)[0]

    scores = {LABELS[i]: round(probs[i].item(), 4) for i in range(len(LABELS))}
    top_idx = probs.argmax().item()

    return InferenceResponse(
        label=LABELS[top_idx],
        confidence=round(probs[top_idx].item(), 4),
        scores=scores,
        language=request.language
    )

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)