ganeshkonapalli commited on
Commit
0e73d34
·
verified ·
1 Parent(s): c8a11a5

Upload 6 files

Browse files
Files changed (6) hide show
  1. Dockerfile +12 -0
  2. README.md +2 -9
  3. docker-compose.yml +6 -0
  4. main.py +34 -0
  5. requirements.txt +6 -0
  6. train.py +56 -0
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./app /code/app
6
+
7
+ RUN pip install --upgrade pip
8
+ RUN pip install -r app/requirements.txt
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,3 @@
1
- ---
2
- title: Deberta.space
3
- emoji: 🐨
4
- colorFrom: indigo
5
- colorTo: gray
6
- sdk: docker
7
- pinned: false
8
- ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # DeBERTa HuggingFace Space
 
 
 
 
 
 
 
2
 
3
+ A FastAPI service with DeBERTa model deployed via Docker.
docker-compose.yml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ version: "3.8"
2
+ services:
3
+ deberta-api:
4
+ build: .
5
+ ports:
6
+ - "7860:7860"
main.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ import torch
4
+ import pickle
5
+
6
+ class InputText(BaseModel):
7
+ text: str
8
+
9
+ app = FastAPI()
10
+
11
+ # Load model, tokenizer, encoder
12
+ with open("app/model.pkl", "rb") as f:
13
+ model = pickle.load(f)
14
+
15
+ with open("app/tokenizer.pkl", "rb") as f:
16
+ tokenizer = pickle.load(f)
17
+
18
+ with open("app/label_encoder.pkl", "rb") as f:
19
+ label_encoder = pickle.load(f)
20
+
21
+ model.eval()
22
+
23
+ @app.get("/")
24
+ def read_root():
25
+ return {"message": "DeBERTa Model is live!"}
26
+
27
+ @app.post("/predict")
28
+ def predict(input: InputText):
29
+ inputs = tokenizer(input.text, return_tensors="pt", truncation=True, padding=True)
30
+ with torch.no_grad():
31
+ outputs = model(**inputs)
32
+ pred = torch.argmax(outputs.logits, dim=1).item()
33
+ label = label_encoder.inverse_transform([pred])[0]
34
+ return {"prediction": label}
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ scikit-learn
5
+ pandas
6
+ torch
train.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import torch
3
+ from transformers import DebertaTokenizer, DebertaForSequenceClassification
4
+ from sklearn.model_selection import train_test_split
5
+ from sklearn.preprocessing import LabelEncoder
6
+ import pickle
7
+
8
+ # Sample data
9
+ data = pd.DataFrame({
10
+ 'text': [
11
+ 'This is a positive message',
12
+ 'This is negative',
13
+ 'I am neutral',
14
+ 'Absolutely wonderful',
15
+ 'Terrible and bad'
16
+ ],
17
+ 'label': ['positive', 'negative', 'neutral', 'positive', 'negative']
18
+ })
19
+
20
+ # Encode labels
21
+ le = LabelEncoder()
22
+ data['label_enc'] = le.fit_transform(data['label'])
23
+
24
+ # Train-test split
25
+ X_train, X_test, y_train, y_test = train_test_split(data['text'], data['label_enc'], test_size=0.2)
26
+
27
+ # Tokenization
28
+ tokenizer = DebertaTokenizer.from_pretrained("microsoft/deberta-base")
29
+ train_encodings = tokenizer(list(X_train), truncation=True, padding=True, return_tensors="pt")
30
+
31
+ # Model
32
+ model = DebertaForSequenceClassification.from_pretrained("microsoft/deberta-base", num_labels=len(le.classes_))
33
+ inputs = train_encodings['input_ids']
34
+ attention_mask = train_encodings['attention_mask']
35
+ labels = torch.tensor(y_train.values)
36
+
37
+ # Training (single epoch for demo)
38
+ model.train()
39
+ optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
40
+
41
+ for epoch in range(1):
42
+ outputs = model(inputs, attention_mask=attention_mask, labels=labels)
43
+ loss = outputs.loss
44
+ loss.backward()
45
+ optimizer.step()
46
+ optimizer.zero_grad()
47
+
48
+ # Save model and tokenizer
49
+ with open("app/model.pkl", "wb") as f:
50
+ pickle.dump(model, f)
51
+
52
+ with open("app/tokenizer.pkl", "wb") as f:
53
+ pickle.dump(tokenizer, f)
54
+
55
+ with open("app/label_encoder.pkl", "wb") as f:
56
+ pickle.dump(le, f)