Spaces:
Configuration error
Configuration error
| import pandas as pd | |
| import torch | |
| from transformers import DebertaTokenizer, DebertaForSequenceClassification | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import LabelEncoder | |
| import pickle | |
| # Sample data | |
| data = pd.DataFrame({ | |
| 'text': [ | |
| 'This is a positive message', | |
| 'This is negative', | |
| 'I am neutral', | |
| 'Absolutely wonderful', | |
| 'Terrible and bad' | |
| ], | |
| 'label': ['positive', 'negative', 'neutral', 'positive', 'negative'] | |
| }) | |
| # Encode labels | |
| le = LabelEncoder() | |
| data['label_enc'] = le.fit_transform(data['label']) | |
| # Train-test split | |
| X_train, X_test, y_train, y_test = train_test_split(data['text'], data['label_enc'], test_size=0.2) | |
| # Tokenization | |
| tokenizer = DebertaTokenizer.from_pretrained("microsoft/deberta-base") | |
| train_encodings = tokenizer(list(X_train), truncation=True, padding=True, return_tensors="pt") | |
| # Model | |
| model = DebertaForSequenceClassification.from_pretrained("microsoft/deberta-base", num_labels=len(le.classes_)) | |
| inputs = train_encodings['input_ids'] | |
| attention_mask = train_encodings['attention_mask'] | |
| labels = torch.tensor(y_train.values) | |
| # Training (single epoch for demo) | |
| model.train() | |
| optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5) | |
| for epoch in range(1): | |
| outputs = model(inputs, attention_mask=attention_mask, labels=labels) | |
| loss = outputs.loss | |
| loss.backward() | |
| optimizer.step() | |
| optimizer.zero_grad() | |
| # Save model and tokenizer | |
| with open("app/model.pkl", "wb") as f: | |
| pickle.dump(model, f) | |
| with open("app/tokenizer.pkl", "wb") as f: | |
| pickle.dump(tokenizer, f) | |
| with open("app/label_encoder.pkl", "wb") as f: | |
| pickle.dump(le, f) | |