Spaces:
Running
Running
Update pages/21_NLP_Transformer.py
Browse files- pages/21_NLP_Transformer.py +8 -15
pages/21_NLP_Transformer.py
CHANGED
|
@@ -1,22 +1,19 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
from sklearn.model_selection import train_test_split
|
| 3 |
import torch
|
| 4 |
from torch.utils.data import DataLoader, Dataset
|
| 5 |
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
|
| 6 |
from transformers import get_linear_schedule_with_warmup
|
| 7 |
import numpy as np
|
| 8 |
-
from
|
| 9 |
import streamlit as st
|
| 10 |
|
| 11 |
-
# Load
|
| 12 |
-
|
| 13 |
-
|
|
|
|
| 14 |
|
| 15 |
-
|
| 16 |
-
train_df
|
| 17 |
-
|
| 18 |
-
train_df.to_csv('train.csv', index=False)
|
| 19 |
-
test_df.to_csv('test.csv', index=False)
|
| 20 |
|
| 21 |
class SentimentDataset(Dataset):
|
| 22 |
def __init__(self, dataframe, tokenizer, max_len):
|
|
@@ -113,10 +110,6 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
| 113 |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
| 114 |
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
|
| 115 |
|
| 116 |
-
# Load data
|
| 117 |
-
train_df = pd.read_csv('train.csv')
|
| 118 |
-
test_df = pd.read_csv('test.csv')
|
| 119 |
-
|
| 120 |
# Create data loaders
|
| 121 |
BATCH_SIZE = 16
|
| 122 |
MAX_LEN = 128
|
|
|
|
|
|
|
|
|
|
| 1 |
import torch
|
| 2 |
from torch.utils.data import DataLoader, Dataset
|
| 3 |
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
|
| 4 |
from transformers import get_linear_schedule_with_warmup
|
| 5 |
import numpy as np
|
| 6 |
+
from datasets import load_dataset
|
| 7 |
import streamlit as st
|
| 8 |
|
| 9 |
+
# Load IMDb dataset
|
| 10 |
+
dataset = load_dataset('imdb')
|
| 11 |
+
train_df = dataset['train'].to_pandas()
|
| 12 |
+
test_df = dataset['test'].to_pandas()
|
| 13 |
|
| 14 |
+
# Preprocess the data
|
| 15 |
+
train_df = train_df[['text', 'label']]
|
| 16 |
+
test_df = test_df[['text', 'label']]
|
|
|
|
|
|
|
| 17 |
|
| 18 |
class SentimentDataset(Dataset):
|
| 19 |
def __init__(self, dataframe, tokenizer, max_len):
|
|
|
|
| 110 |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
| 111 |
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
# Create data loaders
|
| 114 |
BATCH_SIZE = 16
|
| 115 |
MAX_LEN = 128
|