jchsiao2025's picture
Upload 21 files
3118055 verified
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms.functional import to_tensor, to_pil_image
import torchvision.transforms as transforms
# from transformers import AutoModel
# from transformers import AutoTokenizer, AutoConfig
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data.distributed import DistributedSampler
import numpy as np
import random
import os
import copy
import pandas as pd
class MyDataset(Dataset):
def __init__(
self,
ann_file,
cfg,
mode='tra',
):
super().__init__()
data = np.array(pd.read_csv(ann_file))
self.data = data
self.mode = mode
self.cfg = cfg
def __getitem__(self, index):
if self.mode == 'test':
d = self.data[index]
context = d[1]
sms_id = d[0]
return context, sms_id
else :
d = self.data[index]
context = d[1]
label = d[2]
label = int(label)
return context, label
def __len__(self):
return len(self.data)
if __name__ == '__main__':
d = 'C:/Users/u/Desktop/workspace/bs/myr/data/val.csv'
D = MyDataset(d, cfg={})
nb_1 = 0
for i, d in enumerate(D):
_, l = d
if l==1:
nb_1 += 1
print(nb_1/len(D))
np.random.seed(666)
ann_file1 = 'C:/Users/u/Desktop/workspace/bs/myr/data/test_samples.csv'
ann_file2 = 'C:/Users/u/Desktop/workspace/bs/myr/data/train_samples.csv'
data1 = pd.read_csv(ann_file1)
data2 = pd.read_csv(ann_file2)
data = pd.concat([data1, data2])
data = np.array(data)
np.random.shuffle(data)
data_tra = data[:int(len(data)*0.7)]
data_val = data[int(len(data)*0.7):]
data_tra = pd.DataFrame(data_tra, columns=['content', 'label'])
data_val = pd.DataFrame(data_val, columns=['content', 'label'])
data_tra.to_csv('C:/Users/u/Desktop/workspace/bs/myr/data/tra.csv', index=False)
data_val.to_csv('C:/Users/u/Desktop/workspace/bs/myr/data/val.csv', index=False)