noequal commited on
Commit
098f3a5
·
1 Parent(s): 2349074

Fix indentation

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -7,7 +7,7 @@ tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
7
  import torch
8
  from torch.utils.data import Dataset
9
 
10
- class ClinicalDataset(Dataset):
11
  def __init__(self, texts, labels, tokenizer):
12
  self.texts = texts
13
  self.labels = labels
@@ -22,11 +22,11 @@ from torch.utils.data import Dataset
22
  encoding = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
23
  return {"input_ids": encoding["input_ids"].squeeze(), "attention_mask": encoding["attention_mask"].squeeze(), "labels": torch.tensor(label)}
24
 
25
- dataset = ClinicalDataset(texts=train_texts, labels=train_labels, tokenizer=tokenizer)
26
  # Fine-tune the pre-trained model on your clinical dataset
27
  from transformers import Trainer, TrainingArguments
28
 
29
- training_args = TrainingArguments(
30
  output_dir='./results', # output directory
31
  num_train_epochs=3, # total number of training epochs
32
  per_device_train_batch_size=16, # batch size per device during training
@@ -36,7 +36,7 @@ from transformers import Trainer, TrainingArguments
36
  logging_dir='./logs', # directory for storing logs
37
  logging_steps=10, )
38
 
39
- trainer = Trainer(
40
  model=model,
41
  args=training_args,
42
  train_dataset=dataset,
@@ -44,4 +44,4 @@ from transformers import Trainer, TrainingArguments
44
  data_collator=lambda data: {'input_ids': torch.stack([f['input_ids'] for f in data]),
45
  'attention_mask': torch.stack([f['attention_mask'] for f in data]),
46
  'labels': torch.stack([f['labels'] for f in data])}, )
47
- trainer.train()
 
7
  import torch
8
  from torch.utils.data import Dataset
9
 
10
+ class ClinicalDataset(Dataset):
11
  def __init__(self, texts, labels, tokenizer):
12
  self.texts = texts
13
  self.labels = labels
 
22
  encoding = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
23
  return {"input_ids": encoding["input_ids"].squeeze(), "attention_mask": encoding["attention_mask"].squeeze(), "labels": torch.tensor(label)}
24
 
25
+ dataset = ClinicalDataset(texts=train_texts, labels=train_labels, tokenizer=tokenizer)
26
  # Fine-tune the pre-trained model on your clinical dataset
27
  from transformers import Trainer, TrainingArguments
28
 
29
+ training_args = TrainingArguments(
30
  output_dir='./results', # output directory
31
  num_train_epochs=3, # total number of training epochs
32
  per_device_train_batch_size=16, # batch size per device during training
 
36
  logging_dir='./logs', # directory for storing logs
37
  logging_steps=10, )
38
 
39
+ trainer = Trainer(
40
  model=model,
41
  args=training_args,
42
  train_dataset=dataset,
 
44
  data_collator=lambda data: {'input_ids': torch.stack([f['input_ids'] for f in data]),
45
  'attention_mask': torch.stack([f['attention_mask'] for f in data]),
46
  'labels': torch.stack([f['labels'] for f in data])}, )
47
+ trainer.train()