Hamses commited on
Commit
7b8aacd
·
verified ·
1 Parent(s): 68f1d05

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +15 -0
README.md CHANGED
@@ -10,3 +10,18 @@ tags:
10
  - legal
11
  ---
12
  pip install transformers datasets torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  - legal
11
  ---
12
  pip install transformers datasets torch
13
+
14
+ from datasets import load_dataset
15
+
16
+ # Load the dataset (using an example dataset here; replace with your dataset)
17
+ dataset = load_dataset('Hamses/EU_Regulation_261_2004')
18
+
19
+ # Preprocess the dataset
20
+ from transformers import AutoTokenizer
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
23
+
24
+ def preprocess_function(examples):
25
+ return tokenizer(examples['text'], padding='max_length', truncation=True)
26
+
27
+ encoded_dataset = dataset.map(preprocess_function, batched=True)