File size: 368 Bytes
492754f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
from datasets import load_dataset
import pandas as pd
import os

dataset = load_dataset("SetFit/bbc-news")

train_df = pd.DataFrame(dataset["train"])
test_df = pd.DataFrame(dataset["test"])

df = pd.concat([train_df, test_df], ignore_index=True)

os.makedirs("data/raw", exist_ok=True)

df.to_csv("data/raw/bbc-text.csv", index=False)

print(df.shape)
print(df.head())