capstone_backend_v2 / load_datasets_hf.py
dongchan21
Fixed LFS tracking for index file and removed unnecessary excels
c9ace58
raw
history blame contribute delete
447 Bytes
from datasets import load_dataset
# ๊ธฐ๋ณธ ๋กœ๋“œ (์Šคํ”Œ๋ฆฟ์ด ์žˆ์œผ๋ฉด ์ž๋™์œผ๋กœ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค)
ds = load_dataset("BCCard/BCCard-Finance-Kor-QnA")
print(ds) # ์ „์ฒด ์š”์•ฝ
print(ds["train"][0]) # ์ฒซ ์ƒ˜ํ”Œ ํ™•์ธ
# CSV/Parquet๋กœ ์ €์žฅํ•˜๊ธฐ (์›ํ•˜๋ฉด)
df = ds["train"].to_pandas()
df.to_csv("BCCard-Finance-Kor-QnA_train.csv", index=False)
# ๋˜๋Š”: ds["train"].to_parquet("BCCard-Finance-Kor-QnA_train.parquet")