File size: 1,492 Bytes
773c256 c429a2d 773c256 c429a2d 773c256 c429a2d 773c256 c429a2d 773c256 c429a2d 773c256 c429a2d 773c256 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
from pathlib import Path
import os
def download_data():
try:
from huggingface_hub import snapshot_download
except ImportError:
print("huggingface_hub not installed. Installing...")
os.system("pip install huggingface_hub")
from huggingface_hub import snapshot_download
# Configuration - CHANGE THIS to your HuggingFace repo
HF_REPO_ID = os.getenv("HF_DATA_REPO", "hungnha/do_an_tot_nghiep")
data_path = Path("data")
if data_path.exists() and any(data_path.iterdir()):
print("Data folder already exists. Skipping download.")
print(f"To re-download, delete the 'data/' folder first.")
return
print(f"Downloading data from HuggingFace: {HF_REPO_ID}")
print("This may take a few minutes...")
try:
snapshot_download(
repo_id=HF_REPO_ID,
repo_type="dataset",
local_dir="data",
local_dir_use_symlinks=False, # Download actual files, not symlinks
)
print("Download complete!")
print(f"Data saved to: {data_path.absolute()}")
except Exception as e:
print(f"Error downloading data: {e}")
print("\nTips:")
print(" 1. Make sure the HF_DATA_REPO environment variable is set correctly")
print(" 2. Or update HF_REPO_ID in this script")
print(" 3. If repo is private, run: huggingface-cli login")
raise
if __name__ == "__main__":
download_data()
|