Spaces:
Running
Running
File size: 1,055 Bytes
0214972 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | """
Download Indian Supreme Court judgments from Kaggle.
Uses kagglehub to download directly - no manual zip extraction needed.
Output: data/raw_judgments.jsonl
WHY kagglehub? Programmatic download - reproducible, no manual steps.
Anyone cloning this repo can run this script and get the same data.
"""
import kagglehub
import json
import os
import glob
def download_judgments():
print("Downloading SC Judgments dataset from Kaggle...")
# Downloads to a local cache folder, returns the path
path = kagglehub.dataset_download("adarshsingh0903/legal-dataset-sc-judgments-india-19502024")
print(f"Dataset downloaded to: {path}")
# See what files we got
all_files = []
for root, dirs, files in os.walk(path):
for file in files:
full_path = os.path.join(root, file)
all_files.append(full_path)
print(f" Found: {full_path}")
print(f"\nTotal files found: {len(all_files)}")
return path, all_files
if __name__ == "__main__":
path, files = download_judgments() |