Spaces:

CaffeinatedCoding
/

nyayasetu

Running

File size: 1,055 Bytes
"""
Download Indian Supreme Court judgments from Kaggle.
Uses kagglehub to download directly - no manual zip extraction needed.
Output: data/raw_judgments.jsonl

WHY kagglehub? Programmatic download - reproducible, no manual steps.
Anyone cloning this repo can run this script and get the same data.
"""

import kagglehub
import json
import os
import glob

def download_judgments():
    print("Downloading SC Judgments dataset from Kaggle...")
    
    # Downloads to a local cache folder, returns the path
    path = kagglehub.dataset_download("adarshsingh0903/legal-dataset-sc-judgments-india-19502024")
    print(f"Dataset downloaded to: {path}")
    
    # See what files we got
    all_files = []
    for root, dirs, files in os.walk(path):
        for file in files:
            full_path = os.path.join(root, file)
            all_files.append(full_path)
            print(f"  Found: {full_path}")
    
    print(f"\nTotal files found: {len(all_files)}")
    return path, all_files

if __name__ == "__main__":
    path, files = download_judgments()