File size: 1,055 Bytes
0214972
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
"""
Download Indian Supreme Court judgments from Kaggle.
Uses kagglehub to download directly - no manual zip extraction needed.
Output: data/raw_judgments.jsonl

WHY kagglehub? Programmatic download - reproducible, no manual steps.
Anyone cloning this repo can run this script and get the same data.
"""

import kagglehub
import json
import os
import glob

def download_judgments():
    print("Downloading SC Judgments dataset from Kaggle...")
    
    # Downloads to a local cache folder, returns the path
    path = kagglehub.dataset_download("adarshsingh0903/legal-dataset-sc-judgments-india-19502024")
    print(f"Dataset downloaded to: {path}")
    
    # See what files we got
    all_files = []
    for root, dirs, files in os.walk(path):
        for file in files:
            full_path = os.path.join(root, file)
            all_files.append(full_path)
            print(f"  Found: {full_path}")
    
    print(f"\nTotal files found: {len(all_files)}")
    return path, all_files

if __name__ == "__main__":
    path, files = download_judgments()