Spaces:
Running
Running
| """ | |
| Download Indian Supreme Court judgments from Kaggle. | |
| Uses kagglehub to download directly - no manual zip extraction needed. | |
| Output: data/raw_judgments.jsonl | |
| WHY kagglehub? Programmatic download - reproducible, no manual steps. | |
| Anyone cloning this repo can run this script and get the same data. | |
| """ | |
| import kagglehub | |
| import json | |
| import os | |
| import glob | |
| def download_judgments(): | |
| print("Downloading SC Judgments dataset from Kaggle...") | |
| # Downloads to a local cache folder, returns the path | |
| path = kagglehub.dataset_download("adarshsingh0903/legal-dataset-sc-judgments-india-19502024") | |
| print(f"Dataset downloaded to: {path}") | |
| # See what files we got | |
| all_files = [] | |
| for root, dirs, files in os.walk(path): | |
| for file in files: | |
| full_path = os.path.join(root, file) | |
| all_files.append(full_path) | |
| print(f" Found: {full_path}") | |
| print(f"\nTotal files found: {len(all_files)}") | |
| return path, all_files | |
| if __name__ == "__main__": | |
| path, files = download_judgments() |