| import gradio as gr |
| import os, json, pathlib, tempfile, datetime, shutil, io |
| from typing import List, Dict, Optional |
| from contextlib import redirect_stdout, redirect_stderr |
| from dotenv import load_dotenv |
| load_dotenv() |
|
|
| def search_datasets(query: str, max_results: int = 20) -> List[Dict]: |
| """ |
| Return brief metadata for up to `max_results` public datasets |
| whose title or description matches `query`. |
| """ |
| results = api.dataset_list(search=query, max_size= None) |
| out = [] |
| for ds in results[:max_results]: |
| out.append({ |
| "title": ds.title, |
| "slug": ds.ref, |
| "size_mb": round(ds.total_bytes/1e6, 2), |
| "downloads": ds.download_count, |
| "votes": ds.vote_count, |
| }) |
| return out |
|
|
| def list_files(dataset_slug: str) -> List[Dict]: |
| files = api.dataset_list_files(dataset_slug).files |
| return [{"name": f.name, "size_mb": round(f.total_bytes / 1e6, 2)} for f in files] |
|
|
| def download_dataset_file(dataset_slug: str, file_name: str): |
| tmp_dir = tempfile.mkdtemp() |
| api.dataset_download_file(dataset_slug, file_name, path=tmp_dir, quiet=False) |
| zip_path = pathlib.Path(tmp_dir) / f"{file_name}" |
|
|
| if not zip_path.exists(): |
| zip_path = pathlib.Path(tmp_dir) / f"{file_name}.zip" |
| return str(zip_path) |
|
|
| def search_kernels(query: str, max_results: int = 20) -> List[Dict]: |
| |
| kernels = api.kernels_list( |
| search=query, |
| page_size=min(max_results, 20), |
| sort_by="voteCount", |
| ) |
|
|
| out = [] |
| for k in kernels[:max_results]: |
| last_run_raw = getattr(k, "lastRunTime", None) or getattr(k, "updated", None) |
| try: |
| last_run = ( |
| datetime.datetime.fromisoformat(last_run_raw.rstrip("z")) |
| .strftime("%Y-%m-%d %H:%M") if last_run_raw else None |
| ) |
| except Exception: |
| last_run = last_run_raw |
| out.append( |
| { |
| "title": k.title, |
| "ref": k.ref, |
| "language": getattr(k, "language", None), |
| "kernel_type": getattr(k, "kernelType", None), |
| "votes": k.total_votes, |
| "last_run": last_run, |
| } |
| ) |
| return out |
| |
| def download_kernel_notebook(kernel_ref: str) -> str: |
| tmp_dir = tempfile.mkdtemp() |
| api.kernels_pull(kernel_ref, path=tmp_dir, metadata=True, quiet=False) |
|
|
| zip_path = shutil.make_archive( |
| base_name=os.path.join(tmp_dir, "kernel"), |
| format = "zip", |
| root_dir=tmp_dir, |
| ) |
| return zip_path |
|
|
| search_iface = gr.Interface( |
| fn=search_datasets, |
| inputs=[ |
| gr.Textbox(label="Search term", placeholder="e.g. fashion mnist"), |
| gr.Slider(1, 50, step=1, value=20, label="Max results") |
| ], |
| outputs=gr.JSON(label="Datasets"), |
| title="Search kaggle Datasets", |
| description="Resturns a JSON array of dataset metadata." |
| ) |
|
|
| download_kernel_iface = gr.Interface( |
| fn = download_kernel_notebook, |
| inputs=gr.Textbox( |
| label="kernel reference", |
| placeholder="e.g. username/notebook-name", |
| ), |
| outputs=gr.File(label="Downlaod .zip"), |
| title="pull kaggle kernel", |
| description="Downlaods the notebook or script kernel and returns a ZIP archive." |
| ) |
|
|
| list_files_iface = gr.Interface( |
| fn=list_files, |
| inputs=gr.Textbox(label="Dataset slug", placeholder="zalando-research/fashionmnist"), |
| outputs=gr.JSON(label="Files"), |
| title="List Dataset Files", |
| description="Given a dataset slug, returns its file list." |
| ) |
|
|
| download_dataset_iface = gr.Interface( |
| fn=download_dataset_file, |
| inputs=[ |
| gr.Textbox(label="Dataset slug", placeholder="zalando-research/fashionmnist"), |
| gr.Textbox(label="File name", placeholder="fashion-mnist_test.csv") |
| ], |
| outputs=gr.File(label="Download file"), |
| title="Download a File", |
| description="Downloads one file from the dataset and returns it." |
| ) |
|
|
| search_kernels_iface = gr.Interface( |
| fn=search_kernels, |
| inputs=[ |
| gr.Textbox(label="search term", placeholder="e.g. computer vision"), |
| gr.Slider(1, 50, step=1, value=20, label="Max results"), |
| ], |
| outputs=gr.JSON(label="kernels"), |
| title="Search kaggle kernels", |
| description="Find notebook or script kernels by keyword." |
| ) |
|
|
| demo = gr.TabbedInterface( |
| [search_iface, list_files_iface, download_dataset_iface, |
| search_kernels_iface, download_kernel_iface], |
| tab_names=["Search Datasets", "Files", "Download dataset", |
| "Search Kernels", "Download kernels", "Upload kernel zip"], |
| ) |
|
|
| def _bootstrap_kaggle_credentials(): |
| user = os.getenv("KAGGLE_USERNAME") |
| key = os.getenv("KAGGLE_KEY") |
| if not (user and key): |
| raise RuntimeError( |
| "Kaggle credentials not found." |
| "Set KAGGLE_USERNAME and KAGGLE_KEY as env vars or in .env" |
| ) |
| cred_path = pathlib.Path.home() / ".kaggle" / "kaggle.json" |
| if not cred_path.exists(): |
| cred_path.parent.mkdir(exist_ok=True) |
| cred_path.write_text(json.dumps({"username": user, "key": key})) |
| cred_path.chmod(0o600) |
|
|
| _bootstrap_kaggle_credentials() |
|
|
| from kaggle.api.kaggle_api_extended import KaggleApi |
| api = KaggleApi() |
| api.authenticate() |
|
|
| if __name__ == "__main__": |
| demo.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True) |