Spaces:
Sleeping
Sleeping
| import requests | |
| import os | |
| import json | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.responses import FileResponse | |
| app = FastAPI() | |
| ALL_COOKIES = {} | |
| print("loadaaadingggg") | |
| REQUIRED_COOKIES = [ | |
| 'ka_sessionid', | |
| '__Host-KAGGLEID', | |
| 'CLIENT-TOKEN', | |
| 'XSRF-TOKEN', | |
| 'CSRF-TOKEN', | |
| 'build-hash' | |
| ] | |
| for key, value in os.environ.items(): | |
| if key.startswith("KAGGLE_COOKIES_") and key.rsplit('_', 1)[-1].isdigit(): | |
| account_id = key.rsplit('_', 1)[-1] | |
| account_cookies = {} | |
| netscape_file_content = value | |
| for line in netscape_file_content.splitlines(): | |
| if not line.strip() or line.startswith('#'): | |
| continue | |
| parts = line.split('\t') | |
| if len(parts) == 7: | |
| cookie_name = parts[5] | |
| cookie_value = parts[6] | |
| if cookie_name in REQUIRED_COOKIES: | |
| account_cookies[cookie_name] = cookie_value | |
| if all(c in account_cookies for c in REQUIRED_COOKIES): | |
| ALL_COOKIES[account_id] = account_cookies | |
| print(f"negro caca '{account_id}' passed.") | |
| else: | |
| missing = [c for c in REQUIRED_COOKIES if c not in account_cookies] | |
| print(f"negro '{account_id}' poo poo'd. needs: {missing}") | |
| if not ALL_COOKIES: | |
| print("retardation alert 1") | |
| async def get_file_list(request_body: dict): | |
| try: | |
| account_id = str(request_body.get("accountId")) | |
| resource_type = request_body.get("resourceType") | |
| metadata = request_body.get("metadata") | |
| cookies = ALL_COOKIES.get(account_id) | |
| if not cookies: raise HTTPException(status_code=404, detail=f"ass '{account_id}' not found") | |
| api_url = "https://www.kaggle.com/api/i/datasets.databundles.DatabundleService/GetDatabundleExternal" | |
| if resource_type == 'dataset': | |
| body = {"verificationInfo": {"databundleVersionId": metadata["versionId"], "datasetId": metadata["id"]}} | |
| else: | |
| body = {"verificationInfo": {"databundleVersionId": metadata["versionId"], "modelInstanceVersionId": metadata["instanceVersionId"]}} | |
| headers = { | |
| "accept": "application/json", "content-type": "application/json", | |
| "origin": "https://www.kaggle.com", "referer": "https://www.kaggle.com/", | |
| "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36", | |
| "x-kaggle-build-version": cookies["build-hash"], "x-xsrf-token": cookies["XSRF-TOKEN"] | |
| } | |
| response = requests.post(api_url, headers=headers, cookies=cookies, json=body) | |
| response.raise_for_status() | |
| data = response.json() | |
| files = data.get('dataSource', {}).get('databundleVersion', {}).get('filesetInfo', {}).get('files', {}).get('children', []) | |
| file_list = [{"name": f['name'], "firestorePath": f['path']} for f in files if f['name'].lower().endswith(('.mp4', '.mkv', '.mov', '.avi'))] | |
| return { "files": file_list } | |
| except requests.exceptions.HTTPError as err: | |
| error_detail = f"{str(err)} - Response: {err.response.text}" | |
| raise HTTPException(status_code=err.response.status_code, detail=error_detail) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def get_download_url(request_body: dict): | |
| try: | |
| account_id = str(request_body.get("accountId")) | |
| resource_type = request_body.get("resourceType") | |
| metadata = request_body.get("metadata") | |
| file_info = request_body.get("file") | |
| cookies = ALL_COOKIES.get(account_id) | |
| if not cookies: raise HTTPException(status_code=404, detail=f"Account ID '{account_id}' not found or cookies not set.") | |
| api_url = "https://www.kaggle.com/api/i/datasets.DatasetService/GetDataViewExternal" | |
| if resource_type == 'dataset': | |
| body = {"verificationInfo": {"datasetId": metadata["id"], "databundleVersionId": metadata["versionId"]}, "firestorePath": file_info["firestorePath"]} | |
| else: | |
| body = {"verificationInfo": {"modelInstanceVersionId": metadata["instanceVersionId"], "databundleVersionId": metadata["versionId"]}, "firestorePath": file_info["firestorePath"]} | |
| headers = { | |
| "accept": "application/json", "content-type": "application/json", | |
| "origin": "https://www.kaggle.com", "referer": "https://www.kaggle.com/", | |
| "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36", | |
| "x-kaggle-build-version": cookies["build-hash"], "x-xsrf-token": cookies["XSRF-TOKEN"] | |
| } | |
| response = requests.post(api_url, headers=headers, cookies=cookies, json=body) | |
| response.raise_for_status() | |
| data = response.json() | |
| return {"direct_url": data['dataView']['dataUrl']['url']} | |
| except requests.exceptions.HTTPError as err: | |
| error_detail = f"{str(err)} - Response: {err.response.text}" | |
| raise HTTPException(status_code=err.response.status_code, detail=error_detail) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def read_index(): | |
| return FileResponse('index.html') |