Spaces:
Runtime error
Runtime error
| # backend/scraper.py | |
| import os | |
| def setup_kaggle_api(): | |
| """Setup Kaggle API (lazy import).""" | |
| # ✅ Import kaggle CHỈ khi gọi hàm | |
| import json | |
| from kaggle.api.kaggle_api_extended import KaggleApi | |
| kaggle_json_path = os.path.join(os.environ.get('HOME', '/root'), '.config/kaggle/kaggle.json') | |
| if not os.path.exists(kaggle_json_path): | |
| print(f"⚠️ kaggle.json not found at {kaggle_json_path}") | |
| print("⚠️ Upload kaggle.json to HF Space or set KAGGLE_USERNAME/KAGGLE_KEY env vars.") | |
| return None | |
| with open(kaggle_json_path) as f: | |
| kaggle_credentials = json.load(f) | |
| os.environ['KAGGLE_USERNAME'] = kaggle_credentials['username'] | |
| os.environ['KAGGLE_KEY'] = kaggle_credentials['key'] | |
| return KaggleApi() | |
| def download_dataset(): | |
| """Download dataset from Kaggle.""" | |
| api = setup_kaggle_api() | |
| if api is None: | |
| print("❌ Kaggle API not available. Skipping download.") | |
| return False | |
| dataset_name = "jackdaug/ecommerce-products-dataset" | |
| os.makedirs("data", exist_ok=True) | |
| print(f"📥 Downloading dataset: {dataset_name}") | |
| api.dataset_download_files(dataset_name, path="data", unzip=True) | |
| print("✅ Dataset downloaded!") | |
| return True | |
| def run_scraper(): | |
| """Run scraper.""" | |
| print("🚀 Running scraper...") | |
| if download_dataset(): | |
| print("✅ Scraper completed successfully!") | |
| return True | |
| else: | |
| print("❌ Scraper failed!") | |
| return False | |
| # ✅ KHÔNG chạy scraper khi khởi động | |
| if __name__ == "__main__": | |
| print("📦 scraper.py imported (not running automatically)") | |
| print("📦 Call run_scraper() manually or via API") |