# backend/scraper.py import os def setup_kaggle_api(): """Setup Kaggle API (lazy import).""" # ✅ Import kaggle CHỈ khi gọi hàm import json from kaggle.api.kaggle_api_extended import KaggleApi kaggle_json_path = os.path.join(os.environ.get('HOME', '/root'), '.config/kaggle/kaggle.json') if not os.path.exists(kaggle_json_path): print(f"⚠️ kaggle.json not found at {kaggle_json_path}") print("⚠️ Upload kaggle.json to HF Space or set KAGGLE_USERNAME/KAGGLE_KEY env vars.") return None with open(kaggle_json_path) as f: kaggle_credentials = json.load(f) os.environ['KAGGLE_USERNAME'] = kaggle_credentials['username'] os.environ['KAGGLE_KEY'] = kaggle_credentials['key'] return KaggleApi() def download_dataset(): """Download dataset from Kaggle.""" api = setup_kaggle_api() if api is None: print("❌ Kaggle API not available. Skipping download.") return False dataset_name = "jackdaug/ecommerce-products-dataset" os.makedirs("data", exist_ok=True) print(f"📥 Downloading dataset: {dataset_name}") api.dataset_download_files(dataset_name, path="data", unzip=True) print("✅ Dataset downloaded!") return True def run_scraper(): """Run scraper.""" print("🚀 Running scraper...") if download_dataset(): print("✅ Scraper completed successfully!") return True else: print("❌ Scraper failed!") return False # ✅ KHÔNG chạy scraper khi khởi động if __name__ == "__main__": print("📦 scraper.py imported (not running automatically)") print("📦 Call run_scraper() manually or via API")