Vincentran's picture
Upload E-Commerce Product Intelligence Dashboard (frontend + backend)
aee0892
# backend/scraper.py
import os
def setup_kaggle_api():
"""Setup Kaggle API (lazy import)."""
# ✅ Import kaggle CHỈ khi gọi hàm
import json
from kaggle.api.kaggle_api_extended import KaggleApi
kaggle_json_path = os.path.join(os.environ.get('HOME', '/root'), '.config/kaggle/kaggle.json')
if not os.path.exists(kaggle_json_path):
print(f"⚠️ kaggle.json not found at {kaggle_json_path}")
print("⚠️ Upload kaggle.json to HF Space or set KAGGLE_USERNAME/KAGGLE_KEY env vars.")
return None
with open(kaggle_json_path) as f:
kaggle_credentials = json.load(f)
os.environ['KAGGLE_USERNAME'] = kaggle_credentials['username']
os.environ['KAGGLE_KEY'] = kaggle_credentials['key']
return KaggleApi()
def download_dataset():
"""Download dataset from Kaggle."""
api = setup_kaggle_api()
if api is None:
print("❌ Kaggle API not available. Skipping download.")
return False
dataset_name = "jackdaug/ecommerce-products-dataset"
os.makedirs("data", exist_ok=True)
print(f"📥 Downloading dataset: {dataset_name}")
api.dataset_download_files(dataset_name, path="data", unzip=True)
print("✅ Dataset downloaded!")
return True
def run_scraper():
"""Run scraper."""
print("🚀 Running scraper...")
if download_dataset():
print("✅ Scraper completed successfully!")
return True
else:
print("❌ Scraper failed!")
return False
# ✅ KHÔNG chạy scraper khi khởi động
if __name__ == "__main__":
print("📦 scraper.py imported (not running automatically)")
print("📦 Call run_scraper() manually or via API")