Spaces:
Runtime error
Runtime error
File size: 1,712 Bytes
d3157ae 019d08d d3157ae ce1c6ab 9058528 d3157ae aee0892 d3157ae 019d08d d3157ae aee0892 d3157ae 019d08d d3157ae e48c3a5 d3157ae 019d08d d3157ae c5afea2 3accba3 d3157ae 3accba3 d3157ae 019d08d d3157ae e48c3a5 d3157ae 019d08d d3157ae 019d08d e48c3a5 019d08d d3157ae 019d08d aee0892 019d08d d3157ae | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | # backend/scraper.py
import os
def setup_kaggle_api():
"""Setup Kaggle API (lazy import)."""
# ✅ Import kaggle CHỈ khi gọi hàm
import json
from kaggle.api.kaggle_api_extended import KaggleApi
kaggle_json_path = os.path.join(os.environ.get('HOME', '/root'), '.config/kaggle/kaggle.json')
if not os.path.exists(kaggle_json_path):
print(f"⚠️ kaggle.json not found at {kaggle_json_path}")
print("⚠️ Upload kaggle.json to HF Space or set KAGGLE_USERNAME/KAGGLE_KEY env vars.")
return None
with open(kaggle_json_path) as f:
kaggle_credentials = json.load(f)
os.environ['KAGGLE_USERNAME'] = kaggle_credentials['username']
os.environ['KAGGLE_KEY'] = kaggle_credentials['key']
return KaggleApi()
def download_dataset():
"""Download dataset from Kaggle."""
api = setup_kaggle_api()
if api is None:
print("❌ Kaggle API not available. Skipping download.")
return False
dataset_name = "jackdaug/ecommerce-products-dataset"
os.makedirs("data", exist_ok=True)
print(f"📥 Downloading dataset: {dataset_name}")
api.dataset_download_files(dataset_name, path="data", unzip=True)
print("✅ Dataset downloaded!")
return True
def run_scraper():
"""Run scraper."""
print("🚀 Running scraper...")
if download_dataset():
print("✅ Scraper completed successfully!")
return True
else:
print("❌ Scraper failed!")
return False
# ✅ KHÔNG chạy scraper khi khởi động
if __name__ == "__main__":
print("📦 scraper.py imported (not running automatically)")
print("📦 Call run_scraper() manually or via API") |