File size: 1,712 Bytes
d3157ae
019d08d
d3157ae
ce1c6ab
9058528
d3157ae
aee0892
 
 
 
 
d3157ae
019d08d
d3157ae
 
aee0892
d3157ae
019d08d
d3157ae
 
e48c3a5
d3157ae
 
019d08d
d3157ae
c5afea2
 
3accba3
d3157ae
3accba3
 
d3157ae
 
 
019d08d
d3157ae
 
e48c3a5
d3157ae
 
 
019d08d
d3157ae
019d08d
e48c3a5
019d08d
d3157ae
 
 
 
 
 
 
 
 
019d08d
 
aee0892
019d08d
d3157ae
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# backend/scraper.py

import os


def setup_kaggle_api():
    """Setup Kaggle API (lazy import)."""
    # ✅ Import kaggle CHỈ khi gọi hàm
    import json
    from kaggle.api.kaggle_api_extended import KaggleApi

    kaggle_json_path = os.path.join(os.environ.get('HOME', '/root'), '.config/kaggle/kaggle.json')

    if not os.path.exists(kaggle_json_path):
        print(f"⚠️  kaggle.json not found at {kaggle_json_path}")
        print("⚠️  Upload kaggle.json to HF Space or set KAGGLE_USERNAME/KAGGLE_KEY env vars.")
        return None

    with open(kaggle_json_path) as f:
        kaggle_credentials = json.load(f)

    os.environ['KAGGLE_USERNAME'] = kaggle_credentials['username']
    os.environ['KAGGLE_KEY'] = kaggle_credentials['key']

    return KaggleApi()


def download_dataset():
    """Download dataset from Kaggle."""
    api = setup_kaggle_api()

    if api is None:
        print("❌ Kaggle API not available. Skipping download.")
        return False

    dataset_name = "jackdaug/ecommerce-products-dataset"
    os.makedirs("data", exist_ok=True)

    print(f"📥 Downloading dataset: {dataset_name}")
    api.dataset_download_files(dataset_name, path="data", unzip=True)
    print("✅ Dataset downloaded!")

    return True


def run_scraper():
    """Run scraper."""
    print("🚀 Running scraper...")

    if download_dataset():
        print("✅ Scraper completed successfully!")
        return True
    else:
        print("❌ Scraper failed!")
        return False


# ✅ KHÔNG chạy scraper khi khởi động
if __name__ == "__main__":
    print("📦 scraper.py imported (not running automatically)")
    print("📦 Call run_scraper() manually or via API")