Spaces:
Runtime error
Runtime error
Commit ·
c5afea2
1
Parent(s): 9058528
Upload E-Commerce Product Intelligence Dashboard
Browse files- backend/scraper.py +12 -15
backend/scraper.py
CHANGED
|
@@ -2,7 +2,7 @@ import os
|
|
| 2 |
import logging
|
| 3 |
import pandas as pd
|
| 4 |
from pathlib import Path
|
| 5 |
-
|
| 6 |
import shutil
|
| 7 |
|
| 8 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -15,24 +15,21 @@ OUTPUT_PARQUET = Path("data/ecommerce_products.parquet")
|
|
| 15 |
os.makedirs("data", exist_ok=True)
|
| 16 |
|
| 17 |
|
| 18 |
-
def setup_kaggle_api():
|
| 19 |
-
"""Auth Kaggle API."""
|
| 20 |
-
token = os.getenv("KAGGLE_API_TOKEN")
|
| 21 |
-
if not token:
|
| 22 |
-
raise ValueError("KAGGLE_API_TOKEN environment variable not set!")
|
| 23 |
-
|
| 24 |
-
api = KaggleApi()
|
| 25 |
-
api.api_token = token
|
| 26 |
-
return api
|
| 27 |
-
|
| 28 |
-
|
| 29 |
def download_dataset():
|
| 30 |
-
"""Download dataset từ Kaggle."""
|
| 31 |
-
api = setup_kaggle_api()
|
| 32 |
TEMP_DIR.mkdir(parents=True, exist_ok=True)
|
| 33 |
|
| 34 |
logger.info(f"Downloading dataset: {DATASET_SLUG}")
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
logger.info("Download complete.")
|
| 37 |
return TEMP_DIR
|
| 38 |
|
|
|
|
| 2 |
import logging
|
| 3 |
import pandas as pd
|
| 4 |
from pathlib import Path
|
| 5 |
+
import subprocess
|
| 6 |
import shutil
|
| 7 |
|
| 8 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 15 |
os.makedirs("data", exist_ok=True)
|
| 16 |
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
def download_dataset():
|
| 19 |
+
"""Download dataset từ Kaggle bằng CLI."""
|
|
|
|
| 20 |
TEMP_DIR.mkdir(parents=True, exist_ok=True)
|
| 21 |
|
| 22 |
logger.info(f"Downloading dataset: {DATASET_SLUG}")
|
| 23 |
+
|
| 24 |
+
result = subprocess.run(
|
| 25 |
+
["kaggle", "datasets", "download", "-d", DATASET_SLUG, "-p", str(TEMP_DIR), "-u"],
|
| 26 |
+
capture_output=True, text=True
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
if result.returncode != 0:
|
| 30 |
+
logger.error(f"Download failed: {result.stderr}")
|
| 31 |
+
raise RuntimeError(f"Kaggle download failed: {result.stderr}")
|
| 32 |
+
|
| 33 |
logger.info("Download complete.")
|
| 34 |
return TEMP_DIR
|
| 35 |
|