Spaces:
Runtime error
Runtime error
Commit ·
d939d66
1
Parent(s): ce1c6ab
Upload E-Commerce Product Intelligence Dashboard
Browse files- app.py +7 -7
- backend/scraper.py +7 -7
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -11,17 +11,17 @@ logger = logging.getLogger(__name__)
|
|
| 11 |
|
| 12 |
app = FastAPI(title="E-Commerce Product Intelligence Platform")
|
| 13 |
|
| 14 |
-
# ==================== Load data (local
|
| 15 |
-
|
| 16 |
|
| 17 |
|
| 18 |
def load_data():
|
| 19 |
-
"""Load
|
| 20 |
-
if not
|
| 21 |
-
raise FileNotFoundError(f"
|
| 22 |
|
| 23 |
-
logger.info(f"Loading
|
| 24 |
-
return pd.
|
| 25 |
|
| 26 |
|
| 27 |
# ==================== API Routes ====================
|
|
|
|
| 11 |
|
| 12 |
app = FastAPI(title="E-Commerce Product Intelligence Platform")
|
| 13 |
|
| 14 |
+
# ==================== Load data (local CSV) ====================
|
| 15 |
+
LOCAL_CSV_PATH = Path("data/ecommerce_products.csv")
|
| 16 |
|
| 17 |
|
| 18 |
def load_data():
|
| 19 |
+
"""Load CSV từ local."""
|
| 20 |
+
if not LOCAL_CSV_PATH.exists():
|
| 21 |
+
raise FileNotFoundError(f"CSV not found: {LOCAL_CSV_PATH}")
|
| 22 |
|
| 23 |
+
logger.info(f"Loading CSV from: {LOCAL_CSV_PATH}")
|
| 24 |
+
return pd.read_csv(LOCAL_CSV_PATH)
|
| 25 |
|
| 26 |
|
| 27 |
# ==================== API Routes ====================
|
backend/scraper.py
CHANGED
|
@@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
|
|
| 16 |
|
| 17 |
DATASET_SLUG = "anujsaha0123456789/e-commerce-product-intelligence-dataset"
|
| 18 |
TEMP_DIR = Path("data/temp_kaggle")
|
| 19 |
-
|
| 20 |
|
| 21 |
os.makedirs("data", exist_ok=True)
|
| 22 |
|
|
@@ -59,19 +59,19 @@ def load_and_concatenate(csv_files):
|
|
| 59 |
return pd.concat(dfs, ignore_index=True)
|
| 60 |
|
| 61 |
|
| 62 |
-
def
|
| 63 |
-
"""Save to
|
| 64 |
-
df.
|
| 65 |
-
logger.info(f"Saved to: {
|
| 66 |
|
| 67 |
|
| 68 |
def run_scraper():
|
| 69 |
-
"""Full pipeline: download Kaggle → save
|
| 70 |
try:
|
| 71 |
download_dataset()
|
| 72 |
csv_files = find_csv_files(TEMP_DIR)
|
| 73 |
df = load_and_concatenate(csv_files)
|
| 74 |
-
|
| 75 |
finally:
|
| 76 |
shutil.rmtree(TEMP_DIR, ignore_errors=True)
|
| 77 |
|
|
|
|
| 16 |
|
| 17 |
DATASET_SLUG = "anujsaha0123456789/e-commerce-product-intelligence-dataset"
|
| 18 |
TEMP_DIR = Path("data/temp_kaggle")
|
| 19 |
+
OUTPUT_CSV = Path("data/ecommerce_products.csv")
|
| 20 |
|
| 21 |
os.makedirs("data", exist_ok=True)
|
| 22 |
|
|
|
|
| 59 |
return pd.concat(dfs, ignore_index=True)
|
| 60 |
|
| 61 |
|
| 62 |
+
def save_csv(df: pd.DataFrame):
|
| 63 |
+
"""Save to CSV."""
|
| 64 |
+
df.to_csv(OUTPUT_CSV, index=False)
|
| 65 |
+
logger.info(f"Saved to: {OUTPUT_CSV}")
|
| 66 |
|
| 67 |
|
| 68 |
def run_scraper():
|
| 69 |
+
"""Full pipeline: download Kaggle → save CSV."""
|
| 70 |
try:
|
| 71 |
download_dataset()
|
| 72 |
csv_files = find_csv_files(TEMP_DIR)
|
| 73 |
df = load_and_concatenate(csv_files)
|
| 74 |
+
save_csv(df)
|
| 75 |
finally:
|
| 76 |
shutil.rmtree(TEMP_DIR, ignore_errors=True)
|
| 77 |
|
requirements.txt
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
fastapi==0.109.2
|
| 2 |
uvicorn
|
| 3 |
pandas
|
| 4 |
-
kaggle
|
|
|
|
|
|
| 1 |
fastapi==0.109.2
|
| 2 |
uvicorn
|
| 3 |
pandas
|
| 4 |
+
kaggle
|
| 5 |
+
pyarrow
|