Vincentran commited on
Commit
d939d66
·
1 Parent(s): ce1c6ab

Upload E-Commerce Product Intelligence Dashboard

Browse files
Files changed (3) hide show
  1. app.py +7 -7
  2. backend/scraper.py +7 -7
  3. requirements.txt +2 -1
app.py CHANGED
@@ -11,17 +11,17 @@ logger = logging.getLogger(__name__)
11
 
12
  app = FastAPI(title="E-Commerce Product Intelligence Platform")
13
 
14
- # ==================== Load data (local parquet) ====================
15
- LOCAL_PARQUET_PATH = Path("data/ecommerce_products.parquet")
16
 
17
 
18
  def load_data():
19
- """Load parquet từ local."""
20
- if not LOCAL_PARQUET_PATH.exists():
21
- raise FileNotFoundError(f"Parquet not found: {LOCAL_PARQUET_PATH}")
22
 
23
- logger.info(f"Loading parquet from: {LOCAL_PARQUET_PATH}")
24
- return pd.read_parquet(LOCAL_PARQUET_PATH)
25
 
26
 
27
  # ==================== API Routes ====================
 
11
 
12
  app = FastAPI(title="E-Commerce Product Intelligence Platform")
13
 
14
+ # ==================== Load data (local CSV) ====================
15
+ LOCAL_CSV_PATH = Path("data/ecommerce_products.csv")
16
 
17
 
18
  def load_data():
19
+ """Load CSV từ local."""
20
+ if not LOCAL_CSV_PATH.exists():
21
+ raise FileNotFoundError(f"CSV not found: {LOCAL_CSV_PATH}")
22
 
23
+ logger.info(f"Loading CSV from: {LOCAL_CSV_PATH}")
24
+ return pd.read_csv(LOCAL_CSV_PATH)
25
 
26
 
27
  # ==================== API Routes ====================
backend/scraper.py CHANGED
@@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
16
 
17
  DATASET_SLUG = "anujsaha0123456789/e-commerce-product-intelligence-dataset"
18
  TEMP_DIR = Path("data/temp_kaggle")
19
- OUTPUT_PARQUET = Path("data/ecommerce_products.parquet")
20
 
21
  os.makedirs("data", exist_ok=True)
22
 
@@ -59,19 +59,19 @@ def load_and_concatenate(csv_files):
59
  return pd.concat(dfs, ignore_index=True)
60
 
61
 
62
- def save_parquet(df: pd.DataFrame):
63
- """Save to Parquet."""
64
- df.to_parquet(OUTPUT_PARQUET, index=False)
65
- logger.info(f"Saved to: {OUTPUT_PARQUET}")
66
 
67
 
68
  def run_scraper():
69
- """Full pipeline: download Kaggle → save parquet."""
70
  try:
71
  download_dataset()
72
  csv_files = find_csv_files(TEMP_DIR)
73
  df = load_and_concatenate(csv_files)
74
- save_parquet(df)
75
  finally:
76
  shutil.rmtree(TEMP_DIR, ignore_errors=True)
77
 
 
16
 
17
  DATASET_SLUG = "anujsaha0123456789/e-commerce-product-intelligence-dataset"
18
  TEMP_DIR = Path("data/temp_kaggle")
19
+ OUTPUT_CSV = Path("data/ecommerce_products.csv")
20
 
21
  os.makedirs("data", exist_ok=True)
22
 
 
59
  return pd.concat(dfs, ignore_index=True)
60
 
61
 
62
+ def save_csv(df: pd.DataFrame):
63
+ """Save to CSV."""
64
+ df.to_csv(OUTPUT_CSV, index=False)
65
+ logger.info(f"Saved to: {OUTPUT_CSV}")
66
 
67
 
68
  def run_scraper():
69
+ """Full pipeline: download Kaggle → save CSV."""
70
  try:
71
  download_dataset()
72
  csv_files = find_csv_files(TEMP_DIR)
73
  df = load_and_concatenate(csv_files)
74
+ save_csv(df)
75
  finally:
76
  shutil.rmtree(TEMP_DIR, ignore_errors=True)
77
 
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  fastapi==0.109.2
2
  uvicorn
3
  pandas
4
- kaggle
 
 
1
  fastapi==0.109.2
2
  uvicorn
3
  pandas
4
+ kaggle
5
+ pyarrow