Vincentran commited on
Commit
2c7b3a2
·
1 Parent(s): 95adcbf

Upload E-Commerce Product Intelligence Dashboard

Browse files
Files changed (5) hide show
  1. app.py +67 -13
  2. backend/__init__.py +0 -0
  3. backend/api.py +0 -50
  4. backend/scheduler.py +0 -35
  5. backend/scraper.py +0 -7
app.py CHANGED
@@ -1,27 +1,81 @@
1
  import logging
2
- from contextlib import asynccontextmanager
3
  from fastapi import FastAPI
4
  from fastapi.staticfiles import StaticFiles
5
- from fastapi.responses import HTMLResponse
6
  from pathlib import Path
7
- from backend.api import app as api_app
8
- from backend.scheduler import init_scheduler, shutdown_scheduler
9
 
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
- @asynccontextmanager
14
- async def lifespan(app: FastAPI):
15
- logger.info("Starting app...")
16
- init_scheduler()
17
- yield
18
- logger.info("Shutting down app...")
19
- shutdown_scheduler()
20
 
21
- app = FastAPI(title="E-Commerce Product Intelligence Platform", lifespan=lifespan)
 
 
 
 
 
 
22
 
23
- app.include_router(api_app)
 
 
 
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  frontend_dir = Path("frontend")
26
  if frontend_dir.exists():
27
  app.mount("/", StaticFiles(directory=str(frontend), html=True), name="frontend")
 
1
  import logging
2
+ import pandas as pd
3
  from fastapi import FastAPI
4
  from fastapi.staticfiles import StaticFiles
5
+ from fastapi.responses import HTMLResponse, JSONResponse
6
  from pathlib import Path
 
 
7
 
8
  logging.basicConfig(level=logging.INFO)
9
  logger = logging.getLogger(__name__)
10
 
11
+ app = FastAPI(title="E-Commerce Product Intelligence Platform")
 
 
 
 
 
 
12
 
13
+ # ==================== Load data ====================
14
+ def load_data():
15
+ """Load parquet."""
16
+ parquet_path = Path("data/ecommerce_products.parquet")
17
+ if not parquet_path.exists():
18
+ raise FileNotFoundError(f"Parquet not found: {parquet_path}")
19
+ return pd.read_parquet(parquet_path)
20
 
21
+ # ==================== API Routes ====================
22
+ @app.get("/")
23
+ def root():
24
+ return {"status": "E-Commerce Product Intelligence API is running"}
25
 
26
+ @app.get("/data")
27
+ def get_data():
28
+ df = load_data()
29
+ return df.head(200).to_dict("records")
30
+
31
+ @app.get("/stats/categories")
32
+ def stats_categories():
33
+ df = load_data()
34
+ return df["category"].value_counts().head(10).to_dict()
35
+
36
+ @app.get("/stats/brands")
37
+ def stats_brands():
38
+ df = load_data()
39
+ return df["brand"].value_counts().head(10).to_dict()
40
+
41
+ @app.get("/stats/price")
42
+ def stats_price():
43
+ df = load_data()
44
+ return df.groupby("category")["price"].agg(["mean", "median", "min", "max", "count"]).reset_index().to_dict("records")
45
+
46
+ @app.get("/stats/rating")
47
+ def stats_rating():
48
+ df = load_data()
49
+ return df.groupby("category")["rating"].agg(["mean", "median", "min", "max", "count"]).reset_index().to_dict("records")
50
+
51
+ @app.get("/insights")
52
+ def insights():
53
+ df = load_data()
54
+ return JSONResponse(content={
55
+ "total_products": len(df),
56
+ "categories": df["category"].nunique(),
57
+ "brands": df["brand"].nunique(),
58
+ "avg_price": df["price"].mean(),
59
+ "avg_rating": df["rating"].mean(),
60
+ })
61
+
62
+ @app.get("/search")
63
+ def search(query: str):
64
+ df = load_data()
65
+ q = query.lower()
66
+ mask = (
67
+ df["title"].str.contains(q, case=False, na=False) |
68
+ df["description"].str.contains(q, case=False, na=False)
69
+ )
70
+ return df[mask].head(100).to_dict("records")
71
+
72
+ @app.get("/recommend")
73
+ def recommend(category: str):
74
+ df = load_data()
75
+ subset = df[df["category"] == category]
76
+ return subset.sort_values("rating", ascending=False).head(10).to_dict("records")
77
+
78
+ # ==================== Frontend ====================
79
  frontend_dir = Path("frontend")
80
  if frontend_dir.exists():
81
  app.mount("/", StaticFiles(directory=str(frontend), html=True), name="frontend")
backend/__init__.py ADDED
File without changes
backend/api.py DELETED
@@ -1,50 +0,0 @@
1
- from fastapi import FastAPI, HTTPException
2
- from fastapi.responses import JSONResponse
3
- from backend.services import load_data, get_top_categories, get_top_brands, get_price_stats, get_rating_stats
4
- from backend.agent import generate_insights, semantic_search, recommend_by_category
5
-
6
- app = FastAPI(title="E-Commerce Product Intelligence API")
7
-
8
- @app.get("/")
9
- def root():
10
- return {"status": "E-Commerce Product Intelligence API is running"}
11
-
12
- @app.get("/data")
13
- def get_data():
14
- df = load_data()
15
- return df.head(200).to_dict("records")
16
-
17
- @app.get("/stats/categories")
18
- def stats_categories():
19
- df = load_data()
20
- return get_top_categories(df, n=10).to_dict()
21
-
22
- @app.get("/stats/brands")
23
- def stats_brands():
24
- df = load_data()
25
- return get_top_brands(df, n=10).to_dict()
26
-
27
- @app.get("/stats/price")
28
- def stats_price():
29
- df = load_data()
30
- return get_price_stats(df).to_dict("records")
31
-
32
- @app.get("/stats/rating")
33
- def stats_rating():
34
- df = load_data()
35
- return get_rating_stats(df).to_dict("records")
36
-
37
- @app.get("/insights")
38
- def insights():
39
- df = load_data()
40
- return JSONResponse(content=generate_insights(df))
41
-
42
- @app.get("/search")
43
- def search(query: str):
44
- df = load_data()
45
- return semantic_search(query, df).head(100).to_dict("records")
46
-
47
- @app.get("/recommend")
48
- def recommend(category: str):
49
- df = load_data()
50
- return recommend_by_category(df, category).to_dict("records")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/scheduler.py DELETED
@@ -1,35 +0,0 @@
1
- from apscheduler.schedulers.background import BackgroundScheduler
2
- from apscheduler.triggers.cron import CronTrigger
3
- import logging
4
-
5
- logger = logging.getLogger("scheduler")
6
-
7
- scheduler = BackgroundScheduler()
8
-
9
-
10
- def scheduled_scraper_job():
11
- """Chạy scraper trong job."""
12
- logger.info("Running scheduled scraper job...")
13
- try:
14
- # Import dynamic khi cần
15
- from backend.scraper import run_scraper
16
- run_scraper()
17
- logger.info("Scheduled scraper job completed.")
18
- except Exception as e:
19
- logger.error(f"Scheduled scraper job failed: {e}")
20
-
21
-
22
- def init_scheduler():
23
- """Schedule scraper to run daily at 02:00 AM."""
24
- scheduler.add_job(
25
- scheduled_scraper_job,
26
- CronTrigger(hour=2, minute=0),
27
- id="daily_scraper",
28
- replace_existing=True
29
- )
30
- logger.info("Scheduled scraper job added: daily at 02:00 AM")
31
- scheduler.start()
32
-
33
-
34
- def shutdown_scheduler():
35
- scheduler.shutdown()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/scraper.py CHANGED
@@ -4,9 +4,6 @@ from pathlib import Path
4
  import pandas as pd
5
  import shutil
6
 
7
- # Không import KaggleApi ở đây!
8
- # Sẽ import và authenticate khi cần
9
-
10
  DATASET_SLUG = "anujsaha0123456789/e-commerce-product-intelligence-dataset"
11
  TEMP_DIR = Path("data/temp_kaggle")
12
  OUTPUT_PARQUET = Path("data/ecommerce_products.parquet")
@@ -23,10 +20,6 @@ def setup_kaggle_api():
23
  raise ValueError("KAGGLE_API_TOKEN environment variable not set!")
24
 
25
  api = KaggleApi()
26
-
27
- # Tự authenticate từ token
28
- # Token format: KGAT_xxxxx
29
- # Kaggle cần: username + key
30
  api.api_token = token
31
 
32
  return api
 
4
  import pandas as pd
5
  import shutil
6
 
 
 
 
7
  DATASET_SLUG = "anujsaha0123456789/e-commerce-product-intelligence-dataset"
8
  TEMP_DIR = Path("data/temp_kaggle")
9
  OUTPUT_PARQUET = Path("data/ecommerce_products.parquet")
 
20
  raise ValueError("KAGGLE_API_TOKEN environment variable not set!")
21
 
22
  api = KaggleApi()
 
 
 
 
23
  api.api_token = token
24
 
25
  return api