Spaces:
Sleeping
Sleeping
| import requests | |
| import pandas as pd | |
| from datetime import datetime | |
| # URL API Shopee | |
| base_url = "https://shopee.co.id/api/v2/item/get_ratings" | |
| params = { | |
| "exclude_filter": 1, | |
| "filter": 0, | |
| "filter_size": 0, | |
| "flag": 1, | |
| "fold_filter": 0, | |
| "itemid": 5283031042, # Ganti dengan item ID yang ingin di-scrape | |
| "limit": 20, # Jumlah review per permintaan | |
| "offset": 0, # Awal pagination | |
| "relevant_reviews": "false", | |
| "request_source": 2, | |
| "shopid": 52733860, # Shop ID terkait | |
| "tag_filter": "", | |
| "type": 0, | |
| "variation_filters": "" | |
| } | |
| # Dataframe untuk menyimpan hasil | |
| reviews = {"username": [], "rating": [], "comment": [], "date": [], "images": []} | |
| # Loop untuk iterasi pagination | |
| while True: | |
| response = requests.get(base_url, params=params) | |
| if response.status_code != 200: | |
| print("Error: Failed to fetch data.") | |
| break | |
| data = response.json() | |
| # Periksa apakah ada data dalam "ratings" | |
| if "data" not in data or "ratings" not in data["data"]: | |
| print("No more ratings found.") | |
| break | |
| for rating in data["data"]["ratings"]: | |
| reviews["username"].append(rating.get("author_username", "Unknown")) | |
| reviews["rating"].append(rating.get("rating_star", "N/A")) | |
| reviews["comment"].append(rating.get("comment", "No comment")) | |
| reviews["date"].append( | |
| datetime.utcfromtimestamp(rating.get("ctime", 0)).strftime("%Y-%m-%d %H:%M") | |
| ) | |
| reviews["images"].append(", ".join(rating.get("images", []))) | |
| # Jika jumlah data kurang dari limit, berarti sudah di halaman terakhir | |
| if len(data["data"]["ratings"]) < params["limit"]: | |
| break | |
| # Tambah offset untuk pagination | |
| params["offset"] += params["limit"] | |
| # Simpan data ke dalam file CSV | |
| df = pd.DataFrame(reviews) | |
| print(df) | |
| df.to_csv("shopee_reviews.csv", index=False) | |