| """Parse and normalize SerpApi shopping results.""" |
| import pandas as pd |
| import re |
|
|
|
|
| def parse_results(raw_response): |
| """ |
| Extract and normalize shopping results from SerpApi response. |
| |
| Args: |
| raw_response: dict from SerpApi |
| |
| Returns: |
| pd.DataFrame: Normalized product data |
| """ |
| products = raw_response.get("shopping_results", []) |
| |
| if not products: |
| return pd.DataFrame() |
| |
| |
| parsed = [] |
| for product in products: |
| parsed.append({ |
| "title": product.get("title", ""), |
| "price": _parse_price(product.get("price", "")), |
| "source": product.get("source", ""), |
| "link": product.get("product_link", ""), |
| "thumbnail": product.get("thumbnail", ""), |
| "rating": product.get("rating", None), |
| "reviews": product.get("reviews", 0), |
| "product_id": product.get("product_id", ""), |
| "immersive_token": product.get("immersive_product_page_token", ""), |
| }) |
| |
| return pd.DataFrame(parsed) |
|
|
|
|
| def _parse_price(price_str): |
| """ |
| Convert price string to float. |
| |
| Args: |
| price_str: Price as string (e.g. "$249.99") |
| |
| Returns: |
| float or None |
| """ |
| if not price_str: |
| return None |
| |
| |
| clean = re.sub(r'[^\d.]', '', str(price_str)) |
| |
| try: |
| return float(clean) |
| except ValueError: |
| return None |
|
|
|
|
| def attach_details(df, details_list): |
| """ |
| Merge product detail data into the DataFrame. |
| |
| Args: |
| df: DataFrame from parse_results |
| details_list: List of tuples (immersive_token, detail_dict) from get_product_details |
| |
| Returns: |
| pd.DataFrame: DataFrame with description and specs columns added |
| """ |
| if df.empty: |
| return df |
| |
| |
| df["description"] = "" |
| df["specs"] = [{}] * len(df) |
| |
| |
| for token, detail in details_list: |
| if not detail: |
| continue |
| |
| |
| mask = df["immersive_token"] == token |
| if mask.any(): |
| df.loc[mask, "description"] = detail.get("description", "") |
| df.loc[mask, "specs"] = [detail.get("specs", {})] * mask.sum() |
| |
| return df |
|
|
|
|
| def apply_filters(df, min_price=None, max_price=None, sort_by="relevance"): |
| """ |
| Filter and sort the DataFrame. |
| |
| Args: |
| df: DataFrame to filter |
| min_price: Minimum price filter |
| max_price: Maximum price filter |
| sort_by: Sort column ("price", "rating", "relevance") |
| |
| Returns: |
| pd.DataFrame: Filtered and sorted DataFrame |
| """ |
| if df.empty: |
| return df |
| |
| filtered = df.copy() |
| |
| |
| if min_price is not None: |
| filtered = filtered[ |
| (filtered["price"].notna()) & (filtered["price"] >= min_price) |
| ] |
| if max_price is not None: |
| filtered = filtered[ |
| (filtered["price"].notna()) & (filtered["price"] <= max_price) |
| ] |
| |
| |
| if sort_by == "price_low": |
| filtered = filtered.sort_values("price", ascending=True) |
| elif sort_by == "price_high": |
| filtered = filtered.sort_values("price", ascending=False) |
| elif sort_by == "rating": |
| filtered = filtered.sort_values("rating", ascending=False, na_position="last") |
| |
| return filtered.reset_index(drop=True) |
|
|