shopper / product_parser.py
anly656's picture
Upload 3 files
9ff9499 verified
"""Parse and normalize SerpApi shopping results."""
import pandas as pd
import re
def parse_results(raw_response):
"""
Extract and normalize shopping results from SerpApi response.
Args:
raw_response: dict from SerpApi
Returns:
pd.DataFrame: Normalized product data
"""
products = raw_response.get("shopping_results", [])
if not products:
return pd.DataFrame()
# Extract key fields
parsed = []
for product in products:
parsed.append({
"title": product.get("title", ""),
"price": _parse_price(product.get("price", "")),
"source": product.get("source", ""),
"link": product.get("product_link", ""), # Use product_link for clickable URLs
"thumbnail": product.get("thumbnail", ""),
"rating": product.get("rating", None),
"reviews": product.get("reviews", 0),
"product_id": product.get("product_id", ""),
"immersive_token": product.get("immersive_product_page_token", ""), # For detail lookups
})
return pd.DataFrame(parsed)
def _parse_price(price_str):
"""
Convert price string to float.
Args:
price_str: Price as string (e.g. "$249.99")
Returns:
float or None
"""
if not price_str:
return None
# Remove currency symbols and commas
clean = re.sub(r'[^\d.]', '', str(price_str))
try:
return float(clean)
except ValueError:
return None
def attach_details(df, details_list):
"""
Merge product detail data into the DataFrame.
Args:
df: DataFrame from parse_results
details_list: List of tuples (immersive_token, detail_dict) from get_product_details
Returns:
pd.DataFrame: DataFrame with description and specs columns added
"""
if df.empty:
return df
# Create columns for details
df["description"] = ""
df["specs"] = [{}] * len(df)
# Map details by immersive_token
for token, detail in details_list:
if not detail:
continue
# Find the row with this token
mask = df["immersive_token"] == token
if mask.any():
df.loc[mask, "description"] = detail.get("description", "")
df.loc[mask, "specs"] = [detail.get("specs", {})] * mask.sum()
return df
def apply_filters(df, min_price=None, max_price=None, sort_by="relevance"):
"""
Filter and sort the DataFrame.
Args:
df: DataFrame to filter
min_price: Minimum price filter
max_price: Maximum price filter
sort_by: Sort column ("price", "rating", "relevance")
Returns:
pd.DataFrame: Filtered and sorted DataFrame
"""
if df.empty:
return df
filtered = df.copy()
# Price filters (exclude products with missing prices)
if min_price is not None:
filtered = filtered[
(filtered["price"].notna()) & (filtered["price"] >= min_price)
]
if max_price is not None:
filtered = filtered[
(filtered["price"].notna()) & (filtered["price"] <= max_price)
]
# Sort
if sort_by == "price_low":
filtered = filtered.sort_values("price", ascending=True)
elif sort_by == "price_high":
filtered = filtered.sort_values("price", ascending=False)
elif sort_by == "rating":
filtered = filtered.sort_values("rating", ascending=False, na_position="last")
return filtered.reset_index(drop=True)