| """ |
| Shopping Assistant β Main Gradio Application |
| ============================================= |
| |
| Architecture overview (for ANLY 656 students): |
| |
| This file orchestrates a **two-pass LLM pipeline** backed by real |
| product data from Google Shopping. |
| |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| β User types a plain-English shopping request β |
| β β β |
| β PASS 1 β NLP Parser (nlp_parser.py) β |
| β β’ Sends the request to an LLM (Qwen 2.5, via HF API). β |
| β β’ Returns structured JSON: search queries + requirements. β |
| β β β |
| β SerpApi β Google Shopping (search_engine.py) β |
| β β’ Calls the Google Shopping API through SerpApi. β |
| β β’ Returns product titles, prices, thumbnails, links. β |
| β β β |
| β SerpApi β Immersive Product Detail (search_engine.py) β |
| β β’ For the top-N candidates, fetches full descriptions β |
| β and specification sheets via a second SerpApi call. β |
| β β β |
| β PASS 2 β Product Evaluator (product_evaluator.py) β |
| β β’ Sends each product's description + specs to the LLM. β |
| β β’ LLM checks every user requirement β met / not_met / β |
| β unknown. β |
| β β β |
| β Gradio UI (ui_components.py + this file) β |
| β β’ Renders product cards, comparison table, and a plain- β |
| β language summary of how the system interpreted the β |
| β request. β |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| |
| Key APIs used: |
| β’ Hugging Face Inference API β free-tier chat completions |
| β’ SerpApi β Google Shopping + Immersive Product |
| |
| Dependencies: |
| gradio, huggingface-hub, google-search-results (serpapi), |
| python-dotenv, pandas |
| """ |
|
|
| |
| import logging |
| import time |
| from typing import Any |
|
|
| |
| import gradio as gr |
| import pandas as pd |
|
|
| |
| |
| from config import MAX_DETAIL_LOOKUPS |
| from nlp_parser import parse_user_request |
| from product_evaluator import evaluate_batch |
| from product_parser import ( |
| apply_filters, |
| attach_details, |
| parse_results, |
| ) |
| from search_engine import ( |
| get_product_details, |
| search_shopping, |
| ) |
| from ui_components import ( |
| build_comparison_table, |
| build_product_cards, |
| ) |
|
|
| |
| |
| |
| |
| logging.basicConfig( |
| level=logging.INFO, |
| format="%(asctime)s %(levelname)-8s %(message)s", |
| datefmt="%H:%M:%S", |
| ) |
| logger = logging.getLogger(__name__) |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| def _build_parsed_summary( |
| searches: list[dict[str, Any]], |
| requirements: list[str], |
| ) -> str: |
| """ |
| Convert the NLP-parsed output into a human-readable Markdown summary. |
| |
| This is displayed in the "What I Understood" panel so the user can |
| verify that the LLM correctly interpreted their intent. |
| |
| Parameters |
| ---------- |
| searches : list[dict] |
| Each dict represents one product search the user requested. |
| Keys: query, category, brand, min_price, max_price, store. |
| requirements : list[str] |
| Specific product features / criteria the user mentioned |
| (e.g. "vibration pump", "made in Italy"). |
| |
| Returns |
| ------- |
| str |
| Markdown-formatted summary text. |
| """ |
| parts: list[str] = ["## π What I Understood:\n"] |
|
|
| |
| if searches: |
| parts.append("### Products to Search:") |
| for idx, search in enumerate(searches, start=1): |
| query = search.get("query", "") |
| parts.append(f"**{idx}. {query}**") |
|
|
| |
| params: list[str] = [] |
| if search.get("category"): |
| params.append(f" β’ Category: {search['category']}") |
| if search.get("brand"): |
| params.append(f" β’ Brand: {search['brand']}") |
|
|
| |
| low = search.get("min_price") |
| high = search.get("max_price") |
| if low or high: |
| low_str = f"${low}" if low else "$0" |
| high_str = f"${high}" if high else "β" |
| params.append(f" β’ Price Range: {low_str} β {high_str}") |
|
|
| if search.get("store"): |
| params.append(f" β’ Store: {search['store']}") |
|
|
| parts.extend(params) |
| else: |
| parts.append("β οΈ Could not parse any search queries") |
|
|
| |
| if requirements: |
| parts.append("\n### Specific Requirements (I'll verify these):") |
| for idx, req in enumerate(requirements, start=1): |
| parts.append(f"**{idx}.** {req}") |
| else: |
| parts.append("\n### βΉοΈ No specific requirements") |
| parts.append( |
| "_You didn't mention specific features to verify " |
| "(like 'made in Italy', 'vibration pump', etc.)_" |
| ) |
|
|
| return "\n".join(parts) |
|
|
|
|
| def _fetch_details_for_candidates( |
| df: pd.DataFrame, |
| ) -> list[tuple[str, dict]]: |
| """ |
| Fetch full product descriptions and specs for the top-N candidates. |
| |
| Why a separate function? |
| β’ It isolates the SerpApi "Immersive Product" calls, which are the |
| most expensive part of the pipeline (each one costs 1 SerpApi |
| credit). |
| β’ We cap the number of lookups with ``MAX_DETAIL_LOOKUPS`` so a |
| single user query never exhausts the API quota. |
| |
| Parameters |
| ---------- |
| df : pd.DataFrame |
| The search-results DataFrame (must contain an |
| ``immersive_token`` column from the initial Shopping search). |
| |
| Returns |
| ------- |
| list[tuple[str, dict]] |
| Each tuple is (immersive_token, detail_dict). |
| """ |
| top_products = df.head(MAX_DETAIL_LOOKUPS) |
| logger.info( |
| "Fetching product details for top %d candidates β¦", |
| len(top_products), |
| ) |
|
|
| details: list[tuple[str, dict]] = [] |
| for _, row in top_products.iterrows(): |
| token: str = row.get("immersive_token", "") |
| if token: |
| |
| |
| |
| |
| |
| detail = get_product_details(token) |
| details.append((token, detail)) |
|
|
| return details |
|
|
|
|
| def _evaluate_products( |
| df: pd.DataFrame, |
| requirements: list[str], |
| ) -> list[dict[str, Any]]: |
| """ |
| Run LLM Pass 2 β check each candidate against the user's |
| requirements. |
| |
| This is the "AI evaluation" step that makes the assistant smarter |
| than a keyword search. The LLM reads the product description and |
| spec sheet, then decides for **every** requirement whether it is |
| met, not met, or unknown (data missing). |
| |
| Parameters |
| ---------- |
| df : pd.DataFrame |
| Products with descriptions & specs already attached. |
| requirements : list[str] |
| The specific criteria extracted by LLM Pass 1. |
| |
| Returns |
| ------- |
| list[dict] |
| One evaluation dict per row in *df*. Keys: |
| ``verdict`` ("match" | "possible" | "no_match"), |
| ``notes``, ``requirements_met``. |
| """ |
| if not requirements: |
| |
| return [ |
| {"verdict": "match", "notes": "", "requirements_met": []} |
| ] * len(df) |
|
|
| |
| evaluated_count = min(MAX_DETAIL_LOOKUPS, len(df)) |
| products_to_eval = df.head(evaluated_count).to_dict("records") |
|
|
| logger.info( |
| "Evaluating %d products against %d requirements β¦", |
| len(products_to_eval), |
| len(requirements), |
| ) |
|
|
| evaluations_top = evaluate_batch(products_to_eval, requirements) |
|
|
| |
| |
| remaining = len(df) - len(evaluations_top) |
| evaluations_rest = [ |
| {"verdict": "possible", "notes": "", "requirements_met": []} |
| ] * remaining |
|
|
| return evaluations_top + evaluations_rest |
|
|
|
|
| def _build_status_message( |
| total_products: int, |
| evaluations: list[dict[str, Any]], |
| requirements: list[str], |
| searches: list[dict[str, Any]], |
| elapsed_seconds: float, |
| ) -> str: |
| """ |
| Compose the one-line status bar shown beneath the product grid. |
| |
| Parameters |
| ---------- |
| total_products : int |
| Number of products returned by SerpApi. |
| evaluations : list[dict] |
| The evaluation results (same length as the DataFrame). |
| requirements : list[str] |
| User requirements (may be empty). |
| searches : list[dict] |
| All parsed search objects (to note if multi-search was used). |
| elapsed_seconds : float |
| Wall-clock time for the entire pipeline. |
| |
| Returns |
| ------- |
| str |
| A concise, human-readable status string. |
| """ |
| parts: list[str] = [ |
| f"β
Evaluated {total_products} products in {elapsed_seconds:.1f}s" |
| ] |
|
|
| if requirements: |
| matches = sum(1 for e in evaluations if e["verdict"] == "match") |
| possibles = sum(1 for e in evaluations if e["verdict"] == "possible") |
| no_matches = sum(1 for e in evaluations if e["verdict"] == "no_match") |
| parts.append( |
| f"Results: {matches} match, {possibles} maybe, {no_matches} no match" |
| ) |
| else: |
| parts.append("(No requirements to check β all products shown)") |
|
|
| if len(searches) > 1: |
| parts.append("(Showing results for first search only)") |
|
|
| return "\n".join(parts) |
|
|
|
|
| |
| |
| |
|
|
|
|
| def search_products( |
| user_text: str, |
| ) -> tuple[str, str, str, pd.DataFrame]: |
| """ |
| End-to-end pipeline: Parse β Search β Detail β Evaluate β Render. |
| |
| This is the single callback wired to Gradio's "Shop" button. |
| Gradio calls it with the contents of the text box and expects |
| four return values (one per UI output component). |
| |
| Parameters |
| ---------- |
| user_text : str |
| The shopper's free-form request typed into the Gradio text box. |
| |
| Returns |
| ------- |
| tuple |
| parsed_summary : str β Markdown for "What I Understood" |
| cards_html : str β HTML product cards |
| status_msg : str β One-line status bar |
| comparison_df : pd.DataFrame β Comparison table |
| """ |
| start_time = time.time() |
|
|
| |
| if not user_text.strip(): |
| return ( |
| "No request entered.", |
| "", |
| "Please enter what you're looking for.", |
| pd.DataFrame(), |
| ) |
|
|
| |
| |
| |
| |
| |
| |
| parsed: dict[str, Any] = parse_user_request(user_text) |
|
|
| searches: list[dict[str, Any]] = parsed.get("searches", []) |
| requirements: list[str] = parsed.get("requirements", []) |
|
|
| |
| parsed_summary = _build_parsed_summary(searches, requirements) |
|
|
| if not searches: |
| return ( |
| parsed_summary, |
| "", |
| "No valid searches found. Try rephrasing.", |
| pd.DataFrame(), |
| ) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| first_search = searches[0] |
| query: str = first_search.get("query", "") |
|
|
| if not query: |
| return parsed_summary, "", "Empty search query.", pd.DataFrame() |
|
|
| raw_results = search_shopping(query) |
|
|
| |
| |
| |
| |
| |
| |
| df: pd.DataFrame = parse_results(raw_results) |
|
|
| if df.empty: |
| return ( |
| parsed_summary, |
| "", |
| f"No products found for '{query}'. Try a different search.", |
| pd.DataFrame(), |
| ) |
|
|
| df = apply_filters( |
| df, |
| min_price=first_search.get("min_price"), |
| max_price=first_search.get("max_price"), |
| sort_by=first_search.get("sort_by", "relevance"), |
| ) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| if requirements: |
| details_list = _fetch_details_for_candidates(df) |
| df = attach_details(df, details_list) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| evaluations = _evaluate_products(df, requirements) |
|
|
| |
| |
| |
| |
| |
| cards_html: str = build_product_cards(df, evaluations) |
| comparison_df: pd.DataFrame = build_comparison_table(df, evaluations) |
|
|
| elapsed = time.time() - start_time |
| status_msg = _build_status_message( |
| total_products=len(df), |
| evaluations=evaluations, |
| requirements=requirements, |
| searches=searches, |
| elapsed_seconds=elapsed, |
| ) |
|
|
| return parsed_summary, cards_html, status_msg, comparison_df |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| with gr.Blocks(title="Shopping Assistant") as demo: |
|
|
| |
| gr.Markdown("# π Shopping Assistant") |
| gr.Markdown( |
| "Tell me what you're looking for in plain English. " |
| "I'll understand your requirements and search for matching products." |
| ) |
|
|
| |
| with gr.Row(): |
| query_input = gr.Textbox( |
| label="What are you looking for?", |
| placeholder=( |
| 'Example: "I want a prosumer espresso machine from ' |
| "Italy or Spain. I also want a grinder for the beans. " |
| "The machine should use a vibration pump and have a " |
| 'water reservoir that holds at least one liter."' |
| ), |
| lines=5, |
| ) |
|
|
| search_button = gr.Button("Shop", variant="primary", size="lg") |
|
|
| |
| |
| products_html = gr.HTML(label="Product Results") |
| status_output = gr.Textbox(label="Status", interactive=False) |
| results_output = gr.Dataframe( |
| label="Comparison Table", |
| wrap=True, |
| interactive=False, |
| ) |
| parsed_output = gr.Markdown(label="What I Understood", value="") |
|
|
| |
| |
| |
| |
| |
| |
| |
| search_button.click( |
| fn=search_products, |
| inputs=[query_input], |
| outputs=[parsed_output, products_html, status_output, results_output], |
| ) |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| if __name__ == "__main__": |
| |
| |
| |
| demo.launch() |
|
|