| """NLP Parser - Extract structured search parameters from natural language.""" |
| import json |
| from huggingface_hub import InferenceClient |
| from config import HF_TOKEN, LLM_MODEL |
|
|
|
|
| def parse_user_request(text): |
| """ |
| Parse natural language shopping request into structured parameters. |
| |
| Args: |
| text: User's plain-English request |
| |
| Returns: |
| dict: { |
| "searches": [{"query": str, "max_price": float, ...}, ...], |
| "requirements": [str, str, ...] |
| } |
| """ |
| if not text.strip(): |
| return {"searches": [], "requirements": []} |
| |
| system_prompt = """You are an expert shopping assistant parser. Given the user's natural language request, return JSON with two keys: |
| |
| "searches": a list of objects, one per distinct product the user wants. Each object has: |
| - query: search keywords (str, required) |
| - category: one of [Electronics, Clothing & Apparel, Home & Garden, Health & Beauty, Sports & Outdoors, Toys & Games, Books & Media, Office & School, Food & Grocery, Auto & Parts] or null |
| - min_price: number or null |
| - max_price: number or null |
| - sort_by: "relevance"|"price_low"|"price_high"|"rating"|null |
| - brand: str or null |
| - store: str or null |
| |
| "requirements": a list of strings — specific criteria the user mentioned that go BEYOND standard filters. These are things you would need to read a product description or spec sheet to verify. Examples: |
| - "espresso only — not drip or pour-over" |
| - "manufactured in USA or Italy" |
| - "burr grinder, not blade" |
| - "BPA-free materials" |
| - "compatible with K-cups" |
| - "must have HDMI 2.1 port" |
| - "vibration pump" |
| - "water reservoir at least 1 liter" |
| |
| Do NOT include price or brand here (those are already in the search object). Only include requirements that need spec-sheet verification. |
| |
| Return ONLY valid JSON, no commentary.""" |
|
|
| user_message = f"User request: {text}" |
| |
| try: |
| client = InferenceClient(token=HF_TOKEN) |
| |
| |
| response = client.chat_completion( |
| model=LLM_MODEL, |
| messages=[ |
| {"role": "system", "content": system_prompt}, |
| {"role": "user", "content": user_message} |
| ], |
| max_tokens=1000, |
| temperature=0.3, |
| ) |
| |
| |
| response_text = response.choices[0].message.content.strip() |
| |
| |
| parsed = _extract_json(response_text) |
| |
| |
| validated = _validate_response(parsed) |
| |
| print(f"NLP Parser extracted: {json.dumps(validated, indent=2)}") |
| return validated |
| |
| except Exception as e: |
| print(f"Error in NLP parsing: {e}") |
| import traceback |
| traceback.print_exc() |
| |
| |
| return { |
| "searches": [{"query": text, "category": None, "min_price": None, |
| "max_price": None, "sort_by": None, "brand": None, "store": None}], |
| "requirements": [] |
| } |
|
|
|
|
| def _extract_json(text): |
| """Extract JSON from LLM response that might have extra text.""" |
| |
| start = text.find('{') |
| end = text.rfind('}') + 1 |
| |
| if start != -1 and end > start: |
| json_str = text[start:end] |
| try: |
| return json.loads(json_str) |
| except json.JSONDecodeError: |
| pass |
| |
| |
| try: |
| return json.loads(text) |
| except json.JSONDecodeError: |
| return {} |
|
|
|
|
| def _validate_response(data): |
| """Validate and fill in missing fields.""" |
| if not isinstance(data, dict): |
| return {"searches": [], "requirements": []} |
| |
| |
| searches = data.get("searches", []) |
| if not isinstance(searches, list): |
| searches = [] |
| |
| |
| validated_searches = [] |
| for search in searches: |
| if not isinstance(search, dict): |
| continue |
| |
| validated_searches.append({ |
| "query": search.get("query", ""), |
| "category": search.get("category"), |
| "min_price": search.get("min_price"), |
| "max_price": search.get("max_price"), |
| "sort_by": search.get("sort_by"), |
| "brand": search.get("brand"), |
| "store": search.get("store"), |
| }) |
| |
| |
| requirements = data.get("requirements", []) |
| if not isinstance(requirements, list): |
| requirements = [] |
| |
| requirements = [str(r) for r in requirements if r] |
| |
| return { |
| "searches": validated_searches, |
| "requirements": requirements |
| } |
|
|