"""NLP Parser - Extract structured search parameters from natural language.""" import json from huggingface_hub import InferenceClient from config import HF_TOKEN, LLM_MODEL def parse_user_request(text): """ Parse natural language shopping request into structured parameters. Args: text: User's plain-English request Returns: dict: { "searches": [{"query": str, "max_price": float, ...}, ...], "requirements": [str, str, ...] } """ if not text.strip(): return {"searches": [], "requirements": []} system_prompt = """You are an expert shopping assistant parser. Given the user's natural language request, return JSON with two keys: "searches": a list of objects, one per distinct product the user wants. Each object has: - query: search keywords (str, required) - category: one of [Electronics, Clothing & Apparel, Home & Garden, Health & Beauty, Sports & Outdoors, Toys & Games, Books & Media, Office & School, Food & Grocery, Auto & Parts] or null - min_price: number or null - max_price: number or null - sort_by: "relevance"|"price_low"|"price_high"|"rating"|null - brand: str or null - store: str or null "requirements": a list of strings — specific criteria the user mentioned that go BEYOND standard filters. These are things you would need to read a product description or spec sheet to verify. Examples: - "espresso only — not drip or pour-over" - "manufactured in USA or Italy" - "burr grinder, not blade" - "BPA-free materials" - "compatible with K-cups" - "must have HDMI 2.1 port" - "vibration pump" - "water reservoir at least 1 liter" Do NOT include price or brand here (those are already in the search object). Only include requirements that need spec-sheet verification. Return ONLY valid JSON, no commentary.""" user_message = f"User request: {text}" try: client = InferenceClient(token=HF_TOKEN) # Call the LLM response = client.chat_completion( model=LLM_MODEL, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message} ], max_tokens=1000, temperature=0.3, ) # Extract the response text response_text = response.choices[0].message.content.strip() # Try to extract JSON from the response parsed = _extract_json(response_text) # Validate and fill in missing fields validated = _validate_response(parsed) print(f"NLP Parser extracted: {json.dumps(validated, indent=2)}") return validated except Exception as e: print(f"Error in NLP parsing: {e}") import traceback traceback.print_exc() # Fallback: treat the text as a simple search query return { "searches": [{"query": text, "category": None, "min_price": None, "max_price": None, "sort_by": None, "brand": None, "store": None}], "requirements": [] } def _extract_json(text): """Extract JSON from LLM response that might have extra text.""" # Try to find JSON block start = text.find('{') end = text.rfind('}') + 1 if start != -1 and end > start: json_str = text[start:end] try: return json.loads(json_str) except json.JSONDecodeError: pass # If that fails, try the whole text try: return json.loads(text) except json.JSONDecodeError: return {} def _validate_response(data): """Validate and fill in missing fields.""" if not isinstance(data, dict): return {"searches": [], "requirements": []} # Ensure searches is a list searches = data.get("searches", []) if not isinstance(searches, list): searches = [] # Validate each search object validated_searches = [] for search in searches: if not isinstance(search, dict): continue validated_searches.append({ "query": search.get("query", ""), "category": search.get("category"), "min_price": search.get("min_price"), "max_price": search.get("max_price"), "sort_by": search.get("sort_by"), "brand": search.get("brand"), "store": search.get("store"), }) # Ensure requirements is a list of strings requirements = data.get("requirements", []) if not isinstance(requirements, list): requirements = [] requirements = [str(r) for r in requirements if r] return { "searches": validated_searches, "requirements": requirements }