Spaces:

anly656
/

shopper

Sleeping

App Files Files Community

shopper / nlp_parser.py

anly656

Upload 5 files

45ecbbd verified about 1 month ago

raw

history blame contribute delete

4.84 kB

	"""NLP Parser - Extract structured search parameters from natural language."""
	import json
	from huggingface_hub import InferenceClient
	from config import HF_TOKEN, LLM_MODEL


	def parse_user_request(text):
	"""
	Parse natural language shopping request into structured parameters.

	Args:
	text: User's plain-English request

	Returns:
	dict: {
	"searches": [{"query": str, "max_price": float, ...}, ...],
	"requirements": [str, str, ...]
	}
	"""
	if not text.strip():
	return {"searches": [], "requirements": []}

	system_prompt = """You are an expert shopping assistant parser. Given the user's natural language request, return JSON with two keys:

	"searches": a list of objects, one per distinct product the user wants. Each object has:
	- query: search keywords (str, required)
	- category: one of [Electronics, Clothing & Apparel, Home & Garden, Health & Beauty, Sports & Outdoors, Toys & Games, Books & Media, Office & School, Food & Grocery, Auto & Parts] or null
	- min_price: number or null
	- max_price: number or null
	- sort_by: "relevance"\|"price_low"\|"price_high"\|"rating"\|null
	- brand: str or null
	- store: str or null

	"requirements": a list of strings — specific criteria the user mentioned that go BEYOND standard filters. These are things you would need to read a product description or spec sheet to verify. Examples:
	- "espresso only — not drip or pour-over"
	- "manufactured in USA or Italy"
	- "burr grinder, not blade"
	- "BPA-free materials"
	- "compatible with K-cups"
	- "must have HDMI 2.1 port"
	- "vibration pump"
	- "water reservoir at least 1 liter"

	Do NOT include price or brand here (those are already in the search object). Only include requirements that need spec-sheet verification.

	Return ONLY valid JSON, no commentary."""

	user_message = f"User request: {text}"

	try:
	client = InferenceClient(token=HF_TOKEN)

	# Call the LLM
	response = client.chat_completion(
	model=LLM_MODEL,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_message}
	],
	max_tokens=1000,
	temperature=0.3,
	)

	# Extract the response text
	response_text = response.choices[0].message.content.strip()

	# Try to extract JSON from the response
	parsed = _extract_json(response_text)

	# Validate and fill in missing fields
	validated = _validate_response(parsed)

	print(f"NLP Parser extracted: {json.dumps(validated, indent=2)}")
	return validated

	except Exception as e:
	print(f"Error in NLP parsing: {e}")
	import traceback
	traceback.print_exc()

	# Fallback: treat the text as a simple search query
	return {
	"searches": [{"query": text, "category": None, "min_price": None,
	"max_price": None, "sort_by": None, "brand": None, "store": None}],
	"requirements": []
	}


	def _extract_json(text):
	"""Extract JSON from LLM response that might have extra text."""
	# Try to find JSON block
	start = text.find('{')
	end = text.rfind('}') + 1

	if start != -1 and end > start:
	json_str = text[start:end]
	try:
	return json.loads(json_str)
	except json.JSONDecodeError:
	pass

	# If that fails, try the whole text
	try:
	return json.loads(text)
	except json.JSONDecodeError:
	return {}


	def _validate_response(data):
	"""Validate and fill in missing fields."""
	if not isinstance(data, dict):
	return {"searches": [], "requirements": []}

	# Ensure searches is a list
	searches = data.get("searches", [])
	if not isinstance(searches, list):
	searches = []

	# Validate each search object
	validated_searches = []
	for search in searches:
	if not isinstance(search, dict):
	continue

	validated_searches.append({
	"query": search.get("query", ""),
	"category": search.get("category"),
	"min_price": search.get("min_price"),
	"max_price": search.get("max_price"),
	"sort_by": search.get("sort_by"),
	"brand": search.get("brand"),
	"store": search.get("store"),
	})

	# Ensure requirements is a list of strings
	requirements = data.get("requirements", [])
	if not isinstance(requirements, list):
	requirements = []

	requirements = [str(r) for r in requirements if r]

	return {
	"searches": validated_searches,
	"requirements": requirements
	}