Spaces:

Hodfa71
/

RetailMind

Sleeping

File size: 3,082 Bytes

"""
LLM inference engine for RetailMind.

Uses the HuggingFace Inference API (serverless, GPU-backed) so responses
arrive in ~1–2 s instead of 15–20 s on CPU.  Falls back to a structured
template if the API is unavailable.
"""

from __future__ import annotations

import logging
import os
from typing import Any

from huggingface_hub import InferenceClient

logger = logging.getLogger(__name__)

_client: InferenceClient | None = None
MODEL = "Qwen/Qwen2.5-72B-Instruct"   # strong model, free on HF serverless


def _get_client() -> InferenceClient:
    global _client
    if _client is None:
        token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
        _client = InferenceClient(token=token)
        logger.info("InferenceClient ready (model=%s)", MODEL)
    return _client


def _build_context(retrieved_items: list[dict[str, Any]]) -> str:
    lines = []
    for i, r in enumerate(retrieved_items, 1):
        p = r["product"]
        stars = "★" * int(p.get("rating", 4)) + "☆" * (5 - int(p.get("rating", 4)))
        lines.append(
            f"{i}. {p['title']} — ${p['price']:.2f}\n"
            f"   Category: {p['category']} | Rating: {stars} ({p.get('reviews', 0)} reviews)\n"
            f"   Materials: {p.get('materials', 'N/A')}\n"
            f"   Description: {p['desc']}"
        )
    return "\n\n".join(lines)


def _fallback_response(retrieved_items: list[dict[str, Any]]) -> str:
    """Structured template used when the API is unavailable."""
    if not retrieved_items:
        return "I couldn't find matching products for your query. Try different keywords."
    lines = ["Here are my top picks for you:\n"]
    for r in retrieved_items:
        p = r["product"]
        lines.append(f"• **{p['title']}** — ${p['price']:.2f}\n  {p['desc'][:120]}…")
    return "\n".join(lines)


def generate_response(
    system_prompt: str,
    user_query: str,
    retrieved_items: list[dict[str, Any]],
) -> str:
    context = _build_context(retrieved_items)
    messages = [
        {
            "role": "system",
            "content": (
                f"{system_prompt}\n\n"
                f"══════ Available Inventory ══════\n\n"
                f"{context}\n\n"
                f"════════════════════════════════\n"
                f"You are a helpful AI shopping assistant. "
                f"Only recommend products listed above. "
                f"Cite exact names and prices. Be concise (2–4 sentences)."
            ),
        },
        {"role": "user", "content": user_query},
    ]

    try:
        client = _get_client()
        result = client.chat.completions.create(
            model=MODEL,
            messages=messages,
            max_tokens=150,
            temperature=0.3,
        )
        return result.choices[0].message.content.strip()
    except Exception as e:
        logger.warning("Inference API failed (%s), using fallback template.", e)
        return _fallback_response(retrieved_items)