File size: 6,393 Bytes

96abbd8

# -*- coding: utf-8 -*-
"""
Lightweight helpers for categorising optimisation problems and surfacing
category-level memory.
"""

from __future__ import annotations

import json
import os
import re
from collections import defaultdict
from pathlib import Path
from typing import Dict, Iterable, List, Set, Tuple


_PKG_DIR = Path(__file__).resolve().parent
_PROJECT_ROOT = _PKG_DIR.parent.parent
DEFAULT_GUIDELINE_PATH = str(_PROJECT_ROOT / "memory_storage" / "category_guidelines.jsonl")


class MemoryIntelligence:
    """
    Heuristic problem classifier + guideline loader.

    The goal is to offer fast, rule-based categorisation that can run
    offline. If the heuristics fail, downstream agents (LLMs) can still
    append tags, but we always return the heuristic view for consistency.
    """

    CATEGORY_KEYWORDS: Dict[str, Set[str]] = {
        "workforce_planning": {
            "worker",
            "workforce",
            "training",
            "trainee",
            "overtime",
            "hire",
            "fire",
        },
        "inventory_planning": {
            "inventory",
            "backlog",
            "stock",
            "warehouse",
            "storage",
            "holding cost",
        },
        "production_planning": {
            "production",
            "factory",
            "capacity",
            "machine",
            "batch",
            "demand",
        },
        "scheduling": {
            "schedule",
            "sequencing",
            "precedence",
            "flow shop",
            "job shop",
            "makespan",
        },
        "transportation": {
            "transport",
            "shipping",
            "vehicle",
            "route",
            "delivery",
            "supply",
            "demand",
            "shipment",
        },
        "network_flow": {
            "flow",
            "arc",
            "network",
            "node",
            "capacity",
            "supply node",
            "demand node",
        },
        "assignment": {
            "assignment",
            "allocate",
            "task",
            "agent",
            "matching",
            "job",
        },
        "facility_location": {
            "facility",
            "location",
            "plant",
            "open",
            "siting",
            "distribution center",
        },
        "traveling_salesman": {
            "tsp",
            "tour",
            "city",
            "travel",
            "route visiting",
            "cyclic",
        },
        "portfolio_optimization": {
            "portfolio",
            "investment",
            "asset",
            "return",
            "risk",
            "variance",
        },
    }

    def __init__(self, guideline_path: str = DEFAULT_GUIDELINE_PATH):
        self.guideline_path = guideline_path
        self.guidelines = self._load_guidelines(guideline_path)

    @staticmethod
    def _load_guidelines(path: str) -> Dict[str, Dict]:
        guidelines: Dict[str, Dict] = {}
        if not path or not os.path.exists(path):
            return guidelines
        with open(path, "r", encoding="utf-8") as fh:
            for line in fh:
                line = line.strip()
                if not line:
                    continue
                try:
                    payload = json.loads(line)
                except json.JSONDecodeError:
                    continue
                category = payload.get("category")
                if not category:
                    continue
                guidelines[category] = payload
        return guidelines

    def classify(self, description: str, top_k: int = 3, minimum_score: int = 1) -> List[Tuple[str, int]]:
        """
        Return a ranked list of (category, score) using keyword heuristics.
        """
        if not description:
            return []
        text = description.lower()
        scores: Dict[str, int] = defaultdict(int)
        for category, keywords in self.CATEGORY_KEYWORDS.items():
            for keyword in keywords:
                occurrences = len(re.findall(r"\b" + re.escape(keyword.lower()) + r"\b", text))
                if occurrences:
                    scores[category] += occurrences
        ranked = sorted(scores.items(), key=lambda item: item[1], reverse=True)
        filtered = [(cat, score) for cat, score in ranked if score >= minimum_score]
        if top_k:
            return filtered[:top_k]
        return filtered

    def categories_only(self, description: str, top_k: int = 3, minimum_score: int = 1) -> List[str]:
        return [cat for cat, _ in self.classify(description, top_k=top_k, minimum_score=minimum_score)]

    def guideline_text(
        self,
        categories: Iterable[str],
        include_header: bool = True,
        max_items_per_category: int = 4,
    ) -> str:
        """
        Render guidelines for the provided categories as a markdown string.
        """
        categories = list(dict.fromkeys(categories))  # deduplicate while preserving order
        if not categories:
            return ""

        lines: List[str] = []
        if include_header:
            lines.append("# Category Playbook")
            lines.append("")

        for category in categories:
            entry = self.guidelines.get(category)
            if not entry:
                continue
            title = entry.get("title") or category.replace("_", " ").title()
            lines.append(f"## {title}")
            guidelines = entry.get("guidelines") or []
            if not guidelines:
                continue
            for bullet in guidelines[:max_items_per_category]:
                lines.append(f"- {bullet}")
            lines.append("")

        return "\n".join(lines).strip()

    def guideline_bullets(self, categories: Iterable[str], max_items_per_category: int = 4) -> List[str]:
        bullets: List[str] = []
        for category in categories:
            entry = self.guidelines.get(category)
            if not entry:
                continue
            title = entry.get("title") or category.replace("_", " ").title()
            guidelines = entry.get("guidelines") or []
            for item in guidelines[:max_items_per_category]:
                bullets.append(f"{title}: {item}")
        return bullets


__all__ = ["MemoryIntelligence", "DEFAULT_GUIDELINE_PATH"]