File size: 6,393 Bytes
96abbd8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 | # -*- coding: utf-8 -*-
"""
Lightweight helpers for categorising optimisation problems and surfacing
category-level memory.
"""
from __future__ import annotations
import json
import os
import re
from collections import defaultdict
from pathlib import Path
from typing import Dict, Iterable, List, Set, Tuple
_PKG_DIR = Path(__file__).resolve().parent
_PROJECT_ROOT = _PKG_DIR.parent.parent
DEFAULT_GUIDELINE_PATH = str(_PROJECT_ROOT / "memory_storage" / "category_guidelines.jsonl")
class MemoryIntelligence:
"""
Heuristic problem classifier + guideline loader.
The goal is to offer fast, rule-based categorisation that can run
offline. If the heuristics fail, downstream agents (LLMs) can still
append tags, but we always return the heuristic view for consistency.
"""
CATEGORY_KEYWORDS: Dict[str, Set[str]] = {
"workforce_planning": {
"worker",
"workforce",
"training",
"trainee",
"overtime",
"hire",
"fire",
},
"inventory_planning": {
"inventory",
"backlog",
"stock",
"warehouse",
"storage",
"holding cost",
},
"production_planning": {
"production",
"factory",
"capacity",
"machine",
"batch",
"demand",
},
"scheduling": {
"schedule",
"sequencing",
"precedence",
"flow shop",
"job shop",
"makespan",
},
"transportation": {
"transport",
"shipping",
"vehicle",
"route",
"delivery",
"supply",
"demand",
"shipment",
},
"network_flow": {
"flow",
"arc",
"network",
"node",
"capacity",
"supply node",
"demand node",
},
"assignment": {
"assignment",
"allocate",
"task",
"agent",
"matching",
"job",
},
"facility_location": {
"facility",
"location",
"plant",
"open",
"siting",
"distribution center",
},
"traveling_salesman": {
"tsp",
"tour",
"city",
"travel",
"route visiting",
"cyclic",
},
"portfolio_optimization": {
"portfolio",
"investment",
"asset",
"return",
"risk",
"variance",
},
}
def __init__(self, guideline_path: str = DEFAULT_GUIDELINE_PATH):
self.guideline_path = guideline_path
self.guidelines = self._load_guidelines(guideline_path)
@staticmethod
def _load_guidelines(path: str) -> Dict[str, Dict]:
guidelines: Dict[str, Dict] = {}
if not path or not os.path.exists(path):
return guidelines
with open(path, "r", encoding="utf-8") as fh:
for line in fh:
line = line.strip()
if not line:
continue
try:
payload = json.loads(line)
except json.JSONDecodeError:
continue
category = payload.get("category")
if not category:
continue
guidelines[category] = payload
return guidelines
def classify(self, description: str, top_k: int = 3, minimum_score: int = 1) -> List[Tuple[str, int]]:
"""
Return a ranked list of (category, score) using keyword heuristics.
"""
if not description:
return []
text = description.lower()
scores: Dict[str, int] = defaultdict(int)
for category, keywords in self.CATEGORY_KEYWORDS.items():
for keyword in keywords:
occurrences = len(re.findall(r"\b" + re.escape(keyword.lower()) + r"\b", text))
if occurrences:
scores[category] += occurrences
ranked = sorted(scores.items(), key=lambda item: item[1], reverse=True)
filtered = [(cat, score) for cat, score in ranked if score >= minimum_score]
if top_k:
return filtered[:top_k]
return filtered
def categories_only(self, description: str, top_k: int = 3, minimum_score: int = 1) -> List[str]:
return [cat for cat, _ in self.classify(description, top_k=top_k, minimum_score=minimum_score)]
def guideline_text(
self,
categories: Iterable[str],
include_header: bool = True,
max_items_per_category: int = 4,
) -> str:
"""
Render guidelines for the provided categories as a markdown string.
"""
categories = list(dict.fromkeys(categories)) # deduplicate while preserving order
if not categories:
return ""
lines: List[str] = []
if include_header:
lines.append("# Category Playbook")
lines.append("")
for category in categories:
entry = self.guidelines.get(category)
if not entry:
continue
title = entry.get("title") or category.replace("_", " ").title()
lines.append(f"## {title}")
guidelines = entry.get("guidelines") or []
if not guidelines:
continue
for bullet in guidelines[:max_items_per_category]:
lines.append(f"- {bullet}")
lines.append("")
return "\n".join(lines).strip()
def guideline_bullets(self, categories: Iterable[str], max_items_per_category: int = 4) -> List[str]:
bullets: List[str] = []
for category in categories:
entry = self.guidelines.get(category)
if not entry:
continue
title = entry.get("title") or category.replace("_", " ").title()
guidelines = entry.get("guidelines") or []
for item in guidelines[:max_items_per_category]:
bullets.append(f"{title}: {item}")
return bullets
__all__ = ["MemoryIntelligence", "DEFAULT_GUIDELINE_PATH"]
|