strategy-sync-ai / src /alignment.py
Lahiru Munasinghe
Initial Space snapshot without binaries
a91323c
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, List, Tuple
import os
from sentence_transformers import SentenceTransformer
from .models import StrategicObjective, ActionTask
from .text_utils import strategy_to_text, action_to_text
from .vector_store import ActionVectorStore
@dataclass
class Thresholds:
strong: float = 0.75
medium: float = 0.55
class AlignmentEngine:
"""Compute alignment between strategies and actions using embeddings + ChromaDB."""
def __init__(
self,
model_name: str | None = None,
persist_directory: str = "chroma_db",
thresholds: Thresholds | None = None,
) -> None:
self.model_name = (
model_name
or os.environ.get("EMBEDDING_MODEL")
or "sentence-transformers/all-MiniLM-L6-v2"
)
self.embedder = SentenceTransformer(self.model_name)
self.store = ActionVectorStore(persist_directory=persist_directory)
self.thresholds = thresholds or Thresholds()
def _embed_texts(self, texts: List[str]) -> List[List[float]]:
# Ensure plain Python floats (not numpy scalar types) for ChromaDB
arr = self.embedder.encode(texts, normalize_embeddings=True)
return [[float(x) for x in vec] for vec in arr]
def index_actions(
self, actions: List[ActionTask]
) -> Tuple[List[str], List[str], List[List[float]]]:
action_ids = [a.id for a in actions]
action_docs = [action_to_text(a) for a in actions]
action_embs = self._embed_texts(action_docs)
metadatas = [
{
"title": a.title,
"owner": a.owner,
"start_date": str(a.start_date) if a.start_date else None,
"end_date": str(a.end_date) if a.end_date else None,
}
for a in actions
]
self.store.upsert_actions(
ids=action_ids,
documents=action_docs,
embeddings=action_embs,
metadatas=metadatas,
)
return action_ids, action_docs, action_embs
def _label_for_score(self, score: float) -> str:
if score >= self.thresholds.strong:
return "Strong"
if score >= self.thresholds.medium:
return "Medium"
return "Weak"
def align(
self,
strategies: List[StrategicObjective],
actions: List[ActionTask],
top_k: int = 5,
) -> Dict[str, Any]:
# Ensure index
self.index_actions(actions)
strategy_results: List[Dict[str, Any]] = []
avg_scores: List[float] = []
strong_counts: List[int] = []
for s in strategies:
s_text = strategy_to_text(s)
s_emb = self._embed_texts([s_text])[0]
matches = self.store.query_by_embedding(s_emb, top_k=top_k)
# Prepare match details with labels
match_details: List[Dict[str, Any]] = []
for m in matches:
label = self._label_for_score(m["similarity"])
meta = m.get("metadata", {}) or {}
match_details.append(
{
"action_id": m["id"],
"title": meta.get("title"),
"owner": meta.get("owner"),
"start_date": meta.get("start_date"),
"end_date": meta.get("end_date"),
"similarity": m["similarity"],
"alignment_label": label,
}
)
# Strategy-wise average: top 3 similarities
top3 = sorted([m["similarity"] for m in matches], reverse=True)[:3]
avg = sum(top3) / max(1, len(top3))
avg_scores.append(avg)
strong_count = sum(
1 for m in match_details if m["alignment_label"] == "Strong"
)
strong_counts.append(strong_count)
strategy_results.append(
{
"strategy_id": s.id,
"strategy_title": s.title,
"avg_top3_similarity": avg,
"alignment_label": self._label_for_score(avg),
"top_matches": match_details,
}
)
overall = (sum(avg_scores) / max(1, len(avg_scores))) * 100.0
coverage = (
sum(1 for c in strong_counts if c >= 2) / max(1, len(strategies))
) * 100.0
return {
"model": self.model_name,
"thresholds": {
"strong": self.thresholds.strong,
"medium": self.thresholds.medium,
},
"overall_score": round(overall, 2),
"coverage_percent": round(coverage, 2),
"strategy_results": strategy_results,
}