Spaces:

hirumunasinghe
/

strategy-sync-ai

Sleeping

Lahiru Munasinghe

Initial Space snapshot without binaries

a91323c about 2 months ago

4.9 kB

	from __future__ import annotations

	from dataclasses import dataclass
	from typing import Any, Dict, List, Tuple
	import os

	from sentence_transformers import SentenceTransformer

	from .models import StrategicObjective, ActionTask
	from .text_utils import strategy_to_text, action_to_text
	from .vector_store import ActionVectorStore


	@dataclass
	class Thresholds:
	strong: float = 0.75
	medium: float = 0.55


	class AlignmentEngine:
	"""Compute alignment between strategies and actions using embeddings + ChromaDB."""

	def __init__(
	self,
	model_name: str \| None = None,
	persist_directory: str = "chroma_db",
	thresholds: Thresholds \| None = None,
	) -> None:
	self.model_name = (
	model_name
	or os.environ.get("EMBEDDING_MODEL")
	or "sentence-transformers/all-MiniLM-L6-v2"
	)
	self.embedder = SentenceTransformer(self.model_name)
	self.store = ActionVectorStore(persist_directory=persist_directory)
	self.thresholds = thresholds or Thresholds()

	def _embed_texts(self, texts: List[str]) -> List[List[float]]:
	# Ensure plain Python floats (not numpy scalar types) for ChromaDB
	arr = self.embedder.encode(texts, normalize_embeddings=True)
	return [[float(x) for x in vec] for vec in arr]

	def index_actions(
	self, actions: List[ActionTask]
	) -> Tuple[List[str], List[str], List[List[float]]]:
	action_ids = [a.id for a in actions]
	action_docs = [action_to_text(a) for a in actions]
	action_embs = self._embed_texts(action_docs)
	metadatas = [
	{
	"title": a.title,
	"owner": a.owner,
	"start_date": str(a.start_date) if a.start_date else None,
	"end_date": str(a.end_date) if a.end_date else None,
	}
	for a in actions
	]
	self.store.upsert_actions(
	ids=action_ids,
	documents=action_docs,
	embeddings=action_embs,
	metadatas=metadatas,
	)
	return action_ids, action_docs, action_embs

	def _label_for_score(self, score: float) -> str:
	if score >= self.thresholds.strong:
	return "Strong"
	if score >= self.thresholds.medium:
	return "Medium"
	return "Weak"

	def align(
	self,
	strategies: List[StrategicObjective],
	actions: List[ActionTask],
	top_k: int = 5,
	) -> Dict[str, Any]:
	# Ensure index
	self.index_actions(actions)

	strategy_results: List[Dict[str, Any]] = []
	avg_scores: List[float] = []
	strong_counts: List[int] = []

	for s in strategies:
	s_text = strategy_to_text(s)
	s_emb = self._embed_texts([s_text])[0]
	matches = self.store.query_by_embedding(s_emb, top_k=top_k)

	# Prepare match details with labels
	match_details: List[Dict[str, Any]] = []
	for m in matches:
	label = self._label_for_score(m["similarity"])
	meta = m.get("metadata", {}) or {}
	match_details.append(
	{
	"action_id": m["id"],
	"title": meta.get("title"),
	"owner": meta.get("owner"),
	"start_date": meta.get("start_date"),
	"end_date": meta.get("end_date"),
	"similarity": m["similarity"],
	"alignment_label": label,
	}
	)

	# Strategy-wise average: top 3 similarities
	top3 = sorted([m["similarity"] for m in matches], reverse=True)[:3]
	avg = sum(top3) / max(1, len(top3))
	avg_scores.append(avg)

	strong_count = sum(
	1 for m in match_details if m["alignment_label"] == "Strong"
	)
	strong_counts.append(strong_count)

	strategy_results.append(
	{
	"strategy_id": s.id,
	"strategy_title": s.title,
	"avg_top3_similarity": avg,
	"alignment_label": self._label_for_score(avg),
	"top_matches": match_details,
	}
	)

	overall = (sum(avg_scores) / max(1, len(avg_scores))) * 100.0
	coverage = (
	sum(1 for c in strong_counts if c >= 2) / max(1, len(strategies))
	) * 100.0

	return {
	"model": self.model_name,
	"thresholds": {
	"strong": self.thresholds.strong,
	"medium": self.thresholds.medium,
	},
	"overall_score": round(overall, 2),
	"coverage_percent": round(coverage, 2),
	"strategy_results": strategy_results,
	}