Spaces:

hirumunasinghe
/

strategy-sync-ai

Sleeping

File size: 4,896 Bytes

a91323c

from __future__ import annotations

from dataclasses import dataclass
from typing import Any, Dict, List, Tuple
import os

from sentence_transformers import SentenceTransformer

from .models import StrategicObjective, ActionTask
from .text_utils import strategy_to_text, action_to_text
from .vector_store import ActionVectorStore


@dataclass
class Thresholds:
    strong: float = 0.75
    medium: float = 0.55


class AlignmentEngine:
    """Compute alignment between strategies and actions using embeddings + ChromaDB."""

    def __init__(
        self,
        model_name: str | None = None,
        persist_directory: str = "chroma_db",
        thresholds: Thresholds | None = None,
    ) -> None:
        self.model_name = (
            model_name
            or os.environ.get("EMBEDDING_MODEL")
            or "sentence-transformers/all-MiniLM-L6-v2"
        )
        self.embedder = SentenceTransformer(self.model_name)
        self.store = ActionVectorStore(persist_directory=persist_directory)
        self.thresholds = thresholds or Thresholds()

    def _embed_texts(self, texts: List[str]) -> List[List[float]]:
        # Ensure plain Python floats (not numpy scalar types) for ChromaDB
        arr = self.embedder.encode(texts, normalize_embeddings=True)
        return [[float(x) for x in vec] for vec in arr]

    def index_actions(
        self, actions: List[ActionTask]
    ) -> Tuple[List[str], List[str], List[List[float]]]:
        action_ids = [a.id for a in actions]
        action_docs = [action_to_text(a) for a in actions]
        action_embs = self._embed_texts(action_docs)
        metadatas = [
            {
                "title": a.title,
                "owner": a.owner,
                "start_date": str(a.start_date) if a.start_date else None,
                "end_date": str(a.end_date) if a.end_date else None,
            }
            for a in actions
        ]
        self.store.upsert_actions(
            ids=action_ids,
            documents=action_docs,
            embeddings=action_embs,
            metadatas=metadatas,
        )
        return action_ids, action_docs, action_embs

    def _label_for_score(self, score: float) -> str:
        if score >= self.thresholds.strong:
            return "Strong"
        if score >= self.thresholds.medium:
            return "Medium"
        return "Weak"

    def align(
        self,
        strategies: List[StrategicObjective],
        actions: List[ActionTask],
        top_k: int = 5,
    ) -> Dict[str, Any]:
        # Ensure index
        self.index_actions(actions)

        strategy_results: List[Dict[str, Any]] = []
        avg_scores: List[float] = []
        strong_counts: List[int] = []

        for s in strategies:
            s_text = strategy_to_text(s)
            s_emb = self._embed_texts([s_text])[0]
            matches = self.store.query_by_embedding(s_emb, top_k=top_k)

            # Prepare match details with labels
            match_details: List[Dict[str, Any]] = []
            for m in matches:
                label = self._label_for_score(m["similarity"])
                meta = m.get("metadata", {}) or {}
                match_details.append(
                    {
                        "action_id": m["id"],
                        "title": meta.get("title"),
                        "owner": meta.get("owner"),
                        "start_date": meta.get("start_date"),
                        "end_date": meta.get("end_date"),
                        "similarity": m["similarity"],
                        "alignment_label": label,
                    }
                )

            # Strategy-wise average: top 3 similarities
            top3 = sorted([m["similarity"] for m in matches], reverse=True)[:3]
            avg = sum(top3) / max(1, len(top3))
            avg_scores.append(avg)

            strong_count = sum(
                1 for m in match_details if m["alignment_label"] == "Strong"
            )
            strong_counts.append(strong_count)

            strategy_results.append(
                {
                    "strategy_id": s.id,
                    "strategy_title": s.title,
                    "avg_top3_similarity": avg,
                    "alignment_label": self._label_for_score(avg),
                    "top_matches": match_details,
                }
            )

        overall = (sum(avg_scores) / max(1, len(avg_scores))) * 100.0
        coverage = (
            sum(1 for c in strong_counts if c >= 2) / max(1, len(strategies))
        ) * 100.0

        return {
            "model": self.model_name,
            "thresholds": {
                "strong": self.thresholds.strong,
                "medium": self.thresholds.medium,
            },
            "overall_score": round(overall, 2),
            "coverage_percent": round(coverage, 2),
            "strategy_results": strategy_results,
        }