JacobLinCool's picture
feat: stream advisor progress
6d9770a verified
from __future__ import annotations
from hashlib import sha256
from typing import Any
from hackathon_advisor.data import Project, ProjectIndex, SearchHit
from hackathon_advisor.scoring import ScoreCard
from hackathon_advisor.tools import Idea
def build_wood_map(index: ProjectIndex, idea: Idea, score: ScoreCard) -> dict[str, Any]:
echoes = list(score.echoes)
background = _background_projects(index, echoes)
echo_projects = [hit.project for hit in echoes[:5]]
layout, idea_xy = _layout(index, idea, background + echo_projects)
dots = [_project_dot(project, "inked", layout) for project in background]
dots.extend(_echo_dot(hit, layout) for hit in echoes[:5])
dots.append(_idea_dot(idea, score, idea_xy))
return {
"caption": _caption(score, echoes),
"dots": _dedupe_dots(dots),
}
def _background_projects(index: ProjectIndex, echoes: list[SearchHit]) -> list[Project]:
echo_ids = {hit.project.id for hit in echoes}
projects = [project for project in index.top_projects(limit=22) if project.id not in echo_ids]
return projects[:16]
def _project_dot(project: Project, kind: str, layout: dict[str, tuple[int, int]]) -> dict[str, Any]:
x, y = layout.get(project.id) or _point(project.id)
return {
"id": project.id,
"kind": kind,
"title": project.title,
"url": project.url,
"x": x,
"y": y,
"radius": 3,
}
def _echo_dot(hit: SearchHit, layout: dict[str, tuple[int, int]]) -> dict[str, Any]:
dot = _project_dot(hit.project, "echo", layout)
dot["score"] = round(hit.score, 3)
dot["matched_terms"] = list(hit.matched_terms)
dot["page_number"] = hit.page_number
dot["radius"] = max(5, min(9, round(4 + hit.score * 14)))
return dot
def _idea_dot(idea: Idea, score: ScoreCard, idea_xy: tuple[int, int]) -> dict[str, Any]:
x, y = idea_xy
return {
"id": idea.id,
"kind": "idea",
"title": idea.title,
"x": x,
"y": y,
"radius": 8,
"verdict": score.verdict,
"overall": score.overall,
}
def _layout(
index: ProjectIndex,
idea: Idea,
projects: list[Project],
) -> tuple[dict[str, tuple[int, int]], tuple[int, int]]:
"""Place every dot by projecting the real embedding vectors into 2D with PCA, so projects
that are semantically similar land near each other and the idea lands among its closest
echoes. Falls back to a deterministic hash scatter only when the projection cannot run
(missing vectors, too few points, or no embedder)."""
ids = [project.id for project in projects]
vectors = [index.vector_for(project.id) for project in projects]
fallback = ({project_id: _point(project_id) for project_id in ids}, _point(f"idea:{idea.id}:{idea.title}"))
if len(vectors) < 3 or any(vector is None for vector in vectors):
return fallback
try:
idea_vector = index.embed_query(idea.pitch or idea.title)
coords, idea_xy = _pca_project(vectors, idea_vector)
except Exception: # noqa: BLE001 - any projection failure degrades to the hash scatter
return fallback
return {project_id: coord for project_id, coord in zip(ids, coords)}, idea_xy
def _pca_project(
vectors: list[tuple[float, ...]],
idea_vector: tuple[float, ...],
) -> tuple[list[tuple[int, int]], tuple[int, int]]:
import numpy as np
matrix = np.asarray(vectors, dtype=np.float64)
query = np.asarray(idea_vector, dtype=np.float64)
mean = matrix.mean(axis=0)
centered = matrix - mean
# Top-2 principal directions of the project cloud define the map; the idea is projected
# onto that same basis so its position reflects true embedding similarity.
_, _, components = np.linalg.svd(centered, full_matrices=False)
basis = components[:2]
projected = centered @ basis.T
idea_projected = (query - mean) @ basis.T
stacked = np.vstack([projected, idea_projected])
scaled = _scale_to_canvas(stacked)
coords = [(int(round(x)), int(round(y))) for x, y in scaled[:-1]]
idea_xy = (int(round(scaled[-1][0])), int(round(scaled[-1][1])))
return coords, idea_xy
def _scale_to_canvas(points: Any, low: float = 10.0, high: float = 90.0) -> Any:
import numpy as np
scaled = np.empty_like(points)
for axis in range(points.shape[1]):
column = points[:, axis]
lo = float(column.min())
hi = float(column.max())
span = hi - lo
if span < 1e-9:
scaled[:, axis] = (low + high) / 2.0
else:
scaled[:, axis] = low + (column - lo) / span * (high - low)
return scaled
def _caption(score: ScoreCard, echoes: list[SearchHit]) -> str:
if score.verdict.startswith("UNWRITTEN"):
return "Your page sits in a pale margin beyond the nearest inked clusters."
names = ", ".join(hit.project.title for hit in echoes[:2]) or "nearby pages"
return f"Your page is pressed close to {names}; the red dots are the strongest echoes."
def _point(key: str) -> tuple[int, int]:
digest = sha256(key.encode("utf-8")).hexdigest()
x = 8 + int(digest[:4], 16) % 84
y = 8 + int(digest[4:8], 16) % 84
return x, y
def _dedupe_dots(dots: list[dict[str, Any]]) -> list[dict[str, Any]]:
seen: set[tuple[str, str]] = set()
deduped: list[dict[str, Any]] = []
for dot in dots:
key = (str(dot.get("kind")), str(dot.get("id")))
if key in seen:
continue
deduped.append(dot)
seen.add(key)
return deduped