Spaces:
Running on Zero
Running on Zero
| from __future__ import annotations | |
| from hashlib import sha256 | |
| from typing import Any | |
| from hackathon_advisor.data import Project, ProjectIndex, SearchHit | |
| from hackathon_advisor.scoring import ScoreCard | |
| from hackathon_advisor.tools import Idea | |
| def build_wood_map(index: ProjectIndex, idea: Idea, score: ScoreCard) -> dict[str, Any]: | |
| echoes = list(score.echoes) | |
| background = _background_projects(index, echoes) | |
| echo_projects = [hit.project for hit in echoes[:5]] | |
| layout, idea_xy = _layout(index, idea, background + echo_projects) | |
| dots = [_project_dot(project, "inked", layout) for project in background] | |
| dots.extend(_echo_dot(hit, layout) for hit in echoes[:5]) | |
| dots.append(_idea_dot(idea, score, idea_xy)) | |
| return { | |
| "caption": _caption(score, echoes), | |
| "dots": _dedupe_dots(dots), | |
| } | |
| def _background_projects(index: ProjectIndex, echoes: list[SearchHit]) -> list[Project]: | |
| echo_ids = {hit.project.id for hit in echoes} | |
| projects = [project for project in index.top_projects(limit=22) if project.id not in echo_ids] | |
| return projects[:16] | |
| def _project_dot(project: Project, kind: str, layout: dict[str, tuple[int, int]]) -> dict[str, Any]: | |
| x, y = layout.get(project.id) or _point(project.id) | |
| return { | |
| "id": project.id, | |
| "kind": kind, | |
| "title": project.title, | |
| "url": project.url, | |
| "x": x, | |
| "y": y, | |
| "radius": 3, | |
| } | |
| def _echo_dot(hit: SearchHit, layout: dict[str, tuple[int, int]]) -> dict[str, Any]: | |
| dot = _project_dot(hit.project, "echo", layout) | |
| dot["score"] = round(hit.score, 3) | |
| dot["matched_terms"] = list(hit.matched_terms) | |
| dot["page_number"] = hit.page_number | |
| dot["radius"] = max(5, min(9, round(4 + hit.score * 14))) | |
| return dot | |
| def _idea_dot(idea: Idea, score: ScoreCard, idea_xy: tuple[int, int]) -> dict[str, Any]: | |
| x, y = idea_xy | |
| return { | |
| "id": idea.id, | |
| "kind": "idea", | |
| "title": idea.title, | |
| "x": x, | |
| "y": y, | |
| "radius": 8, | |
| "verdict": score.verdict, | |
| "overall": score.overall, | |
| } | |
| def _layout( | |
| index: ProjectIndex, | |
| idea: Idea, | |
| projects: list[Project], | |
| ) -> tuple[dict[str, tuple[int, int]], tuple[int, int]]: | |
| """Place every dot by projecting the real embedding vectors into 2D with PCA, so projects | |
| that are semantically similar land near each other and the idea lands among its closest | |
| echoes. Falls back to a deterministic hash scatter only when the projection cannot run | |
| (missing vectors, too few points, or no embedder).""" | |
| ids = [project.id for project in projects] | |
| vectors = [index.vector_for(project.id) for project in projects] | |
| fallback = ({project_id: _point(project_id) for project_id in ids}, _point(f"idea:{idea.id}:{idea.title}")) | |
| if len(vectors) < 3 or any(vector is None for vector in vectors): | |
| return fallback | |
| try: | |
| idea_vector = index.embed_query(idea.pitch or idea.title) | |
| coords, idea_xy = _pca_project(vectors, idea_vector) | |
| except Exception: # noqa: BLE001 - any projection failure degrades to the hash scatter | |
| return fallback | |
| return {project_id: coord for project_id, coord in zip(ids, coords)}, idea_xy | |
| def _pca_project( | |
| vectors: list[tuple[float, ...]], | |
| idea_vector: tuple[float, ...], | |
| ) -> tuple[list[tuple[int, int]], tuple[int, int]]: | |
| import numpy as np | |
| matrix = np.asarray(vectors, dtype=np.float64) | |
| query = np.asarray(idea_vector, dtype=np.float64) | |
| mean = matrix.mean(axis=0) | |
| centered = matrix - mean | |
| # Top-2 principal directions of the project cloud define the map; the idea is projected | |
| # onto that same basis so its position reflects true embedding similarity. | |
| _, _, components = np.linalg.svd(centered, full_matrices=False) | |
| basis = components[:2] | |
| projected = centered @ basis.T | |
| idea_projected = (query - mean) @ basis.T | |
| stacked = np.vstack([projected, idea_projected]) | |
| scaled = _scale_to_canvas(stacked) | |
| coords = [(int(round(x)), int(round(y))) for x, y in scaled[:-1]] | |
| idea_xy = (int(round(scaled[-1][0])), int(round(scaled[-1][1]))) | |
| return coords, idea_xy | |
| def _scale_to_canvas(points: Any, low: float = 10.0, high: float = 90.0) -> Any: | |
| import numpy as np | |
| scaled = np.empty_like(points) | |
| for axis in range(points.shape[1]): | |
| column = points[:, axis] | |
| lo = float(column.min()) | |
| hi = float(column.max()) | |
| span = hi - lo | |
| if span < 1e-9: | |
| scaled[:, axis] = (low + high) / 2.0 | |
| else: | |
| scaled[:, axis] = low + (column - lo) / span * (high - low) | |
| return scaled | |
| def _caption(score: ScoreCard, echoes: list[SearchHit]) -> str: | |
| if score.verdict.startswith("UNWRITTEN"): | |
| return "Your page sits in a pale margin beyond the nearest inked clusters." | |
| names = ", ".join(hit.project.title for hit in echoes[:2]) or "nearby pages" | |
| return f"Your page is pressed close to {names}; the red dots are the strongest echoes." | |
| def _point(key: str) -> tuple[int, int]: | |
| digest = sha256(key.encode("utf-8")).hexdigest() | |
| x = 8 + int(digest[:4], 16) % 84 | |
| y = 8 + int(digest[4:8], 16) % 84 | |
| return x, y | |
| def _dedupe_dots(dots: list[dict[str, Any]]) -> list[dict[str, Any]]: | |
| seen: set[tuple[str, str]] = set() | |
| deduped: list[dict[str, Any]] = [] | |
| for dot in dots: | |
| key = (str(dot.get("kind")), str(dot.get("id"))) | |
| if key in seen: | |
| continue | |
| deduped.append(dot) | |
| seen.add(key) | |
| return deduped | |