File size: 5,566 Bytes
36ed450
 
 
 
 
 
 
 
 
 
 
 
 
6d9770a
 
 
 
 
 
 
36ed450
 
 
 
 
 
 
 
 
 
 
 
6d9770a
 
36ed450
 
 
 
 
 
 
 
 
 
 
6d9770a
 
36ed450
 
902a11f
36ed450
 
 
 
6d9770a
 
36ed450
 
 
 
 
 
 
 
 
 
 
 
6d9770a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36ed450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
from __future__ import annotations

from hashlib import sha256
from typing import Any

from hackathon_advisor.data import Project, ProjectIndex, SearchHit
from hackathon_advisor.scoring import ScoreCard
from hackathon_advisor.tools import Idea


def build_wood_map(index: ProjectIndex, idea: Idea, score: ScoreCard) -> dict[str, Any]:
    echoes = list(score.echoes)
    background = _background_projects(index, echoes)
    echo_projects = [hit.project for hit in echoes[:5]]

    layout, idea_xy = _layout(index, idea, background + echo_projects)

    dots = [_project_dot(project, "inked", layout) for project in background]
    dots.extend(_echo_dot(hit, layout) for hit in echoes[:5])
    dots.append(_idea_dot(idea, score, idea_xy))
    return {
        "caption": _caption(score, echoes),
        "dots": _dedupe_dots(dots),
    }


def _background_projects(index: ProjectIndex, echoes: list[SearchHit]) -> list[Project]:
    echo_ids = {hit.project.id for hit in echoes}
    projects = [project for project in index.top_projects(limit=22) if project.id not in echo_ids]
    return projects[:16]


def _project_dot(project: Project, kind: str, layout: dict[str, tuple[int, int]]) -> dict[str, Any]:
    x, y = layout.get(project.id) or _point(project.id)
    return {
        "id": project.id,
        "kind": kind,
        "title": project.title,
        "url": project.url,
        "x": x,
        "y": y,
        "radius": 3,
    }


def _echo_dot(hit: SearchHit, layout: dict[str, tuple[int, int]]) -> dict[str, Any]:
    dot = _project_dot(hit.project, "echo", layout)
    dot["score"] = round(hit.score, 3)
    dot["matched_terms"] = list(hit.matched_terms)
    dot["page_number"] = hit.page_number
    dot["radius"] = max(5, min(9, round(4 + hit.score * 14)))
    return dot


def _idea_dot(idea: Idea, score: ScoreCard, idea_xy: tuple[int, int]) -> dict[str, Any]:
    x, y = idea_xy
    return {
        "id": idea.id,
        "kind": "idea",
        "title": idea.title,
        "x": x,
        "y": y,
        "radius": 8,
        "verdict": score.verdict,
        "overall": score.overall,
    }


def _layout(
    index: ProjectIndex,
    idea: Idea,
    projects: list[Project],
) -> tuple[dict[str, tuple[int, int]], tuple[int, int]]:
    """Place every dot by projecting the real embedding vectors into 2D with PCA, so projects
    that are semantically similar land near each other and the idea lands among its closest
    echoes. Falls back to a deterministic hash scatter only when the projection cannot run
    (missing vectors, too few points, or no embedder)."""
    ids = [project.id for project in projects]
    vectors = [index.vector_for(project.id) for project in projects]
    fallback = ({project_id: _point(project_id) for project_id in ids}, _point(f"idea:{idea.id}:{idea.title}"))
    if len(vectors) < 3 or any(vector is None for vector in vectors):
        return fallback
    try:
        idea_vector = index.embed_query(idea.pitch or idea.title)
        coords, idea_xy = _pca_project(vectors, idea_vector)
    except Exception:  # noqa: BLE001 - any projection failure degrades to the hash scatter
        return fallback
    return {project_id: coord for project_id, coord in zip(ids, coords)}, idea_xy


def _pca_project(
    vectors: list[tuple[float, ...]],
    idea_vector: tuple[float, ...],
) -> tuple[list[tuple[int, int]], tuple[int, int]]:
    import numpy as np

    matrix = np.asarray(vectors, dtype=np.float64)
    query = np.asarray(idea_vector, dtype=np.float64)
    mean = matrix.mean(axis=0)
    centered = matrix - mean
    # Top-2 principal directions of the project cloud define the map; the idea is projected
    # onto that same basis so its position reflects true embedding similarity.
    _, _, components = np.linalg.svd(centered, full_matrices=False)
    basis = components[:2]
    projected = centered @ basis.T
    idea_projected = (query - mean) @ basis.T
    stacked = np.vstack([projected, idea_projected])
    scaled = _scale_to_canvas(stacked)
    coords = [(int(round(x)), int(round(y))) for x, y in scaled[:-1]]
    idea_xy = (int(round(scaled[-1][0])), int(round(scaled[-1][1])))
    return coords, idea_xy


def _scale_to_canvas(points: Any, low: float = 10.0, high: float = 90.0) -> Any:
    import numpy as np

    scaled = np.empty_like(points)
    for axis in range(points.shape[1]):
        column = points[:, axis]
        lo = float(column.min())
        hi = float(column.max())
        span = hi - lo
        if span < 1e-9:
            scaled[:, axis] = (low + high) / 2.0
        else:
            scaled[:, axis] = low + (column - lo) / span * (high - low)
    return scaled


def _caption(score: ScoreCard, echoes: list[SearchHit]) -> str:
    if score.verdict.startswith("UNWRITTEN"):
        return "Your page sits in a pale margin beyond the nearest inked clusters."
    names = ", ".join(hit.project.title for hit in echoes[:2]) or "nearby pages"
    return f"Your page is pressed close to {names}; the red dots are the strongest echoes."


def _point(key: str) -> tuple[int, int]:
    digest = sha256(key.encode("utf-8")).hexdigest()
    x = 8 + int(digest[:4], 16) % 84
    y = 8 + int(digest[4:8], 16) % 84
    return x, y


def _dedupe_dots(dots: list[dict[str, Any]]) -> list[dict[str, Any]]:
    seen: set[tuple[str, str]] = set()
    deduped: list[dict[str, Any]] = []
    for dot in dots:
        key = (str(dot.get("kind")), str(dot.get("id")))
        if key in seen:
            continue
        deduped.append(dot)
        seen.add(key)
    return deduped