Spaces:
Running
Phase 6.5 Day 2: query_id linkage (B1)
Browse filesrecommendations.py:
- Generate query_id (UUID) once per GET /api/recommendations request
- Thread query_id through all 4 tiers: trending, Tier 1 (multi-interest),
Tier 2 (EWMA), Tier 3 (Qdrant recommend)
- Pass query_id kwarg to _multi_interest_recommend()
- Embed query_id + position (0-based index) in every paper dict
search.py:
- Generate query_id once per GET /search request
- Add query_id + position to each paper in results
action_buttons.html:
- Add query_id + position to ALL three hx-vals JSON blobs (save, remove, not-interested)
- Position was previously missing from not-interested buttons
This enables Phase 7 per-feed CTR analysis via:
SELECT query_id, COUNT(DISTINCT paper_id) AS shown,
COUNT(*) FILTER (WHERE event_type='save') AS saved
FROM interactions GROUP BY query_id
Tests: 203 passed, 0 failures
|
@@ -59,6 +59,9 @@ async def get_recommendations(
|
|
| 59 |
user_id = user_id or str(uuid.uuid4())
|
| 60 |
state = await us.ensure_loaded(user_id)
|
| 61 |
|
|
|
|
|
|
|
|
|
|
| 62 |
def _empty_resp():
|
| 63 |
r = templates.TemplateResponse(
|
| 64 |
request,
|
|
@@ -79,12 +82,14 @@ async def get_recommendations(
|
|
| 79 |
)
|
| 80 |
if trending:
|
| 81 |
papers = []
|
| 82 |
-
for paper in trending:
|
| 83 |
paper["saved"] = False
|
| 84 |
paper["dismissed"] = False
|
| 85 |
paper["ranker_version"] = _RANKER_VERSION
|
| 86 |
paper["candidate_source"] = "trending_category_fallback"
|
| 87 |
paper["cluster_id"] = ""
|
|
|
|
|
|
|
| 88 |
papers.append(paper)
|
| 89 |
|
| 90 |
r = templates.TemplateResponse(
|
|
@@ -106,7 +111,7 @@ async def get_recommendations(
|
|
| 106 |
|
| 107 |
# ββ Tier 1: Multi-interest clustering + quota fusion (β₯5 saves) ββββββ
|
| 108 |
rec_arxiv_ids, paper_tags = await _multi_interest_recommend(
|
| 109 |
-
user_id, state, seen, REC_LIMIT,
|
| 110 |
)
|
| 111 |
|
| 112 |
# ββ Tier 2: EWMA single-vector search (β₯3 saves) ββββββββββββββββββββββ
|
|
@@ -117,6 +122,7 @@ async def get_recommendations(
|
|
| 117 |
"ranker_version": _RANKER_VERSION,
|
| 118 |
"candidate_source": "ewma_longterm",
|
| 119 |
"cluster_id": "",
|
|
|
|
| 120 |
}
|
| 121 |
|
| 122 |
# ββ Tier 3: Qdrant Recommend API (β₯1 save fallback) βββββββββββββββββββ
|
|
@@ -132,6 +138,7 @@ async def get_recommendations(
|
|
| 132 |
"ranker_version": _RANKER_VERSION,
|
| 133 |
"candidate_source": "qdrant_recommend",
|
| 134 |
"cluster_id": "",
|
|
|
|
| 135 |
}
|
| 136 |
|
| 137 |
if not rec_arxiv_ids:
|
|
@@ -151,7 +158,7 @@ async def get_recommendations(
|
|
| 151 |
await db.cache_turso_metadata_batch(list(meta.values()))
|
| 152 |
|
| 153 |
papers = []
|
| 154 |
-
for aid in rec_arxiv_ids:
|
| 155 |
if aid not in meta:
|
| 156 |
continue
|
| 157 |
tags = paper_tags.get(aid, {})
|
|
@@ -163,6 +170,9 @@ async def get_recommendations(
|
|
| 163 |
"ranker_version": tags.get("ranker_version", _RANKER_VERSION),
|
| 164 |
"candidate_source": tags.get("candidate_source", ""),
|
| 165 |
"cluster_id": tags.get("cluster_id", ""),
|
|
|
|
|
|
|
|
|
|
| 166 |
})
|
| 167 |
|
| 168 |
resp = templates.TemplateResponse(
|
|
@@ -177,7 +187,8 @@ async def get_recommendations(
|
|
| 177 |
# ββ Tier 1: Multi-interest clustering + quota fusion βββββββββββββββββββββββββ
|
| 178 |
|
| 179 |
async def _multi_interest_recommend(
|
| 180 |
-
user_id: str, state, seen: set[str], limit: int
|
|
|
|
| 181 |
) -> tuple[list[str], dict[str, dict]]:
|
| 182 |
"""
|
| 183 |
Full recommendation pipeline (Phase 2b + Phase 4 corrections):
|
|
@@ -458,6 +469,7 @@ async def _multi_interest_recommend(
|
|
| 458 |
"ranker_version": _RANKER_VERSION,
|
| 459 |
"candidate_source": source,
|
| 460 |
"cluster_id": str(cluster_idx) if cluster_idx is not None and cluster_idx >= 0 else "",
|
|
|
|
| 461 |
}
|
| 462 |
|
| 463 |
return final, paper_tags
|
|
|
|
| 59 |
user_id = user_id or str(uuid.uuid4())
|
| 60 |
state = await us.ensure_loaded(user_id)
|
| 61 |
|
| 62 |
+
# Phase 6.5 B1: one query_id per feed request for per-feed CTR analysis
|
| 63 |
+
query_id = str(uuid.uuid4())
|
| 64 |
+
|
| 65 |
def _empty_resp():
|
| 66 |
r = templates.TemplateResponse(
|
| 67 |
request,
|
|
|
|
| 82 |
)
|
| 83 |
if trending:
|
| 84 |
papers = []
|
| 85 |
+
for idx, paper in enumerate(trending):
|
| 86 |
paper["saved"] = False
|
| 87 |
paper["dismissed"] = False
|
| 88 |
paper["ranker_version"] = _RANKER_VERSION
|
| 89 |
paper["candidate_source"] = "trending_category_fallback"
|
| 90 |
paper["cluster_id"] = ""
|
| 91 |
+
paper["query_id"] = query_id
|
| 92 |
+
paper["position"] = idx
|
| 93 |
papers.append(paper)
|
| 94 |
|
| 95 |
r = templates.TemplateResponse(
|
|
|
|
| 111 |
|
| 112 |
# ββ Tier 1: Multi-interest clustering + quota fusion (β₯5 saves) ββββββ
|
| 113 |
rec_arxiv_ids, paper_tags = await _multi_interest_recommend(
|
| 114 |
+
user_id, state, seen, REC_LIMIT, query_id=query_id,
|
| 115 |
)
|
| 116 |
|
| 117 |
# ββ Tier 2: EWMA single-vector search (β₯3 saves) ββββββββββββββββββββββ
|
|
|
|
| 122 |
"ranker_version": _RANKER_VERSION,
|
| 123 |
"candidate_source": "ewma_longterm",
|
| 124 |
"cluster_id": "",
|
| 125 |
+
"query_id": query_id,
|
| 126 |
}
|
| 127 |
|
| 128 |
# ββ Tier 3: Qdrant Recommend API (β₯1 save fallback) βββββββββββββββββββ
|
|
|
|
| 138 |
"ranker_version": _RANKER_VERSION,
|
| 139 |
"candidate_source": "qdrant_recommend",
|
| 140 |
"cluster_id": "",
|
| 141 |
+
"query_id": query_id,
|
| 142 |
}
|
| 143 |
|
| 144 |
if not rec_arxiv_ids:
|
|
|
|
| 158 |
await db.cache_turso_metadata_batch(list(meta.values()))
|
| 159 |
|
| 160 |
papers = []
|
| 161 |
+
for idx, aid in enumerate(rec_arxiv_ids):
|
| 162 |
if aid not in meta:
|
| 163 |
continue
|
| 164 |
tags = paper_tags.get(aid, {})
|
|
|
|
| 170 |
"ranker_version": tags.get("ranker_version", _RANKER_VERSION),
|
| 171 |
"candidate_source": tags.get("candidate_source", ""),
|
| 172 |
"cluster_id": tags.get("cluster_id", ""),
|
| 173 |
+
# Phase 6.5 B1: query_id + position for per-feed CTR
|
| 174 |
+
"query_id": tags.get("query_id", query_id),
|
| 175 |
+
"position": idx,
|
| 176 |
})
|
| 177 |
|
| 178 |
resp = templates.TemplateResponse(
|
|
|
|
| 187 |
# ββ Tier 1: Multi-interest clustering + quota fusion βββββββββββββββββββββββββ
|
| 188 |
|
| 189 |
async def _multi_interest_recommend(
|
| 190 |
+
user_id: str, state, seen: set[str], limit: int,
|
| 191 |
+
*, query_id: str = "",
|
| 192 |
) -> tuple[list[str], dict[str, dict]]:
|
| 193 |
"""
|
| 194 |
Full recommendation pipeline (Phase 2b + Phase 4 corrections):
|
|
|
|
| 469 |
"ranker_version": _RANKER_VERSION,
|
| 470 |
"candidate_source": source,
|
| 471 |
"cluster_id": str(cluster_idx) if cluster_idx is not None and cluster_idx >= 0 else "",
|
| 472 |
+
"query_id": query_id,
|
| 473 |
}
|
| 474 |
|
| 475 |
return final, paper_tags
|
|
@@ -68,13 +68,17 @@ async def search(
|
|
| 68 |
papers = []
|
| 69 |
|
| 70 |
user_id = user_id or str(uuid.uuid4())
|
|
|
|
|
|
|
| 71 |
state = await us.ensure_loaded(user_id)
|
| 72 |
saved_ids = set(state.positive_list)
|
| 73 |
dismissed_ids = set(state.negative_list)
|
| 74 |
|
| 75 |
-
for p in papers:
|
| 76 |
p["saved"] = p["arxiv_id"] in saved_ids
|
| 77 |
p["dismissed"] = p["arxiv_id"] in dismissed_ids
|
|
|
|
|
|
|
| 78 |
|
| 79 |
if request.headers.get("HX-Request"):
|
| 80 |
resp = templates.TemplateResponse(
|
|
|
|
| 68 |
papers = []
|
| 69 |
|
| 70 |
user_id = user_id or str(uuid.uuid4())
|
| 71 |
+
# Phase 6.5 B1: one query_id per search request for per-feed CTR
|
| 72 |
+
query_id = str(uuid.uuid4())
|
| 73 |
state = await us.ensure_loaded(user_id)
|
| 74 |
saved_ids = set(state.positive_list)
|
| 75 |
dismissed_ids = set(state.negative_list)
|
| 76 |
|
| 77 |
+
for idx, p in enumerate(papers):
|
| 78 |
p["saved"] = p["arxiv_id"] in saved_ids
|
| 79 |
p["dismissed"] = p["arxiv_id"] in dismissed_ids
|
| 80 |
+
p["query_id"] = query_id
|
| 81 |
+
p["position"] = idx
|
| 82 |
|
| 83 |
if request.headers.get("HX-Request"):
|
| 84 |
resp = templates.TemplateResponse(
|
|
@@ -3,6 +3,7 @@
|
|
| 3 |
Expects: paper_id (or paper.arxiv_id), saved (bool), dismissed (bool)
|
| 4 |
Optional: source ("search" | "recommendation" | "saved"), position (int)
|
| 5 |
Phase 4.5: ranker_version, candidate_source, cluster_id (set by recommendations.py)
|
|
|
|
| 6 |
These are returned directly by the /api/papers/{id}/save endpoint
|
| 7 |
so they also work as a standalone partial.
|
| 8 |
#}
|
|
@@ -12,6 +13,8 @@
|
|
| 12 |
{% set _ranker_version = paper.ranker_version | default("") if paper is defined else "" %}
|
| 13 |
{% set _candidate_source = paper.candidate_source | default("") if paper is defined else "" %}
|
| 14 |
{% set _cluster_id = paper.cluster_id | default("") if paper is defined else "" %}
|
|
|
|
|
|
|
| 15 |
|
| 16 |
{% if is_saved %}
|
| 17 |
<!-- Already saved β show saved state, allow unsave via not-interested -->
|
|
@@ -23,7 +26,7 @@
|
|
| 23 |
hx-post="/api/papers/{{ pid }}/not-interested"
|
| 24 |
hx-target="#paper-{{ pid }}"
|
| 25 |
hx-swap="outerHTML swap:200ms"
|
| 26 |
-
hx-vals='{"source": "{{ _source }}", "ranker_version": "{{ _ranker_version }}", "candidate_source": "{{ _candidate_source }}", "cluster_id": "{{ _cluster_id }}"}'>
|
| 27 |
Remove
|
| 28 |
</button>
|
| 29 |
</div>
|
|
@@ -34,7 +37,7 @@
|
|
| 34 |
hx-post="/api/papers/{{ pid }}/save"
|
| 35 |
hx-target="[id='actions-{{ pid }}']"
|
| 36 |
hx-swap="innerHTML"
|
| 37 |
-
hx-vals='{"source": "{{ _source }}", "position": "{{
|
| 38 |
β Save
|
| 39 |
</button>
|
| 40 |
<!-- Not interested (removes the whole card) -->
|
|
@@ -42,9 +45,8 @@
|
|
| 42 |
hx-post="/api/papers/{{ pid }}/not-interested"
|
| 43 |
hx-target="#paper-{{ pid }}"
|
| 44 |
hx-swap="outerHTML swap:200ms"
|
| 45 |
-
hx-vals='{"source": "{{ _source }}", "ranker_version": "{{ _ranker_version }}", "candidate_source": "{{ _candidate_source }}", "cluster_id": "{{ _cluster_id }}"}'>
|
| 46 |
β Not interested
|
| 47 |
</button>
|
| 48 |
</div>
|
| 49 |
{% endif %}
|
| 50 |
-
|
|
|
|
| 3 |
Expects: paper_id (or paper.arxiv_id), saved (bool), dismissed (bool)
|
| 4 |
Optional: source ("search" | "recommendation" | "saved"), position (int)
|
| 5 |
Phase 4.5: ranker_version, candidate_source, cluster_id (set by recommendations.py)
|
| 6 |
+
Phase 6.5: query_id (per-request UUID for feed-level CTR)
|
| 7 |
These are returned directly by the /api/papers/{id}/save endpoint
|
| 8 |
so they also work as a standalone partial.
|
| 9 |
#}
|
|
|
|
| 13 |
{% set _ranker_version = paper.ranker_version | default("") if paper is defined else "" %}
|
| 14 |
{% set _candidate_source = paper.candidate_source | default("") if paper is defined else "" %}
|
| 15 |
{% set _cluster_id = paper.cluster_id | default("") if paper is defined else "" %}
|
| 16 |
+
{% set _query_id = paper.query_id | default("") if paper is defined else "" %}
|
| 17 |
+
{% set _position = paper.position | default(0) if paper is defined else 0 %}
|
| 18 |
|
| 19 |
{% if is_saved %}
|
| 20 |
<!-- Already saved β show saved state, allow unsave via not-interested -->
|
|
|
|
| 26 |
hx-post="/api/papers/{{ pid }}/not-interested"
|
| 27 |
hx-target="#paper-{{ pid }}"
|
| 28 |
hx-swap="outerHTML swap:200ms"
|
| 29 |
+
hx-vals='{"source": "{{ _source }}", "position": "{{ _position }}", "ranker_version": "{{ _ranker_version }}", "candidate_source": "{{ _candidate_source }}", "cluster_id": "{{ _cluster_id }}", "query_id": "{{ _query_id }}"}'>
|
| 30 |
Remove
|
| 31 |
</button>
|
| 32 |
</div>
|
|
|
|
| 37 |
hx-post="/api/papers/{{ pid }}/save"
|
| 38 |
hx-target="[id='actions-{{ pid }}']"
|
| 39 |
hx-swap="innerHTML"
|
| 40 |
+
hx-vals='{"source": "{{ _source }}", "position": "{{ _position }}", "ranker_version": "{{ _ranker_version }}", "candidate_source": "{{ _candidate_source }}", "cluster_id": "{{ _cluster_id }}", "query_id": "{{ _query_id }}"}'>
|
| 41 |
β Save
|
| 42 |
</button>
|
| 43 |
<!-- Not interested (removes the whole card) -->
|
|
|
|
| 45 |
hx-post="/api/papers/{{ pid }}/not-interested"
|
| 46 |
hx-target="#paper-{{ pid }}"
|
| 47 |
hx-swap="outerHTML swap:200ms"
|
| 48 |
+
hx-vals='{"source": "{{ _source }}", "position": "{{ _position }}", "ranker_version": "{{ _ranker_version }}", "candidate_source": "{{ _candidate_source }}", "cluster_id": "{{ _cluster_id }}", "query_id": "{{ _query_id }}"}'>
|
| 49 |
β Not interested
|
| 50 |
</button>
|
| 51 |
</div>
|
| 52 |
{% endif %}
|
|
|