siddhm11 commited on
Commit
4bb3d95
Β·
1 Parent(s): 3f58d41

Phase 6.5 Day 2: query_id linkage (B1)

Browse files

recommendations.py:
- Generate query_id (UUID) once per GET /api/recommendations request
- Thread query_id through all 4 tiers: trending, Tier 1 (multi-interest),
Tier 2 (EWMA), Tier 3 (Qdrant recommend)
- Pass query_id kwarg to _multi_interest_recommend()
- Embed query_id + position (0-based index) in every paper dict

search.py:
- Generate query_id once per GET /search request
- Add query_id + position to each paper in results

action_buttons.html:
- Add query_id + position to ALL three hx-vals JSON blobs (save, remove, not-interested)
- Position was previously missing from not-interested buttons

This enables Phase 7 per-feed CTR analysis via:
SELECT query_id, COUNT(DISTINCT paper_id) AS shown,
COUNT(*) FILTER (WHERE event_type='save') AS saved
FROM interactions GROUP BY query_id

Tests: 203 passed, 0 failures

app/routers/recommendations.py CHANGED
@@ -59,6 +59,9 @@ async def get_recommendations(
59
  user_id = user_id or str(uuid.uuid4())
60
  state = await us.ensure_loaded(user_id)
61
 
 
 
 
62
  def _empty_resp():
63
  r = templates.TemplateResponse(
64
  request,
@@ -79,12 +82,14 @@ async def get_recommendations(
79
  )
80
  if trending:
81
  papers = []
82
- for paper in trending:
83
  paper["saved"] = False
84
  paper["dismissed"] = False
85
  paper["ranker_version"] = _RANKER_VERSION
86
  paper["candidate_source"] = "trending_category_fallback"
87
  paper["cluster_id"] = ""
 
 
88
  papers.append(paper)
89
 
90
  r = templates.TemplateResponse(
@@ -106,7 +111,7 @@ async def get_recommendations(
106
 
107
  # ── Tier 1: Multi-interest clustering + quota fusion (β‰₯5 saves) ──────
108
  rec_arxiv_ids, paper_tags = await _multi_interest_recommend(
109
- user_id, state, seen, REC_LIMIT,
110
  )
111
 
112
  # ── Tier 2: EWMA single-vector search (β‰₯3 saves) ──────────────────────
@@ -117,6 +122,7 @@ async def get_recommendations(
117
  "ranker_version": _RANKER_VERSION,
118
  "candidate_source": "ewma_longterm",
119
  "cluster_id": "",
 
120
  }
121
 
122
  # ── Tier 3: Qdrant Recommend API (β‰₯1 save fallback) ───────────────────
@@ -132,6 +138,7 @@ async def get_recommendations(
132
  "ranker_version": _RANKER_VERSION,
133
  "candidate_source": "qdrant_recommend",
134
  "cluster_id": "",
 
135
  }
136
 
137
  if not rec_arxiv_ids:
@@ -151,7 +158,7 @@ async def get_recommendations(
151
  await db.cache_turso_metadata_batch(list(meta.values()))
152
 
153
  papers = []
154
- for aid in rec_arxiv_ids:
155
  if aid not in meta:
156
  continue
157
  tags = paper_tags.get(aid, {})
@@ -163,6 +170,9 @@ async def get_recommendations(
163
  "ranker_version": tags.get("ranker_version", _RANKER_VERSION),
164
  "candidate_source": tags.get("candidate_source", ""),
165
  "cluster_id": tags.get("cluster_id", ""),
 
 
 
166
  })
167
 
168
  resp = templates.TemplateResponse(
@@ -177,7 +187,8 @@ async def get_recommendations(
177
  # ── Tier 1: Multi-interest clustering + quota fusion ─────────────────────────
178
 
179
  async def _multi_interest_recommend(
180
- user_id: str, state, seen: set[str], limit: int
 
181
  ) -> tuple[list[str], dict[str, dict]]:
182
  """
183
  Full recommendation pipeline (Phase 2b + Phase 4 corrections):
@@ -458,6 +469,7 @@ async def _multi_interest_recommend(
458
  "ranker_version": _RANKER_VERSION,
459
  "candidate_source": source,
460
  "cluster_id": str(cluster_idx) if cluster_idx is not None and cluster_idx >= 0 else "",
 
461
  }
462
 
463
  return final, paper_tags
 
59
  user_id = user_id or str(uuid.uuid4())
60
  state = await us.ensure_loaded(user_id)
61
 
62
+ # Phase 6.5 B1: one query_id per feed request for per-feed CTR analysis
63
+ query_id = str(uuid.uuid4())
64
+
65
  def _empty_resp():
66
  r = templates.TemplateResponse(
67
  request,
 
82
  )
83
  if trending:
84
  papers = []
85
+ for idx, paper in enumerate(trending):
86
  paper["saved"] = False
87
  paper["dismissed"] = False
88
  paper["ranker_version"] = _RANKER_VERSION
89
  paper["candidate_source"] = "trending_category_fallback"
90
  paper["cluster_id"] = ""
91
+ paper["query_id"] = query_id
92
+ paper["position"] = idx
93
  papers.append(paper)
94
 
95
  r = templates.TemplateResponse(
 
111
 
112
  # ── Tier 1: Multi-interest clustering + quota fusion (β‰₯5 saves) ──────
113
  rec_arxiv_ids, paper_tags = await _multi_interest_recommend(
114
+ user_id, state, seen, REC_LIMIT, query_id=query_id,
115
  )
116
 
117
  # ── Tier 2: EWMA single-vector search (β‰₯3 saves) ──────────────────────
 
122
  "ranker_version": _RANKER_VERSION,
123
  "candidate_source": "ewma_longterm",
124
  "cluster_id": "",
125
+ "query_id": query_id,
126
  }
127
 
128
  # ── Tier 3: Qdrant Recommend API (β‰₯1 save fallback) ───────────────────
 
138
  "ranker_version": _RANKER_VERSION,
139
  "candidate_source": "qdrant_recommend",
140
  "cluster_id": "",
141
+ "query_id": query_id,
142
  }
143
 
144
  if not rec_arxiv_ids:
 
158
  await db.cache_turso_metadata_batch(list(meta.values()))
159
 
160
  papers = []
161
+ for idx, aid in enumerate(rec_arxiv_ids):
162
  if aid not in meta:
163
  continue
164
  tags = paper_tags.get(aid, {})
 
170
  "ranker_version": tags.get("ranker_version", _RANKER_VERSION),
171
  "candidate_source": tags.get("candidate_source", ""),
172
  "cluster_id": tags.get("cluster_id", ""),
173
+ # Phase 6.5 B1: query_id + position for per-feed CTR
174
+ "query_id": tags.get("query_id", query_id),
175
+ "position": idx,
176
  })
177
 
178
  resp = templates.TemplateResponse(
 
187
  # ── Tier 1: Multi-interest clustering + quota fusion ─────────────────────────
188
 
189
  async def _multi_interest_recommend(
190
+ user_id: str, state, seen: set[str], limit: int,
191
+ *, query_id: str = "",
192
  ) -> tuple[list[str], dict[str, dict]]:
193
  """
194
  Full recommendation pipeline (Phase 2b + Phase 4 corrections):
 
469
  "ranker_version": _RANKER_VERSION,
470
  "candidate_source": source,
471
  "cluster_id": str(cluster_idx) if cluster_idx is not None and cluster_idx >= 0 else "",
472
+ "query_id": query_id,
473
  }
474
 
475
  return final, paper_tags
app/routers/search.py CHANGED
@@ -68,13 +68,17 @@ async def search(
68
  papers = []
69
 
70
  user_id = user_id or str(uuid.uuid4())
 
 
71
  state = await us.ensure_loaded(user_id)
72
  saved_ids = set(state.positive_list)
73
  dismissed_ids = set(state.negative_list)
74
 
75
- for p in papers:
76
  p["saved"] = p["arxiv_id"] in saved_ids
77
  p["dismissed"] = p["arxiv_id"] in dismissed_ids
 
 
78
 
79
  if request.headers.get("HX-Request"):
80
  resp = templates.TemplateResponse(
 
68
  papers = []
69
 
70
  user_id = user_id or str(uuid.uuid4())
71
+ # Phase 6.5 B1: one query_id per search request for per-feed CTR
72
+ query_id = str(uuid.uuid4())
73
  state = await us.ensure_loaded(user_id)
74
  saved_ids = set(state.positive_list)
75
  dismissed_ids = set(state.negative_list)
76
 
77
+ for idx, p in enumerate(papers):
78
  p["saved"] = p["arxiv_id"] in saved_ids
79
  p["dismissed"] = p["arxiv_id"] in dismissed_ids
80
+ p["query_id"] = query_id
81
+ p["position"] = idx
82
 
83
  if request.headers.get("HX-Request"):
84
  resp = templates.TemplateResponse(
app/templates/partials/action_buttons.html CHANGED
@@ -3,6 +3,7 @@
3
  Expects: paper_id (or paper.arxiv_id), saved (bool), dismissed (bool)
4
  Optional: source ("search" | "recommendation" | "saved"), position (int)
5
  Phase 4.5: ranker_version, candidate_source, cluster_id (set by recommendations.py)
 
6
  These are returned directly by the /api/papers/{id}/save endpoint
7
  so they also work as a standalone partial.
8
  #}
@@ -12,6 +13,8 @@
12
  {% set _ranker_version = paper.ranker_version | default("") if paper is defined else "" %}
13
  {% set _candidate_source = paper.candidate_source | default("") if paper is defined else "" %}
14
  {% set _cluster_id = paper.cluster_id | default("") if paper is defined else "" %}
 
 
15
 
16
  {% if is_saved %}
17
  <!-- Already saved β€” show saved state, allow unsave via not-interested -->
@@ -23,7 +26,7 @@
23
  hx-post="/api/papers/{{ pid }}/not-interested"
24
  hx-target="#paper-{{ pid }}"
25
  hx-swap="outerHTML swap:200ms"
26
- hx-vals='{"source": "{{ _source }}", "ranker_version": "{{ _ranker_version }}", "candidate_source": "{{ _candidate_source }}", "cluster_id": "{{ _cluster_id }}"}'>
27
  Remove
28
  </button>
29
  </div>
@@ -34,7 +37,7 @@
34
  hx-post="/api/papers/{{ pid }}/save"
35
  hx-target="[id='actions-{{ pid }}']"
36
  hx-swap="innerHTML"
37
- hx-vals='{"source": "{{ _source }}", "position": "{{ position | default(0) }}", "ranker_version": "{{ _ranker_version }}", "candidate_source": "{{ _candidate_source }}", "cluster_id": "{{ _cluster_id }}"}'>
38
  ⭐ Save
39
  </button>
40
  <!-- Not interested (removes the whole card) -->
@@ -42,9 +45,8 @@
42
  hx-post="/api/papers/{{ pid }}/not-interested"
43
  hx-target="#paper-{{ pid }}"
44
  hx-swap="outerHTML swap:200ms"
45
- hx-vals='{"source": "{{ _source }}", "ranker_version": "{{ _ranker_version }}", "candidate_source": "{{ _candidate_source }}", "cluster_id": "{{ _cluster_id }}"}'>
46
  βœ• Not interested
47
  </button>
48
  </div>
49
  {% endif %}
50
-
 
3
  Expects: paper_id (or paper.arxiv_id), saved (bool), dismissed (bool)
4
  Optional: source ("search" | "recommendation" | "saved"), position (int)
5
  Phase 4.5: ranker_version, candidate_source, cluster_id (set by recommendations.py)
6
+ Phase 6.5: query_id (per-request UUID for feed-level CTR)
7
  These are returned directly by the /api/papers/{id}/save endpoint
8
  so they also work as a standalone partial.
9
  #}
 
13
  {% set _ranker_version = paper.ranker_version | default("") if paper is defined else "" %}
14
  {% set _candidate_source = paper.candidate_source | default("") if paper is defined else "" %}
15
  {% set _cluster_id = paper.cluster_id | default("") if paper is defined else "" %}
16
+ {% set _query_id = paper.query_id | default("") if paper is defined else "" %}
17
+ {% set _position = paper.position | default(0) if paper is defined else 0 %}
18
 
19
  {% if is_saved %}
20
  <!-- Already saved β€” show saved state, allow unsave via not-interested -->
 
26
  hx-post="/api/papers/{{ pid }}/not-interested"
27
  hx-target="#paper-{{ pid }}"
28
  hx-swap="outerHTML swap:200ms"
29
+ hx-vals='{"source": "{{ _source }}", "position": "{{ _position }}", "ranker_version": "{{ _ranker_version }}", "candidate_source": "{{ _candidate_source }}", "cluster_id": "{{ _cluster_id }}", "query_id": "{{ _query_id }}"}'>
30
  Remove
31
  </button>
32
  </div>
 
37
  hx-post="/api/papers/{{ pid }}/save"
38
  hx-target="[id='actions-{{ pid }}']"
39
  hx-swap="innerHTML"
40
+ hx-vals='{"source": "{{ _source }}", "position": "{{ _position }}", "ranker_version": "{{ _ranker_version }}", "candidate_source": "{{ _candidate_source }}", "cluster_id": "{{ _cluster_id }}", "query_id": "{{ _query_id }}"}'>
41
  ⭐ Save
42
  </button>
43
  <!-- Not interested (removes the whole card) -->
 
45
  hx-post="/api/papers/{{ pid }}/not-interested"
46
  hx-target="#paper-{{ pid }}"
47
  hx-swap="outerHTML swap:200ms"
48
+ hx-vals='{"source": "{{ _source }}", "position": "{{ _position }}", "ranker_version": "{{ _ranker_version }}", "candidate_source": "{{ _candidate_source }}", "cluster_id": "{{ _cluster_id }}", "query_id": "{{ _query_id }}"}'>
49
  βœ• Not interested
50
  </button>
51
  </div>
52
  {% endif %}