josefchen commited on
Commit
e030cbd
·
verified ·
1 Parent(s): f5326c7

Add 6 features: mode-click->UMAP, food-group filter, why-explainers, recipe builder, public API, saved queries (BrowserState)

Browse files
Files changed (3) hide show
  1. __pycache__/app.cpython-310.pyc +0 -0
  2. app.py +744 -315
  3. requirements.txt +2 -1
__pycache__/app.cpython-310.pyc CHANGED
Binary files a/__pycache__/app.cpython-310.pyc and b/__pycache__/app.cpython-310.pyc differ
 
app.py CHANGED
@@ -1,4 +1,21 @@
1
- """Epicure Explorer: chef-facing operators over the three sibling embeddings."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  from __future__ import annotations
4
 
@@ -6,13 +23,16 @@ import os
6
  import re
7
  import sys
8
  import json
 
 
 
 
9
  import numpy as np
10
  import gradio as gr
11
  import plotly.graph_objects as go
12
  import matplotlib
13
  matplotlib.use("Agg")
14
  import matplotlib.pyplot as plt
15
- from matplotlib.patches import Patch
16
 
17
  try:
18
  from epicure import Epicure
@@ -29,15 +49,12 @@ KAIKAKU_DARK = "#0F2D2F"
29
  KAIKAKU_DEEP = "#0A1F20"
30
  KAIKAKU_MID = "#1A3D3F"
31
  KAIKAKU_EDGE = "#2A4D4F"
32
- KAIKAKU_ACCENT = "#288B79" # darker teal-green - readable on white
33
  KAIKAKU_ACCENT_HOVER = "#1E6E5F"
34
- KAIKAKU_ACCENT_LIGHT = "#A8D5CA" # background tints only
35
- KAIKAKU_MINT = KAIKAKU_ACCENT # backwards-compat aliases used elsewhere
36
- KAIKAKU_MINT_BRIGHT = KAIKAKU_ACCENT_HOVER
37
  KAIKAKU_TEXT = "#0F2D2F"
38
  KAIKAKU_MUTED = "#5A7878"
39
 
40
- # Light matplotlib defaults; mint is an accent only
41
  plt.rcParams.update({
42
  "figure.facecolor": "#ffffff",
43
  "axes.facecolor": "#ffffff",
@@ -73,11 +90,28 @@ FG_COLORS = {
73
  "Other": "#cccccc",
74
  }
75
 
76
- # Sanity-check log on import so Space logs show whether assets loaded
77
  print(f"[epicure-explorer] models loaded: {list(MODELS)}", flush=True)
78
- print(f"[epicure-explorer] UMAP shapes: {{cooc:{UMAP_DATA['cooc'].shape}, core:{UMAP_DATA['core'].shape}, chem:{UMAP_DATA['chem'].shape}}}", flush=True)
79
- print(f"[epicure-explorer] food group labels: {len(FOOD_GROUPS)} ingredients, "
80
- f"{sum(1 for fg in FOOD_GROUPS if fg != 'Other')} with concrete group", flush=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  # ===== math helpers =====
83
 
@@ -123,7 +157,58 @@ def _slerp(v, d, theta_deg):
123
  th = np.deg2rad(float(theta_deg))
124
  return _unit(np.cos(th)*v + np.sin(th)*d_perp)
125
 
126
- # ===== heatmap (matplotlib, reliable) =====
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  def _basket_heatmap(m, basket):
129
  valid = [n for n in (basket or []) if n in m.vocab]
@@ -154,7 +239,7 @@ def _basket_heatmap(m, basket):
154
  plt.tight_layout()
155
  return fig
156
 
157
- # ===== UMAP (Plotly, SINGLE TRACE, bulletproof) =====
158
 
159
  def _umap_coords(sibling, three_d):
160
  base = UMAP_DATA[sibling]
@@ -172,86 +257,65 @@ def umap_view(sibling, basket, show_neighbours, k, three_d=False):
172
  coords2, z = _umap_coords(sibling, three_d)
173
  m = MODELS[sibling]
174
  n = len(NAMES_BY_IDX)
175
-
176
- # Pre-compute marker colors and hover text per ingredient
177
- colors = [FG_COLORS.get(fg, KAIKAKU_MUTED) for fg in FOOD_GROUPS]
178
  hover_text = [f"{NAMES_BY_IDX[i]}<br>group: {FOOD_GROUPS[i]}" for i in range(n)]
179
-
180
  basket_set = set(basket or [])
181
  basket_idxs = [m.vocab[b] for b in (basket or []) if b in m.vocab]
182
-
183
  neighbour_set: set[str] = set()
184
  if show_neighbours and basket_idxs:
185
  centroid = _basket_centroid(m, basket)
186
  if centroid is not None:
187
  nb_pairs = _topk(m, centroid, k=int(k), exclude=basket)
188
  neighbour_set = {nm for nm, _ in nb_pairs}
189
-
190
- # SINGLE background trace: all 1790 points coloured by food group.
191
- # One trace beats N traces for reliability in gr.Plot.
192
- bg_x = [float(coords2[i, 0]) for i in range(n) if NAMES_BY_IDX[i] not in basket_set and NAMES_BY_IDX[i] not in neighbour_set]
193
- bg_y = [float(coords2[i, 1]) for i in range(n) if NAMES_BY_IDX[i] not in basket_set and NAMES_BY_IDX[i] not in neighbour_set]
194
- bg_z = [float(z[i]) for i in range(n) if NAMES_BY_IDX[i] not in basket_set and NAMES_BY_IDX[i] not in neighbour_set] if three_d else None
195
- bg_c = [colors[i] for i in range(n) if NAMES_BY_IDX[i] not in basket_set and NAMES_BY_IDX[i] not in neighbour_set]
196
- bg_h = [hover_text[i] for i in range(n) if NAMES_BY_IDX[i] not in basket_set and NAMES_BY_IDX[i] not in neighbour_set]
197
-
198
  fig = go.Figure()
199
-
200
  if three_d:
201
  fig.add_trace(go.Scatter3d(
202
  x=bg_x, y=bg_y, z=bg_z, mode="markers",
203
  marker=dict(size=3, color=bg_c, opacity=0.55, line=dict(width=0)),
204
- text=bg_h, hovertemplate="%{text}<extra></extra>", name="ingredients",
205
- showlegend=False,
206
  ))
207
  else:
208
  fig.add_trace(go.Scattergl(
209
  x=bg_x, y=bg_y, mode="markers",
210
  marker=dict(size=5, color=bg_c, opacity=0.65, line=dict(width=0)),
211
- text=bg_h, hovertemplate="%{text}<extra></extra>", name="ingredients",
212
- showlegend=False,
213
  ))
214
-
215
- # Neighbour highlights (amber)
216
  if neighbour_set:
217
  ni = [i for i in range(n) if NAMES_BY_IDX[i] in neighbour_set]
218
  nx = [float(coords2[i, 0]) for i in ni]
219
  ny = [float(coords2[i, 1]) for i in ni]
220
  nz = [float(z[i]) for i in ni] if three_d else None
221
  nlabels = [NAMES_BY_IDX[i] for i in ni]
222
- marker = dict(size=11 if not three_d else 6,
223
- color="#ff8800",
224
- opacity=0.95,
225
  line=dict(color="#ffffff", width=1.2))
226
  TR = go.Scatter3d if three_d else go.Scatter
227
- kwargs = dict(mode="markers+text",
228
- marker=marker, text=nlabels, textposition="top center",
229
  textfont=dict(size=10),
230
  hovertemplate="<b>%{text}</b> (neighbour)<extra></extra>",
231
  name=f"top-{k} neighbours")
232
  fig.add_trace(TR(x=nx, y=ny, z=nz, **kwargs) if three_d else TR(x=nx, y=ny, **kwargs))
233
-
234
- # Basket highlights (mint star, accent only)
235
  if basket_idxs:
236
  bx = [float(coords2[i, 0]) for i in basket_idxs]
237
  by = [float(coords2[i, 1]) for i in basket_idxs]
238
  bz = [float(z[i]) for i in basket_idxs] if three_d else None
239
  blabels = [NAMES_BY_IDX[i] for i in basket_idxs]
240
- marker = dict(size=18 if not three_d else 9,
241
- color=KAIKAKU_MINT,
242
  symbol="star" if not three_d else "diamond",
243
  line=dict(color="#111111", width=1.5))
244
  TR = go.Scatter3d if three_d else go.Scatter
245
- kwargs = dict(mode="markers+text",
246
- marker=marker, text=blabels, textposition="top center",
247
  textfont=dict(size=13, color="#111111"),
248
  hovertemplate="<b>%{text}</b> (basket)<extra></extra>", name="basket")
249
  fig.add_trace(TR(x=bx, y=by, z=bz, **kwargs) if three_d else TR(x=bx, y=by, **kwargs))
250
-
251
  title_suffix = " (3D)" if three_d else ""
252
  fig.update_layout(
253
- title=dict(text=f"UMAP of Epicure-{sibling.capitalize()}{title_suffix} - {n} ingredients",
254
- font=dict(size=15)),
255
  height=650, margin=dict(l=40, r=40, t=60, b=40),
256
  paper_bgcolor="#ffffff", plot_bgcolor="#ffffff",
257
  legend=dict(orientation="v", x=1.02, y=1, font=dict(size=11)),
@@ -260,15 +324,11 @@ def umap_view(sibling, basket, show_neighbours, k, three_d=False):
260
  fig.update_xaxes(showgrid=True, gridcolor="#eeeeee", zeroline=False, title="UMAP 1")
261
  fig.update_yaxes(showgrid=True, gridcolor="#eeeeee", zeroline=False, title="UMAP 2")
262
  else:
263
- fig.update_layout(scene=dict(
264
- xaxis=dict(title="UMAP 1"),
265
- yaxis=dict(title="UMAP 2"),
266
- zaxis=dict(title="PC1 (z)"),
267
- bgcolor="#ffffff",
268
- ))
269
  return fig
270
 
271
- # ===== tab handlers =====
272
 
273
  def basket_pairings(sibling, basket, k):
274
  m = MODELS[sibling]
@@ -288,32 +348,38 @@ def basket_pairings(sibling, basket, k):
288
  def supervised_slerp_multi(sibling, basket, directions, theta, k):
289
  m = MODELS[sibling]
290
  v = _basket_centroid(m, basket)
291
- if v is None: return []
 
292
  d = _stack_directions(m, directions, use_factor_pole=False)
293
  if d is None:
294
- return [[n, f"{s:.4f}"] for n, s in _topk(m, v, k, basket)]
295
  q = _slerp(v, d, theta)
296
- return [[n, f"{s:.4f}"] for n, s in _topk(m, q, k, basket)]
 
297
 
298
  def emergent_slerp_multi(sibling, basket, mode_labels, theta, k):
299
  m = MODELS[sibling]
300
  label_to_id = {f"{mode.label} ({mode.mode_id})": mode.mode_id for mode in m.modes if mode.kind == "factor"}
301
  mode_ids = [label_to_id[lab] for lab in (mode_labels or []) if lab in label_to_id]
302
  v = _basket_centroid(m, basket)
303
- if v is None: return []
 
304
  d = _stack_directions(m, mode_ids, use_factor_pole=True)
305
  if d is None:
306
- return [[n, f"{s:.4f}"] for n, s in _topk(m, v, k, basket)]
307
  q = _slerp(v, d, theta)
308
- return [[n, f"{s:.4f}"] for n, s in _topk(m, q, k, basket)]
 
309
 
310
  def arithmetic(sibling, positives, negatives, k):
311
  m = MODELS[sibling]
312
  pos = _basket_centroid(m, positives)
313
- if pos is None: return []
 
314
  neg = _basket_centroid(m, negatives) if negatives else None
315
  q = _unit(pos - neg) if neg is not None else pos
316
- return [[n, f"{s:.4f}"] for n, s in _topk(m, q, k, (positives or []) + (negatives or []))]
 
317
 
318
  def browse_modes(sibling, kind_filter, query):
319
  m = MODELS[sibling]
@@ -323,7 +389,8 @@ def browse_modes(sibling, kind_filter, query):
323
  continue
324
  if q and q not in mode.label.lower() and q not in mode.property.lower():
325
  continue
326
- rows.append([mode.mode_id, mode.kind, mode.property, mode.label, mode.n_members, ", ".join(mode.members[:12])])
 
327
  rows.sort(key=lambda r: (r[1], -r[4]))
328
  return rows
329
 
@@ -343,15 +410,110 @@ def compare_siblings(basket, directions, theta, k):
343
  out.append([[n, f"{s:.4f}"] for n, s in hits])
344
  return out[0], out[1], out[2]
345
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  # ===== fridge parser =====
347
 
348
  _LINE_SPLIT = re.compile(r"[\n;]")
349
  _BRACKET = re.compile(r"\([^)]*\)")
350
- _QTY = (r"(?:\d+(?:[\.,/]\d+)?|"
351
- r"a|an|one|two|three|four|five|six|seven|eight|nine|ten|half|quarter)")
352
- _UNIT = (r"(?:cups?|tbsp\.?|tablespoons?|tsp\.?|teaspoons?|"
353
- r"oz\.?|ounces?|lbs?\.?|pounds?|grams?|kgs?|kilos?|"
354
- r"ml|liters?|litres?|cloves?|bunches?|sprigs?|pinch(?:es)?|"
355
  r"slices?|pieces?|cans?|packets?|sticks?|leaves?|stalks?|heads?|inch(?:es)?|"
356
  r"splash(?:es)?|dash(?:es)?|drops?|handfuls?|large|small|medium)")
357
  _LEADING_QTY = re.compile(rf"^\s*{_QTY}\s+(?:{_UNIT}\b\s*)?(?:of\s+)?", re.IGNORECASE)
@@ -359,18 +521,14 @@ _LEADING_UNIT_ONLY = re.compile(rf"^\s*{_UNIT}\b\s*(?:of\s+)?", re.IGNORECASE)
359
  _JUICE_OF = re.compile(rf"^\s*(?:juice|zest)\s+(?:of\s+)?(?:{_QTY}\s+)?", re.IGNORECASE)
360
  _LEADING_PREP = re.compile(
361
  r"^\s*(?:fresh|dried|cooked|frozen|raw|ripe|firm|boneless|skinless|smoked|low[- ]fat)\s+",
362
- re.IGNORECASE,
363
- )
364
  _TRAILING_PREP = re.compile(
365
  r"\s*,\s*(?:chopped|minced|diced|sliced|grated|crushed|whole|ground|peeled|"
366
  r"to taste|optional|finely|coarsely|cubed|shredded|julienned|halved|quartered|warmed|"
367
  r"toasted|roasted|bruised|melted|softened|cooked|drained|rinsed|patted dry|trimmed|"
368
- r"deveined|seeded|stemmed|crumbled).*$", re.IGNORECASE,
369
- )
370
- _KNOWN_PLURALS = {
371
- "tortillas":"tortilla","thighs":"thigh","leaves":"leaf","onions":"onion",
372
- "potatoes":"potato","tomatoes":"tomato","cloves":"clove",
373
- }
374
 
375
  def _clean_line(line):
376
  s = line.strip().lower()
@@ -425,11 +583,146 @@ def parse_fridge(raw_text, sibling, min_score=70):
425
  if n not in seen: seen.add(n); dedup.append(n)
426
  return rows, dedup
427
 
 
428
 
429
- # ===== UI =====
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
 
431
- # Theme: light background, BLACK text everywhere, accent color reserved for
432
- # interactive UI (buttons, sliders, focused borders) and brand cues.
433
  THEME = gr.themes.Soft(
434
  primary_hue=gr.themes.Color(
435
  c50="#E8F4F1", c100="#C8E6DE", c200=KAIKAKU_ACCENT_LIGHT,
@@ -440,14 +733,12 @@ THEME = gr.themes.Soft(
440
  neutral_hue="slate",
441
  font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
442
  ).set(
443
- # Force readable label/title text instead of letting Soft tint them with primary
444
  block_label_text_color="#1f2937",
445
  block_label_text_weight="600",
446
  block_title_text_color="#0f172a",
447
  block_title_text_weight="700",
448
  body_text_color="#0f172a",
449
  body_text_color_subdued="#475569",
450
- # Primary button: dark accent + white text -> high contrast
451
  button_primary_background_fill=KAIKAKU_ACCENT,
452
  button_primary_background_fill_hover=KAIKAKU_ACCENT_HOVER,
453
  button_primary_text_color="#ffffff",
@@ -462,66 +753,32 @@ THEME = gr.themes.Soft(
462
  CUSTOM_CSS = f"""
463
  .gradio-container {{max-width: 1280px !important;}}
464
  footer {{visibility: hidden;}}
465
-
466
- /* Make sure NOTHING uses the faded light-mint label color */
467
- .gradio-container label,
468
- .gradio-container .label,
469
  .gradio-container [data-testid="block-label"],
470
- .gradio-container .block-label,
471
- .gradio-container .gr-block-label {{
472
- color: #0f172a !important;
473
- font-weight: 600 !important;
474
- background: transparent !important;
475
- }}
476
-
477
- /* Tab labels readable */
478
- .gradio-container button[role="tab"] {{
479
- color: #334155 !important;
480
- font-weight: 500 !important;
481
  }}
 
482
  .gradio-container button[role="tab"][aria-selected="true"] {{
483
- color: {KAIKAKU_ACCENT} !important;
484
- border-bottom-color: {KAIKAKU_ACCENT} !important;
485
- font-weight: 700 !important;
486
- }}
487
-
488
- /* Primary button: dark accent + white text */
489
- .gradio-container button.primary,
490
- .gradio-container .primary > button {{
491
- background: {KAIKAKU_ACCENT} !important;
492
- color: #ffffff !important;
493
- border-color: {KAIKAKU_ACCENT} !important;
494
- font-weight: 600 !important;
495
- }}
496
- .gradio-container button.primary:hover {{
497
- background: {KAIKAKU_ACCENT_HOVER} !important;
498
- border-color: {KAIKAKU_ACCENT_HOVER} !important;
499
  }}
500
-
501
- /* Dataframe headers: black, bold, readable */
502
- .gradio-container table thead th,
503
- .gradio-container .gr-dataframe thead th {{
504
- color: #0f172a !important;
505
- font-weight: 700 !important;
506
- background: #f8fafc !important;
507
  }}
508
- .gradio-container table tbody td {{
509
- color: #0f172a !important;
 
510
  }}
511
-
512
- /* Sibling card */
513
  .sibling-card {{
514
- border-left: 3px solid {KAIKAKU_ACCENT};
515
- padding: 10px 14px;
516
- margin: 6px 0;
517
- background: #f8fafc;
518
- border-radius: 4px;
519
  }}
520
- .sibling-name {{color: {KAIKAKU_DARK}; font-weight: 700; font-size: 1.02em;}}
521
- .sibling-desc {{color: #334155; font-size: 0.95em; line-height: 1.5;}}
522
  """
523
 
524
- # Precompute initial figures so plots are populated on first page load
525
  _INITIAL_UMAP = umap_view("chem", ["chicken","lemon","garlic"], True, 8, three_d=False)
526
  _INITIAL_HEATMAP = _basket_heatmap(MODELS["chem"], ["chicken","lemon","garlic"])
527
 
@@ -540,10 +797,24 @@ SIBLING_CARDS = """
540
  </div>
541
  """
542
 
 
 
 
 
 
 
 
 
 
 
 
 
543
  with gr.Blocks(title="Epicure Explorer", theme=THEME, css=CUSTOM_CSS) as demo:
544
 
 
 
545
  gr.Markdown(
546
- f"""# Epicure Explorer
547
  Chef-facing operators over three sibling ingredient embeddings (Cooc / Core / Chem) from
548
  [arXiv:2605.22391](https://arxiv.org/abs/2605.22391). 1,790 canonical ingredients across 7 languages,
549
  300-D Metapath2Vec, controlled chemistry-vs-recipe-context spectrum."""
@@ -555,196 +826,354 @@ Chef-facing operators over three sibling ingredient embeddings (Cooc / Core / Ch
555
 
556
  shared_basket = gr.State([])
557
 
558
- # ---------- Tab 1: Basket pairings + heatmap ----------
559
- with gr.Tab("Basket pairings"):
560
- gr.Markdown(
561
- "Pick one or more ingredients. Tool averages their unit vectors and returns nearest neighbours "
562
- "plus closest modes of that centroid. The heatmap shows whether the basket is coherent."
563
- )
564
- basket = gr.Dropdown(
565
- choices=ALL_INGREDIENTS, value=["chicken","lemon","garlic"],
566
- label="Ingredient basket (pick 1+)", multiselect=True, max_choices=10,
567
- )
568
- k_pair = gr.Slider(1, 15, value=8, step=1, label="K")
569
- pair_btn = gr.Button("Find pairings", variant="primary")
570
- with gr.Row():
571
- nb_table = gr.Dataframe(headers=["Neighbour","Cosine"], label="Top-K nearest neighbours", interactive=False)
572
- mode_table = gr.Dataframe(headers=["Mode id","Label","Kind","Cosine"], label="Closest modes", interactive=False)
573
- heatmap_plot = gr.Plot(value=_INITIAL_HEATMAP, label="Pairwise cosine (matplotlib)")
574
- pair_btn.click(
575
- basket_pairings, inputs=[sibling, basket, k_pair],
576
- outputs=[nb_table, mode_table, heatmap_plot],
577
- show_progress="full",
578
- )
579
- gr.Examples(
580
- examples=[
581
- ["chem", ["chicken","lemon","garlic"], 8],
582
- ["core", ["miso","ginger","sesame_oil"], 8],
583
- ["chem", ["tomato","basil","mozzarella_cheese"], 8],
584
- ["cooc", ["chocolate","strawberry","cream"], 8],
585
- ["chem", ["cumin","coriander","turmeric"], 8],
586
- ["core", ["soy_sauce","ginger","scallion"], 8],
587
- ["chem", ["red_wine","beef","rosemary"], 8],
588
- ["core", ["coconut_milk","lemongrass","fish_sauce"], 8],
589
- ],
590
- inputs=[sibling, basket, k_pair],
591
- label="Try one of these baskets",
592
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593
 
594
- # ---------- Tab 2: Supervised SLERP ----------
595
- with gr.Tab("Supervised SLERP"):
596
- gr.Markdown("Rotate the seed basket toward one or more supervised direction poles. Multiple directions are summed.")
597
- sup_basket = gr.Dropdown(choices=ALL_INGREDIENTS, value=["rice"], label="Seed basket (pick 1+)", multiselect=True, max_choices=10)
598
- sup_dirs = gr.Dropdown(choices=_supervised_choices("chem"), value=["cuisine:South_Asian"],
599
- label="Supervised directions (pick 1+)", multiselect=True, max_choices=5)
600
- sup_theta = gr.Slider(0, 90, value=30, step=5, label="Rotation angle (deg)")
601
- sup_k = gr.Slider(1, 15, value=8, step=1, label="K")
602
- sup_btn = gr.Button("Rotate", variant="primary")
603
- sup_table = gr.Dataframe(headers=["Ingredient","Cosine"], label="Top-K rotated-query neighbours")
604
- sup_btn.click(supervised_slerp_multi, inputs=[sibling, sup_basket, sup_dirs, sup_theta, sup_k],
605
- outputs=sup_table, show_progress="full")
606
- sibling.change(lambda s: gr.Dropdown(choices=_supervised_choices(s), value=[]),
607
- inputs=sibling, outputs=sup_dirs)
608
- gr.Examples(
609
- examples=[
610
- ["chem", ["rice"], ["cuisine:South_Asian"], 30, 8],
611
- ["chem", ["corn"], ["cuisine:Latin_American"], 30, 8],
612
- ["core", ["chicken"], ["cuisine:Mediterranean"], 45, 8],
613
- ["core", ["tomato","basil"], ["cuisine:Southeast_Asian"], 45, 8],
614
- ["chem", ["beef"], ["cuisine:East_Asian"], 60, 8],
615
- ["cooc", ["chocolate"], ["cuisine:Latin_American"], 30, 8],
616
- ],
617
- inputs=[sibling, sup_basket, sup_dirs, sup_theta, sup_k],
618
- label="Try one of these rotations",
619
  )
 
 
 
 
 
 
620
 
621
- # ---------- Tab 3: Emergent SLERP ----------
622
- with gr.Tab("Emergent SLERP"):
623
- gr.Markdown("Rotate the seed basket toward one or more emergent FastICA factor-mode poles.")
624
- em_basket = gr.Dropdown(choices=ALL_INGREDIENTS, value=["chocolate"], label="Seed basket (pick 1+)", multiselect=True, max_choices=10)
625
- factor_opts = _factor_mode_choices("chem")
626
- em_modes = gr.Dropdown(choices=[label for label, _ in factor_opts],
627
- value=[factor_opts[0][0]] if factor_opts else [],
628
- label="Factor modes (pick 1+)", multiselect=True, max_choices=5)
629
- em_theta = gr.Slider(0, 90, value=30, step=5, label="Rotation angle (deg)")
630
- em_k = gr.Slider(1, 15, value=8, step=1, label="K")
631
- em_btn = gr.Button("Rotate", variant="primary")
632
- em_table = gr.Dataframe(headers=["Ingredient","Cosine"], label="Top-K rotated-query neighbours")
633
- em_btn.click(emergent_slerp_multi, inputs=[sibling, em_basket, em_modes, em_theta, em_k],
634
- outputs=em_table, show_progress="full")
635
- sibling.change(lambda s: gr.Dropdown(choices=[label for label, _ in _factor_mode_choices(s)], value=[]),
636
- inputs=sibling, outputs=em_modes)
637
-
638
- # ---------- Tab 4: Arithmetic ----------
639
- with gr.Tab("Arithmetic"):
640
- gr.Markdown("Mikolov-style vector arithmetic: `centroid(positives) - centroid(negatives)`, then top-K neighbours.")
641
- pos_box = gr.Dropdown(choices=ALL_INGREDIENTS, value=["miso"], label="Positives", multiselect=True, max_choices=10)
642
- neg_box = gr.Dropdown(choices=ALL_INGREDIENTS, value=["salt"], label="Negatives", multiselect=True, max_choices=10)
643
- ar_k = gr.Slider(1, 15, value=8, step=1, label="K")
644
- ar_btn = gr.Button("Compute", variant="primary")
645
- ar_table = gr.Dataframe(headers=["Ingredient","Cosine"], label="Top-K nearest to result vector")
646
- ar_btn.click(arithmetic, inputs=[sibling, pos_box, neg_box, ar_k], outputs=ar_table, show_progress="full")
647
- gr.Examples(
648
- examples=[
649
- ["core", ["miso"], ["salt"], 8],
650
- ["core", ["chicken","tofu"], ["beef"], 8],
651
- ["cooc", ["basil","cumin"], ["parsley"], 8],
652
- ["chem", ["chocolate"], ["sugar"], 8],
653
- ["chem", ["wine"], ["beer"], 8],
654
- ["core", ["bread"], ["flour"], 8],
655
- ["core", ["coffee"], ["milk"], 8],
656
- ["chem", ["mozzarella_cheese"], ["milk"], 8],
657
- ],
658
- inputs=[sibling, pos_box, neg_box, ar_k],
659
- label="Try one of these arithmetic queries",
660
- )
661
 
662
- # ---------- Tab 5: Mode atlas ----------
663
- with gr.Tab("Mode atlas"):
664
- gr.Markdown("Browse the GMM mode atlas. Cooc 150 / Core 193 / Chem 200 modes.")
665
- atlas_kind = gr.Radio(choices=["all","factor","continuous","binary"], value="all", label="Mode kind")
666
- atlas_search = gr.Textbox(label="Search labels / properties", placeholder="e.g. South Asian, baking, fiber", value="")
667
- atlas_btn = gr.Button("Browse modes", variant="primary")
668
- atlas_table = gr.Dataframe(
669
- headers=["mode_id","kind","property","label","n_members","top members"],
670
- label="Modes (sorted by kind, then size descending)", wrap=True, interactive=False,
671
- )
672
- atlas_btn.click(browse_modes, inputs=[sibling, atlas_kind, atlas_search], outputs=atlas_table, show_progress="full")
673
-
674
- # ---------- Tab 6: Compare siblings ----------
675
- with gr.Tab("Compare siblings"):
676
- gr.Markdown("Same query, three siblings, side by side. The spectrum-of-models thesis in one screen.")
677
- cmp_basket = gr.Dropdown(choices=ALL_INGREDIENTS, value=["chicken"], label="Seed basket", multiselect=True, max_choices=10)
678
- cmp_dirs = gr.Dropdown(choices=_supervised_choices("chem"), value=[],
679
- label="Optional directions (empty = pure pairings)", multiselect=True, max_choices=5)
680
- cmp_theta = gr.Slider(0, 90, value=30, step=5, label="Rotation angle (deg)")
681
- cmp_k = gr.Slider(1, 15, value=8, step=1, label="K")
682
- cmp_btn = gr.Button("Compare across siblings", variant="primary")
683
- with gr.Row():
684
- cmp_cooc = gr.Dataframe(headers=["Cooc neighbour","Cosine"], label="Cooc (recipe-context)")
685
- cmp_core = gr.Dataframe(headers=["Core neighbour","Cosine"], label="Core (blended)")
686
- cmp_chem = gr.Dataframe(headers=["Chem neighbour","Cosine"], label="Chem (chemistry)")
687
- cmp_btn.click(compare_siblings, inputs=[cmp_basket, cmp_dirs, cmp_theta, cmp_k],
688
- outputs=[cmp_cooc, cmp_core, cmp_chem], show_progress="full")
689
-
690
- # ---------- Tab 7: UMAP visualisation ----------
691
- with gr.Tab("UMAP visualisation"):
692
- gr.Markdown(
693
- "2-D UMAP of the 1,790-ingredient embedding (cosine, n_neighbors=30, min_dist=0.03 -- paper Figure 1). "
694
- "Points coloured by food group. Basket members appear as mint stars; top-K neighbours as amber dots."
695
- )
696
- umap_basket = gr.Dropdown(choices=ALL_INGREDIENTS, value=["chicken","lemon","garlic"],
697
- label="Highlight these ingredients", multiselect=True, max_choices=10)
698
- with gr.Row():
699
- umap_show_nb = gr.Checkbox(value=True, label="Show top-K neighbours of basket centroid")
700
- umap_3d = gr.Checkbox(value=False, label="3-D perspective (UMAP + PC1)")
701
- umap_k = gr.Slider(1, 20, value=10, step=1, label="K neighbours")
702
- umap_btn = gr.Button("Update plot", variant="primary")
703
- umap_plot = gr.Plot(value=_INITIAL_UMAP, label="UMAP")
704
- umap_btn.click(umap_view, inputs=[sibling, umap_basket, umap_show_nb, umap_k, umap_3d],
705
- outputs=umap_plot, show_progress="full")
706
- sibling.change(umap_view, inputs=[sibling, umap_basket, umap_show_nb, umap_k, umap_3d],
707
- outputs=umap_plot)
708
-
709
- # ---------- Tab 8: Parse my fridge ----------
710
- with gr.Tab("Parse my fridge"):
711
- gr.Markdown(
712
- "Paste a free-text ingredient list. Quantities, units, and prep notes are stripped, "
713
- "then each line is fuzzy-matched to canonical vocab. "
714
- "Click **Send matched to Basket tab** to populate the Basket Pairings input."
715
- )
716
- fridge_text = gr.Textbox(
717
- label="Free-text ingredients (one per line or semicolon-separated)",
718
- lines=8,
719
- value=("2 boneless chicken thighs\n1 cup coconut milk\n1 tbsp fish sauce (or soy sauce)\n"
720
- "fresh lemongrass, bruised\n3 cloves garlic, minced\n1 inch fresh ginger\n"
721
- "juice of one lime\nsalt to taste"),
722
- )
723
- fridge_min = gr.Slider(40, 100, value=70, step=5, label="Min match score (rapidfuzz)")
724
- with gr.Row():
725
- fridge_btn = gr.Button("Parse and match", variant="primary")
726
- fridge_send = gr.Button("Send matched to Basket tab", variant="secondary")
727
- fridge_table = gr.Dataframe(
728
- headers=["Input line", "Canonical match", "Score", "Cleaned"],
729
- label="Parsed matches", interactive=False,
730
- )
731
- fridge_matched = gr.Textbox(label="Matched ingredients", interactive=False)
732
 
733
- def _parse(txt, sib, mn):
734
- rows, matches = parse_fridge(txt, sib, int(mn))
735
- return rows, ", ".join(matches), matches
736
- fridge_btn.click(_parse, inputs=[fridge_text, sibling, fridge_min],
737
- outputs=[fridge_table, fridge_matched, shared_basket], show_progress="full")
738
 
739
- def _send_to_basket(matches):
740
- return gr.Dropdown(value=matches[:10] if matches else [])
741
- fridge_send.click(_send_to_basket, inputs=[shared_basket], outputs=[basket])
 
 
 
 
 
 
 
742
 
743
- gr.Markdown(
744
- """---
745
- **Cite:** Radzikowski and Chen, 2026, *Epicure: Navigating the Emergent Geometry of Food Ingredient Embeddings*, [arXiv:2605.22391](https://arxiv.org/abs/2605.22391).
746
 
747
- Artefacts: [epicure-cooc](https://huggingface.co/Kaikaku/epicure-cooc) | [epicure-core](https://huggingface.co/Kaikaku/epicure-core) | [epicure-chem](https://huggingface.co/Kaikaku/epicure-chem) | [corpus dataset](https://huggingface.co/datasets/Kaikaku/epicure-corpus-resources)
 
748
  """
749
  )
750
 
 
1
+ """Epicure Explorer: chef-facing operators over the three sibling embeddings.
2
+
3
+ Features:
4
+ - Basket pairings (with pairwise cosine heatmap)
5
+ - Supervised SLERP (with "why these results" explainer)
6
+ - Emergent SLERP (with explainer)
7
+ - Arithmetic (Mikolov-style, with explainer)
8
+ - Mode atlas (click row -> highlight on UMAP)
9
+ - Compare siblings (one query, three columns)
10
+ - UMAP visualisation (2D / 3D)
11
+ - Parse my fridge (free-text -> canonical vocab via rapidfuzz)
12
+ - Recipe builder (hybrid retrieval: rapidfuzz + sentence-transformers over mode labels)
13
+ - Saved queries (per-browser persistence via gr.BrowserState)
14
+ - Public developer API (gr.api endpoints for neighbours / slerp / arithmetic / embed)
15
+ - Food-group filter on every ingredient dropdown
16
+
17
+ Paper: https://arxiv.org/abs/2605.22391
18
+ """
19
 
20
  from __future__ import annotations
21
 
 
23
  import re
24
  import sys
25
  import json
26
+ import uuid
27
+ from datetime import datetime, timezone
28
+ from functools import lru_cache
29
+
30
  import numpy as np
31
  import gradio as gr
32
  import plotly.graph_objects as go
33
  import matplotlib
34
  matplotlib.use("Agg")
35
  import matplotlib.pyplot as plt
 
36
 
37
  try:
38
  from epicure import Epicure
 
49
  KAIKAKU_DEEP = "#0A1F20"
50
  KAIKAKU_MID = "#1A3D3F"
51
  KAIKAKU_EDGE = "#2A4D4F"
52
+ KAIKAKU_ACCENT = "#288B79"
53
  KAIKAKU_ACCENT_HOVER = "#1E6E5F"
54
+ KAIKAKU_ACCENT_LIGHT = "#A8D5CA"
 
 
55
  KAIKAKU_TEXT = "#0F2D2F"
56
  KAIKAKU_MUTED = "#5A7878"
57
 
 
58
  plt.rcParams.update({
59
  "figure.facecolor": "#ffffff",
60
  "axes.facecolor": "#ffffff",
 
90
  "Other": "#cccccc",
91
  }
92
 
 
93
  print(f"[epicure-explorer] models loaded: {list(MODELS)}", flush=True)
94
+ print(f"[epicure-explorer] food group labels: {len(FOOD_GROUPS)} ingredients", flush=True)
95
+
96
+ # ===== Feature 5: food-group filter helpers =====
97
+
98
+ _NAME_TO_GROUP: dict[str, str] = {NAMES_BY_IDX[i]: FOOD_GROUPS[i] for i in range(len(NAMES_BY_IDX))}
99
+ FOOD_GROUP_CHOICES = ["All", "Vegetable", "Spice", "Fruit", "Dairy", "Grain", "Pantry", "Beverage", "Other"]
100
+
101
+ def _choices_for_group(group: str) -> list[str]:
102
+ if not group or group == "All":
103
+ return ALL_INGREDIENTS
104
+ return sorted(n for n in ALL_INGREDIENTS if _NAME_TO_GROUP.get(n, "Other") == group)
105
+
106
+ def _filter_dropdown(group: str, current_value):
107
+ new_choices = _choices_for_group(group)
108
+ allowed = set(new_choices)
109
+ cur = current_value or []
110
+ if isinstance(cur, str):
111
+ kept = cur if cur in allowed else None
112
+ else:
113
+ kept = [v for v in cur if v in allowed]
114
+ return gr.Dropdown(choices=new_choices, value=kept)
115
 
116
  # ===== math helpers =====
117
 
 
157
  th = np.deg2rad(float(theta_deg))
158
  return _unit(np.cos(th)*v + np.sin(th)*d_perp)
159
 
160
+ # ===== Feature 4: explainer helpers =====
161
+
162
+ def _fmt_nb_inline(pairs):
163
+ return ", ".join(f"{n} ({s:+.2f})" for n, s in pairs)
164
+
165
+ def _slerp_explainer(m, basket, direction_keys, theta, q, v, d, kind):
166
+ if v is None or d is None or q is None:
167
+ return "_(no rotation applied)_"
168
+ cos_theta = float(q @ v)
169
+ travelled = min(max(float(theta) / 90.0, 0.0), 1.0)
170
+ dir_nb = _topk(m, _unit(d), k=5, exclude=basket or [])
171
+ seed_nb = _topk(m, v, k=3, exclude=basket or [])
172
+ dir_names = ", ".join(n for n, _ in dir_nb[:3])
173
+ label = "direction pole" if kind == "supervised" else "factor-mode pole"
174
+ dirs_str = " + ".join(direction_keys) if direction_keys else "(none)"
175
+ return (
176
+ f"**Why these results** \n"
177
+ f"- Rotated query vs. seed centroid: cos = {cos_theta:.3f} (theta = {float(theta):.0f}°; "
178
+ f"{travelled*100:.0f}% of the way to the {label}). \n"
179
+ f"- {label.capitalize()} ({dirs_str}) nearest in vocab: {_fmt_nb_inline(dir_nb)}. \n"
180
+ f"- Seed basket's own top-3 (baseline): {_fmt_nb_inline(seed_nb)}. \n"
181
+ f"- At {float(theta):.0f}° the query lands near: {dir_names}."
182
+ )
183
+
184
+ def _arithmetic_explainer(m, positives, negatives, q, pos_v, neg_v):
185
+ if q is None:
186
+ return "_(no result: missing positives)_"
187
+ pos_sims = [(n, float(_unit(m.E[m.vocab[n]]) @ q)) for n in (positives or []) if n in m.vocab]
188
+ neg_sims = [(n, float(_unit(m.E[m.vocab[n]]) @ q)) for n in (negatives or []) if n in m.vocab]
189
+ top = _topk(m, q, k=1, exclude=(positives or []) + (negatives or []))
190
+ top_name, top_sim = top[0] if top else ("(none)", 0.0)
191
+ pos_part = ", ".join(f"{n} ({s:+.2f})" for n, s in pos_sims) or "(none)"
192
+ neg_part = ", ".join(f"{n} ({s:+.2f})" for n, s in neg_sims) or "(none)"
193
+ input_max = max((s for _, s in pos_sims + neg_sims), default=0.0)
194
+ if pos_sims or neg_sims:
195
+ gap = top_sim - input_max
196
+ if gap > 0.05:
197
+ interp = (f"Result sits closer to **{top_name}** ({top_sim:+.2f}) "
198
+ f"than to any input (max {input_max:+.2f}); the embedding separates these concepts.")
199
+ else:
200
+ interp = (f"Result is dominated by the inputs themselves "
201
+ f"(top neighbour {top_name} only {gap:+.2f} above max input cosine).")
202
+ else:
203
+ interp = f"Result top neighbour: {top_name} ({top_sim:+.2f})."
204
+ return (
205
+ f"**Why these results** \n"
206
+ f"- Result vs. positives: {pos_part}. \n"
207
+ f"- Result vs. negatives: {neg_part}. \n"
208
+ f"- {interp}"
209
+ )
210
+
211
+ # ===== heatmap =====
212
 
213
  def _basket_heatmap(m, basket):
214
  valid = [n for n in (basket or []) if n in m.vocab]
 
239
  plt.tight_layout()
240
  return fig
241
 
242
+ # ===== UMAP =====
243
 
244
  def _umap_coords(sibling, three_d):
245
  base = UMAP_DATA[sibling]
 
257
  coords2, z = _umap_coords(sibling, three_d)
258
  m = MODELS[sibling]
259
  n = len(NAMES_BY_IDX)
260
+ colors = [FG_COLORS.get(fg, "#cccccc") for fg in FOOD_GROUPS]
 
 
261
  hover_text = [f"{NAMES_BY_IDX[i]}<br>group: {FOOD_GROUPS[i]}" for i in range(n)]
 
262
  basket_set = set(basket or [])
263
  basket_idxs = [m.vocab[b] for b in (basket or []) if b in m.vocab]
 
264
  neighbour_set: set[str] = set()
265
  if show_neighbours and basket_idxs:
266
  centroid = _basket_centroid(m, basket)
267
  if centroid is not None:
268
  nb_pairs = _topk(m, centroid, k=int(k), exclude=basket)
269
  neighbour_set = {nm for nm, _ in nb_pairs}
270
+ bg_keep = lambda i: NAMES_BY_IDX[i] not in basket_set and NAMES_BY_IDX[i] not in neighbour_set
271
+ bg_x = [float(coords2[i, 0]) for i in range(n) if bg_keep(i)]
272
+ bg_y = [float(coords2[i, 1]) for i in range(n) if bg_keep(i)]
273
+ bg_z = [float(z[i]) for i in range(n) if bg_keep(i)] if three_d else None
274
+ bg_c = [colors[i] for i in range(n) if bg_keep(i)]
275
+ bg_h = [hover_text[i] for i in range(n) if bg_keep(i)]
 
 
 
276
  fig = go.Figure()
 
277
  if three_d:
278
  fig.add_trace(go.Scatter3d(
279
  x=bg_x, y=bg_y, z=bg_z, mode="markers",
280
  marker=dict(size=3, color=bg_c, opacity=0.55, line=dict(width=0)),
281
+ text=bg_h, hovertemplate="%{text}<extra></extra>", name="ingredients", showlegend=False,
 
282
  ))
283
  else:
284
  fig.add_trace(go.Scattergl(
285
  x=bg_x, y=bg_y, mode="markers",
286
  marker=dict(size=5, color=bg_c, opacity=0.65, line=dict(width=0)),
287
+ text=bg_h, hovertemplate="%{text}<extra></extra>", name="ingredients", showlegend=False,
 
288
  ))
 
 
289
  if neighbour_set:
290
  ni = [i for i in range(n) if NAMES_BY_IDX[i] in neighbour_set]
291
  nx = [float(coords2[i, 0]) for i in ni]
292
  ny = [float(coords2[i, 1]) for i in ni]
293
  nz = [float(z[i]) for i in ni] if three_d else None
294
  nlabels = [NAMES_BY_IDX[i] for i in ni]
295
+ marker = dict(size=11 if not three_d else 6, color="#ff8800", opacity=0.95,
 
 
296
  line=dict(color="#ffffff", width=1.2))
297
  TR = go.Scatter3d if three_d else go.Scatter
298
+ kwargs = dict(mode="markers+text", marker=marker, text=nlabels, textposition="top center",
 
299
  textfont=dict(size=10),
300
  hovertemplate="<b>%{text}</b> (neighbour)<extra></extra>",
301
  name=f"top-{k} neighbours")
302
  fig.add_trace(TR(x=nx, y=ny, z=nz, **kwargs) if three_d else TR(x=nx, y=ny, **kwargs))
 
 
303
  if basket_idxs:
304
  bx = [float(coords2[i, 0]) for i in basket_idxs]
305
  by = [float(coords2[i, 1]) for i in basket_idxs]
306
  bz = [float(z[i]) for i in basket_idxs] if three_d else None
307
  blabels = [NAMES_BY_IDX[i] for i in basket_idxs]
308
+ marker = dict(size=18 if not three_d else 9, color=KAIKAKU_ACCENT,
 
309
  symbol="star" if not three_d else "diamond",
310
  line=dict(color="#111111", width=1.5))
311
  TR = go.Scatter3d if three_d else go.Scatter
312
+ kwargs = dict(mode="markers+text", marker=marker, text=blabels, textposition="top center",
 
313
  textfont=dict(size=13, color="#111111"),
314
  hovertemplate="<b>%{text}</b> (basket)<extra></extra>", name="basket")
315
  fig.add_trace(TR(x=bx, y=by, z=bz, **kwargs) if three_d else TR(x=bx, y=by, **kwargs))
 
316
  title_suffix = " (3D)" if three_d else ""
317
  fig.update_layout(
318
+ title=dict(text=f"UMAP of Epicure-{sibling.capitalize()}{title_suffix} - {n} ingredients", font=dict(size=15)),
 
319
  height=650, margin=dict(l=40, r=40, t=60, b=40),
320
  paper_bgcolor="#ffffff", plot_bgcolor="#ffffff",
321
  legend=dict(orientation="v", x=1.02, y=1, font=dict(size=11)),
 
324
  fig.update_xaxes(showgrid=True, gridcolor="#eeeeee", zeroline=False, title="UMAP 1")
325
  fig.update_yaxes(showgrid=True, gridcolor="#eeeeee", zeroline=False, title="UMAP 2")
326
  else:
327
+ fig.update_layout(scene=dict(xaxis=dict(title="UMAP 1"), yaxis=dict(title="UMAP 2"),
328
+ zaxis=dict(title="PC1 (z)"), bgcolor="#ffffff"))
 
 
 
 
329
  return fig
330
 
331
+ # ===== tab handlers (with explainers) =====
332
 
333
  def basket_pairings(sibling, basket, k):
334
  m = MODELS[sibling]
 
348
  def supervised_slerp_multi(sibling, basket, directions, theta, k):
349
  m = MODELS[sibling]
350
  v = _basket_centroid(m, basket)
351
+ if v is None:
352
+ return [], "_(empty basket)_"
353
  d = _stack_directions(m, directions, use_factor_pole=False)
354
  if d is None:
355
+ return [[n, f"{s:.4f}"] for n, s in _topk(m, v, k, basket)], "_(no direction selected)_"
356
  q = _slerp(v, d, theta)
357
+ rows = [[n, f"{s:.4f}"] for n, s in _topk(m, q, k, basket)]
358
+ return rows, _slerp_explainer(m, basket, directions or [], theta, q, v, d, "supervised")
359
 
360
  def emergent_slerp_multi(sibling, basket, mode_labels, theta, k):
361
  m = MODELS[sibling]
362
  label_to_id = {f"{mode.label} ({mode.mode_id})": mode.mode_id for mode in m.modes if mode.kind == "factor"}
363
  mode_ids = [label_to_id[lab] for lab in (mode_labels or []) if lab in label_to_id]
364
  v = _basket_centroid(m, basket)
365
+ if v is None:
366
+ return [], "_(empty basket)_"
367
  d = _stack_directions(m, mode_ids, use_factor_pole=True)
368
  if d is None:
369
+ return [[n, f"{s:.4f}"] for n, s in _topk(m, v, k, basket)], "_(no factor mode selected)_"
370
  q = _slerp(v, d, theta)
371
+ rows = [[n, f"{s:.4f}"] for n, s in _topk(m, q, k, basket)]
372
+ return rows, _slerp_explainer(m, basket, mode_ids, theta, q, v, d, "emergent")
373
 
374
  def arithmetic(sibling, positives, negatives, k):
375
  m = MODELS[sibling]
376
  pos = _basket_centroid(m, positives)
377
+ if pos is None:
378
+ return [], "_(no positives provided)_"
379
  neg = _basket_centroid(m, negatives) if negatives else None
380
  q = _unit(pos - neg) if neg is not None else pos
381
+ rows = [[n, f"{s:.4f}"] for n, s in _topk(m, q, k, (positives or []) + (negatives or []))]
382
+ return rows, _arithmetic_explainer(m, positives or [], negatives or [], q, pos, neg)
383
 
384
  def browse_modes(sibling, kind_filter, query):
385
  m = MODELS[sibling]
 
389
  continue
390
  if q and q not in mode.label.lower() and q not in mode.property.lower():
391
  continue
392
+ rows.append([mode.mode_id, mode.kind, mode.property, mode.label, mode.n_members,
393
+ ", ".join(mode.members[:12])])
394
  rows.sort(key=lambda r: (r[1], -r[4]))
395
  return rows
396
 
 
410
  out.append([[n, f"{s:.4f}"] for n, s in hits])
411
  return out[0], out[1], out[2]
412
 
413
+ # ===== Feature 6: recipe builder (lazy-loaded sentence-transformer) =====
414
+
415
+ _ST_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
416
+ _ST = None
417
+ def _get_st():
418
+ global _ST
419
+ if _ST is None:
420
+ print(f"[epicure-explorer] loading {_ST_MODEL_NAME} (first call, ~80MB)", flush=True)
421
+ from sentence_transformers import SentenceTransformer
422
+ _ST = SentenceTransformer(_ST_MODEL_NAME, device="cpu")
423
+ return _ST
424
+
425
+ @lru_cache(maxsize=4)
426
+ def _mode_label_matrix(sibling: str):
427
+ m = MODELS[sibling]
428
+ modes = [md for md in m.modes if md.kind == "factor"]
429
+ if not modes:
430
+ return [], [], np.zeros((0, 384), dtype=np.float32)
431
+ labels = [md.label for md in modes]
432
+ mids = [md.mode_id for md in modes]
433
+ M = _get_st().encode(labels, normalize_embeddings=True, convert_to_numpy=True)
434
+ return mids, labels, M.astype(np.float32)
435
+
436
+ def _mode_quartile(mode):
437
+ members = list(mode.members or [])
438
+ n = max(4, min(12, (len(members) + 3) // 4))
439
+ return members[:n]
440
+
441
+ _PROMPT_STOPWORDS = {
442
+ "i","im","i'm","a","an","the","for","of","with","and","or","some","my","me","we",
443
+ "make","making","cook","cooking","prepare","preparing","want","need","to","tonight",
444
+ "people","person","servings","dinner","lunch","dish","recipe","quick","easy",
445
+ "tasty","yummy","good","great","food","meal","style","plate","plates",
446
+ }
447
+ _TOKEN_RE = re.compile(r"[A-Za-z][A-Za-z\-']{1,}")
448
+
449
+ def suggest_basket(prompt, sibling, k=10):
450
+ if not prompt or not prompt.strip():
451
+ return [], [], "Type a dish description first."
452
+ vocab = list(MODELS[sibling].vocab.keys())
453
+ vocab_sp = [v.replace("_", " ") for v in vocab]
454
+ raw_tokens = _TOKEN_RE.findall(prompt.lower())
455
+ tokens = [t for t in raw_tokens if t not in _PROMPT_STOPWORDS and len(t) > 2]
456
+ direct = {}
457
+ direct_evidence = []
458
+ for tok in tokens:
459
+ hits = fuzz_process.extract(tok, vocab_sp, scorer=fuzz_scorers.token_set_ratio,
460
+ score_cutoff=88, limit=2)
461
+ for _sp, score, idx in hits:
462
+ name = vocab[idx]
463
+ if score > direct.get(name, 0):
464
+ direct[name] = float(score)
465
+ direct_evidence.append((tok, name, float(score)))
466
+ mids, labels, M = _mode_label_matrix(sibling)
467
+ thematic = {}
468
+ thematic_modes = []
469
+ if M.shape[0] > 0:
470
+ q = _get_st().encode([prompt], normalize_embeddings=True, convert_to_numpy=True)[0]
471
+ sims = M @ q
472
+ order = np.argsort(-sims)
473
+ picked = [(mids[i], labels[i], float(sims[i])) for i in order[:3] if sims[i] >= 0.25]
474
+ thematic_modes = picked
475
+ id_to_mode = {md.mode_id: md for md in MODELS[sibling].modes if md.kind == "factor"}
476
+ for mid, lab, sim in picked:
477
+ for name in _mode_quartile(id_to_mode[mid]):
478
+ s_existing, _ = thematic.get(name, (0.0, ""))
479
+ s_new = max(s_existing, sim * 100.0)
480
+ thematic[name] = (s_new, lab)
481
+ combined = {}
482
+ for name, sc in direct.items():
483
+ combined[name] = (sc, "direct")
484
+ for name, (sc, lab) in thematic.items():
485
+ prev = combined.get(name)
486
+ if prev is None or sc > prev[0]:
487
+ tag = "both" if prev else "thematic"
488
+ combined[name] = (sc, tag)
489
+ ranked = sorted(combined.items(),
490
+ key=lambda kv: (-kv[1][0], 0 if kv[1][1] != "thematic" else 1, kv[0]))[:int(k)]
491
+ rows = [[name, src, round(score, 1)] for name, (score, src) in ranked]
492
+ names = [name for name, _ in ranked]
493
+ lines = []
494
+ if direct_evidence:
495
+ dm = ", ".join(sorted({f"`{n}` (from '{t}')" for t, n, _ in direct_evidence}))
496
+ lines.append(f"**Direct mentions:** {dm}")
497
+ else:
498
+ lines.append("**Direct mentions:** _none cleared score threshold_")
499
+ if thematic_modes:
500
+ bits = []
501
+ id_to_mode = {md.mode_id: md for md in MODELS[sibling].modes if md.kind == "factor"}
502
+ for mid, lab, sim in thematic_modes:
503
+ sample = ", ".join(id_to_mode[mid].members[:4])
504
+ bits.append(f"`{lab}` (cos {sim:.2f}; e.g. {sample})")
505
+ lines.append("**Matched factor modes:** " + "; ".join(bits))
506
+ else:
507
+ lines.append("**Matched factor modes:** _no mode label cleared cosine 0.25_")
508
+ return rows, names, "\n\n".join(lines)
509
+
510
  # ===== fridge parser =====
511
 
512
  _LINE_SPLIT = re.compile(r"[\n;]")
513
  _BRACKET = re.compile(r"\([^)]*\)")
514
+ _QTY = (r"(?:\d+(?:[\.,/]\d+)?|a|an|one|two|three|four|five|six|seven|eight|nine|ten|half|quarter)")
515
+ _UNIT = (r"(?:cups?|tbsp\.?|tablespoons?|tsp\.?|teaspoons?|oz\.?|ounces?|lbs?\.?|pounds?|"
516
+ r"grams?|kgs?|kilos?|ml|liters?|litres?|cloves?|bunches?|sprigs?|pinch(?:es)?|"
 
 
517
  r"slices?|pieces?|cans?|packets?|sticks?|leaves?|stalks?|heads?|inch(?:es)?|"
518
  r"splash(?:es)?|dash(?:es)?|drops?|handfuls?|large|small|medium)")
519
  _LEADING_QTY = re.compile(rf"^\s*{_QTY}\s+(?:{_UNIT}\b\s*)?(?:of\s+)?", re.IGNORECASE)
 
521
  _JUICE_OF = re.compile(rf"^\s*(?:juice|zest)\s+(?:of\s+)?(?:{_QTY}\s+)?", re.IGNORECASE)
522
  _LEADING_PREP = re.compile(
523
  r"^\s*(?:fresh|dried|cooked|frozen|raw|ripe|firm|boneless|skinless|smoked|low[- ]fat)\s+",
524
+ re.IGNORECASE)
 
525
  _TRAILING_PREP = re.compile(
526
  r"\s*,\s*(?:chopped|minced|diced|sliced|grated|crushed|whole|ground|peeled|"
527
  r"to taste|optional|finely|coarsely|cubed|shredded|julienned|halved|quartered|warmed|"
528
  r"toasted|roasted|bruised|melted|softened|cooked|drained|rinsed|patted dry|trimmed|"
529
+ r"deveined|seeded|stemmed|crumbled).*$", re.IGNORECASE)
530
+ _KNOWN_PLURALS = {"tortillas":"tortilla","thighs":"thigh","leaves":"leaf","onions":"onion",
531
+ "potatoes":"potato","tomatoes":"tomato","cloves":"clove"}
 
 
 
532
 
533
  def _clean_line(line):
534
  s = line.strip().lower()
 
583
  if n not in seen: seen.add(n); dedup.append(n)
584
  return rows, dedup
585
 
586
+ # ===== Feature 8: public API endpoints =====
587
 
588
+ def _suggest(name: str, sibling: str, n: int = 5) -> list[str]:
589
+ vocab = list(MODELS[sibling].vocab.keys())
590
+ hits = fuzz_process.extract((name or "").lower().replace(" ", "_"),
591
+ vocab, scorer=fuzz_scorers.WRatio, limit=n)
592
+ return [h[0] for h in hits]
593
+
594
+ def _validate_sibling(sibling):
595
+ if sibling not in MODELS:
596
+ return {"error": f"sibling '{sibling}' not in {{cooc, core, chem}}",
597
+ "suggestions": ["cooc","core","chem"]}
598
+ return None
599
+
600
+ def _validate_ingredient(name, sibling, field="ingredient"):
601
+ if not isinstance(name, str) or not name:
602
+ return {"error": f"{field} must be a non-empty string"}
603
+ if name not in MODELS[sibling].vocab:
604
+ return {"error": f"{field} '{name}' not in vocab",
605
+ "suggestions": _suggest(name, sibling)}
606
+ return None
607
+
608
+ def api_neighbors(ingredient, sibling="chem", k=5):
609
+ err = _validate_sibling(sibling) or _validate_ingredient(ingredient, sibling)
610
+ if err: return err
611
+ m = MODELS[sibling]
612
+ q = _unit(m.E[m.vocab[ingredient]])
613
+ pairs = _topk(m, q, int(k), exclude=[ingredient])
614
+ return [{"name": n, "cosine": round(float(s), 6)} for n, s in pairs]
615
+
616
+ def api_slerp(seed, direction, theta_deg=30, sibling="chem", k=5):
617
+ err = _validate_sibling(sibling) or _validate_ingredient(seed, sibling, "seed")
618
+ if err: return err
619
+ m = MODELS[sibling]
620
+ if direction not in m.supervised_poles:
621
+ return {"error": f"direction '{direction}' not a supervised pole",
622
+ "suggestions": sorted(m.supervised_poles.keys())[:10]}
623
+ v = _unit(m.E[m.vocab[seed]])
624
+ d = _unit(m.supervised_poles[direction])
625
+ q = _slerp(v, d, float(theta_deg))
626
+ pairs = _topk(m, q, int(k), exclude=[seed])
627
+ return [{"name": n, "cosine": round(float(s), 6)} for n, s in pairs]
628
+
629
+ def api_arithmetic(positives, negatives, sibling="chem", k=5):
630
+ err = _validate_sibling(sibling)
631
+ if err: return err
632
+ positives = list(positives or [])
633
+ negatives = list(negatives or [])
634
+ if not positives:
635
+ return {"error": "positives must be a non-empty list"}
636
+ m = MODELS[sibling]
637
+ unknown = [x for x in positives + negatives if x not in m.vocab]
638
+ if unknown:
639
+ return {"error": f"unknown ingredients: {unknown}",
640
+ "suggestions": {x: _suggest(x, sibling) for x in unknown}}
641
+ pos = _basket_centroid(m, positives)
642
+ neg = _basket_centroid(m, negatives) if negatives else None
643
+ q = _unit(pos - neg) if neg is not None else pos
644
+ pairs = _topk(m, q, int(k), exclude=positives + negatives)
645
+ return [{"name": n, "cosine": round(float(s), 6)} for n, s in pairs]
646
+
647
+ def api_embed(ingredient, sibling="chem"):
648
+ err = _validate_sibling(sibling) or _validate_ingredient(ingredient, sibling)
649
+ if err: return err
650
+ m = MODELS[sibling]
651
+ v = _unit(m.E[m.vocab[ingredient]])
652
+ return [float(x) for x in v.tolist()]
653
+
654
+ def api_list_directions(sibling="chem"):
655
+ err = _validate_sibling(sibling)
656
+ if err: return err
657
+ return sorted(MODELS[sibling].supervised_poles.keys())
658
+
659
+ def api_list_factor_modes(sibling="chem"):
660
+ err = _validate_sibling(sibling)
661
+ if err: return err
662
+ return [{"mode_id": mode.mode_id, "label": str(mode.label),
663
+ "kind": str(mode.kind), "property": str(mode.property),
664
+ "n_members": int(mode.n_members)}
665
+ for mode in MODELS[sibling].modes if mode.kind == "factor"]
666
+
667
+ # ===== Feature 9: saved queries helpers =====
668
+
669
+ TAB_IDS = {
670
+ "basket": "tab_basket",
671
+ "supervised_slerp": "tab_sup",
672
+ "emergent_slerp": "tab_em",
673
+ "arithmetic": "tab_ar",
674
+ "compare": "tab_cmp",
675
+ }
676
+ TAB_LABELS = {
677
+ "basket": "Basket pairings",
678
+ "supervised_slerp": "Supervised SLERP",
679
+ "emergent_slerp": "Emergent SLERP",
680
+ "arithmetic": "Arithmetic",
681
+ "compare": "Compare siblings",
682
+ }
683
+
684
+ def _summarise(tab, inputs):
685
+ sib = inputs.get("sibling", "")
686
+ if tab == "basket":
687
+ return f"[{sib}] basket: {', '.join(inputs.get('basket', [])[:3])} k={inputs.get('k')}"
688
+ if tab == "supervised_slerp":
689
+ b = ", ".join(inputs.get("basket", [])[:2])
690
+ d = ", ".join(inputs.get("directions", [])[:2])
691
+ return f"[{sib}] {b} +{inputs.get('theta')}° -> {d}"
692
+ if tab == "emergent_slerp":
693
+ b = ", ".join(inputs.get("basket", [])[:2])
694
+ return f"[{sib}] {b} +{inputs.get('theta')}° -> {len(inputs.get('modes', []))} factor modes"
695
+ if tab == "arithmetic":
696
+ p = " + ".join(inputs.get("positives", [])[:2])
697
+ n = " + ".join(inputs.get("negatives", [])[:2])
698
+ return f"[{sib}] {p}" + (f" - {n}" if n else "")
699
+ if tab == "compare":
700
+ return f"[3 siblings] {', '.join(inputs.get('basket', [])[:2])} +{inputs.get('theta')}°"
701
+ return "(unknown)"
702
+
703
+ def save_query(saved, tab, inputs_dict):
704
+ saved = list(saved or [])
705
+ rec = {
706
+ "id": str(uuid.uuid4()),
707
+ "created_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
708
+ "tab": tab,
709
+ "inputs": inputs_dict,
710
+ "summary": _summarise(tab, inputs_dict),
711
+ }
712
+ saved.insert(0, rec)
713
+ saved = saved[:200]
714
+ return saved, _render_saved(saved)
715
+
716
+ def delete_query(saved, qid):
717
+ saved = [q for q in (saved or []) if q.get("id") != qid]
718
+ return saved, _render_saved(saved)
719
+
720
+ def _render_saved(saved):
721
+ return [[q["created_at"], TAB_LABELS.get(q["tab"], q["tab"]), q["summary"], q["id"]]
722
+ for q in (saved or [])]
723
+
724
+ # ===== Theme + CSS =====
725
 
 
 
726
  THEME = gr.themes.Soft(
727
  primary_hue=gr.themes.Color(
728
  c50="#E8F4F1", c100="#C8E6DE", c200=KAIKAKU_ACCENT_LIGHT,
 
733
  neutral_hue="slate",
734
  font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
735
  ).set(
 
736
  block_label_text_color="#1f2937",
737
  block_label_text_weight="600",
738
  block_title_text_color="#0f172a",
739
  block_title_text_weight="700",
740
  body_text_color="#0f172a",
741
  body_text_color_subdued="#475569",
 
742
  button_primary_background_fill=KAIKAKU_ACCENT,
743
  button_primary_background_fill_hover=KAIKAKU_ACCENT_HOVER,
744
  button_primary_text_color="#ffffff",
 
753
  CUSTOM_CSS = f"""
754
  .gradio-container {{max-width: 1280px !important;}}
755
  footer {{visibility: hidden;}}
756
+ .gradio-container label, .gradio-container .label,
 
 
 
757
  .gradio-container [data-testid="block-label"],
758
+ .gradio-container .block-label, .gradio-container .gr-block-label {{
759
+ color: #0f172a !important; font-weight: 600 !important; background: transparent !important;
 
 
 
 
 
 
 
 
 
760
  }}
761
+ .gradio-container button[role="tab"] {{ color: #334155 !important; font-weight: 500 !important; }}
762
  .gradio-container button[role="tab"][aria-selected="true"] {{
763
+ color: {KAIKAKU_ACCENT} !important; border-bottom-color: {KAIKAKU_ACCENT} !important; font-weight: 700 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
764
  }}
765
+ .gradio-container button.primary, .gradio-container .primary > button {{
766
+ background: {KAIKAKU_ACCENT} !important; color: #ffffff !important;
767
+ border-color: {KAIKAKU_ACCENT} !important; font-weight: 600 !important;
 
 
 
 
768
  }}
769
+ .gradio-container button.primary:hover {{ background: {KAIKAKU_ACCENT_HOVER} !important; border-color: {KAIKAKU_ACCENT_HOVER} !important; }}
770
+ .gradio-container table thead th, .gradio-container .gr-dataframe thead th {{
771
+ color: #0f172a !important; font-weight: 700 !important; background: #f8fafc !important;
772
  }}
773
+ .gradio-container table tbody td {{ color: #0f172a !important; }}
 
774
  .sibling-card {{
775
+ border-left: 3px solid {KAIKAKU_ACCENT}; padding: 10px 14px;
776
+ margin: 6px 0; background: #f8fafc; border-radius: 4px;
 
 
 
777
  }}
778
+ .sibling-name {{ color: {KAIKAKU_DARK}; font-weight: 700; font-size: 1.02em; }}
779
+ .sibling-desc {{ color: #334155; font-size: 0.95em; line-height: 1.5; }}
780
  """
781
 
 
782
  _INITIAL_UMAP = umap_view("chem", ["chicken","lemon","garlic"], True, 8, three_d=False)
783
  _INITIAL_HEATMAP = _basket_heatmap(MODELS["chem"], ["chicken","lemon","garlic"])
784
 
 
797
  </div>
798
  """
799
 
800
+ # ===== Helper for ingredient picker with food-group filter =====
801
+
802
+ def _ingredient_picker(label, default_value, multiselect=True, max_choices=10):
803
+ radio = gr.Radio(choices=FOOD_GROUP_CHOICES, value="All",
804
+ label=f"{label} - food group filter", interactive=True)
805
+ dd = gr.Dropdown(choices=ALL_INGREDIENTS, value=default_value, label=label,
806
+ multiselect=multiselect, max_choices=max_choices)
807
+ radio.change(_filter_dropdown, inputs=[radio, dd], outputs=dd, show_progress="hidden")
808
+ return radio, dd
809
+
810
+ # ===== UI =====
811
+
812
  with gr.Blocks(title="Epicure Explorer", theme=THEME, css=CUSTOM_CSS) as demo:
813
 
814
+ saved_state = gr.BrowserState(default_value=[], storage_key="epicure_saved_queries_v1")
815
+
816
  gr.Markdown(
817
+ """# Epicure Explorer
818
  Chef-facing operators over three sibling ingredient embeddings (Cooc / Core / Chem) from
819
  [arXiv:2605.22391](https://arxiv.org/abs/2605.22391). 1,790 canonical ingredients across 7 languages,
820
  300-D Metapath2Vec, controlled chemistry-vs-recipe-context spectrum."""
 
826
 
827
  shared_basket = gr.State([])
828
 
829
+ with gr.Tabs() as tabs:
830
+
831
+ # ---------- Tab 1: Basket pairings ----------
832
+ with gr.Tab("Basket pairings", id="tab_basket"):
833
+ gr.Markdown("Pick one or more ingredients. The tool averages their unit vectors and returns nearest neighbours plus closest modes of that centroid.")
834
+ basket_radio, basket = _ingredient_picker("Ingredient basket (pick 1+)", ["chicken","lemon","garlic"])
835
+ k_pair = gr.Slider(1, 15, value=8, step=1, label="K")
836
+ with gr.Row():
837
+ pair_btn = gr.Button("Find pairings", variant="primary")
838
+ save_basket_btn = gr.Button("Save this query", variant="secondary")
839
+ with gr.Row():
840
+ nb_table = gr.Dataframe(headers=["Neighbour","Cosine"], label="Top-K nearest neighbours", interactive=False)
841
+ mode_table = gr.Dataframe(headers=["Mode id","Label","Kind","Cosine"], label="Closest modes", interactive=False)
842
+ heatmap_plot = gr.Plot(value=_INITIAL_HEATMAP, label="Pairwise cosine (matplotlib)")
843
+ pair_btn.click(basket_pairings, inputs=[sibling, basket, k_pair],
844
+ outputs=[nb_table, mode_table, heatmap_plot], show_progress="full")
845
+ gr.Examples(
846
+ examples=[
847
+ ["chem", ["chicken","lemon","garlic"], 8],
848
+ ["core", ["miso","ginger","sesame_oil"], 8],
849
+ ["chem", ["tomato","basil","mozzarella_cheese"], 8],
850
+ ["cooc", ["chocolate","strawberry","cream"], 8],
851
+ ["chem", ["cumin","coriander","turmeric"], 8],
852
+ ["core", ["soy_sauce","ginger","scallion"], 8],
853
+ ["chem", ["red_wine","beef","rosemary"], 8],
854
+ ["core", ["coconut_milk","lemongrass","fish_sauce"], 8],
855
+ ],
856
+ inputs=[sibling, basket, k_pair],
857
+ label="Try one of these baskets",
858
+ )
859
+
860
+ # ---------- Tab 2: Supervised SLERP ----------
861
+ with gr.Tab("Supervised SLERP", id="tab_sup"):
862
+ gr.Markdown("Rotate the seed basket toward one or more supervised pole vectors.")
863
+ sup_radio, sup_basket = _ingredient_picker("Seed basket (pick 1+)", ["rice"])
864
+ sup_dirs = gr.Dropdown(choices=_supervised_choices("chem"), value=["cuisine:South_Asian"],
865
+ label="Supervised directions (pick 1+; summed)",
866
+ multiselect=True, max_choices=5)
867
+ sup_theta = gr.Slider(0, 90, value=30, step=5, label="Rotation angle (deg)")
868
+ sup_k = gr.Slider(1, 15, value=8, step=1, label="K")
869
+ with gr.Row():
870
+ sup_btn = gr.Button("Rotate", variant="primary")
871
+ save_sup_btn = gr.Button("Save this query", variant="secondary")
872
+ sup_table = gr.Dataframe(headers=["Ingredient","Cosine"], label="Top-K rotated-query neighbours")
873
+ sup_explainer = gr.Markdown()
874
+ sup_btn.click(supervised_slerp_multi,
875
+ inputs=[sibling, sup_basket, sup_dirs, sup_theta, sup_k],
876
+ outputs=[sup_table, sup_explainer], show_progress="full")
877
+ sibling.change(lambda s: gr.Dropdown(choices=_supervised_choices(s), value=[]),
878
+ inputs=sibling, outputs=sup_dirs)
879
+ gr.Examples(
880
+ examples=[
881
+ ["chem", ["rice"], ["cuisine:South_Asian"], 30, 8],
882
+ ["chem", ["corn"], ["cuisine:Latin_American"], 30, 8],
883
+ ["core", ["chicken"], ["cuisine:Mediterranean"], 45, 8],
884
+ ["core", ["tomato","basil"], ["cuisine:Southeast_Asian"], 45, 8],
885
+ ["chem", ["beef"], ["cuisine:East_Asian"], 60, 8],
886
+ ["cooc", ["chocolate"], ["cuisine:Latin_American"], 30, 8],
887
+ ],
888
+ inputs=[sibling, sup_basket, sup_dirs, sup_theta, sup_k],
889
+ label="Try one of these rotations",
890
+ )
891
+
892
+ # ---------- Tab 3: Emergent SLERP ----------
893
+ with gr.Tab("Emergent SLERP", id="tab_em"):
894
+ gr.Markdown("Rotate the seed basket toward one or more emergent FastICA factor-mode poles.")
895
+ em_radio, em_basket = _ingredient_picker("Seed basket (pick 1+)", ["chocolate"])
896
+ factor_opts = _factor_mode_choices("chem")
897
+ em_modes = gr.Dropdown(choices=[label for label, _ in factor_opts],
898
+ value=[factor_opts[0][0]] if factor_opts else [],
899
+ label="Factor modes (pick 1+; summed)", multiselect=True, max_choices=5)
900
+ em_theta = gr.Slider(0, 90, value=30, step=5, label="Rotation angle (deg)")
901
+ em_k = gr.Slider(1, 15, value=8, step=1, label="K")
902
+ with gr.Row():
903
+ em_btn = gr.Button("Rotate", variant="primary")
904
+ save_em_btn = gr.Button("Save this query", variant="secondary")
905
+ em_table = gr.Dataframe(headers=["Ingredient","Cosine"], label="Top-K rotated-query neighbours")
906
+ em_explainer = gr.Markdown()
907
+ em_btn.click(emergent_slerp_multi,
908
+ inputs=[sibling, em_basket, em_modes, em_theta, em_k],
909
+ outputs=[em_table, em_explainer], show_progress="full")
910
+ sibling.change(lambda s: gr.Dropdown(choices=[label for label, _ in _factor_mode_choices(s)], value=[]),
911
+ inputs=sibling, outputs=em_modes)
912
+
913
+ # ---------- Tab 4: Arithmetic ----------
914
+ with gr.Tab("Arithmetic", id="tab_ar"):
915
+ gr.Markdown("Mikolov-style vector arithmetic: `centroid(positives) - centroid(negatives)`, then top-K neighbours. Killer demo: `miso - salt` on Core.")
916
+ pos_radio, pos_box = _ingredient_picker("Positives (added)", ["miso"])
917
+ neg_radio, neg_box = _ingredient_picker("Negatives (subtracted)", ["salt"])
918
+ ar_k = gr.Slider(1, 15, value=8, step=1, label="K")
919
+ with gr.Row():
920
+ ar_btn = gr.Button("Compute", variant="primary")
921
+ save_ar_btn = gr.Button("Save this query", variant="secondary")
922
+ ar_table = gr.Dataframe(headers=["Ingredient","Cosine"], label="Top-K nearest to result vector")
923
+ ar_explainer = gr.Markdown()
924
+ ar_btn.click(arithmetic, inputs=[sibling, pos_box, neg_box, ar_k],
925
+ outputs=[ar_table, ar_explainer], show_progress="full")
926
+ gr.Examples(
927
+ examples=[
928
+ ["core", ["miso"], ["salt"], 8],
929
+ ["core", ["chicken","tofu"], ["beef"], 8],
930
+ ["cooc", ["basil","cumin"], ["parsley"], 8],
931
+ ["chem", ["chocolate"], ["sugar"], 8],
932
+ ["chem", ["wine"], ["beer"], 8],
933
+ ["core", ["bread"], ["flour"], 8],
934
+ ["core", ["coffee"], ["milk"], 8],
935
+ ["chem", ["mozzarella_cheese"], ["milk"], 8],
936
+ ],
937
+ inputs=[sibling, pos_box, neg_box, ar_k],
938
+ label="Try one of these arithmetic queries",
939
+ )
940
+
941
+ # ---------- Tab 5: Mode atlas (click row -> UMAP) ----------
942
+ with gr.Tab("Mode atlas", id="tab_atlas"):
943
+ gr.Markdown(
944
+ "Browse the GMM mode atlas. Cooc 150 / Core 193 / Chem 200 modes. "
945
+ "**Click any row** to send that mode's members to the UMAP tab as a basket."
946
+ )
947
+ atlas_kind = gr.Radio(choices=["all","factor","continuous","binary"], value="all", label="Mode kind")
948
+ atlas_search = gr.Textbox(label="Search labels / properties", placeholder="e.g. South Asian, baking, fiber", value="")
949
+ atlas_btn = gr.Button("Browse modes", variant="primary")
950
+ atlas_table = gr.Dataframe(
951
+ headers=["mode_id","kind","property","label","n_members","top members"],
952
+ label="Modes (click a row to highlight on UMAP)",
953
+ wrap=True, interactive=False,
954
+ )
955
+ atlas_btn.click(browse_modes, inputs=[sibling, atlas_kind, atlas_search], outputs=atlas_table, show_progress="full")
956
+
957
+ # ---------- Tab 6: Compare siblings ----------
958
+ with gr.Tab("Compare siblings", id="tab_cmp"):
959
+ gr.Markdown("Same query, three siblings, side by side.")
960
+ cmp_radio, cmp_basket = _ingredient_picker("Seed basket", ["chicken"])
961
+ cmp_dirs = gr.Dropdown(choices=_supervised_choices("chem"), value=[],
962
+ label="Optional directions (empty = pure pairings)",
963
+ multiselect=True, max_choices=5)
964
+ cmp_theta = gr.Slider(0, 90, value=30, step=5, label="Rotation angle (deg)")
965
+ cmp_k = gr.Slider(1, 15, value=8, step=1, label="K")
966
+ with gr.Row():
967
+ cmp_btn = gr.Button("Compare across siblings", variant="primary")
968
+ save_cmp_btn = gr.Button("Save this query", variant="secondary")
969
+ with gr.Row():
970
+ cmp_cooc = gr.Dataframe(headers=["Cooc neighbour","Cosine"], label="Cooc (recipe-context)")
971
+ cmp_core = gr.Dataframe(headers=["Core neighbour","Cosine"], label="Core (blended)")
972
+ cmp_chem = gr.Dataframe(headers=["Chem neighbour","Cosine"], label="Chem (chemistry)")
973
+ cmp_btn.click(compare_siblings, inputs=[cmp_basket, cmp_dirs, cmp_theta, cmp_k],
974
+ outputs=[cmp_cooc, cmp_core, cmp_chem], show_progress="full")
975
+
976
+ # ---------- Tab 7: UMAP ----------
977
+ with gr.Tab("UMAP visualisation", id="tab_umap"):
978
+ gr.Markdown(
979
+ "2-D UMAP of the 1,790-ingredient embedding (cosine, n_neighbors=30, min_dist=0.03). "
980
+ "Points coloured by food group. Basket members appear as accent stars; top-K neighbours as amber dots."
981
+ )
982
+ umap_radio, umap_basket = _ingredient_picker("Highlight these ingredients", ["chicken","lemon","garlic"])
983
+ with gr.Row():
984
+ umap_show_nb = gr.Checkbox(value=True, label="Show top-K neighbours of basket centroid")
985
+ umap_3d = gr.Checkbox(value=False, label="3-D perspective (UMAP + PC1)")
986
+ umap_k = gr.Slider(1, 20, value=10, step=1, label="K neighbours")
987
+ umap_btn = gr.Button("Update plot", variant="primary")
988
+ umap_plot = gr.Plot(value=_INITIAL_UMAP, label="UMAP")
989
+ umap_btn.click(umap_view, inputs=[sibling, umap_basket, umap_show_nb, umap_k, umap_3d],
990
+ outputs=umap_plot, show_progress="full")
991
+ sibling.change(umap_view, inputs=[sibling, umap_basket, umap_show_nb, umap_k, umap_3d],
992
+ outputs=umap_plot)
993
+
994
+ # ---------- Tab 8: Parse my fridge ----------
995
+ with gr.Tab("Parse my fridge", id="tab_fridge"):
996
+ gr.Markdown(
997
+ "Paste a free-text ingredient list. Quantities, units, and prep notes are stripped, "
998
+ "then each line is fuzzy-matched to canonical vocab. Click **Send matched to Basket tab** "
999
+ "to populate the Basket Pairings input."
1000
+ )
1001
+ fridge_text = gr.Textbox(
1002
+ label="Free-text ingredients (one per line or semicolon-separated)",
1003
+ lines=8,
1004
+ value=("2 boneless chicken thighs\n1 cup coconut milk\n1 tbsp fish sauce (or soy sauce)\n"
1005
+ "fresh lemongrass, bruised\n3 cloves garlic, minced\n1 inch fresh ginger\n"
1006
+ "juice of one lime\nsalt to taste"),
1007
+ )
1008
+ fridge_min = gr.Slider(40, 100, value=70, step=5, label="Min match score (rapidfuzz)")
1009
+ with gr.Row():
1010
+ fridge_btn = gr.Button("Parse and match", variant="primary")
1011
+ fridge_send = gr.Button("Send matched to Basket tab", variant="secondary")
1012
+ fridge_table = gr.Dataframe(
1013
+ headers=["Input line", "Canonical match", "Score", "Cleaned"],
1014
+ label="Parsed matches", interactive=False,
1015
+ )
1016
+ fridge_matched = gr.Textbox(label="Matched ingredients", interactive=False)
1017
+ def _parse(txt, sib, mn):
1018
+ rows, matches = parse_fridge(txt, sib, int(mn))
1019
+ return rows, ", ".join(matches), matches
1020
+ fridge_btn.click(_parse, inputs=[fridge_text, sibling, fridge_min],
1021
+ outputs=[fridge_table, fridge_matched, shared_basket], show_progress="full")
1022
+ fridge_send.click(lambda matches: gr.Dropdown(value=matches[:10] if matches else []),
1023
+ inputs=[shared_basket], outputs=[basket])
1024
+
1025
+ # ---------- Tab 9: Recipe builder ----------
1026
+ with gr.Tab("Recipe builder", id="tab_recipe"):
1027
+ gr.Markdown(
1028
+ "Describe a dish in plain English. Hybrid retrieval: rapidfuzz token matching for direct "
1029
+ "ingredient mentions + sentence-transformer cosine against the sibling's factor-mode labels "
1030
+ "for thematic matches. First call after Space cold-start downloads ~80MB encoder (one-time)."
1031
+ )
1032
+ rb_prompt = gr.Textbox(label="Dish description", lines=3,
1033
+ value="I'm making Thai green curry for 4 people")
1034
+ rb_k = gr.Slider(4, 20, value=10, step=1, label="Suggestions (K)")
1035
+ rb_btn = gr.Button("Suggest starter basket", variant="primary")
1036
+ rb_table = gr.Dataframe(
1037
+ headers=["Ingredient", "Source", "Score"],
1038
+ label="Suggested basket (source = direct / thematic / both)", interactive=False,
1039
+ )
1040
+ rb_explainer = gr.Markdown()
1041
+ rb_matched = gr.State([])
1042
+ rb_send = gr.Button("Send to Basket tab", variant="secondary")
1043
+ def _rb(prompt, sib, k):
1044
+ rows, names, md = suggest_basket(prompt, sib, int(k))
1045
+ return rows, md, names
1046
+ rb_btn.click(_rb, inputs=[rb_prompt, sibling, rb_k],
1047
+ outputs=[rb_table, rb_explainer, rb_matched], show_progress="full")
1048
+ rb_send.click(lambda names: gr.Dropdown(value=(names or [])[:10]),
1049
+ inputs=[rb_matched], outputs=[basket])
1050
+ gr.Examples(
1051
+ examples=[
1052
+ ["I'm making Thai green curry for 4 people", 10],
1053
+ ["spicy vegetarian taco filling", 10],
1054
+ ["weeknight pasta with tomatoes and herbs", 10],
1055
+ ["Japanese miso-glazed salmon and greens", 10],
1056
+ ["Moroccan tagine with lamb and dried fruit", 10],
1057
+ ],
1058
+ inputs=[rb_prompt, rb_k],
1059
+ label="Try one of these prompts",
1060
+ )
1061
+
1062
+ # ---------- Tab 10: Saved queries ----------
1063
+ with gr.Tab("Saved queries", id="tab_saved"):
1064
+ gr.Markdown(
1065
+ "Stored locally in your browser via `localStorage` (gr.BrowserState). "
1066
+ "~5 MB quota; per-browser, not per-account. Clearing browser data wipes them."
1067
+ )
1068
+ saved_table = gr.Dataframe(
1069
+ headers=["created_at", "tab", "summary", "id"],
1070
+ label="Your saved queries (newest first)", interactive=False, wrap=True,
1071
+ )
1072
+ with gr.Row():
1073
+ selected_id = gr.State("")
1074
+ del_btn = gr.Button("Delete selected", variant="secondary")
1075
+ def _on_select(saved, evt: gr.SelectData):
1076
+ if evt is None or evt.index is None:
1077
+ return ""
1078
+ row = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
1079
+ return (saved or [{}])[row].get("id", "") if row < len(saved or []) else ""
1080
+ saved_table.select(_on_select, inputs=[saved_state], outputs=selected_id)
1081
+ del_btn.click(delete_query, inputs=[saved_state, selected_id],
1082
+ outputs=[saved_state, saved_table])
1083
+ demo.load(lambda s: _render_saved(s), inputs=saved_state, outputs=saved_table)
1084
+
1085
+ # ---- Wire Save buttons (after all tabs exist so all components are in scope) ----
1086
+ save_basket_btn.click(
1087
+ lambda s, sib, b, k: save_query(s, "basket",
1088
+ {"sibling": sib, "basket": b or [], "k": int(k)}),
1089
+ inputs=[saved_state, sibling, basket, k_pair],
1090
+ outputs=[saved_state, saved_table],
1091
+ )
1092
+ save_sup_btn.click(
1093
+ lambda s, sib, b, d, th, k: save_query(s, "supervised_slerp",
1094
+ {"sibling": sib, "basket": b or [], "directions": d or [], "theta": float(th), "k": int(k)}),
1095
+ inputs=[saved_state, sibling, sup_basket, sup_dirs, sup_theta, sup_k],
1096
+ outputs=[saved_state, saved_table],
1097
+ )
1098
+ save_em_btn.click(
1099
+ lambda s, sib, b, m, th, k: save_query(s, "emergent_slerp",
1100
+ {"sibling": sib, "basket": b or [], "modes": m or [], "theta": float(th), "k": int(k)}),
1101
+ inputs=[saved_state, sibling, em_basket, em_modes, em_theta, em_k],
1102
+ outputs=[saved_state, saved_table],
1103
+ )
1104
+ save_ar_btn.click(
1105
+ lambda s, sib, p, n, k: save_query(s, "arithmetic",
1106
+ {"sibling": sib, "positives": p or [], "negatives": n or [], "k": int(k)}),
1107
+ inputs=[saved_state, sibling, pos_box, neg_box, ar_k],
1108
+ outputs=[saved_state, saved_table],
1109
+ )
1110
+ save_cmp_btn.click(
1111
+ lambda s, b, d, th, k: save_query(s, "compare",
1112
+ {"basket": b or [], "directions": d or [], "theta": float(th), "k": int(k)}),
1113
+ inputs=[saved_state, cmp_basket, cmp_dirs, cmp_theta, cmp_k],
1114
+ outputs=[saved_state, saved_table],
1115
+ )
1116
 
1117
+ # ---- Mode atlas row click -> UMAP highlight + jump to UMAP tab ----
1118
+ def atlas_row_to_umap(sibling_value, table_value, show_nb, k_value, three_d_value, evt: gr.SelectData):
1119
+ if evt is None or evt.index is None or table_value is None:
1120
+ return gr.update(), gr.update(), gr.update(), gr.update()
1121
+ row = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
1122
+ try:
1123
+ clicked_mode_id = (table_value.iloc[row, 0] if hasattr(table_value, "iloc")
1124
+ else table_value[row][0])
1125
+ except Exception:
1126
+ return gr.update(), gr.update(), gr.update(), gr.update()
1127
+ m = MODELS[sibling_value]
1128
+ mode = next((md for md in m.modes if md.mode_id == clicked_mode_id), None)
1129
+ if mode is None:
1130
+ return gr.update(), gr.update(), gr.update(), gr.update()
1131
+ members = [n for n in mode.members if n in m.vocab][:10]
1132
+ if not members:
1133
+ return gr.update(), gr.update(), gr.update(), gr.update()
1134
+ fig = umap_view(sibling_value, members, bool(show_nb), int(k_value), three_d=bool(three_d_value))
1135
+ return (
1136
+ gr.Dropdown(value=members),
1137
+ fig,
1138
+ members,
1139
+ gr.Tabs(selected="tab_umap"),
 
 
1140
  )
1141
+ atlas_table.select(
1142
+ atlas_row_to_umap,
1143
+ inputs=[sibling, atlas_table, umap_show_nb, umap_k, umap_3d],
1144
+ outputs=[umap_basket, umap_plot, shared_basket, tabs],
1145
+ show_progress="hidden",
1146
+ )
1147
 
1148
+ # ---- Public API endpoints ----
1149
+ gr.api(api_neighbors, api_name="neighbors")
1150
+ gr.api(api_slerp, api_name="slerp")
1151
+ gr.api(api_arithmetic, api_name="arithmetic")
1152
+ gr.api(api_embed, api_name="embed")
1153
+ gr.api(api_list_directions, api_name="list_directions")
1154
+ gr.api(api_list_factor_modes, api_name="list_factor_modes")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1155
 
1156
+ gr.Markdown(
1157
+ """---
1158
+ ### Developer API
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1159
 
1160
+ These operators are also exposed as JSON endpoints. See `/?view=api` for the auto-generated schema.
 
 
 
 
1161
 
1162
+ ```python
1163
+ from gradio_client import Client
1164
+ c = Client("Kaikaku/epicure-explorer")
1165
+ c.predict("garlic", "chem", 5, api_name="/neighbors")
1166
+ c.predict("rice", "cuisine:South_Asian", 30, "chem", 5, api_name="/slerp")
1167
+ c.predict(["miso"], ["salt"], "core", 8, api_name="/arithmetic")
1168
+ c.predict("garlic", "chem", api_name="/embed") # 300-D L2-normalised vector
1169
+ c.predict("chem", api_name="/list_directions")
1170
+ c.predict("chem", api_name="/list_factor_modes")
1171
+ ```
1172
 
1173
+ Endpoints validate inputs and return `{"error": "...", "suggestions": [...]}` on bad input. Free-tier limits: ~1-2 req/sec shared, no auth, Space sleeps after ~48h idle (cold start ~30-60s on next request).
 
 
1174
 
1175
+ ---
1176
+ **Cite:** Radzikowski and Chen, 2026, *Epicure: Navigating the Emergent Geometry of Food Ingredient Embeddings*, [arXiv:2605.22391](https://arxiv.org/abs/2605.22391). Artefacts: [epicure-cooc](https://huggingface.co/Kaikaku/epicure-cooc) | [epicure-core](https://huggingface.co/Kaikaku/epicure-core) | [epicure-chem](https://huggingface.co/Kaikaku/epicure-chem) | [corpus dataset](https://huggingface.co/datasets/Kaikaku/epicure-corpus-resources)
1177
  """
1178
  )
1179
 
requirements.txt CHANGED
@@ -1,8 +1,9 @@
1
- gradio>=5.0.0
2
  huggingface_hub>=0.24.0
3
  safetensors>=0.4.0
4
  numpy>=1.24
5
  plotly>=5.20.0
6
  matplotlib>=3.8.0
7
  rapidfuzz>=3.6.0
 
8
  audioop-lts; python_version >= "3.13"
 
1
+ gradio>=5.6.0
2
  huggingface_hub>=0.24.0
3
  safetensors>=0.4.0
4
  numpy>=1.24
5
  plotly>=5.20.0
6
  matplotlib>=3.8.0
7
  rapidfuzz>=3.6.0
8
+ sentence-transformers>=2.7.0
9
  audioop-lts; python_version >= "3.13"