josefchen commited on
Commit
8fba7bd
·
verified ·
1 Parent(s): 93e4469

Fix empty heatmap (placeholder when <2 items); render UMAP+heatmap at load; add Soft theme, 3D UMAP toggle, fridge->basket routing, sibling help text

Browse files
Files changed (2) hide show
  1. __pycache__/app.cpython-310.pyc +0 -0
  2. app.py +232 -201
__pycache__/app.cpython-310.pyc CHANGED
Binary files a/__pycache__/app.cpython-310.pyc and b/__pycache__/app.cpython-310.pyc differ
 
app.py CHANGED
@@ -1,17 +1,4 @@
1
- """Epicure Explorer: chef-facing operators over the three sibling embeddings.
2
-
3
- Eight tabs:
4
- - Basket pairings (with pairwise cosine heatmap of the basket itself)
5
- - Supervised SLERP
6
- - Emergent SLERP
7
- - Arithmetic (Mikolov-style)
8
- - Mode atlas (filter + search the GMM mode atlas)
9
- - Compare siblings (same query, three columns)
10
- - UMAP visualisation (Plotly scatter coloured by food group, basket highlighted)
11
- - Parse my fridge (paste free-text ingredient list, fuzzy-match to canonical vocab)
12
-
13
- Paper: https://arxiv.org/abs/2605.22391
14
- """
15
 
16
  from __future__ import annotations
17
 
@@ -40,9 +27,8 @@ MODELS = {
40
  }
41
  ALL_INGREDIENTS = sorted(MODELS["cooc"].vocab.keys())
42
 
43
- # Load precomputed UMAP coords + food-group labels
44
  _HERE = os.path.dirname(os.path.abspath(__file__))
45
- UMAP = np.load(os.path.join(_HERE, "umap_2d.npz")) # keys: cooc, core, chem ; (1790, 2)
46
  _lab = json.load(open(os.path.join(_HERE, "ingredient_labels.json")))
47
  NAMES_BY_IDX = _lab["names"]
48
  FOOD_GROUPS = _lab["food_groups"]
@@ -55,12 +41,18 @@ FG_COLORS = {
55
  "Spice": "#d62728",
56
  "Pantry": "#ff7f0e",
57
  "Beverage": "#9467bd",
58
- "Other": "#888888",
 
 
 
 
 
 
59
  }
60
 
61
  # ===== math helpers =====
62
 
63
- def _unit(v: np.ndarray, eps: float = 1e-9) -> np.ndarray:
64
  n = np.linalg.norm(v); return v / max(n, eps)
65
 
66
  def _basket_centroid(m, names):
@@ -94,7 +86,7 @@ def _supervised_choices(sibling):
94
  def _factor_mode_choices(sibling):
95
  return [(f"{m.label} ({m.mode_id})", m.mode_id) for m in MODELS[sibling].modes if m.kind == "factor"]
96
 
97
- def _slerp(m, v, d, theta_deg):
98
  d_perp = d - (d @ v) * v
99
  n = np.linalg.norm(d_perp)
100
  if n < 1e-9: return v
@@ -122,9 +114,16 @@ def basket_pairings(sibling, basket, k):
122
  def _basket_heatmap(m, basket):
123
  valid = [n for n in (basket or []) if n in m.vocab]
124
  if len(valid) < 2:
125
- return None
 
 
 
 
 
 
 
126
  idxs = [m.vocab[n] for n in valid]
127
- sub = m.E[idxs] # already L2-normalised
128
  sim = sub @ sub.T
129
  fig = go.Figure(go.Heatmap(
130
  z=sim, x=valid, y=valid,
@@ -133,9 +132,9 @@ def _basket_heatmap(m, basket):
133
  hovertemplate="%{y} <> %{x}<br>cos = %{z:.3f}<extra></extra>",
134
  ))
135
  fig.update_layout(
136
- title="Pairwise cosine between basket members",
137
- height=420, width=520,
138
- margin=dict(l=80, r=20, t=50, b=80),
139
  )
140
  return fig
141
 
@@ -146,7 +145,7 @@ def supervised_slerp_multi(sibling, basket, directions, theta, k):
146
  d = _stack_directions(m, directions, use_factor_pole=False)
147
  if d is None:
148
  return [[n, f"{s:.4f}"] for n, s in _topk(m, v, k, basket)]
149
- q = _slerp(m, v, d, theta)
150
  return [[n, f"{s:.4f}"] for n, s in _topk(m, q, k, basket)]
151
 
152
  def emergent_slerp_multi(sibling, basket, mode_labels, theta, k):
@@ -158,7 +157,7 @@ def emergent_slerp_multi(sibling, basket, mode_labels, theta, k):
158
  d = _stack_directions(m, mode_ids, use_factor_pole=True)
159
  if d is None:
160
  return [[n, f"{s:.4f}"] for n, s in _topk(m, v, k, basket)]
161
- q = _slerp(m, v, d, theta)
162
  return [[n, f"{s:.4f}"] for n, s in _topk(m, q, k, basket)]
163
 
164
  def arithmetic(sibling, positives, negatives, k):
@@ -191,104 +190,129 @@ def compare_siblings(basket, directions, theta, k):
191
  valid_dirs = [d for d in (directions or []) if d in m.supervised_poles]
192
  if valid_dirs:
193
  d_vec = _stack_directions(m, valid_dirs)
194
- q = _slerp(m, v, d_vec, theta) if d_vec is not None else v
195
  else:
196
  q = v
197
  hits = _topk(m, q, k=k, exclude=basket)
198
  out.append([[n, f"{s:.4f}"] for n, s in hits])
199
  return out[0], out[1], out[2]
200
 
201
- def umap_view(sibling, basket, show_neighbours, k):
202
- coords = UMAP[sibling] # (1790, 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  m = MODELS[sibling]
204
  name_to_idx = m.vocab
205
-
206
- fig = go.Figure()
207
-
208
- # Background scatter coloured by food group
209
  by_group = {}
210
  for i, fg in enumerate(FOOD_GROUPS):
211
  by_group.setdefault(fg, []).append(i)
212
- # Plot Other first so it sits behind the colourful groups
213
  order = ["Other"] + [g for g in FG_COLORS if g != "Other"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  for fg in order:
215
  if fg not in by_group: continue
216
  idxs = by_group[fg]
217
- fig.add_trace(go.Scatter(
218
- x=coords[idxs, 0], y=coords[idxs, 1],
219
- mode="markers",
220
- name=fg,
221
- marker=dict(
222
- size=5, color=FG_COLORS.get(fg, "#888888"),
223
- opacity=0.35 if fg == "Other" else 0.55,
224
- line=dict(width=0),
225
- ),
226
- text=[NAMES_BY_IDX[i] for i in idxs],
227
- hovertemplate="%{text}<br>food group: " + fg + "<extra></extra>",
228
- ))
229
 
230
- # Highlight the basket members (red, larger, with text labels)
231
  if basket:
232
  bi = [name_to_idx[b] for b in basket if b in name_to_idx]
233
  if bi:
234
- fig.add_trace(go.Scatter(
235
- x=coords[bi, 0], y=coords[bi, 1],
236
- mode="markers+text",
237
- name="Basket",
238
- marker=dict(size=14, color="#e30613", symbol="star", line=dict(color="white", width=1.5)),
239
- text=[NAMES_BY_IDX[i] for i in bi],
240
- textposition="top center",
241
- textfont=dict(size=12, color="#000000"),
242
- hovertemplate="<b>%{text}</b><extra></extra>",
243
- ))
244
 
245
- # Optionally show top-K neighbours of the basket centroid
246
  if show_neighbours:
247
  centroid = _basket_centroid(m, basket)
248
  if centroid is not None:
249
  nb_pairs = _topk(m, centroid, k=int(k), exclude=basket)
250
  nb_idxs = [name_to_idx[n] for n, _ in nb_pairs if n in name_to_idx]
251
  if nb_idxs:
252
- fig.add_trace(go.Scatter(
253
- x=coords[nb_idxs, 0], y=coords[nb_idxs, 1],
254
- mode="markers+text",
255
- name=f"Top-{k} neighbours",
256
- marker=dict(size=9, color="#ff8800", symbol="circle", line=dict(color="white", width=1)),
257
- text=[NAMES_BY_IDX[i] for i in nb_idxs],
258
- textposition="top center",
259
- textfont=dict(size=10, color="#444444"),
260
- hovertemplate="<b>%{text}</b> (neighbour)<extra></extra>",
261
- ))
262
-
 
263
  fig.update_layout(
264
- title=f"UMAP of Epicure-{sibling.capitalize()} (cosine, n_neighbors=30, min_dist=0.03)",
265
- xaxis_title="UMAP 1", yaxis_title="UMAP 2",
266
- height=650, width=900,
267
- legend=dict(orientation="v", x=1.02, y=1, font=dict(size=11)),
268
- margin=dict(l=60, r=160, t=70, b=60),
269
- plot_bgcolor="#ffffff",
270
  )
271
- fig.update_xaxes(showgrid=True, gridcolor="#eee", zeroline=False)
272
- fig.update_yaxes(showgrid=True, gridcolor="#eee", zeroline=False)
 
 
 
 
 
 
273
  return fig
274
 
275
 
 
 
276
  _LINE_SPLIT = re.compile(r"[\n;]")
277
  _BRACKET = re.compile(r"\([^)]*\)")
278
-
279
- # Number or word number
280
- _QTY = (
281
- r"(?:\d+(?:[\.,/]\d+)?|"
282
- r"a|an|one|two|three|four|five|six|seven|eight|nine|ten|half|quarter)"
283
- )
284
- # Units (word-boundary protected so 'g' does NOT eat the 'g' in 'ginger')
285
- _UNIT = (
286
- r"(?:cups?|tbsp\.?|tablespoons?|tsp\.?|teaspoons?|"
287
- r"oz\.?|ounces?|lbs?\.?|pounds?|grams?|kgs?|kilos?|"
288
- r"ml|liters?|litres?|cloves?|bunches?|sprigs?|pinch(?:es)?|"
289
- r"slices?|pieces?|cans?|packets?|sticks?|leaves?|stalks?|heads?|inch(?:es)?|"
290
- r"splash(?:es)?|dash(?:es)?|drops?|handfuls?|large|small|medium)"
291
- )
292
  _LEADING_QTY = re.compile(rf"^\s*{_QTY}\s+(?:{_UNIT}\b\s*)?(?:of\s+)?", re.IGNORECASE)
293
  _LEADING_UNIT_ONLY = re.compile(rf"^\s*{_UNIT}\b\s*(?:of\s+)?", re.IGNORECASE)
294
  _JUICE_OF = re.compile(rf"^\s*(?:juice|zest)\s+(?:of\s+)?(?:{_QTY}\s+)?", re.IGNORECASE)
@@ -296,26 +320,18 @@ _LEADING_PREP = re.compile(
296
  r"^\s*(?:fresh|dried|cooked|frozen|raw|ripe|firm|boneless|skinless|smoked|low[- ]fat)\s+",
297
  re.IGNORECASE,
298
  )
299
- # Trailing prep: only after a comma (so 'boneless chicken thighs' is not nuked)
300
  _TRAILING_PREP = re.compile(
301
  r"\s*,\s*(?:chopped|minced|diced|sliced|grated|crushed|whole|ground|peeled|"
302
  r"to taste|optional|finely|coarsely|cubed|shredded|julienned|halved|quartered|warmed|"
303
  r"toasted|roasted|bruised|melted|softened|cooked|drained|rinsed|patted dry|trimmed|"
304
- r"deveined|seeded|stemmed|crumbled).*$",
305
- re.IGNORECASE,
306
  )
307
- # Some plural -> singular forms we hand-massage before fuzzy lookup
308
  _KNOWN_PLURALS = {
309
- "tortillas": "tortilla",
310
- "thighs": "thigh",
311
- "leaves": "leaf",
312
- "onions": "onion",
313
- "potatoes": "potato",
314
- "tomatoes": "tomato",
315
- "cloves": "clove",
316
  }
317
 
318
- def _clean_line(line: str) -> str:
319
  s = line.strip().lower()
320
  s = _BRACKET.sub(" ", s)
321
  if "juice" in s or "zest" in s:
@@ -324,95 +340,92 @@ def _clean_line(line: str) -> str:
324
  s = _LEADING_QTY.sub("", s)
325
  s = _LEADING_UNIT_ONLY.sub("", s)
326
  s = _LEADING_PREP.sub("", s)
327
- # Run the leading-prep / unit cleanup once more to catch chains like "fresh whole bean"
328
  s = _LEADING_PREP.sub("", s)
329
- # Hand-massage common plurals so 'tortillas' fuzzy-matches 'tortilla' / 'corn_tortilla' better
330
- tokens = s.split()
331
- tokens = [_KNOWN_PLURALS.get(t, t) for t in tokens]
332
  s = " ".join(tokens)
333
- s = re.sub(r"\s+", " ", s).strip()
334
- return s
335
 
336
- def _fuzzy_lookup(cleaned: str, vocab: list[str], vocab_sp: list[str], min_score: int):
337
- """Pick the best canonical match across three scorers, breaking ties by canonical-name length."""
338
- if not cleaned:
339
- return None, 0.0
340
  candidates = []
341
  for scorer in (fuzz_scorers.token_set_ratio, fuzz_scorers.WRatio, fuzz_scorers.partial_ratio):
342
  hits = fuzz_process.extract(cleaned, vocab_sp, scorer=scorer, score_cutoff=min_score, limit=10)
343
  for _name_sp, score, idx in hits:
344
  candidates.append((vocab[idx], float(score)))
345
- if not candidates:
346
- return None, 0.0
347
- # Tie-break: higher score first, then longer canonical name (prefer 'fish_sauce' over 'fish').
348
- # We also prefer canonical names whose token-set is a subset of the input (avoid 'black_garlic' for 'garlic').
349
- def tokens(name): return set(name.replace("_"," ").split())
350
  cleaned_tokens = set(cleaned.split())
351
  def rank_key(c):
352
  name, score = c
353
- nt = tokens(name)
354
- # 0 if all canonical tokens appear in input, 1 if not (penalty)
355
- extra_penalty = 0 if nt.issubset(cleaned_tokens) else 1
356
- return (-score, extra_penalty, -len(name))
357
  candidates.sort(key=rank_key)
358
  return candidates[0]
359
 
360
- def parse_fridge(raw_text: str, sibling: str, min_score: int = 70):
361
- if not raw_text or not raw_text.strip():
362
- return [], []
363
  vocab = list(MODELS[sibling].vocab.keys())
364
- vocab_sp = [v.replace("_", " ") for v in vocab]
365
- rows, matched_set = [], []
366
  for line in _LINE_SPLIT.split(raw_text):
367
  if not line.strip(): continue
368
  cleaned = _clean_line(line)
369
  if not cleaned:
370
- rows.append([line.strip(), "(empty after cleaning)", 0.0, ""])
371
- continue
372
  match, score = _fuzzy_lookup(cleaned, vocab, vocab_sp, int(min_score))
373
  if match is None:
374
- # last-ditch: drop the last token (handles 'tortillas warmed' -> 'tortillas')
375
  tokens = cleaned.split()
376
  if len(tokens) > 1:
377
  match, score = _fuzzy_lookup(" ".join(tokens[:-1]), vocab, vocab_sp, int(min_score))
378
  if match is None:
379
- rows.append([line.strip(), "(no match)", 0.0, cleaned])
380
- continue
381
  rows.append([line.strip(), match, round(score, 1), cleaned])
382
- matched_set.append(match)
383
  seen, dedup = set(), []
384
- for n in matched_set:
385
- if n not in seen:
386
- seen.add(n); dedup.append(n)
387
  return rows, dedup
388
 
389
 
390
  # ===== UI =====
391
 
392
- with gr.Blocks(title="Epicure Explorer") as demo:
393
- gr.Markdown(
394
- """# Epicure Explorer
 
 
 
395
 
396
- Chef-facing operators over the three Epicure sibling embeddings (Cooc, Core, Chem),
397
- from [arXiv:2605.22391](https://arxiv.org/abs/2605.22391).
 
398
 
399
- - **Cooc** walks recipe co-occurrence only. Neighbours are recipe companions.
400
- - **Core** blends typed FlavorDB compound walks with injected I-I walks. Concentrated geometry, tightest modes.
401
- - **Chem** walks typed FlavorDB compound metapaths only. Strongest supervised-direction recovery; neighbours are flavour-profile peers.
 
 
 
402
 
403
- Pick a sibling, then explore. Each operator tab has worked examples below the form (click any row to populate inputs).
404
- """
 
 
 
405
  )
406
 
407
  sibling = gr.Radio(choices=["cooc","core","chem"], value="chem", label="Sibling embedding")
 
 
 
 
 
408
 
409
  # ---------- Tab 1: Basket pairings + heatmap ----------
410
  with gr.Tab("Basket pairings"):
411
  gr.Markdown(
412
  "Pick one or more ingredients. Tool averages their unit vectors and returns nearest neighbours "
413
- "plus closest modes of that centroid. The heatmap on the right shows how related the basket "
414
- "members already are to each other -- a coherent basket has bright off-diagonals, a scattered "
415
- "basket has dark ones."
416
  )
417
  basket = gr.Dropdown(
418
  choices=ALL_INGREDIENTS, value=["chicken","lemon","garlic"],
@@ -423,11 +436,11 @@ Pick a sibling, then explore. Each operator tab has worked examples below the fo
423
  with gr.Row():
424
  nb_table = gr.Dataframe(headers=["Neighbour","Cosine"], label="Top-K nearest neighbours", interactive=False)
425
  mode_table = gr.Dataframe(headers=["Mode id","Label","Kind","Cosine"], label="Closest modes", interactive=False)
426
- heatmap_plot = gr.Plot(label="Pairwise cosine within the basket")
427
  pair_btn.click(
428
- basket_pairings,
429
- inputs=[sibling, basket, k_pair],
430
  outputs=[nb_table, mode_table, heatmap_plot],
 
431
  )
432
  gr.Examples(
433
  examples=[
@@ -447,8 +460,8 @@ Pick a sibling, then explore. Each operator tab has worked examples below the fo
447
  # ---------- Tab 2: Supervised SLERP ----------
448
  with gr.Tab("Supervised SLERP"):
449
  gr.Markdown(
450
- "Rotate the seed basket toward one or more supervised direction poles. Multiple directions "
451
- "are summed and L2-normalised before rotation, matching the paper's multi-constraint queries."
452
  )
453
  sup_basket = gr.Dropdown(
454
  choices=ALL_INGREDIENTS, value=["rice"],
@@ -456,15 +469,16 @@ Pick a sibling, then explore. Each operator tab has worked examples below the fo
456
  )
457
  sup_dirs = gr.Dropdown(
458
  choices=_supervised_choices("chem"), value=["cuisine:South_Asian"],
459
- label="Supervised directions (pick 1+; summed before rotation)",
460
- multiselect=True, max_choices=5,
461
  )
462
  sup_theta = gr.Slider(0, 90, value=30, step=5, label="Rotation angle (deg)")
463
  sup_k = gr.Slider(1, 15, value=8, step=1, label="K")
464
  sup_btn = gr.Button("Rotate", variant="primary")
465
  sup_table = gr.Dataframe(headers=["Ingredient","Cosine"], label="Top-K rotated-query neighbours")
466
- sup_btn.click(supervised_slerp_multi, inputs=[sibling, sup_basket, sup_dirs, sup_theta, sup_k], outputs=sup_table)
467
- sibling.change(lambda s: gr.Dropdown(choices=_supervised_choices(s), value=[]), inputs=sibling, outputs=sup_dirs)
 
 
468
  gr.Examples(
469
  examples=[
470
  ["chem", ["rice"], ["cuisine:South_Asian"], 30, 8],
@@ -482,7 +496,7 @@ Pick a sibling, then explore. Each operator tab has worked examples below the fo
482
  with gr.Tab("Emergent SLERP"):
483
  gr.Markdown(
484
  "Rotate the seed basket toward one or more emergent factor-mode poles discovered "
485
- "by multi-seed-stable FastICA + GMM. Stack mode targets to combine culinary axes."
486
  )
487
  em_basket = gr.Dropdown(
488
  choices=ALL_INGREDIENTS, value=["chocolate"],
@@ -492,29 +506,29 @@ Pick a sibling, then explore. Each operator tab has worked examples below the fo
492
  em_modes = gr.Dropdown(
493
  choices=[label for label, _ in factor_opts],
494
  value=[factor_opts[0][0]] if factor_opts else [],
495
- label="Factor modes (pick 1+; summed before rotation)",
496
- multiselect=True, max_choices=5,
497
  )
498
  em_theta = gr.Slider(0, 90, value=30, step=5, label="Rotation angle (deg)")
499
  em_k = gr.Slider(1, 15, value=8, step=1, label="K")
500
  em_btn = gr.Button("Rotate", variant="primary")
501
  em_table = gr.Dataframe(headers=["Ingredient","Cosine"], label="Top-K rotated-query neighbours")
502
- em_btn.click(emergent_slerp_multi, inputs=[sibling, em_basket, em_modes, em_theta, em_k], outputs=em_table)
503
- sibling.change(lambda s: gr.Dropdown(choices=[label for label, _ in _factor_mode_choices(s)], value=[]), inputs=sibling, outputs=em_modes)
 
 
504
 
505
  # ---------- Tab 4: Arithmetic ----------
506
  with gr.Tab("Arithmetic"):
507
  gr.Markdown(
508
- "Classic Mikolov-style vector arithmetic: `centroid(positives) - centroid(negatives)`, "
509
- "then top-K nearest neighbours. The killer demo is `miso - salt` on Core: returns the "
510
- "Japanese fermented-umami pantry minus the salty component (mirin, kombu, wakame, sake, dashi)."
511
  )
512
  pos_box = gr.Dropdown(choices=ALL_INGREDIENTS, value=["miso"], label="Positives", multiselect=True, max_choices=10)
513
  neg_box = gr.Dropdown(choices=ALL_INGREDIENTS, value=["salt"], label="Negatives", multiselect=True, max_choices=10)
514
  ar_k = gr.Slider(1, 15, value=8, step=1, label="K")
515
  ar_btn = gr.Button("Compute", variant="primary")
516
  ar_table = gr.Dataframe(headers=["Ingredient","Cosine"], label="Top-K nearest to result vector")
517
- ar_btn.click(arithmetic, inputs=[sibling, pos_box, neg_box, ar_k], outputs=ar_table)
518
  gr.Examples(
519
  examples=[
520
  ["core", ["miso"], ["salt"], 8],
@@ -533,39 +547,38 @@ Pick a sibling, then explore. Each operator tab has worked examples below the fo
533
  # ---------- Tab 5: Mode atlas ----------
534
  with gr.Tab("Mode atlas"):
535
  gr.Markdown(
536
- "Browse the GMM mode atlas of the selected sibling (Cooc 150 / Core 193 / Chem 200 modes). "
537
  "`factor` = emergent FastICA modes; `continuous` = quartile partitions of NOVA/sensory/USDA; "
538
- "`binary` = food-group buckets. Search by label or property substring."
539
  )
540
  atlas_kind = gr.Radio(choices=["all","factor","continuous","binary"], value="all", label="Mode kind")
541
  atlas_search = gr.Textbox(label="Search labels / properties", placeholder="e.g. South Asian, baking, fiber", value="")
542
  atlas_btn = gr.Button("Browse modes", variant="primary")
543
  atlas_table = gr.Dataframe(
544
  headers=["mode_id","kind","property","label","n_members","top members"],
545
- label="Modes (sorted by kind, then size descending)",
546
- wrap=True, interactive=False,
547
  )
548
- atlas_btn.click(browse_modes, inputs=[sibling, atlas_kind, atlas_search], outputs=atlas_table)
549
 
550
  # ---------- Tab 6: Compare siblings ----------
551
  with gr.Tab("Compare siblings"):
552
  gr.Markdown(
553
- "Same query, three siblings, side-by-side. The chemistry-vs-recipe-context spectrum visible in one screen."
554
  )
555
  cmp_basket = gr.Dropdown(choices=ALL_INGREDIENTS, value=["chicken"], label="Seed basket", multiselect=True, max_choices=10)
556
  cmp_dirs = gr.Dropdown(
557
  choices=_supervised_choices("chem"), value=[],
558
- label="Optional directions (leave empty for pure pairings)",
559
- multiselect=True, max_choices=5,
560
  )
561
- cmp_theta = gr.Slider(0, 90, value=30, step=5, label="Rotation angle (deg; ignored if no directions)")
562
  cmp_k = gr.Slider(1, 15, value=8, step=1, label="K")
563
  cmp_btn = gr.Button("Compare across siblings", variant="primary")
564
  with gr.Row():
565
  cmp_cooc = gr.Dataframe(headers=["Cooc neighbour","Cosine"], label="Cooc (recipe-context)")
566
  cmp_core = gr.Dataframe(headers=["Core neighbour","Cosine"], label="Core (blended)")
567
  cmp_chem = gr.Dataframe(headers=["Chem neighbour","Cosine"], label="Chem (chemistry)")
568
- cmp_btn.click(compare_siblings, inputs=[cmp_basket, cmp_dirs, cmp_theta, cmp_k], outputs=[cmp_cooc, cmp_core, cmp_chem])
 
569
  gr.Examples(
570
  examples=[
571
  [["chicken"], [], 0, 8],
@@ -582,34 +595,41 @@ Pick a sibling, then explore. Each operator tab has worked examples below the fo
582
  # ---------- Tab 7: UMAP visualisation ----------
583
  with gr.Tab("UMAP visualisation"):
584
  gr.Markdown(
585
- "2-D UMAP projection of the 1,790-ingredient embedding (cosine metric, "
586
- "n_neighbors=30, min_dist=0.03 -- the paper's Figure 1 hyperparameters). "
587
- "Points coloured by food group when known. Add ingredients to the basket to highlight "
588
- "them as red stars, and optionally show their nearest neighbours as orange circles."
589
- )
590
- umap_basket = gr.Dropdown(
591
- choices=ALL_INGREDIENTS, value=["chicken","lemon","garlic"],
592
- label="Highlight these ingredients", multiselect=True, max_choices=10,
593
  )
594
  with gr.Row():
595
- umap_show_nb = gr.Checkbox(value=True, label="Also show top-K neighbours of the basket centroid")
596
- umap_k = gr.Slider(1, 20, value=10, step=1, label="K neighbours to draw")
597
- umap_btn = gr.Button("Plot UMAP", variant="primary")
598
- umap_plot = gr.Plot(label="UMAP")
599
- umap_btn.click(umap_view, inputs=[sibling, umap_basket, umap_show_nb, umap_k], outputs=umap_plot)
 
 
 
 
 
 
 
 
 
 
 
 
 
600
 
601
  # ---------- Tab 8: Parse my fridge ----------
602
  with gr.Tab("Parse my fridge"):
603
  gr.Markdown(
604
- "Paste a free-text ingredient list (recipe lines, shopping list, fridge contents). "
605
- "Tool strips quantities/units/prep notes and fuzzy-matches each line against the 1,790 canonical "
606
- "vocab via rapidfuzz. Threshold defaults to 70 (out of 100); lower = more lenient. "
607
- "Useful because chefs do not think in `corn_tortilla` -- they write `2 corn tortillas, warmed`."
608
  )
609
  fridge_text = gr.Textbox(
610
  label="Free-text ingredients (one per line or semicolon-separated)",
611
  lines=8,
612
- placeholder=(
613
  "2 boneless chicken thighs\n"
614
  "1 cup coconut milk\n"
615
  "1 tbsp fish sauce (or soy sauce)\n"
@@ -620,23 +640,34 @@ Pick a sibling, then explore. Each operator tab has worked examples below the fo
620
  "salt to taste"
621
  ),
622
  )
623
- fridge_min = gr.Slider(40, 100, value=70, step=5, label="Min match score (rapidfuzz WRatio)")
624
- fridge_btn = gr.Button("Parse and match", variant="primary")
 
 
625
  fridge_table = gr.Dataframe(
626
  headers=["Input line", "Canonical match", "Score", "Cleaned"],
627
  label="Parsed matches", interactive=False,
628
  )
629
- fridge_matched = gr.Textbox(label="Matched ingredients (paste into a Basket dropdown)", interactive=False)
 
630
  def _parse(txt, sib, mn):
631
  rows, matches = parse_fridge(txt, sib, int(mn))
632
- return rows, ", ".join(matches)
633
- fridge_btn.click(_parse, inputs=[fridge_text, sibling, fridge_min], outputs=[fridge_table, fridge_matched])
 
 
 
 
 
 
 
 
634
 
635
  gr.Markdown(
636
  """---
637
  **Cite:** Radzikowski and Chen, 2026, *Epicure: Navigating the Emergent Geometry of Food Ingredient Embeddings*, [arXiv:2605.22391](https://arxiv.org/abs/2605.22391).
638
 
639
- Models: [epicure-cooc](https://huggingface.co/Kaikaku/epicure-cooc) | [epicure-core](https://huggingface.co/Kaikaku/epicure-core) | [epicure-chem](https://huggingface.co/Kaikaku/epicure-chem). Dataset: [epicure-corpus-resources](https://huggingface.co/datasets/Kaikaku/epicure-corpus-resources).
640
  """
641
  )
642
 
 
1
+ """Epicure Explorer: chef-facing operators over the three sibling embeddings."""
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  from __future__ import annotations
4
 
 
27
  }
28
  ALL_INGREDIENTS = sorted(MODELS["cooc"].vocab.keys())
29
 
 
30
  _HERE = os.path.dirname(os.path.abspath(__file__))
31
+ UMAP = np.load(os.path.join(_HERE, "umap_2d.npz"))
32
  _lab = json.load(open(os.path.join(_HERE, "ingredient_labels.json")))
33
  NAMES_BY_IDX = _lab["names"]
34
  FOOD_GROUPS = _lab["food_groups"]
 
41
  "Spice": "#d62728",
42
  "Pantry": "#ff7f0e",
43
  "Beverage": "#9467bd",
44
+ "Other": "#cccccc",
45
+ }
46
+
47
+ SIBLING_BLURBS = {
48
+ "cooc": "**Cooc** walks recipe co-occurrence only. Neighbours are recipe companions: ingredients that *get cooked with* the seed.",
49
+ "core": "**Core** blends typed FlavorDB compound walks with injected I-I walks at ii_repeat=10. Concentrated geometry (PR=94), tightest emergent modes.",
50
+ "chem": "**Chem** walks typed FlavorDB compound metapaths only (ii_repeat=0). Neighbours are flavour-profile peers: ingredients that *share aroma chemistry* with the seed.",
51
  }
52
 
53
  # ===== math helpers =====
54
 
55
+ def _unit(v, eps=1e-9):
56
  n = np.linalg.norm(v); return v / max(n, eps)
57
 
58
  def _basket_centroid(m, names):
 
86
  def _factor_mode_choices(sibling):
87
  return [(f"{m.label} ({m.mode_id})", m.mode_id) for m in MODELS[sibling].modes if m.kind == "factor"]
88
 
89
+ def _slerp(v, d, theta_deg):
90
  d_perp = d - (d @ v) * v
91
  n = np.linalg.norm(d_perp)
92
  if n < 1e-9: return v
 
114
  def _basket_heatmap(m, basket):
115
  valid = [n for n in (basket or []) if n in m.vocab]
116
  if len(valid) < 2:
117
+ # Empty figure with a hint
118
+ fig = go.Figure()
119
+ fig.add_annotation(text="Add 2+ ingredients to see pairwise cosines",
120
+ showarrow=False, xref="paper", yref="paper", x=0.5, y=0.5,
121
+ font=dict(size=14, color="#888"))
122
+ fig.update_layout(height=420, plot_bgcolor="#fafafa", paper_bgcolor="#fafafa")
123
+ fig.update_xaxes(visible=False); fig.update_yaxes(visible=False)
124
+ return fig
125
  idxs = [m.vocab[n] for n in valid]
126
+ sub = m.E[idxs]
127
  sim = sub @ sub.T
128
  fig = go.Figure(go.Heatmap(
129
  z=sim, x=valid, y=valid,
 
132
  hovertemplate="%{y} <> %{x}<br>cos = %{z:.3f}<extra></extra>",
133
  ))
134
  fig.update_layout(
135
+ title=dict(text="Pairwise cosine within the basket", font=dict(size=14)),
136
+ height=420, margin=dict(l=80, r=20, t=50, b=80),
137
+ paper_bgcolor="#ffffff", plot_bgcolor="#ffffff",
138
  )
139
  return fig
140
 
 
145
  d = _stack_directions(m, directions, use_factor_pole=False)
146
  if d is None:
147
  return [[n, f"{s:.4f}"] for n, s in _topk(m, v, k, basket)]
148
+ q = _slerp(v, d, theta)
149
  return [[n, f"{s:.4f}"] for n, s in _topk(m, q, k, basket)]
150
 
151
  def emergent_slerp_multi(sibling, basket, mode_labels, theta, k):
 
157
  d = _stack_directions(m, mode_ids, use_factor_pole=True)
158
  if d is None:
159
  return [[n, f"{s:.4f}"] for n, s in _topk(m, v, k, basket)]
160
+ q = _slerp(v, d, theta)
161
  return [[n, f"{s:.4f}"] for n, s in _topk(m, q, k, basket)]
162
 
163
  def arithmetic(sibling, positives, negatives, k):
 
190
  valid_dirs = [d for d in (directions or []) if d in m.supervised_poles]
191
  if valid_dirs:
192
  d_vec = _stack_directions(m, valid_dirs)
193
+ q = _slerp(v, d_vec, theta) if d_vec is not None else v
194
  else:
195
  q = v
196
  hits = _topk(m, q, k=k, exclude=basket)
197
  out.append([[n, f"{s:.4f}"] for n, s in hits])
198
  return out[0], out[1], out[2]
199
 
200
+
201
+ def _umap_coords(sibling, three_d):
202
+ """Lift the 2D UMAP into 3D by appending the embedding's third principal axis if requested."""
203
+ base = UMAP[sibling] # (1790, 2)
204
+ if not three_d:
205
+ return base, None
206
+ # Compute a third dim via simple PCA on the underlying embedding
207
+ m = MODELS[sibling]
208
+ E = m.E - m.E.mean(axis=0, keepdims=True)
209
+ # First three PCs
210
+ U, S, Vt = np.linalg.svd(E, full_matrices=False)
211
+ pc1 = (E @ Vt[0]); pc1 = (pc1 - pc1.mean()) / (pc1.std() + 1e-9)
212
+ # Combine base 2D with pc1 scaled to the same range
213
+ scale = (base.max() - base.min()) * 0.25
214
+ z = pc1 * scale
215
+ return base, z.astype(np.float32)
216
+
217
+ def umap_view(sibling, basket, show_neighbours, k, three_d=False):
218
+ coords2, z = _umap_coords(sibling, three_d)
219
  m = MODELS[sibling]
220
  name_to_idx = m.vocab
 
 
 
 
221
  by_group = {}
222
  for i, fg in enumerate(FOOD_GROUPS):
223
  by_group.setdefault(fg, []).append(i)
 
224
  order = ["Other"] + [g for g in FG_COLORS if g != "Other"]
225
+
226
+ fig = go.Figure()
227
+
228
+ def add_scatter(name, idxs, marker, text, hover, mode="markers"):
229
+ if three_d:
230
+ fig.add_trace(go.Scatter3d(
231
+ x=coords2[idxs,0], y=coords2[idxs,1], z=z[idxs],
232
+ mode=mode, name=name, marker=marker, text=text, hovertemplate=hover,
233
+ textfont=dict(size=10),
234
+ ))
235
+ else:
236
+ fig.add_trace(go.Scatter(
237
+ x=coords2[idxs,0], y=coords2[idxs,1],
238
+ mode=mode, name=name, marker=marker, text=text, hovertemplate=hover,
239
+ textfont=dict(size=10),
240
+ ))
241
+
242
  for fg in order:
243
  if fg not in by_group: continue
244
  idxs = by_group[fg]
245
+ marker = dict(
246
+ size=4 if not three_d else 3,
247
+ color=FG_COLORS.get(fg, "#888888"),
248
+ opacity=0.35 if fg == "Other" else 0.7,
249
+ line=dict(width=0),
250
+ )
251
+ add_scatter(fg, idxs, marker,
252
+ [NAMES_BY_IDX[i] for i in idxs],
253
+ "%{text}<br>group: " + fg + "<extra></extra>")
 
 
 
254
 
 
255
  if basket:
256
  bi = [name_to_idx[b] for b in basket if b in name_to_idx]
257
  if bi:
258
+ marker = dict(
259
+ size=16 if not three_d else 8,
260
+ color="#e30613",
261
+ symbol="star" if not three_d else "diamond",
262
+ line=dict(color="white", width=2),
263
+ )
264
+ add_scatter("Basket", bi, marker,
265
+ [NAMES_BY_IDX[i] for i in bi],
266
+ "<b>%{text}</b><extra></extra>",
267
+ mode="markers+text")
268
 
 
269
  if show_neighbours:
270
  centroid = _basket_centroid(m, basket)
271
  if centroid is not None:
272
  nb_pairs = _topk(m, centroid, k=int(k), exclude=basket)
273
  nb_idxs = [name_to_idx[n] for n, _ in nb_pairs if n in name_to_idx]
274
  if nb_idxs:
275
+ marker = dict(
276
+ size=10 if not three_d else 6,
277
+ color="#ff8800",
278
+ symbol="circle",
279
+ line=dict(color="white", width=1),
280
+ )
281
+ add_scatter(f"Top-{k} neighbours", nb_idxs, marker,
282
+ [NAMES_BY_IDX[i] for i in nb_idxs],
283
+ "<b>%{text}</b> (neighbour)<extra></extra>",
284
+ mode="markers+text")
285
+
286
+ title_suffix = " (3D, PCA z-axis)" if three_d else ""
287
  fig.update_layout(
288
+ title=dict(text=f"UMAP of Epicure-{sibling.capitalize()}{title_suffix}", font=dict(size=15)),
289
+ height=650,
290
+ legend=dict(orientation="v", x=1.02, y=1, font=dict(size=11), bgcolor="rgba(255,255,255,0.8)"),
291
+ margin=dict(l=40, r=160, t=60, b=40),
292
+ paper_bgcolor="#ffffff", plot_bgcolor="#ffffff",
 
293
  )
294
+ if not three_d:
295
+ fig.update_xaxes(showgrid=True, gridcolor="#eee", zeroline=False, title="UMAP 1")
296
+ fig.update_yaxes(showgrid=True, gridcolor="#eee", zeroline=False, title="UMAP 2")
297
+ else:
298
+ fig.update_layout(scene=dict(
299
+ xaxis_title="UMAP 1", yaxis_title="UMAP 2", zaxis_title="PC1 (z)",
300
+ bgcolor="#ffffff",
301
+ ))
302
  return fig
303
 
304
 
305
+ # ===== fridge parser =====
306
+
307
  _LINE_SPLIT = re.compile(r"[\n;]")
308
  _BRACKET = re.compile(r"\([^)]*\)")
309
+ _QTY = (r"(?:\d+(?:[\.,/]\d+)?|"
310
+ r"a|an|one|two|three|four|five|six|seven|eight|nine|ten|half|quarter)")
311
+ _UNIT = (r"(?:cups?|tbsp\.?|tablespoons?|tsp\.?|teaspoons?|"
312
+ r"oz\.?|ounces?|lbs?\.?|pounds?|grams?|kgs?|kilos?|"
313
+ r"ml|liters?|litres?|cloves?|bunches?|sprigs?|pinch(?:es)?|"
314
+ r"slices?|pieces?|cans?|packets?|sticks?|leaves?|stalks?|heads?|inch(?:es)?|"
315
+ r"splash(?:es)?|dash(?:es)?|drops?|handfuls?|large|small|medium)")
 
 
 
 
 
 
 
316
  _LEADING_QTY = re.compile(rf"^\s*{_QTY}\s+(?:{_UNIT}\b\s*)?(?:of\s+)?", re.IGNORECASE)
317
  _LEADING_UNIT_ONLY = re.compile(rf"^\s*{_UNIT}\b\s*(?:of\s+)?", re.IGNORECASE)
318
  _JUICE_OF = re.compile(rf"^\s*(?:juice|zest)\s+(?:of\s+)?(?:{_QTY}\s+)?", re.IGNORECASE)
 
320
  r"^\s*(?:fresh|dried|cooked|frozen|raw|ripe|firm|boneless|skinless|smoked|low[- ]fat)\s+",
321
  re.IGNORECASE,
322
  )
 
323
  _TRAILING_PREP = re.compile(
324
  r"\s*,\s*(?:chopped|minced|diced|sliced|grated|crushed|whole|ground|peeled|"
325
  r"to taste|optional|finely|coarsely|cubed|shredded|julienned|halved|quartered|warmed|"
326
  r"toasted|roasted|bruised|melted|softened|cooked|drained|rinsed|patted dry|trimmed|"
327
+ r"deveined|seeded|stemmed|crumbled).*$", re.IGNORECASE,
 
328
  )
 
329
  _KNOWN_PLURALS = {
330
+ "tortillas":"tortilla","thighs":"thigh","leaves":"leaf","onions":"onion",
331
+ "potatoes":"potato","tomatoes":"tomato","cloves":"clove",
 
 
 
 
 
332
  }
333
 
334
+ def _clean_line(line):
335
  s = line.strip().lower()
336
  s = _BRACKET.sub(" ", s)
337
  if "juice" in s or "zest" in s:
 
340
  s = _LEADING_QTY.sub("", s)
341
  s = _LEADING_UNIT_ONLY.sub("", s)
342
  s = _LEADING_PREP.sub("", s)
 
343
  s = _LEADING_PREP.sub("", s)
344
+ tokens = [_KNOWN_PLURALS.get(t, t) for t in s.split()]
 
 
345
  s = " ".join(tokens)
346
+ return re.sub(r"\s+", " ", s).strip()
 
347
 
348
+ def _fuzzy_lookup(cleaned, vocab, vocab_sp, min_score):
349
+ if not cleaned: return None, 0.0
 
 
350
  candidates = []
351
  for scorer in (fuzz_scorers.token_set_ratio, fuzz_scorers.WRatio, fuzz_scorers.partial_ratio):
352
  hits = fuzz_process.extract(cleaned, vocab_sp, scorer=scorer, score_cutoff=min_score, limit=10)
353
  for _name_sp, score, idx in hits:
354
  candidates.append((vocab[idx], float(score)))
355
+ if not candidates: return None, 0.0
 
 
 
 
356
  cleaned_tokens = set(cleaned.split())
357
  def rank_key(c):
358
  name, score = c
359
+ nt = set(name.replace("_"," ").split())
360
+ return (-score, 0 if nt.issubset(cleaned_tokens) else 1, -len(name))
 
 
361
  candidates.sort(key=rank_key)
362
  return candidates[0]
363
 
364
+ def parse_fridge(raw_text, sibling, min_score=70):
365
+ if not raw_text or not raw_text.strip(): return [], []
 
366
  vocab = list(MODELS[sibling].vocab.keys())
367
+ vocab_sp = [v.replace("_"," ") for v in vocab]
368
+ rows, matched = [], []
369
  for line in _LINE_SPLIT.split(raw_text):
370
  if not line.strip(): continue
371
  cleaned = _clean_line(line)
372
  if not cleaned:
373
+ rows.append([line.strip(), "(empty)", 0.0, ""]); continue
 
374
  match, score = _fuzzy_lookup(cleaned, vocab, vocab_sp, int(min_score))
375
  if match is None:
 
376
  tokens = cleaned.split()
377
  if len(tokens) > 1:
378
  match, score = _fuzzy_lookup(" ".join(tokens[:-1]), vocab, vocab_sp, int(min_score))
379
  if match is None:
380
+ rows.append([line.strip(), "(no match)", 0.0, cleaned]); continue
 
381
  rows.append([line.strip(), match, round(score, 1), cleaned])
382
+ matched.append(match)
383
  seen, dedup = set(), []
384
+ for n in matched:
385
+ if n not in seen: seen.add(n); dedup.append(n)
 
386
  return rows, dedup
387
 
388
 
389
  # ===== UI =====
390
 
391
+ THEME = gr.themes.Soft(
392
+ primary_hue="red",
393
+ secondary_hue="orange",
394
+ neutral_hue="slate",
395
+ font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
396
+ )
397
 
398
+ # Precompute the initial UMAP for the default sibling+basket so the tab is not empty on first open.
399
+ _INITIAL_UMAP = umap_view("chem", ["chicken","lemon","garlic"], True, 8, three_d=False)
400
+ _INITIAL_HEATMAP = _basket_heatmap(MODELS["chem"], ["chicken","lemon","garlic"])
401
 
402
+ with gr.Blocks(title="Epicure Explorer", theme=THEME, css="""
403
+ .gradio-container {max-width: 1280px !important;}
404
+ footer {visibility: hidden;}
405
+ h1 {margin-bottom: 0.2em;}
406
+ .subtitle {color: #666; font-size: 0.95em; margin-top: 0;}
407
+ """) as demo:
408
 
409
+ gr.Markdown(
410
+ """# Epicure Explorer
411
+ <p class="subtitle">Chef-facing operators over three sibling ingredient embeddings (Cooc / Core / Chem) from
412
+ <a href="https://arxiv.org/abs/2605.22391" target="_blank">arXiv:2605.22391</a>.
413
+ 1,790 canonical ingredients across 7 languages, 300-D Metapath2Vec embeddings, controlled chemistry-vs-recipe-context spectrum.</p>"""
414
  )
415
 
416
  sibling = gr.Radio(choices=["cooc","core","chem"], value="chem", label="Sibling embedding")
417
+ sibling_help = gr.Markdown(SIBLING_BLURBS["chem"])
418
+ sibling.change(lambda s: SIBLING_BLURBS[s], inputs=sibling, outputs=sibling_help)
419
+
420
+ # Shared state for cross-tab routing (e.g. Parse fridge -> Basket)
421
+ shared_basket = gr.State([])
422
 
423
  # ---------- Tab 1: Basket pairings + heatmap ----------
424
  with gr.Tab("Basket pairings"):
425
  gr.Markdown(
426
  "Pick one or more ingredients. Tool averages their unit vectors and returns nearest neighbours "
427
+ "plus closest modes of that centroid. The heatmap shows whether the basket is coherent "
428
+ "(bright off-diagonals) or scattered."
 
429
  )
430
  basket = gr.Dropdown(
431
  choices=ALL_INGREDIENTS, value=["chicken","lemon","garlic"],
 
436
  with gr.Row():
437
  nb_table = gr.Dataframe(headers=["Neighbour","Cosine"], label="Top-K nearest neighbours", interactive=False)
438
  mode_table = gr.Dataframe(headers=["Mode id","Label","Kind","Cosine"], label="Closest modes", interactive=False)
439
+ heatmap_plot = gr.Plot(value=_INITIAL_HEATMAP, label="Pairwise cosine within the basket")
440
  pair_btn.click(
441
+ basket_pairings, inputs=[sibling, basket, k_pair],
 
442
  outputs=[nb_table, mode_table, heatmap_plot],
443
+ show_progress="full",
444
  )
445
  gr.Examples(
446
  examples=[
 
460
  # ---------- Tab 2: Supervised SLERP ----------
461
  with gr.Tab("Supervised SLERP"):
462
  gr.Markdown(
463
+ "Rotate the seed basket toward one or more supervised direction poles (cuisine, food group, "
464
+ "NOVA, sensory, USDA macros). Multiple directions are summed before rotation."
465
  )
466
  sup_basket = gr.Dropdown(
467
  choices=ALL_INGREDIENTS, value=["rice"],
 
469
  )
470
  sup_dirs = gr.Dropdown(
471
  choices=_supervised_choices("chem"), value=["cuisine:South_Asian"],
472
+ label="Supervised directions (pick 1+; summed)", multiselect=True, max_choices=5,
 
473
  )
474
  sup_theta = gr.Slider(0, 90, value=30, step=5, label="Rotation angle (deg)")
475
  sup_k = gr.Slider(1, 15, value=8, step=1, label="K")
476
  sup_btn = gr.Button("Rotate", variant="primary")
477
  sup_table = gr.Dataframe(headers=["Ingredient","Cosine"], label="Top-K rotated-query neighbours")
478
+ sup_btn.click(supervised_slerp_multi, inputs=[sibling, sup_basket, sup_dirs, sup_theta, sup_k],
479
+ outputs=sup_table, show_progress="full")
480
+ sibling.change(lambda s: gr.Dropdown(choices=_supervised_choices(s), value=[]),
481
+ inputs=sibling, outputs=sup_dirs)
482
  gr.Examples(
483
  examples=[
484
  ["chem", ["rice"], ["cuisine:South_Asian"], 30, 8],
 
496
  with gr.Tab("Emergent SLERP"):
497
  gr.Markdown(
498
  "Rotate the seed basket toward one or more emergent factor-mode poles discovered "
499
+ "by multi-seed-stable FastICA + GMM."
500
  )
501
  em_basket = gr.Dropdown(
502
  choices=ALL_INGREDIENTS, value=["chocolate"],
 
506
  em_modes = gr.Dropdown(
507
  choices=[label for label, _ in factor_opts],
508
  value=[factor_opts[0][0]] if factor_opts else [],
509
+ label="Factor modes (pick 1+; summed)", multiselect=True, max_choices=5,
 
510
  )
511
  em_theta = gr.Slider(0, 90, value=30, step=5, label="Rotation angle (deg)")
512
  em_k = gr.Slider(1, 15, value=8, step=1, label="K")
513
  em_btn = gr.Button("Rotate", variant="primary")
514
  em_table = gr.Dataframe(headers=["Ingredient","Cosine"], label="Top-K rotated-query neighbours")
515
+ em_btn.click(emergent_slerp_multi, inputs=[sibling, em_basket, em_modes, em_theta, em_k],
516
+ outputs=em_table, show_progress="full")
517
+ sibling.change(lambda s: gr.Dropdown(choices=[label for label, _ in _factor_mode_choices(s)], value=[]),
518
+ inputs=sibling, outputs=em_modes)
519
 
520
  # ---------- Tab 4: Arithmetic ----------
521
  with gr.Tab("Arithmetic"):
522
  gr.Markdown(
523
+ "Mikolov-style vector arithmetic: `centroid(positives) - centroid(negatives)`, "
524
+ "then top-K nearest neighbours. The killer demo is `miso - salt` on Core."
 
525
  )
526
  pos_box = gr.Dropdown(choices=ALL_INGREDIENTS, value=["miso"], label="Positives", multiselect=True, max_choices=10)
527
  neg_box = gr.Dropdown(choices=ALL_INGREDIENTS, value=["salt"], label="Negatives", multiselect=True, max_choices=10)
528
  ar_k = gr.Slider(1, 15, value=8, step=1, label="K")
529
  ar_btn = gr.Button("Compute", variant="primary")
530
  ar_table = gr.Dataframe(headers=["Ingredient","Cosine"], label="Top-K nearest to result vector")
531
+ ar_btn.click(arithmetic, inputs=[sibling, pos_box, neg_box, ar_k], outputs=ar_table, show_progress="full")
532
  gr.Examples(
533
  examples=[
534
  ["core", ["miso"], ["salt"], 8],
 
547
  # ---------- Tab 5: Mode atlas ----------
548
  with gr.Tab("Mode atlas"):
549
  gr.Markdown(
550
+ "Browse the GMM mode atlas of the selected sibling. Cooc 150 modes / Core 193 / Chem 200. "
551
  "`factor` = emergent FastICA modes; `continuous` = quartile partitions of NOVA/sensory/USDA; "
552
+ "`binary` = food-group buckets."
553
  )
554
  atlas_kind = gr.Radio(choices=["all","factor","continuous","binary"], value="all", label="Mode kind")
555
  atlas_search = gr.Textbox(label="Search labels / properties", placeholder="e.g. South Asian, baking, fiber", value="")
556
  atlas_btn = gr.Button("Browse modes", variant="primary")
557
  atlas_table = gr.Dataframe(
558
  headers=["mode_id","kind","property","label","n_members","top members"],
559
+ label="Modes (sorted by kind, then size descending)", wrap=True, interactive=False,
 
560
  )
561
+ atlas_btn.click(browse_modes, inputs=[sibling, atlas_kind, atlas_search], outputs=atlas_table, show_progress="full")
562
 
563
  # ---------- Tab 6: Compare siblings ----------
564
  with gr.Tab("Compare siblings"):
565
  gr.Markdown(
566
+ "Same query, three siblings, side by side. The spectrum-of-models thesis visible in one screen."
567
  )
568
  cmp_basket = gr.Dropdown(choices=ALL_INGREDIENTS, value=["chicken"], label="Seed basket", multiselect=True, max_choices=10)
569
  cmp_dirs = gr.Dropdown(
570
  choices=_supervised_choices("chem"), value=[],
571
+ label="Optional directions (leave empty for pure pairings)", multiselect=True, max_choices=5,
 
572
  )
573
+ cmp_theta = gr.Slider(0, 90, value=30, step=5, label="Rotation angle (deg)")
574
  cmp_k = gr.Slider(1, 15, value=8, step=1, label="K")
575
  cmp_btn = gr.Button("Compare across siblings", variant="primary")
576
  with gr.Row():
577
  cmp_cooc = gr.Dataframe(headers=["Cooc neighbour","Cosine"], label="Cooc (recipe-context)")
578
  cmp_core = gr.Dataframe(headers=["Core neighbour","Cosine"], label="Core (blended)")
579
  cmp_chem = gr.Dataframe(headers=["Chem neighbour","Cosine"], label="Chem (chemistry)")
580
+ cmp_btn.click(compare_siblings, inputs=[cmp_basket, cmp_dirs, cmp_theta, cmp_k],
581
+ outputs=[cmp_cooc, cmp_core, cmp_chem], show_progress="full")
582
  gr.Examples(
583
  examples=[
584
  [["chicken"], [], 0, 8],
 
595
  # ---------- Tab 7: UMAP visualisation ----------
596
  with gr.Tab("UMAP visualisation"):
597
  gr.Markdown(
598
+ "2-D UMAP projection of the 1,790-ingredient embedding (cosine metric, n_neighbors=30, min_dist=0.03 "
599
+ "-- paper Figure 1 hyperparameters). Points coloured by food group. Add ingredients to the basket "
600
+ "to highlight them as red stars; their nearest neighbours appear as orange circles. "
601
+ "Toggle 3D for a perspective view (third axis is PC1 of the embedding)."
 
 
 
 
602
  )
603
  with gr.Row():
604
+ umap_basket = gr.Dropdown(
605
+ choices=ALL_INGREDIENTS, value=["chicken","lemon","garlic"],
606
+ label="Highlight these ingredients", multiselect=True, max_choices=10,
607
+ )
608
+ with gr.Row():
609
+ umap_show_nb = gr.Checkbox(value=True, label="Show top-K neighbours of basket centroid")
610
+ umap_3d = gr.Checkbox(value=False, label="3-D perspective (UMAP + PC1)")
611
+ umap_k = gr.Slider(1, 20, value=10, step=1, label="K neighbours")
612
+ umap_btn = gr.Button("Update plot", variant="primary")
613
+ umap_plot = gr.Plot(value=_INITIAL_UMAP, label="UMAP")
614
+ umap_btn.click(umap_view,
615
+ inputs=[sibling, umap_basket, umap_show_nb, umap_k, umap_3d],
616
+ outputs=umap_plot, show_progress="full")
617
+ # Auto-refresh on sibling change
618
+ sibling.change(umap_view,
619
+ inputs=[sibling, umap_basket, umap_show_nb, umap_k, umap_3d],
620
+ outputs=umap_plot)
621
+ gr.Markdown("*Tip: scroll-zoom and box-zoom are enabled. Double-click to reset. Click a legend item to hide that food group.*")
622
 
623
  # ---------- Tab 8: Parse my fridge ----------
624
  with gr.Tab("Parse my fridge"):
625
  gr.Markdown(
626
+ "Paste a free-text ingredient list. Tool strips quantities and prep notes, then fuzzy-matches "
627
+ "each line to canonical vocab. Hit **Send to Basket** to route the matched set into the Basket-pairings tab."
 
 
628
  )
629
  fridge_text = gr.Textbox(
630
  label="Free-text ingredients (one per line or semicolon-separated)",
631
  lines=8,
632
+ value=(
633
  "2 boneless chicken thighs\n"
634
  "1 cup coconut milk\n"
635
  "1 tbsp fish sauce (or soy sauce)\n"
 
640
  "salt to taste"
641
  ),
642
  )
643
+ fridge_min = gr.Slider(40, 100, value=70, step=5, label="Min match score (rapidfuzz)")
644
+ with gr.Row():
645
+ fridge_btn = gr.Button("Parse and match", variant="primary")
646
+ fridge_send = gr.Button("Send matched to Basket tab", variant="secondary")
647
  fridge_table = gr.Dataframe(
648
  headers=["Input line", "Canonical match", "Score", "Cleaned"],
649
  label="Parsed matches", interactive=False,
650
  )
651
+ fridge_matched = gr.Textbox(label="Matched ingredients", interactive=False)
652
+
653
  def _parse(txt, sib, mn):
654
  rows, matches = parse_fridge(txt, sib, int(mn))
655
+ return rows, ", ".join(matches), matches
656
+ fridge_btn.click(
657
+ _parse, inputs=[fridge_text, sibling, fridge_min],
658
+ outputs=[fridge_table, fridge_matched, shared_basket],
659
+ show_progress="full",
660
+ )
661
+
662
+ def _send_to_basket(matches):
663
+ return gr.Dropdown(value=matches[:10] if matches else [])
664
+ fridge_send.click(_send_to_basket, inputs=[shared_basket], outputs=[basket])
665
 
666
  gr.Markdown(
667
  """---
668
  **Cite:** Radzikowski and Chen, 2026, *Epicure: Navigating the Emergent Geometry of Food Ingredient Embeddings*, [arXiv:2605.22391](https://arxiv.org/abs/2605.22391).
669
 
670
+ Artefacts: [epicure-cooc](https://huggingface.co/Kaikaku/epicure-cooc) | [epicure-core](https://huggingface.co/Kaikaku/epicure-core) | [epicure-chem](https://huggingface.co/Kaikaku/epicure-chem) | [corpus dataset](https://huggingface.co/datasets/Kaikaku/epicure-corpus-resources)
671
  """
672
  )
673