josefchen commited on
Commit
7200047
·
verified ·
1 Parent(s): f060061

Initial release: Epicure Explorer Gradio demo. Paper arxiv 2605.22391.

Browse files
Files changed (5) hide show
  1. LICENSE +21 -0
  2. README.md +30 -6
  3. app.py +199 -0
  4. epicure.py +208 -0
  5. requirements.txt +4 -0
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Creative Commons Attribution 4.0 International (CC BY 4.0)
2
+
3
+ Copyright (c) 2026 Jakub Radzikowski and Josef Chen (KAIKAKU.AI)
4
+
5
+ You are free to:
6
+ Share -- copy and redistribute the material in any medium or format
7
+ Adapt -- remix, transform, and build upon the material for any purpose, even commercially
8
+
9
+ Under the following terms:
10
+ Attribution -- You must give appropriate credit, provide a link to the license,
11
+ and indicate if changes were made. You may do so in any reasonable manner, but
12
+ not in any way that suggests the licensor endorses you or your use.
13
+
14
+ No additional restrictions -- You may not apply legal terms or technological
15
+ measures that legally restrict others from doing anything the license permits.
16
+
17
+ Full text: https://creativecommons.org/licenses/by/4.0/legalcode
18
+
19
+ Citation:
20
+ Radzikowski, J. and Chen, J. (2026). Epicure: Navigating the Emergent Geometry
21
+ of Food Ingredient Embeddings. arXiv:2605.22391.
README.md CHANGED
@@ -1,13 +1,37 @@
1
  ---
2
  title: Epicure Explorer
3
- emoji: 🔥
4
- colorFrom: yellow
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 6.15.0
8
- python_version: '3.13'
9
  app_file: app.py
10
  pinned: false
 
 
 
 
 
 
 
 
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Epicure Explorer
3
+ emoji: "🌶"
4
+ colorFrom: green
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 4.44.0
 
8
  app_file: app.py
9
  pinned: false
10
+ license: cc-by-4.0
11
+ short_description: Operators over the three Epicure ingredient embeddings
12
+ models:
13
+ - Kaikaku/epicure-cooc
14
+ - Kaikaku/epicure-core
15
+ - Kaikaku/epicure-chem
16
+ datasets:
17
+ - Kaikaku/epicure-corpus-resources
18
  ---
19
 
20
+ # Epicure Explorer
21
+
22
+ Interactive chef-facing demo of the three Epicure sibling ingredient embeddings (Cooc, Core, Chem). Three operator tabs:
23
+
24
+ 1. **Pairings**: top-K cosine neighbours plus the closest emergent mode for any of 1,790 ingredients.
25
+ 2. **Supervised SLERP**: rotate a seed toward a supervised direction (cuisine macro-region, food group, NOVA level, sensory category, USDA macro) by a continuous angle.
26
+ 3. **Emergent SLERP**: rotate a seed toward an unsupervised factor-mode pole discovered via multi-seed-stable FastICA + GMM.
27
+
28
+ Paper: [Epicure: Navigating the Emergent Geometry of Food Ingredient Embeddings](https://arxiv.org/abs/2605.22391).
29
+
30
+ ## Try
31
+
32
+ - Pairings, `chicken`, Cooc -> garlic, onion, black_pepper, turkey, carrot (recipe companions).
33
+ - Pairings, `chicken`, Chem -> beef, pork, cream_of_chicken_soup, buffalo_wing_sauce, peanut (chemistry peers).
34
+ - Supervised SLERP, `rice` + `cuisine:South_Asian`, 30 deg, Core -> turmeric, mustard_seed, fenugreek_seed, coriander, cumin.
35
+ - Supervised SLERP, `corn` + `cuisine:Latin_American`, 30 deg, Chem -> poblano_pepper, corn_tortilla, salsa, queso_fresco, chipotle_pepper.
36
+
37
+ Citation: Radzikowski and Chen, 2026.
app.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Epicure Explorer: a chef-facing interactive demo of the three sibling embeddings.
2
+
3
+ Three tabs:
4
+ - Pairings: top-K cosine neighbours + closest emergent mode for a chosen ingredient.
5
+ - Supervised SLERP: rotate a seed toward a supervised pole (cuisine, food group,
6
+ NOVA, sensory, USDA macros) by a chosen angle.
7
+ - Emergent SLERP: rotate a seed toward an emergent factor-mode pole.
8
+
9
+ Loads all three siblings on startup from their HF model repos.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ import sys
16
+ import gradio as gr
17
+
18
+ # epicure.py is loaded from the cooc repo's snapshot at runtime; alternatively
19
+ # copy it into this Space's root for offline development.
20
+ try:
21
+ from epicure import Epicure # noqa: F401
22
+ except ImportError:
23
+ from huggingface_hub import hf_hub_download
24
+ epicure_py = hf_hub_download("Kaikaku/epicure-cooc", "epicure.py")
25
+ sys.path.insert(0, os.path.dirname(epicure_py))
26
+ from epicure import Epicure # noqa: F401
27
+
28
+ MODELS = {
29
+ "cooc": Epicure.from_pretrained("Kaikaku/epicure-cooc"),
30
+ "core": Epicure.from_pretrained("Kaikaku/epicure-core"),
31
+ "chem": Epicure.from_pretrained("Kaikaku/epicure-chem"),
32
+ }
33
+
34
+ ALL_INGREDIENTS = sorted(MODELS["cooc"].vocab.keys())
35
+
36
+
37
+ def _supervised_choices(sibling: str) -> list[str]:
38
+ return sorted(MODELS[sibling].supervised_poles.keys())
39
+
40
+
41
+ def _factor_modes(sibling: str) -> list[tuple[str, str]]:
42
+ return [
43
+ (f"{m.mode_id} - {m.label}", m.mode_id)
44
+ for m in MODELS[sibling].modes
45
+ if m.kind == "factor"
46
+ ]
47
+
48
+
49
+ def pairings(sibling: str, ingredient: str, k: int):
50
+ if not ingredient or ingredient not in MODELS[sibling].vocab:
51
+ return [], []
52
+ m = MODELS[sibling]
53
+ nb = m.neighbors(ingredient, k=k)
54
+ cm = m.closest_mode(ingredient, kind=None, k=k)
55
+ return (
56
+ [[name, f"{sim:.4f}"] for name, sim in nb],
57
+ [[mid, label, f"{sim:.4f}"] for mid, label, sim in cm],
58
+ )
59
+
60
+
61
+ def supervised_slerp(sibling: str, seed: str, direction: str, theta: float, k: int):
62
+ if not seed or seed not in MODELS[sibling].vocab:
63
+ return []
64
+ if direction not in MODELS[sibling].supervised_poles:
65
+ return []
66
+ r = MODELS[sibling].slerp(seed, direction, theta_deg=theta, k=k)
67
+ return [[name, f"{sim:.4f}"] for name, sim in r]
68
+
69
+
70
+ def emergent_slerp(sibling: str, seed: str, factor_mode_id: str, theta: float, k: int):
71
+ if not seed or seed not in MODELS[sibling].vocab:
72
+ return []
73
+ m = MODELS[sibling]
74
+ pole = None
75
+ for mode in m.modes:
76
+ if mode.mode_id == factor_mode_id:
77
+ pole = mode.pole
78
+ break
79
+ if pole is None:
80
+ return []
81
+ r = m.slerp(seed, pole, theta_deg=theta, k=k)
82
+ return [[name, f"{sim:.4f}"] for name, sim in r]
83
+
84
+
85
+ with gr.Blocks(title="Epicure Explorer") as demo:
86
+ gr.Markdown(
87
+ """# Epicure Explorer
88
+
89
+ Interactive chef-facing operators over the three Epicure sibling embeddings (Cooc, Core, Chem),
90
+ from the paper [Epicure: Navigating the Emergent Geometry of Food Ingredient Embeddings](https://arxiv.org/abs/2605.22391).
91
+
92
+ Each sibling sits at a different point on the chemistry-vs-recipe-context spectrum:
93
+ - **Cooc** walks recipe co-occurrence only. Neighbours are recipe companions.
94
+ - **Core** blends typed FlavorDB compound walks with injected ingredient-ingredient walks. Concentrated geometry, tightest modes.
95
+ - **Chem** walks typed FlavorDB compound metapaths only. Strongest supervised-direction recovery; neighbours are flavour-profile peers.
96
+ """
97
+ )
98
+
99
+ sibling = gr.Radio(
100
+ choices=["cooc", "core", "chem"],
101
+ value="chem",
102
+ label="Sibling embedding",
103
+ )
104
+
105
+ with gr.Tab("Pairings"):
106
+ ingredient = gr.Dropdown(
107
+ choices=ALL_INGREDIENTS, value="chicken", label="Ingredient", allow_custom_value=False
108
+ )
109
+ k_pair = gr.Slider(1, 10, value=5, step=1, label="K")
110
+ pair_btn = gr.Button("Find pairings", variant="primary")
111
+ with gr.Row():
112
+ nb_table = gr.Dataframe(
113
+ headers=["Neighbour", "Cosine"], label="Top-K nearest neighbours", interactive=False
114
+ )
115
+ mode_table = gr.Dataframe(
116
+ headers=["Mode id", "Label", "Cosine"], label="Closest modes", interactive=False
117
+ )
118
+ pair_btn.click(
119
+ pairings, inputs=[sibling, ingredient, k_pair], outputs=[nb_table, mode_table]
120
+ )
121
+
122
+ with gr.Tab("Supervised SLERP"):
123
+ sup_seed = gr.Dropdown(
124
+ choices=ALL_INGREDIENTS, value="rice", label="Seed ingredient"
125
+ )
126
+ sup_dir = gr.Dropdown(
127
+ choices=_supervised_choices("chem"),
128
+ value="cuisine:South_Asian",
129
+ label="Supervised direction",
130
+ )
131
+ sup_theta = gr.Slider(0, 90, value=30, step=5, label="Rotation angle (deg)")
132
+ sup_k = gr.Slider(1, 10, value=5, step=1, label="K")
133
+ sup_btn = gr.Button("Rotate", variant="primary")
134
+ sup_table = gr.Dataframe(
135
+ headers=["Ingredient", "Cosine"], label="Top-K rotated-query neighbours"
136
+ )
137
+ sup_btn.click(
138
+ supervised_slerp,
139
+ inputs=[sibling, sup_seed, sup_dir, sup_theta, sup_k],
140
+ outputs=sup_table,
141
+ )
142
+ sibling.change(
143
+ lambda s: gr.Dropdown(choices=_supervised_choices(s), value=None),
144
+ inputs=sibling,
145
+ outputs=sup_dir,
146
+ )
147
+
148
+ with gr.Tab("Emergent SLERP"):
149
+ em_seed = gr.Dropdown(
150
+ choices=ALL_INGREDIENTS, value="chocolate", label="Seed ingredient"
151
+ )
152
+ factor_options = _factor_modes("chem")
153
+ em_mode = gr.Dropdown(
154
+ choices=[label for label, _ in factor_options],
155
+ value=factor_options[0][0] if factor_options else None,
156
+ label="Emergent factor mode (label - mode_id)",
157
+ )
158
+ em_theta = gr.Slider(0, 90, value=30, step=5, label="Rotation angle (deg)")
159
+ em_k = gr.Slider(1, 10, value=5, step=1, label="K")
160
+ em_btn = gr.Button("Rotate", variant="primary")
161
+ em_table = gr.Dataframe(
162
+ headers=["Ingredient", "Cosine"], label="Top-K rotated-query neighbours"
163
+ )
164
+
165
+ def _resolve_factor(sib, label, seed, theta, k):
166
+ options = _factor_modes(sib)
167
+ mode_id = None
168
+ for lab, mid in options:
169
+ if lab == label:
170
+ mode_id = mid
171
+ break
172
+ if mode_id is None and options:
173
+ mode_id = options[0][1]
174
+ if mode_id is None:
175
+ return []
176
+ return emergent_slerp(sib, seed, mode_id, theta, k)
177
+
178
+ em_btn.click(
179
+ _resolve_factor,
180
+ inputs=[sibling, em_mode, em_seed, em_theta, em_k],
181
+ outputs=em_table,
182
+ )
183
+ sibling.change(
184
+ lambda s: gr.Dropdown(choices=[label for label, _ in _factor_modes(s)], value=None),
185
+ inputs=sibling,
186
+ outputs=em_mode,
187
+ )
188
+
189
+ gr.Markdown(
190
+ """---
191
+ **Cite:** Radzikowski and Chen 2026, *Epicure: Navigating the Emergent Geometry of Food Ingredient Embeddings*, arXiv:2605.22391.
192
+
193
+ Models: [epicure-cooc](https://huggingface.co/Kaikaku/epicure-cooc), [epicure-core](https://huggingface.co/Kaikaku/epicure-core), [epicure-chem](https://huggingface.co/Kaikaku/epicure-chem).
194
+ Dataset: [epicure-corpus-resources](https://huggingface.co/datasets/Kaikaku/epicure-corpus-resources).
195
+ """
196
+ )
197
+
198
+ if __name__ == "__main__":
199
+ demo.launch()
epicure.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Epicure: minimal loader for the three sibling ingredient embeddings.
3
+
4
+ Usage
5
+ -----
6
+ from epicure import Epicure
7
+ m = Epicure.from_pretrained("Kaikaku/epicure-cooc")
8
+ m.neighbors("chicken", k=5)
9
+ m.slerp("rice", "cuisine:South_Asian/South Asian", theta_deg=30, k=5)
10
+ m.closest_mode("miso", kind="factor", k=3)
11
+
12
+ The three repos (epicure-cooc, epicure-core, epicure-chem) ship the same loader.
13
+ Paper: https://arxiv.org/abs/2605.22391
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import os
20
+ from dataclasses import dataclass
21
+ from typing import Iterable
22
+
23
+ import numpy as np
24
+
25
+
26
+ def _try_hf_download(repo_id: str, filename: str, revision: str | None = None) -> str:
27
+ try:
28
+ from huggingface_hub import hf_hub_download
29
+ except ImportError as exc:
30
+ raise ImportError(
31
+ "huggingface_hub is required for from_pretrained(). "
32
+ "Install with: pip install huggingface_hub safetensors numpy"
33
+ ) from exc
34
+ return hf_hub_download(repo_id=repo_id, filename=filename, revision=revision)
35
+
36
+
37
+ def _load_safetensors(path: str) -> np.ndarray:
38
+ try:
39
+ from safetensors.numpy import load_file
40
+ except ImportError as exc:
41
+ raise ImportError("safetensors required. pip install safetensors") from exc
42
+ return load_file(path)["embeddings"]
43
+
44
+
45
+ def _unit(v: np.ndarray, axis: int = -1, eps: float = 1e-9) -> np.ndarray:
46
+ n = np.linalg.norm(v, axis=axis, keepdims=True)
47
+ return v / np.maximum(n, eps)
48
+
49
+
50
+ @dataclass
51
+ class ModeEntry:
52
+ mode_id: str
53
+ kind: str
54
+ property: str
55
+ label: str
56
+ n_members: int
57
+ members: list[str]
58
+ pole: np.ndarray # (d_model,) unit-normalised
59
+
60
+
61
+ class Epicure:
62
+ """Lookup-table embedding with neighbour, SLERP, and closest-mode operators."""
63
+
64
+ def __init__(
65
+ self,
66
+ E: np.ndarray,
67
+ vocab: dict[str, int],
68
+ modes: list[ModeEntry],
69
+ supervised_poles: dict[str, np.ndarray],
70
+ config: dict,
71
+ ):
72
+ self.E_raw = E.astype(np.float32)
73
+ self.E = _unit(self.E_raw)
74
+ self.vocab = vocab
75
+ self.itos = {i: n for n, i in vocab.items()}
76
+ self.modes = modes
77
+ self.supervised_poles = supervised_poles
78
+ self.config = config
79
+
80
+ # ----- constructors -----
81
+
82
+ @classmethod
83
+ def from_pretrained(cls, repo_id_or_path: str, revision: str | None = None) -> "Epicure":
84
+ if os.path.isdir(repo_id_or_path):
85
+ base = repo_id_or_path
86
+ getp = lambda fn: os.path.join(base, fn)
87
+ else:
88
+ getp = lambda fn: _try_hf_download(repo_id_or_path, fn, revision=revision)
89
+ E = _load_safetensors(getp("embeddings.safetensors"))
90
+ with open(getp("vocab.json")) as f:
91
+ vocab = json.load(f)
92
+ with open(getp("modes.json")) as f:
93
+ modes_raw = json.load(f)
94
+ with open(getp("supervised_poles.json")) as f:
95
+ sup_raw = json.load(f)
96
+ with open(getp("config.json")) as f:
97
+ config = json.load(f)
98
+ modes = [
99
+ ModeEntry(
100
+ mode_id=m["mode_id"],
101
+ kind=m["kind"],
102
+ property=m["property"],
103
+ label=m["label"],
104
+ n_members=m["n_members"],
105
+ members=m["members"],
106
+ pole=np.array(m["pole"], dtype=np.float32),
107
+ )
108
+ for m in modes_raw
109
+ ]
110
+ supervised_poles = {k: np.array(v, dtype=np.float32) for k, v in sup_raw.items()}
111
+ return cls(E, vocab, modes, supervised_poles, config)
112
+
113
+ # ----- core operators -----
114
+
115
+ def vec(self, name: str, normalised: bool = True) -> np.ndarray:
116
+ i = self.vocab[name]
117
+ return self.E[i] if normalised else self.E_raw[i]
118
+
119
+ def neighbors(self, name: str, k: int = 5, exclude_self: bool = True) -> list[tuple[str, float]]:
120
+ v = self.vec(name)
121
+ sims = self.E @ v
122
+ order = np.argsort(-sims)
123
+ start = 1 if exclude_self else 0
124
+ return [(self.itos[int(i)], float(sims[i])) for i in order[start:start + k]]
125
+
126
+ def slerp(
127
+ self,
128
+ seed: str,
129
+ direction: str | np.ndarray,
130
+ theta_deg: float,
131
+ k: int = 5,
132
+ exclude_seed: bool = True,
133
+ ) -> list[tuple[str, float]]:
134
+ """Rotate the seed vector toward a unit direction by angle theta on the unit sphere.
135
+
136
+ ``direction`` is either a supervised pole key (e.g.
137
+ ``"cuisine:South_Asian"``) or a raw (d_model,) np.ndarray.
138
+ At theta=0 the query is the seed. At theta=60deg cosine to seed = 0.5.
139
+ With ``exclude_seed=True`` (default) the seed ingredient is removed from results
140
+ (the paper's reported tables also exclude it).
141
+ """
142
+ seed_idx = self.vocab[seed]
143
+ v = self.E[seed_idx]
144
+ d = self.supervised_poles[direction] if isinstance(direction, str) else direction
145
+ d = np.asarray(d, dtype=np.float32)
146
+ d = _unit(d)
147
+ # Gram-Schmidt: orthogonal component of d relative to v
148
+ d_perp = d - (d @ v) * v
149
+ n_perp = np.linalg.norm(d_perp)
150
+ if n_perp < 1e-9:
151
+ # d is colinear with v: rotation has no defined plane; return seed neighbours
152
+ return self.neighbors(seed, k=k)
153
+ d_perp = d_perp / n_perp
154
+ theta = np.deg2rad(float(theta_deg))
155
+ q = np.cos(theta) * v + np.sin(theta) * d_perp
156
+ q = _unit(q)
157
+ sims = self.E @ q
158
+ if exclude_seed:
159
+ sims[seed_idx] = -np.inf
160
+ order = np.argsort(-sims)
161
+ return [(self.itos[int(i)], float(sims[i])) for i in order[:k]]
162
+
163
+ def closest_mode(
164
+ self,
165
+ name: str,
166
+ kind: str | None = None,
167
+ k: int = 3,
168
+ ) -> list[tuple[str, str, float]]:
169
+ """Return the top-k closest modes to the named ingredient.
170
+
171
+ ``kind`` filters by mode kind: 'factor', 'cuisine', 'food_group',
172
+ 'nova_level', 'cf_sensory', 'usda_nutrient' or None for all.
173
+ """
174
+ v = self.vec(name)
175
+ scored = []
176
+ for m in self.modes:
177
+ if kind is not None and m.kind != kind:
178
+ continue
179
+ scored.append((m.mode_id, m.label, float(_unit(m.pole) @ v)))
180
+ scored.sort(key=lambda x: -x[2])
181
+ return scored[:k]
182
+
183
+ def mode_members(self, mode_id: str, k: int | None = None) -> list[str]:
184
+ for m in self.modes:
185
+ if m.mode_id == mode_id:
186
+ return m.members[:k] if k is not None else m.members
187
+ raise KeyError(mode_id)
188
+
189
+ # ----- introspection -----
190
+
191
+ def list_supervised_poles(self, prefix: str | None = None) -> list[str]:
192
+ if prefix is None:
193
+ return list(self.supervised_poles.keys())
194
+ return [k for k in self.supervised_poles if k.startswith(prefix)]
195
+
196
+ def list_modes(self, kind: str | None = None) -> list[tuple[str, str]]:
197
+ if kind is None:
198
+ return [(m.mode_id, m.label) for m in self.modes]
199
+ return [(m.mode_id, m.label) for m in self.modes if m.kind == kind]
200
+
201
+ def __repr__(self) -> str:
202
+ return (
203
+ f"Epicure(schema={self.config.get('schema')!r}, "
204
+ f"d_model={self.config.get('d_model')}, "
205
+ f"vocab_size={self.config.get('vocab_size')}, "
206
+ f"modes={len(self.modes)}, "
207
+ f"supervised_poles={len(self.supervised_poles)})"
208
+ )
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=4.40.0
2
+ huggingface_hub>=0.24.0
3
+ safetensors>=0.4.0
4
+ numpy>=1.24