Spaces:

basilboy
/

microbiome-space

Running

App Files Files Community

the-puzzler commited on 16 days ago

Commit

082f6c4

1 Parent(s): 6ea791c

Color UMAPs by final logit

Browse files

Files changed (1) hide show

app.py +6 -7

app.py CHANGED Viewed

@@ -516,7 +516,7 @@ def _infer_logits_and_final_embeddings(input_embeddings: np.ndarray, models: Loa
     return logits.squeeze(0).detach().cpu().numpy(), final_hidden.squeeze(0).detach().cpu().numpy()
-def _plot_umap(vectors: np.ndarray, labels: List[str], title: str):
     if len(vectors) < 2:
         raise gr.Error("UMAP needs at least 2 sequences.")
@@ -530,10 +530,9 @@ def _plot_umap(vectors: np.ndarray, labels: List[str], title: str):
         init="random" if n_points <= 3 else "spectral",
     )
     coords = reducer.fit_transform(vectors)
-    norms = np.linalg.norm(vectors, axis=1)
     x_values = [float(value) for value in coords[:, 0]]
     y_values = [float(value) for value in coords[:, 1]]
-    color_values = [float(value) for value in norms]
     fig = go.Figure(
         data=[
@@ -543,7 +542,7 @@ def _plot_umap(vectors: np.ndarray, labels: List[str], title: str):
                 mode="markers",
                 text=labels,
                 customdata=np.array(color_values).reshape(-1, 1),
-                hovertemplate="<b>%{text}</b><br>UMAP 1=%{x:.3f}<br>UMAP 2=%{y:.3f}<br>norm=%{customdata[0]:.3f}<extra></extra>",
                 marker={
                     "size": 10,
                     "color": color_values,
@@ -551,7 +550,7 @@ def _plot_umap(vectors: np.ndarray, labels: List[str], title: str):
                     "line": {"width": 0.6, "color": "#1d2a1f"},
                     "opacity": 0.92,
                     "showscale": True,
-                    "colorbar": {"title": "vector norm"},
                 },
             )
         ]
@@ -620,8 +619,8 @@ def _analyze_records(records: List[dict], source_title: str, extra_summary: str
     input_embeddings = _embed_sequences(seqs, models)
     logits, final_embeddings = _infer_logits_and_final_embeddings(input_embeddings, models)
-    input_umap = _plot_umap(input_embeddings, labels, "UMAP of Input DNA Embeddings")
-    final_umap = _plot_umap(final_embeddings, labels, "UMAP of Final Transformer Embeddings")
     logits_hist = _plot_logits(logits, labels)
     rows = []

     return logits.squeeze(0).detach().cpu().numpy(), final_hidden.squeeze(0).detach().cpu().numpy()
+def _plot_umap(vectors: np.ndarray, labels: List[str], logits: np.ndarray, title: str):
     if len(vectors) < 2:
         raise gr.Error("UMAP needs at least 2 sequences.")
         init="random" if n_points <= 3 else "spectral",
     )
     coords = reducer.fit_transform(vectors)
     x_values = [float(value) for value in coords[:, 0]]
     y_values = [float(value) for value in coords[:, 1]]
+    color_values = [float(value) for value in logits]
     fig = go.Figure(
         data=[
                 mode="markers",
                 text=labels,
                 customdata=np.array(color_values).reshape(-1, 1),
+                hovertemplate="<b>%{text}</b><br>UMAP 1=%{x:.3f}<br>UMAP 2=%{y:.3f}<br>logit=%{customdata[0]:.4f}<extra></extra>",
                 marker={
                     "size": 10,
                     "color": color_values,
                     "line": {"width": 0.6, "color": "#1d2a1f"},
                     "opacity": 0.92,
                     "showscale": True,
+                    "colorbar": {"title": "final logit"},
                 },
             )
         ]
     input_embeddings = _embed_sequences(seqs, models)
     logits, final_embeddings = _infer_logits_and_final_embeddings(input_embeddings, models)
+    input_umap = _plot_umap(input_embeddings, labels, logits, "UMAP of Input DNA Embeddings")
+    final_umap = _plot_umap(final_embeddings, labels, logits, "UMAP of Final Transformer Embeddings")
     logits_hist = _plot_logits(logits, labels)
     rows = []