Spaces:
Sleeping
Sleeping
Add API documentation tab
Browse files
app.py
CHANGED
|
@@ -180,38 +180,85 @@ with gr.Blocks(title="BERT Metagenome Embeddings") as demo:
|
|
| 180 |
Extract embeddings from DNA sequences. BERT model (430M params) pretrained on metagenomic sequences.
|
| 181 |
""")
|
| 182 |
|
| 183 |
-
with gr.
|
| 184 |
-
with gr.
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
|
|
|
| 208 |
|
| 209 |
btn.click(
|
| 210 |
process,
|
| 211 |
inputs=[seq_input, mode_input, stride_input],
|
| 212 |
-
outputs=[output, download]
|
|
|
|
| 213 |
)
|
| 214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
gr.Markdown("""
|
| 216 |
---
|
| 217 |
**Model**: [genomenet/bert-metagenome](https://huggingface.co/genomenet/bert-metagenome) |
|
|
|
|
| 180 |
Extract embeddings from DNA sequences. BERT model (430M params) pretrained on metagenomic sequences.
|
| 181 |
""")
|
| 182 |
|
| 183 |
+
with gr.Tab("Extract"):
|
| 184 |
+
with gr.Row():
|
| 185 |
+
with gr.Column(scale=1):
|
| 186 |
+
seq_input = gr.Textbox(
|
| 187 |
+
label="sequence",
|
| 188 |
+
placeholder="Paste DNA sequence (FASTA or raw)...",
|
| 189 |
+
lines=8,
|
| 190 |
+
value=EXAMPLE_SEQUENCE,
|
| 191 |
+
info="min 1000 bp"
|
| 192 |
+
)
|
| 193 |
+
mode_input = gr.Radio(
|
| 194 |
+
choices=["mean", "max", "per-window"],
|
| 195 |
+
value="mean",
|
| 196 |
+
label="pooling",
|
| 197 |
+
info="mean/max: single 768-dim vector | per-window: (n, 768) matrix"
|
| 198 |
+
)
|
| 199 |
+
stride_input = gr.Slider(
|
| 200 |
+
minimum=50, maximum=500, value=100, step=50,
|
| 201 |
+
label="stride",
|
| 202 |
+
info="step size between windows"
|
| 203 |
+
)
|
| 204 |
+
btn = gr.Button("extract", variant="primary")
|
| 205 |
+
|
| 206 |
+
with gr.Column(scale=1):
|
| 207 |
+
output = gr.Markdown()
|
| 208 |
+
download = gr.File(label="download")
|
| 209 |
|
| 210 |
btn.click(
|
| 211 |
process,
|
| 212 |
inputs=[seq_input, mode_input, stride_input],
|
| 213 |
+
outputs=[output, download],
|
| 214 |
+
api_name="embed"
|
| 215 |
)
|
| 216 |
|
| 217 |
+
with gr.Tab("API"):
|
| 218 |
+
gr.Markdown("""
|
| 219 |
+
### API
|
| 220 |
+
|
| 221 |
+
```python
|
| 222 |
+
from gradio_client import Client
|
| 223 |
+
import numpy as np
|
| 224 |
+
|
| 225 |
+
client = Client("genomenet/bert-embedding")
|
| 226 |
+
|
| 227 |
+
# Extract embedding (mean pooled)
|
| 228 |
+
result = client.predict(
|
| 229 |
+
sequence="ATGCGATCGATCG...", # min 1000 bp
|
| 230 |
+
mode="mean", # "mean", "max", or "per-window"
|
| 231 |
+
stride=100, # step size between windows
|
| 232 |
+
api_name="/embed"
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
# result = (summary_text, embedding_file_path)
|
| 236 |
+
summary, emb_path = result
|
| 237 |
+
embedding = np.load(emb_path)
|
| 238 |
+
print(embedding.shape) # (768,) for mean/max, (n_windows, 768) for per-window
|
| 239 |
+
```
|
| 240 |
+
|
| 241 |
+
**Modes**:
|
| 242 |
+
- `mean`: Single 768-dim vector (mean pooled across all windows)
|
| 243 |
+
- `max`: Single 768-dim vector (max pooled across all windows)
|
| 244 |
+
- `per-window`: Matrix of shape `(n_windows, 768)` - one embedding per sliding window
|
| 245 |
+
|
| 246 |
+
**Parameters**:
|
| 247 |
+
| param | type | description |
|
| 248 |
+
|-------|------|-------------|
|
| 249 |
+
| sequence | str | DNA sequence (min 1000 bp) |
|
| 250 |
+
| mode | str | Pooling mode: "mean", "max", "per-window" |
|
| 251 |
+
| stride | int | Step size between windows (50-500) |
|
| 252 |
+
|
| 253 |
+
**Local**:
|
| 254 |
+
```bash
|
| 255 |
+
git clone https://huggingface.co/spaces/genomenet/bert-embedding
|
| 256 |
+
cd bert-embedding
|
| 257 |
+
pip install -r requirements.txt
|
| 258 |
+
python app.py
|
| 259 |
+
```
|
| 260 |
+
""")
|
| 261 |
+
|
| 262 |
gr.Markdown("""
|
| 263 |
---
|
| 264 |
**Model**: [genomenet/bert-metagenome](https://huggingface.co/genomenet/bert-metagenome) |
|