Spaces:
Running on L40S
Running on L40S
Update examples, Help tab, and css (#137)
Browse files- Trimmed long audio examples to 10s
- Updated task dropdown list to match core tasks
- Updated Help tab with new prompting tips
- Fixed border around chat component and waveform color
---------
Co-authored-by: David <david@earthspecies.org>
- README.md +2 -2
- app.py +25 -18
- assets/American Crow - Corvus brachyrhynchos.mp3 +2 -2
- assets/Lazuli_Bunting_yell-YELLLAZB20160625SM303143.mp3 +2 -2
- assets/nri-GreenTreeFrogEvergladesNP.mp3 +2 -2
- assets/yell-YELLAMRO20160506SM3.mp3 +2 -2
- static/help.html +39 -47
- static/onboarding.html +2 -2
- static/style.css +8 -2
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title: NatureLM
|
| 3 |
emoji: 🔈
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: green
|
|
@@ -14,7 +14,7 @@ thumbnail: >-
|
|
| 14 |
|
| 15 |
---
|
| 16 |
|
| 17 |
-
# NatureLM-audio
|
| 18 |
|
| 19 |
This is a demo of the NatureLM-audio model. Users can upload an audio file containing animal vocalizations and ask questions about them in a chat interface.
|
| 20 |
|
|
|
|
| 1 |
---
|
| 2 |
+
title: NatureLM-Audio
|
| 3 |
emoji: 🔈
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: green
|
|
|
|
| 14 |
|
| 15 |
---
|
| 16 |
|
| 17 |
+
# NatureLM-audio
|
| 18 |
|
| 19 |
This is a demo of the NatureLM-audio model. Users can upload an audio file containing animal vocalizations and ask questions about them in a chat interface.
|
| 20 |
|
app.py
CHANGED
|
@@ -252,29 +252,30 @@ def main() -> tuple[gr.Blocks, gr.themes.Base, str]:
|
|
| 252 |
robin_audio = ASSETS_DIR / "yell-YELLAMRO20160506SM3.mp3"
|
| 253 |
whale_audio = ASSETS_DIR / "Humpback Whale - Megaptera novaeangliae.wav"
|
| 254 |
crow_audio = ASSETS_DIR / "American Crow - Corvus brachyrhynchos.mp3"
|
|
|
|
| 255 |
|
| 256 |
examples = {
|
| 257 |
-
"
|
| 258 |
str(laz_audio),
|
| 259 |
"What is the common name for the focal species in the audio?",
|
| 260 |
],
|
| 261 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
str(frog_audio),
|
| 263 |
-
"
|
| 264 |
],
|
| 265 |
"Caption the audio (American Robin)": [
|
| 266 |
str(robin_audio),
|
| 267 |
"Caption the audio, using the scientific name for any animal species.",
|
| 268 |
],
|
| 269 |
-
"
|
| 270 |
-
str(whale_audio),
|
| 271 |
-
"What is the scientific name for the focal species in the audio?",
|
| 272 |
-
],
|
| 273 |
-
"Speaker Count (American Crow)": [
|
| 274 |
str(crow_audio),
|
| 275 |
-
"
|
| 276 |
],
|
| 277 |
-
"
|
| 278 |
}
|
| 279 |
|
| 280 |
gr.set_static_paths(paths=[ASSETS_DIR])
|
|
@@ -313,6 +314,7 @@ def main() -> tuple[gr.Blocks, gr.themes.Base, str]:
|
|
| 313 |
interactive=True,
|
| 314 |
sources=["upload"],
|
| 315 |
type="filepath",
|
|
|
|
| 316 |
)
|
| 317 |
# Validate audio duration and sample rate on upload
|
| 318 |
audio_input.change(
|
|
@@ -332,15 +334,19 @@ def main() -> tuple[gr.Blocks, gr.themes.Base, str]:
|
|
| 332 |
task_dropdown = gr.Dropdown(
|
| 333 |
[
|
| 334 |
"What are the common names for the species in the audio, if any?",
|
| 335 |
-
"
|
| 336 |
-
"
|
| 337 |
-
"
|
| 338 |
-
"What is the common name for the focal species in the audio?",
|
| 339 |
-
"What is the family of the focal species in the audio?",
|
| 340 |
"What is the genus of the focal species in the audio?",
|
| 341 |
-
"What is the
|
| 342 |
-
"
|
| 343 |
-
"What
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
],
|
| 345 |
label="Pre-Loaded Tasks",
|
| 346 |
info="Select a task, or write your own prompt below.",
|
|
@@ -504,6 +510,7 @@ def main() -> tuple[gr.Blocks, gr.themes.Base, str]:
|
|
| 504 |
gr.Audio(
|
| 505 |
filepath,
|
| 506 |
label=label,
|
|
|
|
| 507 |
)
|
| 508 |
|
| 509 |
with gr.Tab("💡 Help"):
|
|
|
|
| 252 |
robin_audio = ASSETS_DIR / "yell-YELLAMRO20160506SM3.mp3"
|
| 253 |
whale_audio = ASSETS_DIR / "Humpback Whale - Megaptera novaeangliae.wav"
|
| 254 |
crow_audio = ASSETS_DIR / "American Crow - Corvus brachyrhynchos.mp3"
|
| 255 |
+
walrus_audio = ASSETS_DIR / "Walrus - Odobenus rosmarus.wav"
|
| 256 |
|
| 257 |
examples = {
|
| 258 |
+
"Species Identification (Lazuli Bunting)": [
|
| 259 |
str(laz_audio),
|
| 260 |
"What is the common name for the focal species in the audio?",
|
| 261 |
],
|
| 262 |
+
"Species Detection (Humpback Whale)": [
|
| 263 |
+
str(whale_audio),
|
| 264 |
+
"What are the common names for the species in the audio, if any?",
|
| 265 |
+
],
|
| 266 |
+
"Call Type (Green Tree Frog)": [
|
| 267 |
str(frog_audio),
|
| 268 |
+
"What type of call is the frog making in this recording?",
|
| 269 |
],
|
| 270 |
"Caption the audio (American Robin)": [
|
| 271 |
str(robin_audio),
|
| 272 |
"Caption the audio, using the scientific name for any animal species.",
|
| 273 |
],
|
| 274 |
+
"Multiple Species Identification (American Crow)": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
str(crow_audio),
|
| 276 |
+
"List the common names of all species vocalizing in this audio clip.",
|
| 277 |
],
|
| 278 |
+
"Taxonomy (Walrus)": [str(walrus_audio), "What is the taxonomic name of the focal species in the audio?"],
|
| 279 |
}
|
| 280 |
|
| 281 |
gr.set_static_paths(paths=[ASSETS_DIR])
|
|
|
|
| 314 |
interactive=True,
|
| 315 |
sources=["upload"],
|
| 316 |
type="filepath",
|
| 317 |
+
waveform_options=gr.WaveformOptions(waveform_progress_color="#3b82f6"),
|
| 318 |
)
|
| 319 |
# Validate audio duration and sample rate on upload
|
| 320 |
audio_input.change(
|
|
|
|
| 334 |
task_dropdown = gr.Dropdown(
|
| 335 |
[
|
| 336 |
"What are the common names for the species in the audio, if any?",
|
| 337 |
+
"What species is vocalizing in this audio recording? Common name?",
|
| 338 |
+
"Which of these is the focal species in the audio? Options: [add your options here]",
|
| 339 |
+
"List the scientific names of all species vocalizing in this audio clip.",
|
|
|
|
|
|
|
| 340 |
"What is the genus of the focal species in the audio?",
|
| 341 |
+
"What is the common name of the species vocalizing in this audio recording?"
|
| 342 |
+
" Provide your top 3 predictions in ranked order.",
|
| 343 |
+
"What type of vocalization or call is this?",
|
| 344 |
+
"Is the focal species an adult or juvenile?",
|
| 345 |
+
"Caption the audio, using common names for any animal species.",
|
| 346 |
+
"Is there a bird vocalizing in this recording? Answer: Yes or No.",
|
| 347 |
+
"Based on the sounds, what habitat or environment do you think this was recorded in?",
|
| 348 |
+
"How many individual vocalizations can you detect in this audio?",
|
| 349 |
+
"First describe what you hear, then identify the species.",
|
| 350 |
],
|
| 351 |
label="Pre-Loaded Tasks",
|
| 352 |
info="Select a task, or write your own prompt below.",
|
|
|
|
| 510 |
gr.Audio(
|
| 511 |
filepath,
|
| 512 |
label=label,
|
| 513 |
+
waveform_options=gr.WaveformOptions(waveform_progress_color="#3b82f6"),
|
| 514 |
)
|
| 515 |
|
| 516 |
with gr.Tab("💡 Help"):
|
assets/American Crow - Corvus brachyrhynchos.mp3
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91b67d8df44c265cd8c2aece4078b00f3d67706156e40c0b799b0db42ea3aa09
|
| 3 |
+
size 402834
|
assets/Lazuli_Bunting_yell-YELLLAZB20160625SM303143.mp3
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b266ceff4be4c64bc2abf668b1b7de17fdc17bb84e64102d045d885f8a6b989e
|
| 3 |
+
size 244523
|
assets/nri-GreenTreeFrogEvergladesNP.mp3
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6015cac9794e7ca94c84df11927718395fc4cbef9dfcb997040a7149bb13df24
|
| 3 |
+
size 155036
|
assets/yell-YELLAMRO20160506SM3.mp3
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57f2595f1db3d29be3eae5908964f1b61f65ed375b14f68f6b69f6f1f78f7f79
|
| 3 |
+
size 165763
|
static/help.html
CHANGED
|
@@ -23,8 +23,7 @@
|
|
| 23 |
</li>
|
| 24 |
<li style="margin-bottom: 8px;">
|
| 25 |
<strong>Trim your audio (if needed)</strong> by clicking the scissors
|
| 26 |
-
icon on the bottom right of the audio panel.
|
| 27 |
-
to 10 seconds or less.
|
| 28 |
</li>
|
| 29 |
<li style="margin-bottom: 8px;">
|
| 30 |
<strong>View the Spectrogram (optional)</strong>. You can easily
|
|
@@ -45,41 +44,41 @@
|
|
| 45 |
</div>
|
| 46 |
<div class="guide-section">
|
| 47 |
<h3>Tips</h3>
|
| 48 |
-
<b>Prompting
|
| 49 |
<ul style="margin-top: 12px; padding-left: 20px;
|
| 50 |
color: #6b7280; font-size: 14px; line-height: 1.6;">
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
<
|
| 60 |
-
</ul>
|
| 61 |
-
<ul>✅ Do ask:
|
| 62 |
-
<i>"What species made this sound?"</i>
|
| 63 |
</ul>
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
<ul>
|
| 67 |
-
<
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
</ul>
|
| 70 |
-
<ul>
|
| 71 |
-
|
| 72 |
</ul>
|
| 73 |
-
<li>Giving the model
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
<i>
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
<
|
| 80 |
-
<i>"Classify the audio into one of the following categories:
|
| 81 |
-
Cetaceans, Aves, or None."</i>
|
| 82 |
</ul>
|
|
|
|
| 83 |
</ul>
|
| 84 |
<br>
|
| 85 |
<b>Audio Files</b>
|
|
@@ -97,23 +96,16 @@
|
|
| 97 |
<h3>Learn More</h3>
|
| 98 |
<ul style="margin-top: 12px; padding-left: 20px;
|
| 99 |
color: #6b7280; font-size: 14px; line-height: 1.6;">
|
| 100 |
-
<li>
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
|
|
|
| 104 |
<li>Check out the
|
| 105 |
<a href="https://arxiv.org/abs/2411.07186"
|
| 106 |
-
target="_blank">published paper</a>
|
| 107 |
-
|
| 108 |
-
<li>
|
| 109 |
-
|
| 110 |
-
target="_blank">NatureLM-audio Demo Page</a>
|
| 111 |
-
for additional context, a demo video, and more examples
|
| 112 |
-
of the model in action.</li>
|
| 113 |
-
<li>Sign up for our
|
| 114 |
-
<a href="https://forms.gle/WjrbmFhKkzmEgwvY7"
|
| 115 |
-
target="_blank">closed beta waitlist</a>,
|
| 116 |
-
if you're interested in testing upcoming features like
|
| 117 |
-
longer audio files and batch processing.</li>
|
| 118 |
</ul>
|
| 119 |
</div>
|
|
|
|
| 23 |
</li>
|
| 24 |
<li style="margin-bottom: 8px;">
|
| 25 |
<strong>Trim your audio (if needed)</strong> by clicking the scissors
|
| 26 |
+
icon on the bottom right of the audio panel. Only the first 10 seconds of audio will be analyzed, so trim to the most relevant section of your recording.
|
|
|
|
| 27 |
</li>
|
| 28 |
<li style="margin-bottom: 8px;">
|
| 29 |
<strong>View the Spectrogram (optional)</strong>. You can easily
|
|
|
|
| 44 |
</div>
|
| 45 |
<div class="guide-section">
|
| 46 |
<h3>Tips</h3>
|
| 47 |
+
<b>Prompting Tips</b> (see full <a href="https://projects.earthspecies.org/naturelm-audio/prompting_guide.html" target="_blank">Prompting Guide</a> for more)
|
| 48 |
<ul style="margin-top: 12px; padding-left: 20px;
|
| 49 |
color: #6b7280; font-size: 14px; line-height: 1.6;">
|
| 50 |
+
|
| 51 |
+
<li><strong>For Yes/No questions, always include "Answer: Yes or No."</strong> Without this, the model may respond with species names rather than a yes or no answer.</li>
|
| 52 |
+
<ul>
|
| 53 |
+
<li>
|
| 54 |
+
<i>Is an alarm call present in this recording? Answer: Yes or No.</i>
|
| 55 |
+
</li>
|
| 56 |
+
<li>
|
| 57 |
+
<i>Is there a frog or amphibian vocalizing in this recording? Answer: Yes or No.</i>
|
| 58 |
+
</li>
|
|
|
|
|
|
|
|
|
|
| 59 |
</ul>
|
| 60 |
+
|
| 61 |
+
<li><strong>Providing geographic or temporal context</strong> can help narrow identification.</li>
|
| 62 |
+
<ul>
|
| 63 |
+
<li>
|
| 64 |
+
<i>Given the context: '[context]', what is the common name for the focal species in the audio?"</i>
|
| 65 |
+
</li>
|
| 66 |
+
<li>
|
| 67 |
+
Replace [context] with whatever metadata you have, e.g. country: BR, coordinates: -23.5, -46.6 or recorded in temperate forest, June.
|
| 68 |
+
</li>
|
| 69 |
</ul>
|
| 70 |
+
<ul>
|
| 71 |
+
|
| 72 |
</ul>
|
| 73 |
+
<li><strong>Giving the model a candidate list</strong> to choose from can improve accuracy. </li>
|
| 74 |
+
<ul>
|
| 75 |
+
<li>
|
| 76 |
+
<i>Which of these is the focal species in the audio? Options: [species_choices]</i>
|
| 77 |
+
</li><li>
|
| 78 |
+
<i>Replace [species_choices] with a comma-separated list, e.g. Turdus merula, Erithacus rubecula, Fringilla coelebs, Parus major, Phylloscopus collybita.</i>
|
| 79 |
+
</li>
|
|
|
|
|
|
|
| 80 |
</ul>
|
| 81 |
+
|
| 82 |
</ul>
|
| 83 |
<br>
|
| 84 |
<b>Audio Files</b>
|
|
|
|
| 96 |
<h3>Learn More</h3>
|
| 97 |
<ul style="margin-top: 12px; padding-left: 20px;
|
| 98 |
color: #6b7280; font-size: 14px; line-height: 1.6;">
|
| 99 |
+
<li>Visit the <a href="https://projects.earthspecies.org/naturelm-audio/prompting_guide.html"
|
| 100 |
+
target="_blank">NatureLM-audio Project Page</a>
|
| 101 |
+
for more details, examples, and the full Prompting Guide</li>
|
| 102 |
+
<li>Read our <a href="https://huggingface.co/blog/EarthSpeciesProject/nature-lm-audio-ui-demo/"
|
| 103 |
+
target="_blank">blog post</a> with a step-by-step tutorial</li>
|
| 104 |
<li>Check out the
|
| 105 |
<a href="https://arxiv.org/abs/2411.07186"
|
| 106 |
+
target="_blank">published paper</a> for a deeper technical dive on NatureLM-audio</li>
|
| 107 |
+
|
| 108 |
+
<li>Sign up for our <a href="https://forms.gle/WjrbmFhKkzmEgwvY7"
|
| 109 |
+
target="_blank">closed beta waitlist</a>, if you're interested in testing upcoming features like longer audio files and batch processing.</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
</ul>
|
| 111 |
</div>
|
static/onboarding.html
CHANGED
|
@@ -8,6 +8,6 @@
|
|
| 8 |
</div>
|
| 9 |
</div>
|
| 10 |
</div>
|
| 11 |
-
<a href="https://
|
| 12 |
-
target="_blank" class="link-btn">
|
| 13 |
</div>
|
|
|
|
| 8 |
</div>
|
| 9 |
</div>
|
| 10 |
</div>
|
| 11 |
+
<a href="https://projects.earthspecies.org/naturelm-audio/quick_start.html"
|
| 12 |
+
target="_blank" class="link-btn">Quick Start Guide</a>
|
| 13 |
</div>
|
static/style.css
CHANGED
|
@@ -7,10 +7,16 @@
|
|
| 7 |
margin: 2px 6px;
|
| 8 |
align-self: center;
|
| 9 |
}
|
|
|
|
|
|
|
|
|
|
| 10 |
#spectrogram-plot {
|
| 11 |
padding: 12px;
|
| 12 |
margin: 12px;
|
| 13 |
}
|
|
|
|
|
|
|
|
|
|
| 14 |
.banner {
|
| 15 |
background: white;
|
| 16 |
border: 1px solid #e5e7eb;
|
|
@@ -35,7 +41,7 @@
|
|
| 35 |
color: #6b7280;
|
| 36 |
line-height: 1.4;
|
| 37 |
}
|
| 38 |
-
.link-btn {
|
| 39 |
padding: 6px 12px;
|
| 40 |
border-radius: 6px;
|
| 41 |
font-size: 13px;
|
|
@@ -48,7 +54,7 @@
|
|
| 48 |
display: inline-block;
|
| 49 |
transition: background 0.2s ease;
|
| 50 |
}
|
| 51 |
-
.link-btn:hover {
|
| 52 |
background: #2563eb;
|
| 53 |
}
|
| 54 |
|
|
|
|
| 7 |
margin: 2px 6px;
|
| 8 |
align-self: center;
|
| 9 |
}
|
| 10 |
+
#chatbot {
|
| 11 |
+
border-style: none !important;
|
| 12 |
+
}
|
| 13 |
#spectrogram-plot {
|
| 14 |
padding: 12px;
|
| 15 |
margin: 12px;
|
| 16 |
}
|
| 17 |
+
.gradio-style a {
|
| 18 |
+
padding: 0;
|
| 19 |
+
}
|
| 20 |
.banner {
|
| 21 |
background: white;
|
| 22 |
border: 1px solid #e5e7eb;
|
|
|
|
| 41 |
color: #6b7280;
|
| 42 |
line-height: 1.4;
|
| 43 |
}
|
| 44 |
+
.gradio-style .link-btn {
|
| 45 |
padding: 6px 12px;
|
| 46 |
border-radius: 6px;
|
| 47 |
font-size: 13px;
|
|
|
|
| 54 |
display: inline-block;
|
| 55 |
transition: background 0.2s ease;
|
| 56 |
}
|
| 57 |
+
.gradio-style .link-btn:hover {
|
| 58 |
background: #2563eb;
|
| 59 |
}
|
| 60 |
|