Spaces:
Running
on
Zero
Running
on
Zero
Updating app.py to be up to date with UI changes
#4
by
dianekim
- opened
app.py
CHANGED
|
@@ -287,7 +287,7 @@ def main(
|
|
| 287 |
vireo_audio = assets_dir / "yell-YELLWarblingVireoMammoth20150614T29ms.mp3"
|
| 288 |
|
| 289 |
examples = {
|
| 290 |
-
"
|
| 291 |
str(laz_audio),
|
| 292 |
"What is the common name for the focal species in the audio?",
|
| 293 |
],
|
|
@@ -299,17 +299,37 @@ def main(
|
|
| 299 |
str(robin_audio),
|
| 300 |
"Caption the audio, using the scientific name for any animal species.",
|
| 301 |
],
|
| 302 |
-
"Caption the audio (Warbling Vireo)": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
}
|
| 304 |
|
| 305 |
with gr.Blocks(
|
| 306 |
-
title="NatureLM-audio",
|
| 307 |
theme=gr.themes.Base(
|
| 308 |
primary_hue="blue", font=[gr.themes.GoogleFont("Noto Sans")]
|
| 309 |
),
|
|
|
|
| 310 |
) as app:
|
| 311 |
header = gr.HTML("""
|
| 312 |
-
<div style="display: flex; align-items: center; gap: 12px;"><h2 style="margin: 0;">NatureLM-audio<span style="font-size: 0.55em; color: #28a745; background: #e6f4ea; padding: 2px 6px; border-radius: 4px; margin-left: 8px; display: inline-block; vertical-align: top;">BETA</span></h2></div>
|
| 313 |
|
| 314 |
""")
|
| 315 |
|
|
@@ -322,50 +342,25 @@ def main(
|
|
| 322 |
# label="Model Status",
|
| 323 |
# interactive=False,
|
| 324 |
# visible=True,
|
| 325 |
-
# )
|
|
|
|
| 326 |
with gr.Column(visible=True) as onboarding_message:
|
| 327 |
gr.HTML(
|
| 328 |
"""
|
| 329 |
-
<div
|
| 330 |
-
background: transparent;
|
| 331 |
-
border: 1px solid #e5e7eb;
|
| 332 |
-
border-radius: 8px;
|
| 333 |
-
padding: 16px 20px;
|
| 334 |
-
display: flex;
|
| 335 |
-
align-items: center;
|
| 336 |
-
justify-content: space-between;
|
| 337 |
-
margin-bottom: 16px;
|
| 338 |
-
margin-left: 0;
|
| 339 |
-
margin-right: 0;
|
| 340 |
-
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
|
| 341 |
-
">
|
| 342 |
<div style="display: flex; padding: 0px; align-items: center; flex: 1;">
|
| 343 |
<div style="font-size: 20px; margin-right: 12px;">👋</div>
|
| 344 |
<div style="flex: 1;">
|
| 345 |
<div style="font-size: 16px; font-weight: 600; color: #374151; margin-bottom: 4px;">Welcome to NatureLM-audio!</div>
|
| 346 |
-
<div style="font-size: 14px; color: #6b7280; line-height: 1.4;">Upload your first audio file below or
|
| 347 |
</div>
|
| 348 |
</div>
|
| 349 |
-
<a href="https://www.earthspecies.org/blog" target="_blank"
|
| 350 |
-
padding: 6px 12px;
|
| 351 |
-
border-radius: 6px;
|
| 352 |
-
font-size: 13px;
|
| 353 |
-
font-weight: 500;
|
| 354 |
-
cursor: pointer;
|
| 355 |
-
border: none;
|
| 356 |
-
background: #3b82f6;
|
| 357 |
-
color: white;
|
| 358 |
-
text-decoration: none;
|
| 359 |
-
display: inline-block;
|
| 360 |
-
transition: background 0.2s ease;
|
| 361 |
-
"
|
| 362 |
-
onmouseover="this.style.background='#2563eb';"
|
| 363 |
-
onmouseout="this.style.background='#3b82f6';"
|
| 364 |
-
>View Tutorial</a>
|
| 365 |
</div>
|
| 366 |
""",
|
| 367 |
padding=False,
|
| 368 |
-
)
|
|
|
|
| 369 |
with gr.Column(visible=True) as upload_section:
|
| 370 |
audio_input = gr.Audio(
|
| 371 |
type="filepath",
|
|
@@ -373,17 +368,17 @@ def main(
|
|
| 373 |
interactive=True,
|
| 374 |
sources=["upload"],
|
| 375 |
)
|
| 376 |
-
with gr.
|
| 377 |
plotter = gr.Plot(
|
| 378 |
get_spectrogram(torch.zeros(1, SAMPLE_RATE)),
|
| 379 |
label="Spectrogram",
|
| 380 |
visible=False,
|
| 381 |
elem_id="spectrogram-plot",
|
| 382 |
)
|
|
|
|
| 383 |
task_dropdown = gr.Dropdown(
|
| 384 |
[
|
| 385 |
-
"What are the common names for the species in the audio, if any?",
|
| 386 |
-
"Caption the audio.",
|
| 387 |
"Caption the audio, using the scientific name for any animal species.",
|
| 388 |
"Caption the audio, using the common name for any animal species.",
|
| 389 |
"What is the scientific name for the focal species in the audio?",
|
|
@@ -394,13 +389,15 @@ def main(
|
|
| 394 |
"What call types are heard from the focal species in the audio?",
|
| 395 |
"What is the life stage of the focal species in the audio?",
|
| 396 |
],
|
| 397 |
-
label="Pre-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
value=None,
|
| 401 |
-
)
|
|
|
|
| 402 |
chatbot = gr.Chatbot(
|
| 403 |
elem_id="chatbot",
|
|
|
|
| 404 |
type="messages",
|
| 405 |
label="Chat",
|
| 406 |
render_markdown=False,
|
|
@@ -413,130 +410,272 @@ def main(
|
|
| 413 |
"other",
|
| 414 |
],
|
| 415 |
resizeable=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
)
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
inputs=[task_dropdown],
|
| 454 |
-
outputs=[chat_input],
|
| 455 |
-
)
|
| 456 |
-
|
| 457 |
-
clear_button = gr.ClearButton(
|
| 458 |
-
components=[chatbot, chat_input, audio_input, plotter],
|
| 459 |
-
visible=False,
|
| 460 |
-
)
|
| 461 |
-
|
| 462 |
-
def start_chat_interface(audio_path):
|
| 463 |
-
return (
|
| 464 |
-
gr.update(visible=False), # hide onboarding message
|
| 465 |
-
gr.update(visible=True), # show upload section
|
| 466 |
-
gr.update(visible=True), # show chat box
|
| 467 |
-
gr.update(visible=True), # show plotter
|
| 468 |
-
)
|
| 469 |
-
|
| 470 |
-
# When audio added, set spectrogram
|
| 471 |
-
audio_input.change(
|
| 472 |
-
fn=start_chat_interface,
|
| 473 |
-
inputs=[audio_input],
|
| 474 |
-
outputs=[onboarding_message, upload_section, chat, plotter],
|
| 475 |
-
).then(
|
| 476 |
-
fn=update_current_audio,
|
| 477 |
-
inputs=[audio_input],
|
| 478 |
-
outputs=[],
|
| 479 |
-
).then(
|
| 480 |
-
fn=make_spectrogram_figure,
|
| 481 |
-
inputs=[audio_input],
|
| 482 |
-
outputs=[plotter],
|
| 483 |
-
)
|
| 484 |
|
| 485 |
-
# When submit clicked first:
|
| 486 |
-
# 1. Validate and add user query to chat history
|
| 487 |
-
# 2. Get response from model
|
| 488 |
-
# 3. Clear the chat input box
|
| 489 |
-
# 4. Show clear button
|
| 490 |
-
chat_input.submit(
|
| 491 |
-
validate_and_submit,
|
| 492 |
-
inputs=[chatbot, chat_input],
|
| 493 |
-
outputs=[chatbot, chat_input],
|
| 494 |
-
).then(
|
| 495 |
-
get_response,
|
| 496 |
-
inputs=[chatbot, audio_input],
|
| 497 |
-
outputs=[chatbot],
|
| 498 |
-
).then(
|
| 499 |
-
lambda: gr.update(visible=True), # Show clear button
|
| 500 |
-
None,
|
| 501 |
-
[clear_button],
|
| 502 |
)
|
| 503 |
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
|
| 508 |
-
|
| 509 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 510 |
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
[audio_input, chat_input],
|
| 514 |
-
[audio_input, chat_input],
|
| 515 |
-
example_labels=list(examples.keys()),
|
| 516 |
-
examples_per_page=20,
|
| 517 |
)
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
gr.
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 540 |
"""
|
| 541 |
|
| 542 |
return app
|
|
|
|
| 287 |
vireo_audio = assets_dir / "yell-YELLWarblingVireoMammoth20150614T29ms.mp3"
|
| 288 |
|
| 289 |
examples = {
|
| 290 |
+
"Identifying Focal Species (Lazuli Bunting)": [
|
| 291 |
str(laz_audio),
|
| 292 |
"What is the common name for the focal species in the audio?",
|
| 293 |
],
|
|
|
|
| 299 |
str(robin_audio),
|
| 300 |
"Caption the audio, using the scientific name for any animal species.",
|
| 301 |
],
|
| 302 |
+
"Caption the audio (Warbling Vireo)": [
|
| 303 |
+
str(vireo_audio),
|
| 304 |
+
"Caption the audio."
|
| 305 |
+
],
|
| 306 |
+
"Speaker Count (Lazuli Bunting)": [
|
| 307 |
+
str(laz_audio),
|
| 308 |
+
"How many individuals are vocalizing in this audio?",
|
| 309 |
+
],
|
| 310 |
+
"Caption the audio (Green Tree Frog)": [
|
| 311 |
+
str(frog_audio),
|
| 312 |
+
"Caption the audio, using the common name for any animal species.",
|
| 313 |
+
],
|
| 314 |
+
"Caption the audio (American Robin)": [
|
| 315 |
+
str(robin_audio),
|
| 316 |
+
"Caption the audio, using the scientific name for any animal species.",
|
| 317 |
+
],
|
| 318 |
+
"Caption the audio (Warbling Vireo)": [
|
| 319 |
+
str(vireo_audio),
|
| 320 |
+
"Caption the audio."
|
| 321 |
+
],
|
| 322 |
}
|
| 323 |
|
| 324 |
with gr.Blocks(
|
| 325 |
+
title="NatureLM-audio",
|
| 326 |
theme=gr.themes.Base(
|
| 327 |
primary_hue="blue", font=[gr.themes.GoogleFont("Noto Sans")]
|
| 328 |
),
|
| 329 |
+
css="styles.css",
|
| 330 |
) as app:
|
| 331 |
header = gr.HTML("""
|
| 332 |
+
<div style="display: flex; align-items: center; gap: 12px;"><img src="https://huggingface.co/spaces/EarthSpeciesProject/NatureLM-Audio/resolve/main/assets/esp_logo.png" style="height: 40px; width: auto;"><h2 style="margin: 0;">NatureLM-audio<span style="font-size: 0.55em; color: #28a745; background: #e6f4ea; padding: 2px 6px; border-radius: 4px; margin-left: 8px; display: inline-block; vertical-align: top;">BETA</span></h2></div>
|
| 333 |
|
| 334 |
""")
|
| 335 |
|
|
|
|
| 342 |
# label="Model Status",
|
| 343 |
# interactive=False,
|
| 344 |
# visible=True,
|
| 345 |
+
# )
|
| 346 |
+
|
| 347 |
with gr.Column(visible=True) as onboarding_message:
|
| 348 |
gr.HTML(
|
| 349 |
"""
|
| 350 |
+
<div class="banner">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 351 |
<div style="display: flex; padding: 0px; align-items: center; flex: 1;">
|
| 352 |
<div style="font-size: 20px; margin-right: 12px;">👋</div>
|
| 353 |
<div style="flex: 1;">
|
| 354 |
<div style="font-size: 16px; font-weight: 600; color: #374151; margin-bottom: 4px;">Welcome to NatureLM-audio!</div>
|
| 355 |
+
<div style="font-size: 14px; color: #6b7280; line-height: 1.4;">Upload your first audio file below or select a pre-loaded example below.</div>
|
| 356 |
</div>
|
| 357 |
</div>
|
| 358 |
+
<a href="https://www.earthspecies.org/blog" target="_blank" class="link-btn">View Tutorial</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
</div>
|
| 360 |
""",
|
| 361 |
padding=False,
|
| 362 |
+
)
|
| 363 |
+
|
| 364 |
with gr.Column(visible=True) as upload_section:
|
| 365 |
audio_input = gr.Audio(
|
| 366 |
type="filepath",
|
|
|
|
| 368 |
interactive=True,
|
| 369 |
sources=["upload"],
|
| 370 |
)
|
| 371 |
+
with gr.Accordion(label="Toggle Spectrogram", open=False, visible=False) as spectrogram:
|
| 372 |
plotter = gr.Plot(
|
| 373 |
get_spectrogram(torch.zeros(1, SAMPLE_RATE)),
|
| 374 |
label="Spectrogram",
|
| 375 |
visible=False,
|
| 376 |
elem_id="spectrogram-plot",
|
| 377 |
)
|
| 378 |
+
with gr.Column(visible=False) as tasks:
|
| 379 |
task_dropdown = gr.Dropdown(
|
| 380 |
[
|
| 381 |
+
"What are the common names for the species in the audio, if any?",
|
|
|
|
| 382 |
"Caption the audio, using the scientific name for any animal species.",
|
| 383 |
"Caption the audio, using the common name for any animal species.",
|
| 384 |
"What is the scientific name for the focal species in the audio?",
|
|
|
|
| 389 |
"What call types are heard from the focal species in the audio?",
|
| 390 |
"What is the life stage of the focal species in the audio?",
|
| 391 |
],
|
| 392 |
+
label="Pre-Loaded Tasks",
|
| 393 |
+
info="Select a task, or write your own prompt below.",
|
| 394 |
+
allow_custom_value=False,
|
| 395 |
+
value=None,
|
| 396 |
+
)
|
| 397 |
+
with gr.Group(visible=False) as chat:
|
| 398 |
chatbot = gr.Chatbot(
|
| 399 |
elem_id="chatbot",
|
| 400 |
+
height=250,
|
| 401 |
type="messages",
|
| 402 |
label="Chat",
|
| 403 |
render_markdown=False,
|
|
|
|
| 410 |
"other",
|
| 411 |
],
|
| 412 |
resizeable=True,
|
| 413 |
+
)
|
| 414 |
+
with gr.Column() as text:
|
| 415 |
+
chat_input = gr.Textbox(
|
| 416 |
+
placeholder="Type your message and press Enter to send",
|
| 417 |
+
type="text",
|
| 418 |
+
lines=1,
|
| 419 |
+
show_label=False,
|
| 420 |
+
submit_btn="Send",
|
| 421 |
+
container=False,
|
| 422 |
+
autofocus=True,
|
| 423 |
+
elem_id="chat-input",
|
| 424 |
+
)
|
| 425 |
+
|
| 426 |
+
with gr.Column() as examples_section:
|
| 427 |
+
gr.Examples(
|
| 428 |
+
list(examples.values()),
|
| 429 |
+
[audio_input, chat_input],
|
| 430 |
+
[audio_input, chat_input],
|
| 431 |
+
example_labels=list(examples.keys()),
|
| 432 |
+
examples_per_page=20,
|
| 433 |
)
|
| 434 |
+
def validate_and_submit(chatbot_history, chat_input):
|
| 435 |
+
if not chat_input or not chat_input.strip():
|
| 436 |
+
gr.Warning("Please enter a question or message before sending.")
|
| 437 |
+
return chatbot_history, chat_input
|
| 438 |
+
|
| 439 |
+
updated_history = add_user_query(chatbot_history, chat_input)
|
| 440 |
+
return updated_history, ""
|
| 441 |
+
|
| 442 |
+
def update_current_audio(audio_input):
|
| 443 |
+
global CURRENT_AUDIO
|
| 444 |
+
if audio_input != CURRENT_AUDIO:
|
| 445 |
+
CURRENT_AUDIO = audio_input
|
| 446 |
+
|
| 447 |
+
clear_button = gr.ClearButton(
|
| 448 |
+
components=[chatbot, chat_input, audio_input, plotter],
|
| 449 |
+
visible=False,
|
| 450 |
+
)
|
| 451 |
+
# if task_dropdown is selected, set chat_input to that value
|
| 452 |
+
def set_query(task):
|
| 453 |
+
if task:
|
| 454 |
+
return gr.update(value=task)
|
| 455 |
+
return gr.update(value="")
|
| 456 |
+
|
| 457 |
+
task_dropdown.select(
|
| 458 |
+
fn=set_query,
|
| 459 |
+
inputs=[task_dropdown],
|
| 460 |
+
outputs=[chat_input],
|
| 461 |
+
)
|
| 462 |
+
def start_chat_interface(audio_path):
|
| 463 |
+
return (
|
| 464 |
+
gr.update(visible=False), # hide onboarding message
|
| 465 |
+
gr.update(visible=True), # show upload section
|
| 466 |
+
gr.update(visible=True), # show spectrogram
|
| 467 |
+
gr.update(visible=True), # show tasks
|
| 468 |
+
gr.update(visible=True), # show chat box
|
| 469 |
+
gr.update(visible=True), # show plotter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
)
|
| 472 |
|
| 473 |
+
# When audio added, set spectrogram
|
| 474 |
+
audio_input.change(
|
| 475 |
+
fn=start_chat_interface,
|
| 476 |
+
inputs=[audio_input],
|
| 477 |
+
outputs=[onboarding_message, upload_section, spectrogram, tasks, chat, plotter],
|
| 478 |
+
).then(
|
| 479 |
+
fn=update_current_audio,
|
| 480 |
+
inputs=[audio_input],
|
| 481 |
+
outputs=[],
|
| 482 |
+
).then(
|
| 483 |
+
fn=make_spectrogram_figure,
|
| 484 |
+
inputs=[audio_input],
|
| 485 |
+
outputs=[plotter],
|
| 486 |
+
)
|
| 487 |
|
| 488 |
+
# When submit clicked first:
|
| 489 |
+
# 1. Validate and add user query to chat history
|
| 490 |
+
# 2. Get response from model
|
| 491 |
+
# 3. Clear the chat input box
|
| 492 |
+
# 4. Show clear button
|
| 493 |
+
chat_input.submit(
|
| 494 |
+
validate_and_submit,
|
| 495 |
+
inputs=[chatbot, chat_input],
|
| 496 |
+
outputs=[chatbot, chat_input],
|
| 497 |
+
).then(
|
| 498 |
+
get_response,
|
| 499 |
+
inputs=[chatbot, audio_input],
|
| 500 |
+
outputs=[chatbot],
|
| 501 |
+
).then(
|
| 502 |
+
lambda: gr.update(visible=True), # Show clear button
|
| 503 |
+
None,
|
| 504 |
+
[clear_button],
|
| 505 |
+
)
|
| 506 |
|
| 507 |
+
clear_button.click(
|
| 508 |
+
lambda: gr.ClearButton(visible=False), None, [clear_button]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
)
|
| 510 |
+
|
| 511 |
+
with gr.Tab("Sample Library"):
|
| 512 |
+
with gr.Row():
|
| 513 |
+
with gr.Column():
|
| 514 |
+
gr.Markdown("### Download Sample Audio")
|
| 515 |
+
gr.Markdown(
|
| 516 |
+
"""Feel free to explore these sample audio files. To download, click the button in the top-right corner of each audio file, or **Download All**. You can also find a large collection of publicly available animal sounds on
|
| 517 |
+
[Xenocanto](https://xeno-canto.org/explore/taxonomy) and [Watkins Marine Mammal Sound Database](https://whoicf2.whoi.edu/science/B/whalesounds/index.cfm).""")
|
| 518 |
+
samples=[
|
| 519 |
+
("assets/Lazuli_Bunting_yell-YELLLAZB20160625SM303143.m4a", "Lazuli Bunting"),
|
| 520 |
+
("assets/nri-GreenTreeFrogEvergladesNP.mp3", "Green Tree Frog"),
|
| 521 |
+
("assets/Eastern Gray Squirrel - Sciurus carolinensis.wav", "Eastern Gray Squirrel"),
|
| 522 |
+
("assets/Gray Wolf - Canis lupus italicus.m4a", "Gray Wolf"),
|
| 523 |
+
("assets/Humpback Whale - Megaptera novaeangliae.wav", "Humpback Whale"),
|
| 524 |
+
("assets/Walrus - Odobenus rosmarus.wav", "Walrus"),
|
| 525 |
+
]
|
| 526 |
+
for row_i in range(0, len(samples), 3):
|
| 527 |
+
with gr.Row():
|
| 528 |
+
for filepath, label in samples[row_i:row_i+3]:
|
| 529 |
+
with gr.Column():
|
| 530 |
+
gr.Audio(
|
| 531 |
+
filepath,
|
| 532 |
+
label=label,
|
| 533 |
+
type="filepath",
|
| 534 |
+
show_download_button=True
|
| 535 |
+
)
|
| 536 |
+
with gr.Row():
|
| 537 |
+
gr.HTML("""<center>
|
| 538 |
+
<a href="assets/Sample Audio Files NatureLM_audio.zip" download class="download-btn">Download All</a></center>
|
| 539 |
+
"""
|
| 540 |
+
)
|
| 541 |
+
|
| 542 |
+
with gr.Tab("💡 Help"):
|
| 543 |
+
gr.HTML("""
|
| 544 |
+
<div class="guide-section">
|
| 545 |
+
<h3>Getting Started</h3>
|
| 546 |
+
<ol style="margin-top: 12px; padding-left: 20px; color: #6b7280; font-size: 14px; line-height: 1.6;">
|
| 547 |
+
<li style="margin-bottom: 8px;"><strong>Upload your audio</strong> - Click the upload area or drag and drop your audio file containing animal vocalizations.</li>
|
| 548 |
+
<li style="margin-bottom: 8px;"><strong>Trim your audio (if needed)</strong> - Try to keep your audio to 10 seconds or less.</li>
|
| 549 |
+
<li style="margin-bottom: 8px;"><strong>View the Spectrogram (optional)</strong> - You can easily view/hide the spectrogram of your audio for closer analysis.</li>
|
| 550 |
+
<li style="margin-bottom: 8px;"><strong>Select a task or write your own</strong> - Select an option from pre-loaded tasks. This will auto-fill the text box with a prompt, so all you have to do is hit Send. Or, type a custom prompt directly into the chat.</li>
|
| 551 |
+
<li style="margin-bottom: 0;"><strong>Send and Analyze Audio</strong> - Press "Send" or type Enter to begin processing your audio. Ask follow-up questions or press "Clear" to start a new conversation.</li>
|
| 552 |
+
</ol>
|
| 553 |
+
<p></p>
|
| 554 |
+
</div>
|
| 555 |
+
|
| 556 |
+
<div class="guide-section">
|
| 557 |
+
<h3>Tips & Tricks</h3>
|
| 558 |
+
<b>Prompting Best Practices</b>
|
| 559 |
+
<ul style="margin-top: 12px; padding-left: 20px; color: #6b7280; font-size: 14px; line-height: 1.6;">
|
| 560 |
+
<li>Be specific about what you want to know (e.g., "What species made this call?" vs "Analyze this audio")</li>
|
| 561 |
+
<li>Mention the context if known (geographic area/location, time of day or year, habitat type)</li>
|
| 562 |
+
<li>[TO ADD: examples of classification prompts that do and don't work well]</li>
|
| 563 |
+
</ul>
|
| 564 |
+
<b>Audio Files</b>
|
| 565 |
+
<ul style="margin-top: 12px; padding-left: 20px; color: #6b7280; font-size: 14px; line-height: 1.6;">
|
| 566 |
+
<li>Supported formats: .wav, .mp3, .aac, .flac, .ogg, .webm, .midi, .aiff, .wma, .opus, .amr</li>
|
| 567 |
+
<li>If you are uploading an .mp4, please check that it is not an MPEG-4 Movie file. </li>
|
| 568 |
+
<li>For best results, use high-quality recordings with minimal background noise.</li>
|
| 569 |
+
</ul>
|
| 570 |
+
</div>
|
| 571 |
+
|
| 572 |
+
<div class="guide-section">
|
| 573 |
+
<h3>Learn More</h3>
|
| 574 |
+
<ul style="margin-top: 12px; padding-left: 20px; color: #6b7280; font-size: 14px; line-height: 1.6;">
|
| 575 |
+
<li>Read our <a href="https://earthspecies.org/blog" target="_blank">recent blog post</a> with a step-by-step tutorial</li>
|
| 576 |
+
<li>Check out the <a href="https://openreview.net/forum?id=hJVdwBpWjt" target="_blank">published paper</a> for a deeper technical dive on NatureLM-audio.</li>
|
| 577 |
+
<li>Visit the <a href="https://earthspecies.github.io/naturelm-audio-demo/" target="_blank">NatureLM-audio Demo Page</a> for additional context, a demo video, and more examples of the model in action.</li>
|
| 578 |
+
<li>Sign up for our <a href="https://forms.gle/WjrbmFhKkzmEgwvY7" target="_blank">closed beta waitlist</a>, if you’re interested in testing upcoming features like longer audio files and batch processing.</li>
|
| 579 |
+
</ul>
|
| 580 |
+
</div>
|
| 581 |
+
<div class="guide-section">
|
| 582 |
+
<h4>Help us improve the model!</h4>
|
| 583 |
+
<p>Found an issue or have suggestions? Please join us on <a href="https://earthspeciesproject.discourse.group/" target="_blank">Discourse</a> to share any feedback, questions, bug reports, or other ideas. Your input helps make NatureLM-audio better for everyone.</p>
|
| 584 |
+
</div>
|
| 585 |
+
</div>
|
| 586 |
+
""")
|
| 587 |
+
|
| 588 |
+
app.css = """
|
| 589 |
+
#chat-input {
|
| 590 |
+
background: white;
|
| 591 |
+
padding: 10px;
|
| 592 |
+
min-height: 44px;
|
| 593 |
+
display: flex;
|
| 594 |
+
align-items: center;
|
| 595 |
+
}
|
| 596 |
+
#chat-input textarea {
|
| 597 |
+
background: white;
|
| 598 |
+
flex: 1;
|
| 599 |
+
}
|
| 600 |
+
|
| 601 |
+
#chat-input .submit-button {
|
| 602 |
+
padding: 10px;
|
| 603 |
+
margin: 2px 6px;
|
| 604 |
+
align-self: center;
|
| 605 |
+
}
|
| 606 |
+
#spectrogram-plot {
|
| 607 |
+
padding: 12px;
|
| 608 |
+
margin: 12px;
|
| 609 |
+
}
|
| 610 |
+
.banner {
|
| 611 |
+
background: transparent;
|
| 612 |
+
border: 1px solid #e5e7eb;
|
| 613 |
+
border-radius: 8px;
|
| 614 |
+
padding: 16px 20px;
|
| 615 |
+
display: flex;
|
| 616 |
+
align-items: center;
|
| 617 |
+
justify-content: space-between;
|
| 618 |
+
margin-bottom: 16px;
|
| 619 |
+
margin-left: 0;
|
| 620 |
+
margin-right: 0;
|
| 621 |
+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
|
| 622 |
+
}
|
| 623 |
+
.link-btn {
|
| 624 |
+
padding: 6px 12px;
|
| 625 |
+
border-radius: 6px;
|
| 626 |
+
font-size: 13px;
|
| 627 |
+
font-weight: 500;
|
| 628 |
+
cursor: pointer;
|
| 629 |
+
border: none;
|
| 630 |
+
background: #3b82f6;
|
| 631 |
+
color: white;
|
| 632 |
+
text-decoration: none;
|
| 633 |
+
display: inline-block;
|
| 634 |
+
transition: background 0.2s ease;
|
| 635 |
+
}
|
| 636 |
+
|
| 637 |
+
.link-btn:hover {
|
| 638 |
+
background: #2563eb;
|
| 639 |
+
}
|
| 640 |
+
.download-btn {
|
| 641 |
+
padding: 10px 20px;
|
| 642 |
+
border-radius: 6px;
|
| 643 |
+
font-size: 13px;
|
| 644 |
+
font-weight: 500;
|
| 645 |
+
cursor: pointer;
|
| 646 |
+
border: none;
|
| 647 |
+
background: #3b82f6;
|
| 648 |
+
color: white;
|
| 649 |
+
text-decoration: none;
|
| 650 |
+
display: block;
|
| 651 |
+
text-align: center;
|
| 652 |
+
transition: background 0.2s ease;
|
| 653 |
+
width: 200px;
|
| 654 |
+
box-sizing: border-box;
|
| 655 |
+
}
|
| 656 |
+
|
| 657 |
+
.download-btn:hover {
|
| 658 |
+
background: #2563eb;
|
| 659 |
+
}
|
| 660 |
+
.guide-section {
|
| 661 |
+
margin-bottom: 32px;
|
| 662 |
+
background: white;
|
| 663 |
+
border-radius: 8px;
|
| 664 |
+
padding: 14px;
|
| 665 |
+
border: 1px solid #e5e7eb;
|
| 666 |
+
}
|
| 667 |
+
|
| 668 |
+
.guide-section h3 {
|
| 669 |
+
color: #1f2937;
|
| 670 |
+
margin-top: 4px;
|
| 671 |
+
margin-bottom: 16px;
|
| 672 |
+
border-bottom: 1px solid #e5e7eb;
|
| 673 |
+
padding-bottom: 12px;
|
| 674 |
+
}
|
| 675 |
+
.guide-section h4 {
|
| 676 |
+
color: #1f2937;
|
| 677 |
+
margin-top: 4px;
|
| 678 |
+
}
|
| 679 |
"""
|
| 680 |
|
| 681 |
return app
|