stivenDR14
feat: Introduce audio captioning and categorization model with ONNX/ExecuTorch hybrid inference and category embedding generation.
5c8d855
{
"categories": [
{
"id": "dog_bark",
"label": "bark of a dog",
"description": "dog barking sound, woofing, growling or howling from a canine"
},
{
"id": "doorbell",
"label": "doorbell ringing",
"description": "ding, bell or advice sound in house door entrance"
},
{
"id": "baby_crying",
"label": "baby crying",
"description": "infant crying, wailing, sobbing or distressed baby sounds"
},
{
"id": "glass_breaking",
"label": "glass breaking",
"description": "sound of glass shattering, breaking or crashing"
},
{
"id": "car_horn",
"label": "car horn",
"description": "vehicle horn honking, beeping or car alert sound"
},
{
"id": "alarm_clock",
"label": "alarm clock",
"description": "alarm clock ringing, beeping or buzzing wake-up sound"
},
{
"id": "fire_alarm",
"label": "fire alarm",
"description": "fire alarm siren, emergency alert or smoke detector beeping"
},
{
"id": "door_closing",
"label": "window or door closing",
"description": "sound of door or window shutting, closing or slamming"
},
{
"id": "door_opening",
"label": "window or door opening",
"description": "sound of door or window opening, creaking or unlocking"
},
{
"id": "stagger_swipe",
"label": "staggerer or swipe",
"description": "staggering footsteps, stumbling or swiping movement sound"
}
]
}