tmp_rc / prompt.json
Dongchao's picture
Upload v0.1
d1b63e9 verified
{
"asr": [
"<Speech><SpeechHere></Speech> Can you transcribe the speech into a written format?",
"<Speech><SpeechHere></Speech> Listen to the speech and write down its content.",
"<Speech><SpeechHere></Speech> What is the content of the speech you heard?",
"<Speech><SpeechHere></Speech> Please write down the transcription of the speech.",
"<Speech><SpeechHere></Speech> Please transcribe the speech into a written format.",
"<Speech><SpeechHere></Speech> Write down the content of the speech you heard.",
"<Speech><SpeechHere></Speech> Can you write down the transcription of the speech?",
"<Speech><SpeechHere></Speech> Put the speech into a written format.",
"<Speech><SpeechHere></Speech> Please help me to transcribe the speech into a written format.",
"<Speech><SpeechHere></Speech> Recognize the content of the speech you heard.",
"<Speech><SpeechHere></Speech> Can you recognize what you heard in the speech?",
"<Speech><SpeechHere></Speech> Recognize the speech and write it down in a written format.",
"<Speech><SpeechHere></Speech> Listen to the speech and recognize its content."
],
"audiocaption": [
"<Speech><SpeechHere></Speech> Listen to this audio clip and provide its caption.",
"<Speech><SpeechHere></Speech> Describe the following audio in a caption.",
"<Speech><SpeechHere></Speech> Based on the sound you hear, create a caption for this audio.",
"<Speech><SpeechHere></Speech> Can you describe the scene or event depicted in this audio?",
"<Speech><SpeechHere></Speech> Could you summarise what's happening in this audio?",
"<Speech><SpeechHere></Speech> What does this audio describe?",
"<Speech><SpeechHere></Speech> Please describe the audio."
],
"audiocaption_v2": [
"<Speech><SpeechHere></Speech> Please write down what your hear in the audio."
],
"QA": [
"<Speech><SpeechHere></Speech> {}"
],
"inference_QA": [
"<Speech><SpeechHere></Speech> {}"
],
"gender_QA": [
"<Speech><SpeechHere></Speech> {}"
],
"gender_recognition": [
"<Speech><SpeechHere></Speech> What is the gender of the speaker?",
"<Speech><SpeechHere></Speech> Use one word to describe the speaker's gender.",
"<Speech><SpeechHere></Speech> Describe the speaker's gender.",
"<Speech><SpeechHere></Speech> Can you accurately identify the gender of the speaker?",
"<Speech><SpeechHere></Speech> Can you distinguish the gender of the speaker?",
"<Speech><SpeechHere></Speech> Describe the gender of the person speaking.",
"<Speech><SpeechHere></Speech> What is the speaker's gender based on the audio?",
"<Speech><SpeechHere></Speech> Tell me about the gender of the person you hear.",
"<Speech><SpeechHere></Speech> Is the speaker male or female?"
],
"emotion_recognition": [
"<Speech><SpeechHere></Speech> Describe the emotion of the speaker in one word.",
"<Speech><SpeechHere></Speech> Use one word to describe the speaker's emotion."
],
"emotion_recognitions": [
"<Speech><SpeechHere></Speech> Describe the emotion of the speaker in one word.",
"<Speech><SpeechHere></Speech> Use one word to describe the speaker's emotion."
],
"music_caption": [
"<Speech><SpeechHere></Speech> Listen to this music clip and describe the music.",
"<Speech><SpeechHere></Speech> Please describe the music.",
"<Speech><SpeechHere></Speech> Provide a description of the music.",
"<Speech><SpeechHere></Speech> Analyze the music in this clip and offer a description.",
"<Speech><SpeechHere></Speech> Give me a description of the music in this clip."
],
"lyric_recognize": [
"<Speech><SpeechHere></Speech> Listen to the music and write down music's lyric.",
"<Speech><SpeechHere></Speech> What is the lyric of the music you heard?",
"<Speech><SpeechHere></Speech> Please write down the lyric of the music.",
"<Speech><SpeechHere></Speech> Write down the lyric of the music you heard.",
"<Speech><SpeechHere></Speech> Recognize the lyric of the music you heard.",
"<Speech><SpeechHere></Speech> Recognize the music and give me the lyric."
],
"speaker_verification": [
"<Speech><SpeechHere></Speech> Are the two people speaking successively the same person? Answer yes or no.",
"<Speech><SpeechHere></Speech> Do you only hear the same person talking? Answer yes or no.",
"<Speech><SpeechHere></Speech> Is only one person speaking in the audio? Answer yes or no."
],
"music_type_classification": [
"<Speech><SpeechHere></Speech> Describe the music type of the music in one word. Choose from ['reggae', 'disco', 'rock', 'metal', 'blues', 'classical', 'country', 'hiphop', 'jazz', 'pop']"
],
"VocalSound_classification": [
"<Speech><SpeechHere></Speech> Listen the audio, and describe the vocal sound type. Choose from [laughter, sighs, coughs, throat clearing, sneezes, sniffs]."
],
"zerospeech_recognition": [
"<Speech><SpeechHere></Speech> Listen the audio, judge whether the audio is mute or not. Answer Yes or No"
],
"speech_caption": [
"<Speech><SpeechHere></Speech> Listen the speech, describe it's timbre, speaking speed, style.",
"<Speech><SpeechHere></Speech> Listen to this speech and provide its caption (timbre, speaking speed, style, and so on).",
"<Speech><SpeechHere></Speech> Describe the following speech in a caption ((timbre, speaking speed, style, and so on))."
],
"music_analysis": [
"<Speech><SpeechHere></Speech> Listen carefully to the song. Identify: (1) main genre, (2) vocal gender & timbre, (3) instrumentation, (4) lyric, (5) mood progression.",
"<Speech><SpeechHere></Speech> Please analyse the audio as a musical piece. Report the singer’s voice type, dominant instruments, tempo/BPM, key or mode, and summarise the lyric content.",
"<Speech><SpeechHere></Speech> After listening, give a detailed breakdown: lyric, style (e.g., pop, jazz), vocalist identity clues (gender, age range), production quality, emotional tone.",
"<Speech><SpeechHere></Speech> Evaluate this music clip. Describe the genre, arrangement (rhythm section, melodic instruments), vocal technique, recording ambience, and provide the lyric summary."
],
"speech_analysis": [
"<Speech><SpeechHere></Speech> Listen the speech, and tell us the speech's transcription, speaker information, emation information, and so on.",
"<Speech><SpeechHere></Speech> Listen to the spoken audio. Output: full transcription, speaker count & gender, speech rate (fast/medium/slow), emotional tone...",
"<Speech><SpeechHere></Speech> Analyse this speech clip: transcribe it, classify emotion (e.g., calm, excited), describe prosody features (pauses, emphasis), and estimate speaker gender."
],
"audio_caption": [
"<Speech><SpeechHere></Speech> Listen to this audio clip and provide its caption.",
"<Speech><SpeechHere></Speech> Describe the following audio in a caption.",
"<Speech><SpeechHere></Speech> Based on the sound you hear, create a caption for this audio.",
"<Speech><SpeechHere></Speech> Can you describe the scene or event depicted in this audio?",
"<Speech><SpeechHere></Speech> Could you summarise what's happening in this audio?",
"<Speech><SpeechHere></Speech> What does this audio describe?",
"<Speech><SpeechHere></Speech> Please describe the audio."
],
"audio caption": [
"<Speech><SpeechHere></Speech> Listen to this audio clip and provide its caption.",
"<Speech><SpeechHere></Speech> Describe the following audio in a caption.",
"<Speech><SpeechHere></Speech> Based on the sound you hear, create a caption for this audio.",
"<Speech><SpeechHere></Speech> Can you describe the scene or event depicted in this audio?",
"<Speech><SpeechHere></Speech> Could you summarise what's happening in this audio?",
"<Speech><SpeechHere></Speech> What does this audio describe?",
"<Speech><SpeechHere></Speech> Please describe the audio."
],
"speech_reasoning": [
"<Speech><SpeechHere></Speech> Provide a multi-level analysis of this speech: Identify basic audio properties, Analyse phonetic structure and prosody, Transcribe the speech and extract key semantics.."
],
"sound_reasoning": [
"<Speech><SpeechHere></Speech> Provide a fine-grained reasoning analysis for this sound: Level1: Coarse / Surface. Level 2: Event / Acoustic; Level 3:Semantic"
],
"music_reasoning": [
"<Speech><SpeechHere></Speech> Provide a fine-grained understanding for this music: e.g. Audio quality, Genre family, Main instruments / presence of vocals, Musical / Acoustic, Semantic / Expressive"
],
"music_3level": [
"<Speech><SpeechHere></Speech> Perform a three-level music analysis: Level1 (Audio quality, Genre, Instruments/Vocals), Level2 (Tempo, Key, Meter, Melody, Timbre, Dynamics), Level3 (Mood, Lyrics, Techniques, Hook, Structure)"
],
"sound_3level": [
"<Speech><SpeechHere></Speech> Conduct three-level soundscape analysis: Level1 (Scene type, Spectral band), Level2 (Events, Patterns, Acoustics, Localization), Level3 (Sources, Emotion, Interactions)"
],
"speech_3level": [
"<Speech><SpeechHere></Speech> Conduct three-level speech analysis: Level1 (Audio quality, Speaker gender, Language), Level2 (Prosody, Rhythm, Pronunciation), Level3 (Transcription, Semantics, Emotion)"
]
}