tmp_rc

File size: 9,814 Bytes

d1b63e9

{
    "asr": [
        "<Speech><SpeechHere></Speech> Can you transcribe the speech into a written format?",
        "<Speech><SpeechHere></Speech> Listen to the speech and write down its content.",
        "<Speech><SpeechHere></Speech> What is the content of the speech you heard?",
        "<Speech><SpeechHere></Speech> Please write down the transcription of the speech.",
        "<Speech><SpeechHere></Speech> Please transcribe the speech into a written format.",
        "<Speech><SpeechHere></Speech> Write down the content of the speech you heard.",
        "<Speech><SpeechHere></Speech> Can you write down the transcription of the speech?",
        "<Speech><SpeechHere></Speech> Put the speech into a written format.",
        "<Speech><SpeechHere></Speech> Please help me to transcribe the speech into a written format.",
        "<Speech><SpeechHere></Speech> Recognize the content of the speech you heard.",
        "<Speech><SpeechHere></Speech> Can you recognize what you heard in the speech?",
        "<Speech><SpeechHere></Speech> Recognize the speech and write it down in a written format.",
        "<Speech><SpeechHere></Speech> Listen to the speech and recognize its content."
    ],
    "audiocaption": [
        "<Speech><SpeechHere></Speech> Listen to this audio clip and provide its caption.",
        "<Speech><SpeechHere></Speech> Describe the following audio in a caption.",
        "<Speech><SpeechHere></Speech> Based on the sound you hear, create a caption for this audio.",
        "<Speech><SpeechHere></Speech> Can you describe the scene or event depicted in this audio?",
        "<Speech><SpeechHere></Speech> Could you summarise what's happening in this audio?",
        "<Speech><SpeechHere></Speech> What does this audio describe?",
        "<Speech><SpeechHere></Speech> Please describe the audio."
    ],
    "audiocaption_v2": [
        "<Speech><SpeechHere></Speech> Please write down what your hear in the audio."
    ],
    "QA": [
        "<Speech><SpeechHere></Speech> {}"
    ],
    "inference_QA": [
        "<Speech><SpeechHere></Speech> {}"
    ],
    "gender_QA": [
        "<Speech><SpeechHere></Speech> {}"
    ],
    "gender_recognition": [
        "<Speech><SpeechHere></Speech> What is the gender of the speaker?",
        "<Speech><SpeechHere></Speech> Use one word to describe the speaker's gender.",
        "<Speech><SpeechHere></Speech> Describe the speaker's gender.",
        "<Speech><SpeechHere></Speech> Can you accurately identify the gender of the speaker?",
        "<Speech><SpeechHere></Speech> Can you distinguish the gender of the speaker?",
        "<Speech><SpeechHere></Speech> Describe the gender of the person speaking.",
        "<Speech><SpeechHere></Speech> What is the speaker's gender based on the audio?",
        "<Speech><SpeechHere></Speech> Tell me about the gender of the person you hear.",
        "<Speech><SpeechHere></Speech> Is the speaker male or female?"
    ],
    "emotion_recognition": [
        "<Speech><SpeechHere></Speech> Describe the emotion of the speaker in one word.",
        "<Speech><SpeechHere></Speech> Use one word to describe the speaker's emotion."
    ],
    "emotion_recognitions": [
        "<Speech><SpeechHere></Speech> Describe the emotion of the speaker in one word.",
        "<Speech><SpeechHere></Speech> Use one word to describe the speaker's emotion."
    ],
    "music_caption": [
        "<Speech><SpeechHere></Speech> Listen to this music clip and describe the music.",
        "<Speech><SpeechHere></Speech> Please describe the music.",
        "<Speech><SpeechHere></Speech> Provide a description of the music.",
        "<Speech><SpeechHere></Speech> Analyze the music in this clip and offer a description.",
        "<Speech><SpeechHere></Speech> Give me a description of the music in this clip."
    ],
    "lyric_recognize": [
        "<Speech><SpeechHere></Speech> Listen to the music and write down music's lyric.",
        "<Speech><SpeechHere></Speech> What is the lyric of the music you heard?",
        "<Speech><SpeechHere></Speech> Please write down the lyric of the music.",
        "<Speech><SpeechHere></Speech> Write down the lyric of the music you heard.",
        "<Speech><SpeechHere></Speech> Recognize the lyric of the music you heard.",
        "<Speech><SpeechHere></Speech> Recognize the music and give me the lyric."
    ],
    "speaker_verification": [
        "<Speech><SpeechHere></Speech> Are the two people speaking successively the same person? Answer yes or no.",
        "<Speech><SpeechHere></Speech> Do you only hear the same person talking? Answer yes or no.",
        "<Speech><SpeechHere></Speech> Is only one person speaking in the audio? Answer yes or no."
    ],
    "music_type_classification": [
        "<Speech><SpeechHere></Speech> Describe the music type of the music in one word. Choose from ['reggae', 'disco', 'rock', 'metal', 'blues', 'classical', 'country', 'hiphop', 'jazz', 'pop']"
    ],
    "VocalSound_classification": [
        "<Speech><SpeechHere></Speech> Listen the audio, and describe the vocal sound type. Choose from [laughter, sighs, coughs, throat clearing, sneezes, sniffs]."
    ],
    "zerospeech_recognition": [
        "<Speech><SpeechHere></Speech> Listen the audio, judge whether the audio is mute or not. Answer Yes or No"
    ],
    "speech_caption": [
        "<Speech><SpeechHere></Speech> Listen the speech, describe it's timbre, speaking speed, style.",
        "<Speech><SpeechHere></Speech> Listen to this speech and provide its caption (timbre, speaking speed, style, and so on).",
        "<Speech><SpeechHere></Speech> Describe the following speech in a caption ((timbre, speaking speed, style, and so on))."
    ],
    "music_analysis": [
        "<Speech><SpeechHere></Speech> Listen carefully to the song. Identify: (1) main genre, (2) vocal gender & timbre, (3) instrumentation, (4) lyric, (5) mood progression.",
        "<Speech><SpeechHere></Speech> Please analyse the audio as a musical piece. Report the singer’s voice type, dominant instruments, tempo/BPM, key or mode, and summarise the lyric content.",    
        "<Speech><SpeechHere></Speech> After listening, give a detailed breakdown: lyric, style (e.g., pop, jazz), vocalist identity clues (gender, age range), production quality, emotional tone.",
        "<Speech><SpeechHere></Speech> Evaluate this music clip. Describe the genre, arrangement (rhythm section, melodic instruments), vocal technique, recording ambience, and provide the lyric summary."
    ],
    "speech_analysis": [
        "<Speech><SpeechHere></Speech> Listen the speech, and tell us the speech's transcription, speaker information, emation information, and so on.",
        "<Speech><SpeechHere></Speech> Listen to the spoken audio. Output: full transcription, speaker count & gender, speech rate (fast/medium/slow), emotional tone...",
        "<Speech><SpeechHere></Speech> Analyse this speech clip: transcribe it, classify emotion (e.g., calm, excited), describe prosody features (pauses, emphasis), and estimate speaker gender."
    ],
    "audio_caption": [
        "<Speech><SpeechHere></Speech> Listen to this audio clip and provide its caption.",
        "<Speech><SpeechHere></Speech> Describe the following audio in a caption.",
        "<Speech><SpeechHere></Speech> Based on the sound you hear, create a caption for this audio.",
        "<Speech><SpeechHere></Speech> Can you describe the scene or event depicted in this audio?",
        "<Speech><SpeechHere></Speech> Could you summarise what's happening in this audio?",
        "<Speech><SpeechHere></Speech> What does this audio describe?",
        "<Speech><SpeechHere></Speech> Please describe the audio."
    ],
    "audio caption": [
        "<Speech><SpeechHere></Speech> Listen to this audio clip and provide its caption.",
        "<Speech><SpeechHere></Speech> Describe the following audio in a caption.",
        "<Speech><SpeechHere></Speech> Based on the sound you hear, create a caption for this audio.",
        "<Speech><SpeechHere></Speech> Can you describe the scene or event depicted in this audio?",
        "<Speech><SpeechHere></Speech> Could you summarise what's happening in this audio?",
        "<Speech><SpeechHere></Speech> What does this audio describe?",
        "<Speech><SpeechHere></Speech> Please describe the audio."
    ],
    "speech_reasoning": [
        "<Speech><SpeechHere></Speech> Provide a multi-level analysis of this speech: Identify basic audio properties, Analyse phonetic structure and prosody, Transcribe the speech and extract key semantics.."
    ],
    "sound_reasoning": [
        "<Speech><SpeechHere></Speech> Provide a fine-grained reasoning analysis for this sound: Level1: Coarse / Surface. Level 2: Event / Acoustic; Level 3:Semantic"
    ],
    "music_reasoning": [
        "<Speech><SpeechHere></Speech> Provide a fine-grained understanding for this music: e.g. Audio quality, Genre family, Main instruments / presence of vocals, Musical / Acoustic, Semantic / Expressive"
    ],
    "music_3level": [
        "<Speech><SpeechHere></Speech> Perform a three-level music analysis: Level1 (Audio quality, Genre, Instruments/Vocals), Level2 (Tempo, Key, Meter, Melody, Timbre, Dynamics), Level3 (Mood, Lyrics, Techniques, Hook, Structure)"
    ],
    "sound_3level": [
        "<Speech><SpeechHere></Speech> Conduct three-level soundscape analysis: Level1 (Scene type, Spectral band), Level2 (Events, Patterns, Acoustics, Localization), Level3 (Sources, Emotion, Interactions)"
    ],
    "speech_3level": [
        "<Speech><SpeechHere></Speech> Conduct three-level speech analysis: Level1 (Audio quality, Speaker gender, Language), Level2 (Prosody, Rhythm, Pronunciation), Level3 (Transcription, Semantics, Emotion)"
    ]
}