diff --git a/index.html b/index.html
index c1449eaab8b20089c86f96dda74beac664735e2e..23c21010a0e0e6d1478794dcf4bab2f0613e1276 100644
--- a/index.html
+++ b/index.html
@@ -22,7 +22,7 @@
- To evaluate the potential of our dataset, we fine-tune SoulX-Podcast with our dataset and compare it with other models that support the corresponding capabilities. Since our current dataset is primarily Chinese, most demos are presented in Chinese. We also collected about 200 hours of English data using the same construction pipeline and used it for fine-tuning, which produced strong results as well. The audio samples show that our data construction method has strong cross-lingual transferability.
+ This page presents the results obtained by fine-tuning SoulX-Podcast on our dataset. We compare our model with other models that possess related capabilities across multiple tasks, in order to demonstrate the superiority of our dataset. Since our dataset is primarily composed of Chinese data, we mainly present the performance of our model on Chinese speech generation. In addition, we supplement the training data with 100 hours of English audiobook data for fine-tuning, thereby demonstrating that the construction of our dataset exhibits cross-lingual transferability.
@@ -124,8 +124,11 @@
item.variants.forEach((variant, index) => {
const context = document.createElement("p");
context.className = "variant-context";
- const label = hasMultipleVariants ? `instruction${index + 1}` : "instruction";
- context.innerHTML = `
${label} ${escapeHtml(variant.context)}`;
+ if (hasMultipleVariants) {
+ context.innerHTML = `
instruction${index + 1} ${escapeHtml(variant.context)}`;
+ } else {
+ context.innerHTML = `
${escapeHtml(variant.context)}`;
+ }
wrapper.appendChild(context);
});
@@ -213,10 +216,11 @@
const row = document.createElement("div");
row.className = "variant-audio-row";
- const label = document.createElement("span");
- label.textContent = hasMultipleVariants ? `instruction${index + 1}` : "instruction";
-
- row.appendChild(label);
+ if (hasMultipleVariants) {
+ const label = document.createElement("span");
+ label.textContent = `instruction${index + 1}`;
+ row.appendChild(label);
+ }
row.appendChild(createAudio(`${taskRoot}/${model}/${variant.output_audio}`));
wrapper.appendChild(row);
});
@@ -291,10 +295,19 @@
links.className = "task-directory-links";
tasks.forEach(({ id, task }) => {
+ const item = document.createElement("div");
+ item.className = "task-directory-item";
+
const link = document.createElement("a");
link.href = `#task-${id}`;
link.textContent = task.name;
- links.appendChild(link);
+
+ const description = document.createElement("p");
+ description.textContent = task.description;
+
+ item.appendChild(link);
+ item.appendChild(description);
+ links.appendChild(item);
});
directory.replaceChildren(title, links);
diff --git a/static/css/index.css b/static/css/index.css
index 2f64b0da260cf83708519ee44af164fabb86ae63..f2f6bc740ec76399f9d343fa465e5fe442170266 100644
--- a/static/css/index.css
+++ b/static/css/index.css
@@ -82,19 +82,32 @@ body {
.task-directory-links {
display: grid;
- gap: 0.45rem;
+ gap: 0.7rem;
+}
+
+.task-directory-item {
+ display: grid;
+ gap: 0.2rem;
}
.task-directory a {
color: var(--link);
text-decoration: none;
line-height: 1.35;
+ font-weight: 700;
}
.task-directory a:hover {
text-decoration: underline;
}
+.task-directory-item p {
+ margin: 0;
+ color: var(--muted);
+ font-size: 0.9rem;
+ line-height: 1.45;
+}
+
.task-list {
display: grid;
gap: 1.6rem;
diff --git a/static/task/emo/task.yaml b/static/task/emo/task.yaml
index fcc9fc2cd65a7531f369085a2c152748a2d2d15f..1725cbdc2335bb19e01096d735943910d871dc18 100644
--- a/static/task/emo/task.yaml
+++ b/static/task/emo/task.yaml
@@ -1,5 +1,5 @@
name: Emotion Control
-description: Single-sentence showcase of 18 emotion labels.
+description: The Emotion Control task is designed to evaluate the model’s controllability under conventional emotion labels. In this task, we expand the set of emotion categories and assess the performance of different models across 18 emotion labels. Notably, our dataset was not explicitly constructed with emotion-labeled data; nevertheless, the results show that our dataset generalizes to emotion-labeled speech generation tasks
show_reference: false
models:
- CosyVoice3
diff --git a/static/task/mixed-style/CosyVoice2/manifest.jsonl b/static/task/mixed-style/CosyVoice2/manifest.jsonl
index 70ee76b3e561e41c64fd567f0e3f939a3e757f5b..cb90b837d22deee5d9a03199b3b7dcc525941b04 100644
--- a/static/task/mixed-style/CosyVoice2/manifest.jsonl
+++ b/static/task/mixed-style/CosyVoice2/manifest.jsonl
@@ -1,32 +1,20 @@
-{"utt":"utt_001","target_text":"That's not what I meant, I swear.","variants":[{"utt":"utt_001","context":"Defensive, speaking quickly with slight panic","output_audio":"samples/utt_001.wav"}]}
-{"utt":"utt_002","target_text":"Oh. Oh no.","variants":[{"utt":"utt_002","context":"Realization dawning slowly, voice dropping to a whisper","output_audio":"samples/utt_002.wav"}]}
-{"utt":"utt_003","target_text":"I... I don't know what to say.","variants":[{"utt":"utt_003","context":"nervous and hesitant, with the voice gradually trailing off","output_audio":"samples/utt_003.wav"}]}
-{"utt":"utt_004","target_text":"Wait— just give me a second.","variants":[{"utt":"utt_004","context":"flustered, interrupting oneself mid-sentence","output_audio":"samples/utt_004.wav"}]}
-{"utt":"utt_005","target_text":"Fine. Do whatever you want.","variants":[{"utt":"utt_005","context":"resigned, with a calm tone tinged with faint bitterness","output_audio":"samples/utt_005.wav"}]}
-{"utt":"utt_006","target_text":"Don't you dare talk to me like that.","variants":[{"utt":"utt_006","context":"coldly angry, with every word articulated clearly and forcefully","output_audio":"samples/utt_006.wav"}]}
-{"utt":"utt_007","target_text":"I trusted you. I actually trusted you.","variants":[{"utt":"utt_007","context":"angry, with the heartbreak of betrayal, the voice catching in the throat","output_audio":"samples/utt_007.wav"}]}
-{"utt":"utt_008","target_text":"I'm not— I'm not drunk, you're drunk.","variants":[{"utt":"utt_008","context":"drunkenly denying it, slurring the words and speaking incoherently","output_audio":"samples/utt_008.wav"}]}
-{"utt":"utt_0001","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0001","context":"如释重负,笑意藏不住地感叹着","output_audio":"samples/utt_0001.wav"}]}
-{"utt":"utt_0002","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0002","context":"兴奋得不行,音量抬高语速加快地喊着","output_audio":"samples/utt_0002.wav"}]}
-{"utt":"utt_0003","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0003","context":"无奈得很,拖着调子抱怨着","output_audio":"samples/utt_0003.wav"}]}
-{"utt":"utt_0004","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0004","context":"压低声音,带着烦躁小声嘟囔着","output_audio":"samples/utt_0004.wav"}]}
-{"utt":"utt_0005","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0005","context":"语气很冲,音量抬高地喝止着","output_audio":"samples/utt_0005.wav"}]}
-{"utt":"utt_0006","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0006","context":"急得发紧,语速飞快几乎不停顿地喊着","output_audio":"samples/utt_0006.wav"}]}
-{"utt":"utt_0009","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0009","context":"惊得一愣,语调上扬地反问着","output_audio":"samples/utt_0009.wav"}]}
-{"utt":"utt_0010","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0010","context":"带着笑意,半真半假地追问着","output_audio":"samples/utt_0010.wav"}]}
-{"utt":"utt_0011","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0011","context":"压着嗓子,用气声又快又轻地提醒着","output_audio":"samples/utt_0011.wav"}]}
-{"utt":"utt_0012","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0012","context":"紧张得发虚,声音更轻更低地说着","output_audio":"samples/utt_0012.wav"}]}
-{"utt":"utt_0013","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0013","context":"委屈得很,急着解释地喊着","output_audio":"samples/utt_0013.wav"}]}
-{"utt":"utt_0014","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0014","context":"火气上来,嗓门拔高地吼着","output_audio":"samples/utt_0014.wav"}]}
-{"utt":"utt_0015","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"}]}
-{"utt":"utt_0016","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
-{"utt":"utt_0021","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"}]}
-{"utt":"utt_0022","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
-{"utt":"utt_0023","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"}]}
-{"utt":"utt_0024","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
-{"utt":"utt_0025","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"}]}
-{"utt":"utt_0026","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
-{"utt":"utt_0029","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"}]}
-{"utt":"utt_0030","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
-{"utt":"utt_0039","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"}]}
-{"utt":"utt_0040","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
+{"utt":"That's not what I meant, I swear.","target_text":"That's not what I meant, I swear.","variants":[{"utt":"utt_001","context":"Defensive, speaking quickly with slight panic","output_audio":"sample/utt_001.wav"}]}
+{"utt":"Oh. Oh no.","target_text":"Oh. Oh no.","variants":[{"utt":"utt_002","context":"Realization dawning slowly, voice dropping to a whisper","output_audio":"sample/utt_002.wav"}]}
+{"utt":"I... I don't know what to say.","target_text":"I... I don't know what to say.","variants":[{"utt":"utt_003","context":"nervous and hesitant, with the voice gradually trailing off","output_audio":"sample/utt_003.wav"}]}
+{"utt":"Wait— just give me a second.","target_text":"Wait— just give me a second.","variants":[{"utt":"utt_004","context":"flustered, interrupting oneself mid-sentence","output_audio":"sample/utt_004.wav"}]}
+{"utt":"Fine. Do whatever you want.","target_text":"Fine. Do whatever you want.","variants":[{"utt":"utt_005","context":"resigned, with a calm tone tinged with faint bitterness","output_audio":"sample/utt_005.wav"}]}
+{"utt":"Don't you dare talk to me like that.","target_text":"Don't you dare talk to me like that.","variants":[{"utt":"utt_006","context":"coldly angry, with every word articulated clearly and forcefully","output_audio":"sample/utt_006.wav"}]}
+{"utt":"I trusted you. I actually trusted you.","target_text":"I trusted you. I actually trusted you.","variants":[{"utt":"utt_007","context":"angry, with the heartbreak of betrayal, the voice catching in the throat","output_audio":"sample/utt_007.wav"}]}
+{"utt":"I'm not— I'm not drunk, you're drunk.","target_text":"I'm not— I'm not drunk, you're drunk.","variants":[{"utt":"utt_008","context":"drunkenly denying it, slurring the words and speaking incoherently","output_audio":"sample/utt_008.wav"}]}
+{"utt":"呼——终于做完了,可以放假啦!","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0001","context":"如释重负,笑意藏不住地感叹着","output_audio":"sample/utt_0001.wav"},{"utt":"utt_0002","context":"兴奋得不行,音量抬高语速加快地喊着","output_audio":"sample/utt_0002.wav"}]}
+{"utt":"哎,这雨怎么说下就下啊?","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0003","context":"无奈得很,拖着调子抱怨着","output_audio":"sample/utt_0003.wav"},{"utt":"utt_0004","context":"压低声音,带着烦躁小声嘟囔着","output_audio":"sample/utt_0004.wav"}]}
+{"utt":"别挤别挤,一个个来!","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0005","context":"语气很冲,音量抬高地喝止着","output_audio":"sample/utt_0005.wav"},{"utt":"utt_0006","context":"急得发紧,语速飞快几乎不停顿地喊着","output_audio":"sample/utt_0006.wav"}]}
+{"utt":"真的假的?你别骗我!","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0009","context":"惊得一愣,语调上扬地反问着","output_audio":"sample/utt_0009.wav"},{"utt":"utt_0010","context":"带着笑意,半真半假地追问着","output_audio":"sample/utt_0010.wav"}]}
+{"utt":"嘘,小点声,有人来了。","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0011","context":"压着嗓子,用气声又快又轻地提醒着","output_audio":"sample/utt_0011.wav"},{"utt":"utt_0012","context":"紧张得发虚,声音更轻更低地说着","output_audio":"sample/utt_0012.wav"}]}
+{"utt":"我都说了不是我干的!","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0013","context":"委屈得很,急着解释地喊着","output_audio":"sample/utt_0013.wav"},{"utt":"utt_0014","context":"火气上来,嗓门拔高地吼着","output_audio":"sample/utt_0014.wav"}]}
+{"utt":"等我两分钟,我马上回来。","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"sample/utt_0015.wav"},{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"sample/utt_0016.wav"}]}
+{"utt":"嗯?你刚刚叫我吗?","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"sample/utt_0021.wav"},{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"sample/utt_0022.wav"}]}
+{"utt":"别担心,我在呢。","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"sample/utt_0023.wav"},{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"sample/utt_0024.wav"}]}
+{"utt":"不是我说你,这也太慢了。","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"sample/utt_0025.wav"},{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"sample/utt_0026.wav"}]}
+{"utt":"我再确认一遍:你确定要这样做?","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"sample/utt_0029.wav"},{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"sample/utt_0030.wav"}]}
+{"utt":"我不太确定,但我可以试试。","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"sample/utt_0039.wav"},{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"sample/utt_0040.wav"}]}
diff --git a/static/task/mixed-style/CosyVoice3/manifest.jsonl b/static/task/mixed-style/CosyVoice3/manifest.jsonl
index 70ee76b3e561e41c64fd567f0e3f939a3e757f5b..cb90b837d22deee5d9a03199b3b7dcc525941b04 100644
--- a/static/task/mixed-style/CosyVoice3/manifest.jsonl
+++ b/static/task/mixed-style/CosyVoice3/manifest.jsonl
@@ -1,32 +1,20 @@
-{"utt":"utt_001","target_text":"That's not what I meant, I swear.","variants":[{"utt":"utt_001","context":"Defensive, speaking quickly with slight panic","output_audio":"samples/utt_001.wav"}]}
-{"utt":"utt_002","target_text":"Oh. Oh no.","variants":[{"utt":"utt_002","context":"Realization dawning slowly, voice dropping to a whisper","output_audio":"samples/utt_002.wav"}]}
-{"utt":"utt_003","target_text":"I... I don't know what to say.","variants":[{"utt":"utt_003","context":"nervous and hesitant, with the voice gradually trailing off","output_audio":"samples/utt_003.wav"}]}
-{"utt":"utt_004","target_text":"Wait— just give me a second.","variants":[{"utt":"utt_004","context":"flustered, interrupting oneself mid-sentence","output_audio":"samples/utt_004.wav"}]}
-{"utt":"utt_005","target_text":"Fine. Do whatever you want.","variants":[{"utt":"utt_005","context":"resigned, with a calm tone tinged with faint bitterness","output_audio":"samples/utt_005.wav"}]}
-{"utt":"utt_006","target_text":"Don't you dare talk to me like that.","variants":[{"utt":"utt_006","context":"coldly angry, with every word articulated clearly and forcefully","output_audio":"samples/utt_006.wav"}]}
-{"utt":"utt_007","target_text":"I trusted you. I actually trusted you.","variants":[{"utt":"utt_007","context":"angry, with the heartbreak of betrayal, the voice catching in the throat","output_audio":"samples/utt_007.wav"}]}
-{"utt":"utt_008","target_text":"I'm not— I'm not drunk, you're drunk.","variants":[{"utt":"utt_008","context":"drunkenly denying it, slurring the words and speaking incoherently","output_audio":"samples/utt_008.wav"}]}
-{"utt":"utt_0001","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0001","context":"如释重负,笑意藏不住地感叹着","output_audio":"samples/utt_0001.wav"}]}
-{"utt":"utt_0002","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0002","context":"兴奋得不行,音量抬高语速加快地喊着","output_audio":"samples/utt_0002.wav"}]}
-{"utt":"utt_0003","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0003","context":"无奈得很,拖着调子抱怨着","output_audio":"samples/utt_0003.wav"}]}
-{"utt":"utt_0004","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0004","context":"压低声音,带着烦躁小声嘟囔着","output_audio":"samples/utt_0004.wav"}]}
-{"utt":"utt_0005","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0005","context":"语气很冲,音量抬高地喝止着","output_audio":"samples/utt_0005.wav"}]}
-{"utt":"utt_0006","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0006","context":"急得发紧,语速飞快几乎不停顿地喊着","output_audio":"samples/utt_0006.wav"}]}
-{"utt":"utt_0009","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0009","context":"惊得一愣,语调上扬地反问着","output_audio":"samples/utt_0009.wav"}]}
-{"utt":"utt_0010","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0010","context":"带着笑意,半真半假地追问着","output_audio":"samples/utt_0010.wav"}]}
-{"utt":"utt_0011","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0011","context":"压着嗓子,用气声又快又轻地提醒着","output_audio":"samples/utt_0011.wav"}]}
-{"utt":"utt_0012","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0012","context":"紧张得发虚,声音更轻更低地说着","output_audio":"samples/utt_0012.wav"}]}
-{"utt":"utt_0013","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0013","context":"委屈得很,急着解释地喊着","output_audio":"samples/utt_0013.wav"}]}
-{"utt":"utt_0014","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0014","context":"火气上来,嗓门拔高地吼着","output_audio":"samples/utt_0014.wav"}]}
-{"utt":"utt_0015","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"}]}
-{"utt":"utt_0016","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
-{"utt":"utt_0021","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"}]}
-{"utt":"utt_0022","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
-{"utt":"utt_0023","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"}]}
-{"utt":"utt_0024","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
-{"utt":"utt_0025","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"}]}
-{"utt":"utt_0026","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
-{"utt":"utt_0029","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"}]}
-{"utt":"utt_0030","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
-{"utt":"utt_0039","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"}]}
-{"utt":"utt_0040","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
+{"utt":"That's not what I meant, I swear.","target_text":"That's not what I meant, I swear.","variants":[{"utt":"utt_001","context":"Defensive, speaking quickly with slight panic","output_audio":"sample/utt_001.wav"}]}
+{"utt":"Oh. Oh no.","target_text":"Oh. Oh no.","variants":[{"utt":"utt_002","context":"Realization dawning slowly, voice dropping to a whisper","output_audio":"sample/utt_002.wav"}]}
+{"utt":"I... I don't know what to say.","target_text":"I... I don't know what to say.","variants":[{"utt":"utt_003","context":"nervous and hesitant, with the voice gradually trailing off","output_audio":"sample/utt_003.wav"}]}
+{"utt":"Wait— just give me a second.","target_text":"Wait— just give me a second.","variants":[{"utt":"utt_004","context":"flustered, interrupting oneself mid-sentence","output_audio":"sample/utt_004.wav"}]}
+{"utt":"Fine. Do whatever you want.","target_text":"Fine. Do whatever you want.","variants":[{"utt":"utt_005","context":"resigned, with a calm tone tinged with faint bitterness","output_audio":"sample/utt_005.wav"}]}
+{"utt":"Don't you dare talk to me like that.","target_text":"Don't you dare talk to me like that.","variants":[{"utt":"utt_006","context":"coldly angry, with every word articulated clearly and forcefully","output_audio":"sample/utt_006.wav"}]}
+{"utt":"I trusted you. I actually trusted you.","target_text":"I trusted you. I actually trusted you.","variants":[{"utt":"utt_007","context":"angry, with the heartbreak of betrayal, the voice catching in the throat","output_audio":"sample/utt_007.wav"}]}
+{"utt":"I'm not— I'm not drunk, you're drunk.","target_text":"I'm not— I'm not drunk, you're drunk.","variants":[{"utt":"utt_008","context":"drunkenly denying it, slurring the words and speaking incoherently","output_audio":"sample/utt_008.wav"}]}
+{"utt":"呼——终于做完了,可以放假啦!","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0001","context":"如释重负,笑意藏不住地感叹着","output_audio":"sample/utt_0001.wav"},{"utt":"utt_0002","context":"兴奋得不行,音量抬高语速加快地喊着","output_audio":"sample/utt_0002.wav"}]}
+{"utt":"哎,这雨怎么说下就下啊?","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0003","context":"无奈得很,拖着调子抱怨着","output_audio":"sample/utt_0003.wav"},{"utt":"utt_0004","context":"压低声音,带着烦躁小声嘟囔着","output_audio":"sample/utt_0004.wav"}]}
+{"utt":"别挤别挤,一个个来!","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0005","context":"语气很冲,音量抬高地喝止着","output_audio":"sample/utt_0005.wav"},{"utt":"utt_0006","context":"急得发紧,语速飞快几乎不停顿地喊着","output_audio":"sample/utt_0006.wav"}]}
+{"utt":"真的假的?你别骗我!","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0009","context":"惊得一愣,语调上扬地反问着","output_audio":"sample/utt_0009.wav"},{"utt":"utt_0010","context":"带着笑意,半真半假地追问着","output_audio":"sample/utt_0010.wav"}]}
+{"utt":"嘘,小点声,有人来了。","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0011","context":"压着嗓子,用气声又快又轻地提醒着","output_audio":"sample/utt_0011.wav"},{"utt":"utt_0012","context":"紧张得发虚,声音更轻更低地说着","output_audio":"sample/utt_0012.wav"}]}
+{"utt":"我都说了不是我干的!","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0013","context":"委屈得很,急着解释地喊着","output_audio":"sample/utt_0013.wav"},{"utt":"utt_0014","context":"火气上来,嗓门拔高地吼着","output_audio":"sample/utt_0014.wav"}]}
+{"utt":"等我两分钟,我马上回来。","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"sample/utt_0015.wav"},{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"sample/utt_0016.wav"}]}
+{"utt":"嗯?你刚刚叫我吗?","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"sample/utt_0021.wav"},{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"sample/utt_0022.wav"}]}
+{"utt":"别担心,我在呢。","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"sample/utt_0023.wav"},{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"sample/utt_0024.wav"}]}
+{"utt":"不是我说你,这也太慢了。","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"sample/utt_0025.wav"},{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"sample/utt_0026.wav"}]}
+{"utt":"我再确认一遍:你确定要这样做?","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"sample/utt_0029.wav"},{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"sample/utt_0030.wav"}]}
+{"utt":"我不太确定,但我可以试试。","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"sample/utt_0039.wav"},{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"sample/utt_0040.wav"}]}
diff --git a/static/task/mixed-style/MiMo-V2.5/manifest.jsonl b/static/task/mixed-style/MiMo-V2.5/manifest.jsonl
index 70ee76b3e561e41c64fd567f0e3f939a3e757f5b..58db9b1fe734bca1ded785d4dfbde45799d910b8 100644
--- a/static/task/mixed-style/MiMo-V2.5/manifest.jsonl
+++ b/static/task/mixed-style/MiMo-V2.5/manifest.jsonl
@@ -1,32 +1,20 @@
-{"utt":"utt_001","target_text":"That's not what I meant, I swear.","variants":[{"utt":"utt_001","context":"Defensive, speaking quickly with slight panic","output_audio":"samples/utt_001.wav"}]}
-{"utt":"utt_002","target_text":"Oh. Oh no.","variants":[{"utt":"utt_002","context":"Realization dawning slowly, voice dropping to a whisper","output_audio":"samples/utt_002.wav"}]}
-{"utt":"utt_003","target_text":"I... I don't know what to say.","variants":[{"utt":"utt_003","context":"nervous and hesitant, with the voice gradually trailing off","output_audio":"samples/utt_003.wav"}]}
-{"utt":"utt_004","target_text":"Wait— just give me a second.","variants":[{"utt":"utt_004","context":"flustered, interrupting oneself mid-sentence","output_audio":"samples/utt_004.wav"}]}
-{"utt":"utt_005","target_text":"Fine. Do whatever you want.","variants":[{"utt":"utt_005","context":"resigned, with a calm tone tinged with faint bitterness","output_audio":"samples/utt_005.wav"}]}
-{"utt":"utt_006","target_text":"Don't you dare talk to me like that.","variants":[{"utt":"utt_006","context":"coldly angry, with every word articulated clearly and forcefully","output_audio":"samples/utt_006.wav"}]}
-{"utt":"utt_007","target_text":"I trusted you. I actually trusted you.","variants":[{"utt":"utt_007","context":"angry, with the heartbreak of betrayal, the voice catching in the throat","output_audio":"samples/utt_007.wav"}]}
-{"utt":"utt_008","target_text":"I'm not— I'm not drunk, you're drunk.","variants":[{"utt":"utt_008","context":"drunkenly denying it, slurring the words and speaking incoherently","output_audio":"samples/utt_008.wav"}]}
-{"utt":"utt_0001","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0001","context":"如释重负,笑意藏不住地感叹着","output_audio":"samples/utt_0001.wav"}]}
-{"utt":"utt_0002","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0002","context":"兴奋得不行,音量抬高语速加快地喊着","output_audio":"samples/utt_0002.wav"}]}
-{"utt":"utt_0003","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0003","context":"无奈得很,拖着调子抱怨着","output_audio":"samples/utt_0003.wav"}]}
-{"utt":"utt_0004","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0004","context":"压低声音,带着烦躁小声嘟囔着","output_audio":"samples/utt_0004.wav"}]}
-{"utt":"utt_0005","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0005","context":"语气很冲,音量抬高地喝止着","output_audio":"samples/utt_0005.wav"}]}
-{"utt":"utt_0006","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0006","context":"急得发紧,语速飞快几乎不停顿地喊着","output_audio":"samples/utt_0006.wav"}]}
-{"utt":"utt_0009","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0009","context":"惊得一愣,语调上扬地反问着","output_audio":"samples/utt_0009.wav"}]}
-{"utt":"utt_0010","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0010","context":"带着笑意,半真半假地追问着","output_audio":"samples/utt_0010.wav"}]}
-{"utt":"utt_0011","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0011","context":"压着嗓子,用气声又快又轻地提醒着","output_audio":"samples/utt_0011.wav"}]}
-{"utt":"utt_0012","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0012","context":"紧张得发虚,声音更轻更低地说着","output_audio":"samples/utt_0012.wav"}]}
-{"utt":"utt_0013","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0013","context":"委屈得很,急着解释地喊着","output_audio":"samples/utt_0013.wav"}]}
-{"utt":"utt_0014","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0014","context":"火气上来,嗓门拔高地吼着","output_audio":"samples/utt_0014.wav"}]}
-{"utt":"utt_0015","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"}]}
-{"utt":"utt_0016","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
-{"utt":"utt_0021","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"}]}
-{"utt":"utt_0022","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
-{"utt":"utt_0023","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"}]}
-{"utt":"utt_0024","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
-{"utt":"utt_0025","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"}]}
-{"utt":"utt_0026","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
-{"utt":"utt_0029","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"}]}
-{"utt":"utt_0030","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
-{"utt":"utt_0039","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"}]}
-{"utt":"utt_0040","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
+{"utt":"That's not what I meant, I swear.","target_text":"That's not what I meant, I swear.","variants":[{"utt":"utt_001","context":"Defensive, speaking quickly with slight panic","output_audio":"samples/utt_001.wav"}]}
+{"utt":"Oh. Oh no.","target_text":"Oh. Oh no.","variants":[{"utt":"utt_002","context":"Realization dawning slowly, voice dropping to a whisper","output_audio":"samples/utt_002.wav"}]}
+{"utt":"I... I don't know what to say.","target_text":"I... I don't know what to say.","variants":[{"utt":"utt_003","context":"nervous and hesitant, with the voice gradually trailing off","output_audio":"samples/utt_003.wav"}]}
+{"utt":"Wait— just give me a second.","target_text":"Wait— just give me a second.","variants":[{"utt":"utt_004","context":"flustered, interrupting oneself mid-sentence","output_audio":"samples/utt_004.wav"}]}
+{"utt":"Fine. Do whatever you want.","target_text":"Fine. Do whatever you want.","variants":[{"utt":"utt_005","context":"resigned, with a calm tone tinged with faint bitterness","output_audio":"samples/utt_005.wav"}]}
+{"utt":"Don't you dare talk to me like that.","target_text":"Don't you dare talk to me like that.","variants":[{"utt":"utt_006","context":"coldly angry, with every word articulated clearly and forcefully","output_audio":"samples/utt_006.wav"}]}
+{"utt":"I trusted you. I actually trusted you.","target_text":"I trusted you. I actually trusted you.","variants":[{"utt":"utt_007","context":"angry, with the heartbreak of betrayal, the voice catching in the throat","output_audio":"samples/utt_007.wav"}]}
+{"utt":"I'm not— I'm not drunk, you're drunk.","target_text":"I'm not— I'm not drunk, you're drunk.","variants":[{"utt":"utt_008","context":"drunkenly denying it, slurring the words and speaking incoherently","output_audio":"samples/utt_008.wav"}]}
+{"utt":"呼——终于做完了,可以放假啦!","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0001","context":"如释重负,笑意藏不住地感叹着","output_audio":"samples/utt_0001.wav"},{"utt":"utt_0002","context":"兴奋得不行,音量抬高语速加快地喊着","output_audio":"samples/utt_0002.wav"}]}
+{"utt":"哎,这雨怎么说下就下啊?","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0003","context":"无奈得很,拖着调子抱怨着","output_audio":"samples/utt_0003.wav"},{"utt":"utt_0004","context":"压低声音,带着烦躁小声嘟囔着","output_audio":"samples/utt_0004.wav"}]}
+{"utt":"别挤别挤,一个个来!","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0005","context":"语气很冲,音量抬高地喝止着","output_audio":"samples/utt_0005.wav"},{"utt":"utt_0006","context":"急得发紧,语速飞快几乎不停顿地喊着","output_audio":"samples/utt_0006.wav"}]}
+{"utt":"真的假的?你别骗我!","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0009","context":"惊得一愣,语调上扬地反问着","output_audio":"samples/utt_0009.wav"},{"utt":"utt_0010","context":"带着笑意,半真半假地追问着","output_audio":"samples/utt_0010.wav"}]}
+{"utt":"嘘,小点声,有人来了。","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0011","context":"压着嗓子,用气声又快又轻地提醒着","output_audio":"samples/utt_0011.wav"},{"utt":"utt_0012","context":"紧张得发虚,声音更轻更低地说着","output_audio":"samples/utt_0012.wav"}]}
+{"utt":"我都说了不是我干的!","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0013","context":"委屈得很,急着解释地喊着","output_audio":"samples/utt_0013.wav"},{"utt":"utt_0014","context":"火气上来,嗓门拔高地吼着","output_audio":"samples/utt_0014.wav"}]}
+{"utt":"等我两分钟,我马上回来。","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"},{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
+{"utt":"嗯?你刚刚叫我吗?","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"},{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
+{"utt":"别担心,我在呢。","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"},{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
+{"utt":"不是我说你,这也太慢了。","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"},{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
+{"utt":"我再确认一遍:你确定要这样做?","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"},{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
+{"utt":"我不太确定,但我可以试试。","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"},{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
diff --git a/static/task/mixed-style/Ours/manifest.jsonl b/static/task/mixed-style/Ours/manifest.jsonl
index 70ee76b3e561e41c64fd567f0e3f939a3e757f5b..58db9b1fe734bca1ded785d4dfbde45799d910b8 100644
--- a/static/task/mixed-style/Ours/manifest.jsonl
+++ b/static/task/mixed-style/Ours/manifest.jsonl
@@ -1,32 +1,20 @@
-{"utt":"utt_001","target_text":"That's not what I meant, I swear.","variants":[{"utt":"utt_001","context":"Defensive, speaking quickly with slight panic","output_audio":"samples/utt_001.wav"}]}
-{"utt":"utt_002","target_text":"Oh. Oh no.","variants":[{"utt":"utt_002","context":"Realization dawning slowly, voice dropping to a whisper","output_audio":"samples/utt_002.wav"}]}
-{"utt":"utt_003","target_text":"I... I don't know what to say.","variants":[{"utt":"utt_003","context":"nervous and hesitant, with the voice gradually trailing off","output_audio":"samples/utt_003.wav"}]}
-{"utt":"utt_004","target_text":"Wait— just give me a second.","variants":[{"utt":"utt_004","context":"flustered, interrupting oneself mid-sentence","output_audio":"samples/utt_004.wav"}]}
-{"utt":"utt_005","target_text":"Fine. Do whatever you want.","variants":[{"utt":"utt_005","context":"resigned, with a calm tone tinged with faint bitterness","output_audio":"samples/utt_005.wav"}]}
-{"utt":"utt_006","target_text":"Don't you dare talk to me like that.","variants":[{"utt":"utt_006","context":"coldly angry, with every word articulated clearly and forcefully","output_audio":"samples/utt_006.wav"}]}
-{"utt":"utt_007","target_text":"I trusted you. I actually trusted you.","variants":[{"utt":"utt_007","context":"angry, with the heartbreak of betrayal, the voice catching in the throat","output_audio":"samples/utt_007.wav"}]}
-{"utt":"utt_008","target_text":"I'm not— I'm not drunk, you're drunk.","variants":[{"utt":"utt_008","context":"drunkenly denying it, slurring the words and speaking incoherently","output_audio":"samples/utt_008.wav"}]}
-{"utt":"utt_0001","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0001","context":"如释重负,笑意藏不住地感叹着","output_audio":"samples/utt_0001.wav"}]}
-{"utt":"utt_0002","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0002","context":"兴奋得不行,音量抬高语速加快地喊着","output_audio":"samples/utt_0002.wav"}]}
-{"utt":"utt_0003","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0003","context":"无奈得很,拖着调子抱怨着","output_audio":"samples/utt_0003.wav"}]}
-{"utt":"utt_0004","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0004","context":"压低声音,带着烦躁小声嘟囔着","output_audio":"samples/utt_0004.wav"}]}
-{"utt":"utt_0005","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0005","context":"语气很冲,音量抬高地喝止着","output_audio":"samples/utt_0005.wav"}]}
-{"utt":"utt_0006","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0006","context":"急得发紧,语速飞快几乎不停顿地喊着","output_audio":"samples/utt_0006.wav"}]}
-{"utt":"utt_0009","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0009","context":"惊得一愣,语调上扬地反问着","output_audio":"samples/utt_0009.wav"}]}
-{"utt":"utt_0010","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0010","context":"带着笑意,半真半假地追问着","output_audio":"samples/utt_0010.wav"}]}
-{"utt":"utt_0011","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0011","context":"压着嗓子,用气声又快又轻地提醒着","output_audio":"samples/utt_0011.wav"}]}
-{"utt":"utt_0012","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0012","context":"紧张得发虚,声音更轻更低地说着","output_audio":"samples/utt_0012.wav"}]}
-{"utt":"utt_0013","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0013","context":"委屈得很,急着解释地喊着","output_audio":"samples/utt_0013.wav"}]}
-{"utt":"utt_0014","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0014","context":"火气上来,嗓门拔高地吼着","output_audio":"samples/utt_0014.wav"}]}
-{"utt":"utt_0015","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"}]}
-{"utt":"utt_0016","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
-{"utt":"utt_0021","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"}]}
-{"utt":"utt_0022","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
-{"utt":"utt_0023","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"}]}
-{"utt":"utt_0024","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
-{"utt":"utt_0025","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"}]}
-{"utt":"utt_0026","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
-{"utt":"utt_0029","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"}]}
-{"utt":"utt_0030","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
-{"utt":"utt_0039","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"}]}
-{"utt":"utt_0040","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
+{"utt":"That's not what I meant, I swear.","target_text":"That's not what I meant, I swear.","variants":[{"utt":"utt_001","context":"Defensive, speaking quickly with slight panic","output_audio":"samples/utt_001.wav"}]}
+{"utt":"Oh. Oh no.","target_text":"Oh. Oh no.","variants":[{"utt":"utt_002","context":"Realization dawning slowly, voice dropping to a whisper","output_audio":"samples/utt_002.wav"}]}
+{"utt":"I... I don't know what to say.","target_text":"I... I don't know what to say.","variants":[{"utt":"utt_003","context":"nervous and hesitant, with the voice gradually trailing off","output_audio":"samples/utt_003.wav"}]}
+{"utt":"Wait— just give me a second.","target_text":"Wait— just give me a second.","variants":[{"utt":"utt_004","context":"flustered, interrupting oneself mid-sentence","output_audio":"samples/utt_004.wav"}]}
+{"utt":"Fine. Do whatever you want.","target_text":"Fine. Do whatever you want.","variants":[{"utt":"utt_005","context":"resigned, with a calm tone tinged with faint bitterness","output_audio":"samples/utt_005.wav"}]}
+{"utt":"Don't you dare talk to me like that.","target_text":"Don't you dare talk to me like that.","variants":[{"utt":"utt_006","context":"coldly angry, with every word articulated clearly and forcefully","output_audio":"samples/utt_006.wav"}]}
+{"utt":"I trusted you. I actually trusted you.","target_text":"I trusted you. I actually trusted you.","variants":[{"utt":"utt_007","context":"angry, with the heartbreak of betrayal, the voice catching in the throat","output_audio":"samples/utt_007.wav"}]}
+{"utt":"I'm not— I'm not drunk, you're drunk.","target_text":"I'm not— I'm not drunk, you're drunk.","variants":[{"utt":"utt_008","context":"drunkenly denying it, slurring the words and speaking incoherently","output_audio":"samples/utt_008.wav"}]}
+{"utt":"呼——终于做完了,可以放假啦!","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0001","context":"如释重负,笑意藏不住地感叹着","output_audio":"samples/utt_0001.wav"},{"utt":"utt_0002","context":"兴奋得不行,音量抬高语速加快地喊着","output_audio":"samples/utt_0002.wav"}]}
+{"utt":"哎,这雨怎么说下就下啊?","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0003","context":"无奈得很,拖着调子抱怨着","output_audio":"samples/utt_0003.wav"},{"utt":"utt_0004","context":"压低声音,带着烦躁小声嘟囔着","output_audio":"samples/utt_0004.wav"}]}
+{"utt":"别挤别挤,一个个来!","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0005","context":"语气很冲,音量抬高地喝止着","output_audio":"samples/utt_0005.wav"},{"utt":"utt_0006","context":"急得发紧,语速飞快几乎不停顿地喊着","output_audio":"samples/utt_0006.wav"}]}
+{"utt":"真的假的?你别骗我!","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0009","context":"惊得一愣,语调上扬地反问着","output_audio":"samples/utt_0009.wav"},{"utt":"utt_0010","context":"带着笑意,半真半假地追问着","output_audio":"samples/utt_0010.wav"}]}
+{"utt":"嘘,小点声,有人来了。","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0011","context":"压着嗓子,用气声又快又轻地提醒着","output_audio":"samples/utt_0011.wav"},{"utt":"utt_0012","context":"紧张得发虚,声音更轻更低地说着","output_audio":"samples/utt_0012.wav"}]}
+{"utt":"我都说了不是我干的!","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0013","context":"委屈得很,急着解释地喊着","output_audio":"samples/utt_0013.wav"},{"utt":"utt_0014","context":"火气上来,嗓门拔高地吼着","output_audio":"samples/utt_0014.wav"}]}
+{"utt":"等我两分钟,我马上回来。","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"},{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
+{"utt":"嗯?你刚刚叫我吗?","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"},{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
+{"utt":"别担心,我在呢。","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"},{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
+{"utt":"不是我说你,这也太慢了。","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"},{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
+{"utt":"我再确认一遍:你确定要这样做?","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"},{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
+{"utt":"我不太确定,但我可以试试。","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"},{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
diff --git a/static/task/mixed-style/Qwen3/manifest.jsonl b/static/task/mixed-style/Qwen3/manifest.jsonl
index 70ee76b3e561e41c64fd567f0e3f939a3e757f5b..58db9b1fe734bca1ded785d4dfbde45799d910b8 100644
--- a/static/task/mixed-style/Qwen3/manifest.jsonl
+++ b/static/task/mixed-style/Qwen3/manifest.jsonl
@@ -1,32 +1,20 @@
-{"utt":"utt_001","target_text":"That's not what I meant, I swear.","variants":[{"utt":"utt_001","context":"Defensive, speaking quickly with slight panic","output_audio":"samples/utt_001.wav"}]}
-{"utt":"utt_002","target_text":"Oh. Oh no.","variants":[{"utt":"utt_002","context":"Realization dawning slowly, voice dropping to a whisper","output_audio":"samples/utt_002.wav"}]}
-{"utt":"utt_003","target_text":"I... I don't know what to say.","variants":[{"utt":"utt_003","context":"nervous and hesitant, with the voice gradually trailing off","output_audio":"samples/utt_003.wav"}]}
-{"utt":"utt_004","target_text":"Wait— just give me a second.","variants":[{"utt":"utt_004","context":"flustered, interrupting oneself mid-sentence","output_audio":"samples/utt_004.wav"}]}
-{"utt":"utt_005","target_text":"Fine. Do whatever you want.","variants":[{"utt":"utt_005","context":"resigned, with a calm tone tinged with faint bitterness","output_audio":"samples/utt_005.wav"}]}
-{"utt":"utt_006","target_text":"Don't you dare talk to me like that.","variants":[{"utt":"utt_006","context":"coldly angry, with every word articulated clearly and forcefully","output_audio":"samples/utt_006.wav"}]}
-{"utt":"utt_007","target_text":"I trusted you. I actually trusted you.","variants":[{"utt":"utt_007","context":"angry, with the heartbreak of betrayal, the voice catching in the throat","output_audio":"samples/utt_007.wav"}]}
-{"utt":"utt_008","target_text":"I'm not— I'm not drunk, you're drunk.","variants":[{"utt":"utt_008","context":"drunkenly denying it, slurring the words and speaking incoherently","output_audio":"samples/utt_008.wav"}]}
-{"utt":"utt_0001","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0001","context":"如释重负,笑意藏不住地感叹着","output_audio":"samples/utt_0001.wav"}]}
-{"utt":"utt_0002","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0002","context":"兴奋得不行,音量抬高语速加快地喊着","output_audio":"samples/utt_0002.wav"}]}
-{"utt":"utt_0003","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0003","context":"无奈得很,拖着调子抱怨着","output_audio":"samples/utt_0003.wav"}]}
-{"utt":"utt_0004","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0004","context":"压低声音,带着烦躁小声嘟囔着","output_audio":"samples/utt_0004.wav"}]}
-{"utt":"utt_0005","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0005","context":"语气很冲,音量抬高地喝止着","output_audio":"samples/utt_0005.wav"}]}
-{"utt":"utt_0006","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0006","context":"急得发紧,语速飞快几乎不停顿地喊着","output_audio":"samples/utt_0006.wav"}]}
-{"utt":"utt_0009","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0009","context":"惊得一愣,语调上扬地反问着","output_audio":"samples/utt_0009.wav"}]}
-{"utt":"utt_0010","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0010","context":"带着笑意,半真半假地追问着","output_audio":"samples/utt_0010.wav"}]}
-{"utt":"utt_0011","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0011","context":"压着嗓子,用气声又快又轻地提醒着","output_audio":"samples/utt_0011.wav"}]}
-{"utt":"utt_0012","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0012","context":"紧张得发虚,声音更轻更低地说着","output_audio":"samples/utt_0012.wav"}]}
-{"utt":"utt_0013","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0013","context":"委屈得很,急着解释地喊着","output_audio":"samples/utt_0013.wav"}]}
-{"utt":"utt_0014","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0014","context":"火气上来,嗓门拔高地吼着","output_audio":"samples/utt_0014.wav"}]}
-{"utt":"utt_0015","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"}]}
-{"utt":"utt_0016","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
-{"utt":"utt_0021","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"}]}
-{"utt":"utt_0022","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
-{"utt":"utt_0023","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"}]}
-{"utt":"utt_0024","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
-{"utt":"utt_0025","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"}]}
-{"utt":"utt_0026","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
-{"utt":"utt_0029","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"}]}
-{"utt":"utt_0030","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
-{"utt":"utt_0039","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"}]}
-{"utt":"utt_0040","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
+{"utt":"That's not what I meant, I swear.","target_text":"That's not what I meant, I swear.","variants":[{"utt":"utt_001","context":"Defensive, speaking quickly with slight panic","output_audio":"samples/utt_001.wav"}]}
+{"utt":"Oh. Oh no.","target_text":"Oh. Oh no.","variants":[{"utt":"utt_002","context":"Realization dawning slowly, voice dropping to a whisper","output_audio":"samples/utt_002.wav"}]}
+{"utt":"I... I don't know what to say.","target_text":"I... I don't know what to say.","variants":[{"utt":"utt_003","context":"nervous and hesitant, with the voice gradually trailing off","output_audio":"samples/utt_003.wav"}]}
+{"utt":"Wait— just give me a second.","target_text":"Wait— just give me a second.","variants":[{"utt":"utt_004","context":"flustered, interrupting oneself mid-sentence","output_audio":"samples/utt_004.wav"}]}
+{"utt":"Fine. Do whatever you want.","target_text":"Fine. Do whatever you want.","variants":[{"utt":"utt_005","context":"resigned, with a calm tone tinged with faint bitterness","output_audio":"samples/utt_005.wav"}]}
+{"utt":"Don't you dare talk to me like that.","target_text":"Don't you dare talk to me like that.","variants":[{"utt":"utt_006","context":"coldly angry, with every word articulated clearly and forcefully","output_audio":"samples/utt_006.wav"}]}
+{"utt":"I trusted you. I actually trusted you.","target_text":"I trusted you. I actually trusted you.","variants":[{"utt":"utt_007","context":"angry, with the heartbreak of betrayal, the voice catching in the throat","output_audio":"samples/utt_007.wav"}]}
+{"utt":"I'm not— I'm not drunk, you're drunk.","target_text":"I'm not— I'm not drunk, you're drunk.","variants":[{"utt":"utt_008","context":"drunkenly denying it, slurring the words and speaking incoherently","output_audio":"samples/utt_008.wav"}]}
+{"utt":"呼——终于做完了,可以放假啦!","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0001","context":"如释重负,笑意藏不住地感叹着","output_audio":"samples/utt_0001.wav"},{"utt":"utt_0002","context":"兴奋得不行,音量抬高语速加快地喊着","output_audio":"samples/utt_0002.wav"}]}
+{"utt":"哎,这雨怎么说下就下啊?","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0003","context":"无奈得很,拖着调子抱怨着","output_audio":"samples/utt_0003.wav"},{"utt":"utt_0004","context":"压低声音,带着烦躁小声嘟囔着","output_audio":"samples/utt_0004.wav"}]}
+{"utt":"别挤别挤,一个个来!","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0005","context":"语气很冲,音量抬高地喝止着","output_audio":"samples/utt_0005.wav"},{"utt":"utt_0006","context":"急得发紧,语速飞快几乎不停顿地喊着","output_audio":"samples/utt_0006.wav"}]}
+{"utt":"真的假的?你别骗我!","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0009","context":"惊得一愣,语调上扬地反问着","output_audio":"samples/utt_0009.wav"},{"utt":"utt_0010","context":"带着笑意,半真半假地追问着","output_audio":"samples/utt_0010.wav"}]}
+{"utt":"嘘,小点声,有人来了。","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0011","context":"压着嗓子,用气声又快又轻地提醒着","output_audio":"samples/utt_0011.wav"},{"utt":"utt_0012","context":"紧张得发虚,声音更轻更低地说着","output_audio":"samples/utt_0012.wav"}]}
+{"utt":"我都说了不是我干的!","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0013","context":"委屈得很,急着解释地喊着","output_audio":"samples/utt_0013.wav"},{"utt":"utt_0014","context":"火气上来,嗓门拔高地吼着","output_audio":"samples/utt_0014.wav"}]}
+{"utt":"等我两分钟,我马上回来。","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"},{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
+{"utt":"嗯?你刚刚叫我吗?","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"},{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
+{"utt":"别担心,我在呢。","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"},{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
+{"utt":"不是我说你,这也太慢了。","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"},{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
+{"utt":"我再确认一遍:你确定要这样做?","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"},{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
+{"utt":"我不太确定,但我可以试试。","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"},{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
diff --git a/static/task/mixed-style/task.yaml b/static/task/mixed-style/task.yaml
index 14ef69bc90e8439b672b4ffbda6b9ea75bb19717..921160c92ee1562e80c1f65b7e465aac16383da1 100644
--- a/static/task/mixed-style/task.yaml
+++ b/static/task/mixed-style/task.yaml
@@ -1,5 +1,5 @@
name: Mixed Style Control
-description: To better illustrate the stylistic differences in Chinese speech, we use comparative examples in the Chinese data presentation.
+description: The Mixed Style Control task is designed to evaluate whether the model can accurately follow natural language instructions under mixed-style conditions. To better illustrate the stylistic differences in Chinese speech, we use comparative examples in the Chinese data presentation.
show_reference: false
models:
- CosyVoice2
diff --git a/static/task/multi/MOSS-TTSD-V1/manifest.jsonl b/static/task/multi/MOSS-TTSD-V1(no instruction)/manifest.jsonl
similarity index 100%
rename from static/task/multi/MOSS-TTSD-V1/manifest.jsonl
rename to static/task/multi/MOSS-TTSD-V1(no instruction)/manifest.jsonl
diff --git a/static/task/multi/MOSS-TTSD-V1/refer/case4__prompt0.wav b/static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case4__prompt0.wav
similarity index 100%
rename from static/task/multi/MOSS-TTSD-V1/refer/case4__prompt0.wav
rename to static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case4__prompt0.wav
diff --git a/static/task/multi/MOSS-TTSD-V1/refer/case4__prompt1.wav b/static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case4__prompt1.wav
similarity index 100%
rename from static/task/multi/MOSS-TTSD-V1/refer/case4__prompt1.wav
rename to static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case4__prompt1.wav
diff --git a/static/task/multi/MOSS-TTSD-V1/refer/case5__prompt0.wav b/static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case5__prompt0.wav
similarity index 100%
rename from static/task/multi/MOSS-TTSD-V1/refer/case5__prompt0.wav
rename to static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case5__prompt0.wav
diff --git a/static/task/multi/MOSS-TTSD-V1/refer/case5__prompt1.wav b/static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case5__prompt1.wav
similarity index 100%
rename from static/task/multi/MOSS-TTSD-V1/refer/case5__prompt1.wav
rename to static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case5__prompt1.wav
diff --git a/static/task/multi/MOSS-TTSD-V1/refer/case6__prompt0.wav b/static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case6__prompt0.wav
similarity index 100%
rename from static/task/multi/MOSS-TTSD-V1/refer/case6__prompt0.wav
rename to static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case6__prompt0.wav
diff --git a/static/task/multi/MOSS-TTSD-V1/refer/case6__prompt1.wav b/static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case6__prompt1.wav
similarity index 100%
rename from static/task/multi/MOSS-TTSD-V1/refer/case6__prompt1.wav
rename to static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case6__prompt1.wav
diff --git a/static/task/multi/MOSS-TTSD-V1/refer/case7__prompt0.wav b/static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case7__prompt0.wav
similarity index 100%
rename from static/task/multi/MOSS-TTSD-V1/refer/case7__prompt0.wav
rename to static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case7__prompt0.wav
diff --git a/static/task/multi/MOSS-TTSD-V1/refer/case7__prompt1.wav b/static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case7__prompt1.wav
similarity index 100%
rename from static/task/multi/MOSS-TTSD-V1/refer/case7__prompt1.wav
rename to static/task/multi/MOSS-TTSD-V1(no instruction)/refer/case7__prompt1.wav
diff --git a/static/task/multi/MOSS-TTSD-V1/samples/case4.wav b/static/task/multi/MOSS-TTSD-V1(no instruction)/samples/case4.wav
similarity index 100%
rename from static/task/multi/MOSS-TTSD-V1/samples/case4.wav
rename to static/task/multi/MOSS-TTSD-V1(no instruction)/samples/case4.wav
diff --git a/static/task/multi/MOSS-TTSD-V1/samples/case5.wav b/static/task/multi/MOSS-TTSD-V1(no instruction)/samples/case5.wav
similarity index 100%
rename from static/task/multi/MOSS-TTSD-V1/samples/case5.wav
rename to static/task/multi/MOSS-TTSD-V1(no instruction)/samples/case5.wav
diff --git a/static/task/multi/MOSS-TTSD-V1/samples/case6.wav b/static/task/multi/MOSS-TTSD-V1(no instruction)/samples/case6.wav
similarity index 100%
rename from static/task/multi/MOSS-TTSD-V1/samples/case6.wav
rename to static/task/multi/MOSS-TTSD-V1(no instruction)/samples/case6.wav
diff --git a/static/task/multi/MOSS-TTSD-V1/samples/case7.wav b/static/task/multi/MOSS-TTSD-V1(no instruction)/samples/case7.wav
similarity index 100%
rename from static/task/multi/MOSS-TTSD-V1/samples/case7.wav
rename to static/task/multi/MOSS-TTSD-V1(no instruction)/samples/case7.wav
diff --git a/static/task/multi/Ours(without instruction)/manifest.jsonl b/static/task/multi/Ours(no instruction)/manifest.jsonl
similarity index 100%
rename from static/task/multi/Ours(without instruction)/manifest.jsonl
rename to static/task/multi/Ours(no instruction)/manifest.jsonl
diff --git a/static/task/multi/Ours(without instruction)/refer/case4__prompt0.wav b/static/task/multi/Ours(no instruction)/refer/case4__prompt0.wav
similarity index 100%
rename from static/task/multi/Ours(without instruction)/refer/case4__prompt0.wav
rename to static/task/multi/Ours(no instruction)/refer/case4__prompt0.wav
diff --git a/static/task/multi/Ours(without instruction)/refer/case4__prompt1.wav b/static/task/multi/Ours(no instruction)/refer/case4__prompt1.wav
similarity index 100%
rename from static/task/multi/Ours(without instruction)/refer/case4__prompt1.wav
rename to static/task/multi/Ours(no instruction)/refer/case4__prompt1.wav
diff --git a/static/task/multi/Ours(without instruction)/refer/case5__prompt0.wav b/static/task/multi/Ours(no instruction)/refer/case5__prompt0.wav
similarity index 100%
rename from static/task/multi/Ours(without instruction)/refer/case5__prompt0.wav
rename to static/task/multi/Ours(no instruction)/refer/case5__prompt0.wav
diff --git a/static/task/multi/Ours(without instruction)/refer/case5__prompt1.wav b/static/task/multi/Ours(no instruction)/refer/case5__prompt1.wav
similarity index 100%
rename from static/task/multi/Ours(without instruction)/refer/case5__prompt1.wav
rename to static/task/multi/Ours(no instruction)/refer/case5__prompt1.wav
diff --git a/static/task/multi/Ours(without instruction)/refer/case6__prompt0.wav b/static/task/multi/Ours(no instruction)/refer/case6__prompt0.wav
similarity index 100%
rename from static/task/multi/Ours(without instruction)/refer/case6__prompt0.wav
rename to static/task/multi/Ours(no instruction)/refer/case6__prompt0.wav
diff --git a/static/task/multi/Ours(without instruction)/refer/case6__prompt1.wav b/static/task/multi/Ours(no instruction)/refer/case6__prompt1.wav
similarity index 100%
rename from static/task/multi/Ours(without instruction)/refer/case6__prompt1.wav
rename to static/task/multi/Ours(no instruction)/refer/case6__prompt1.wav
diff --git a/static/task/multi/Ours(without instruction)/refer/case7__prompt0.wav b/static/task/multi/Ours(no instruction)/refer/case7__prompt0.wav
similarity index 100%
rename from static/task/multi/Ours(without instruction)/refer/case7__prompt0.wav
rename to static/task/multi/Ours(no instruction)/refer/case7__prompt0.wav
diff --git a/static/task/multi/Ours(without instruction)/refer/case7__prompt1.wav b/static/task/multi/Ours(no instruction)/refer/case7__prompt1.wav
similarity index 100%
rename from static/task/multi/Ours(without instruction)/refer/case7__prompt1.wav
rename to static/task/multi/Ours(no instruction)/refer/case7__prompt1.wav
diff --git a/static/task/multi/Ours(without instruction)/samples/case4.wav b/static/task/multi/Ours(no instruction)/samples/case4.wav
similarity index 100%
rename from static/task/multi/Ours(without instruction)/samples/case4.wav
rename to static/task/multi/Ours(no instruction)/samples/case4.wav
diff --git a/static/task/multi/Ours(without instruction)/samples/case5.wav b/static/task/multi/Ours(no instruction)/samples/case5.wav
similarity index 100%
rename from static/task/multi/Ours(without instruction)/samples/case5.wav
rename to static/task/multi/Ours(no instruction)/samples/case5.wav
diff --git a/static/task/multi/Ours(without instruction)/samples/case6.wav b/static/task/multi/Ours(no instruction)/samples/case6.wav
similarity index 100%
rename from static/task/multi/Ours(without instruction)/samples/case6.wav
rename to static/task/multi/Ours(no instruction)/samples/case6.wav
diff --git a/static/task/multi/Ours(without instruction)/samples/case7.wav b/static/task/multi/Ours(no instruction)/samples/case7.wav
similarity index 100%
rename from static/task/multi/Ours(without instruction)/samples/case7.wav
rename to static/task/multi/Ours(no instruction)/samples/case7.wav
diff --git a/static/task/multi/SoulX-Podcast/manifest.jsonl b/static/task/multi/SoulX-Podcast(no instruction)/manifest.jsonl
similarity index 100%
rename from static/task/multi/SoulX-Podcast/manifest.jsonl
rename to static/task/multi/SoulX-Podcast(no instruction)/manifest.jsonl
diff --git a/static/task/multi/SoulX-Podcast/refer/case4__prompt0.wav b/static/task/multi/SoulX-Podcast(no instruction)/refer/case4__prompt0.wav
similarity index 100%
rename from static/task/multi/SoulX-Podcast/refer/case4__prompt0.wav
rename to static/task/multi/SoulX-Podcast(no instruction)/refer/case4__prompt0.wav
diff --git a/static/task/multi/SoulX-Podcast/refer/case4__prompt1.wav b/static/task/multi/SoulX-Podcast(no instruction)/refer/case4__prompt1.wav
similarity index 100%
rename from static/task/multi/SoulX-Podcast/refer/case4__prompt1.wav
rename to static/task/multi/SoulX-Podcast(no instruction)/refer/case4__prompt1.wav
diff --git a/static/task/multi/SoulX-Podcast/refer/case5__prompt0.wav b/static/task/multi/SoulX-Podcast(no instruction)/refer/case5__prompt0.wav
similarity index 100%
rename from static/task/multi/SoulX-Podcast/refer/case5__prompt0.wav
rename to static/task/multi/SoulX-Podcast(no instruction)/refer/case5__prompt0.wav
diff --git a/static/task/multi/SoulX-Podcast/refer/case5__prompt1.wav b/static/task/multi/SoulX-Podcast(no instruction)/refer/case5__prompt1.wav
similarity index 100%
rename from static/task/multi/SoulX-Podcast/refer/case5__prompt1.wav
rename to static/task/multi/SoulX-Podcast(no instruction)/refer/case5__prompt1.wav
diff --git a/static/task/multi/SoulX-Podcast/refer/case6__prompt0.wav b/static/task/multi/SoulX-Podcast(no instruction)/refer/case6__prompt0.wav
similarity index 100%
rename from static/task/multi/SoulX-Podcast/refer/case6__prompt0.wav
rename to static/task/multi/SoulX-Podcast(no instruction)/refer/case6__prompt0.wav
diff --git a/static/task/multi/SoulX-Podcast/refer/case6__prompt1.wav b/static/task/multi/SoulX-Podcast(no instruction)/refer/case6__prompt1.wav
similarity index 100%
rename from static/task/multi/SoulX-Podcast/refer/case6__prompt1.wav
rename to static/task/multi/SoulX-Podcast(no instruction)/refer/case6__prompt1.wav
diff --git a/static/task/multi/SoulX-Podcast/refer/case7__prompt0.wav b/static/task/multi/SoulX-Podcast(no instruction)/refer/case7__prompt0.wav
similarity index 100%
rename from static/task/multi/SoulX-Podcast/refer/case7__prompt0.wav
rename to static/task/multi/SoulX-Podcast(no instruction)/refer/case7__prompt0.wav
diff --git a/static/task/multi/SoulX-Podcast/refer/case7__prompt1.wav b/static/task/multi/SoulX-Podcast(no instruction)/refer/case7__prompt1.wav
similarity index 100%
rename from static/task/multi/SoulX-Podcast/refer/case7__prompt1.wav
rename to static/task/multi/SoulX-Podcast(no instruction)/refer/case7__prompt1.wav
diff --git a/static/task/multi/SoulX-Podcast/samples/case4.wav b/static/task/multi/SoulX-Podcast(no instruction)/samples/case4.wav
similarity index 100%
rename from static/task/multi/SoulX-Podcast/samples/case4.wav
rename to static/task/multi/SoulX-Podcast(no instruction)/samples/case4.wav
diff --git a/static/task/multi/SoulX-Podcast/samples/case5.wav b/static/task/multi/SoulX-Podcast(no instruction)/samples/case5.wav
similarity index 100%
rename from static/task/multi/SoulX-Podcast/samples/case5.wav
rename to static/task/multi/SoulX-Podcast(no instruction)/samples/case5.wav
diff --git a/static/task/multi/SoulX-Podcast/samples/case6.wav b/static/task/multi/SoulX-Podcast(no instruction)/samples/case6.wav
similarity index 100%
rename from static/task/multi/SoulX-Podcast/samples/case6.wav
rename to static/task/multi/SoulX-Podcast(no instruction)/samples/case6.wav
diff --git a/static/task/multi/SoulX-Podcast/samples/case7.wav b/static/task/multi/SoulX-Podcast(no instruction)/samples/case7.wav
similarity index 100%
rename from static/task/multi/SoulX-Podcast/samples/case7.wav
rename to static/task/multi/SoulX-Podcast(no instruction)/samples/case7.wav
diff --git a/static/task/multi/task.yaml b/static/task/multi/task.yaml
index 28acd66f744c59430437e7e232bdef14118fa56a..89cb8f23f8ae6340892625ab3925811009e57a83 100644
--- a/static/task/multi/task.yaml
+++ b/static/task/multi/task.yaml
@@ -1,7 +1,7 @@
name: Multi-Speaker Dialogue Generation
-description: Demos showcasing multi-speaker dialogue synthesis. Unlike existing dialogue synthesis models, ours supports instruction-based control, with demos presented both with and without instructions.
+description: The Multi-Speaker Dialogue Generation task evaluates the model’s ability to generate speech in multi-turn dialogue scenarios. We report results under two input settings: with and without instructions. This comparison demonstrates the model’s natural controllability through contextual scene understanding, as well as its guided controllability when provided with instructions. It should be noted that the compared baseline models do not provide explicit controllability, and are therefore evaluated only under their default generation settings.
models:
- - MOSS-TTSD-V1
- - SoulX-Podcast
- - Ours(without instruction)
+ - MOSS-TTSD-V1(no instruction)
+ - SoulX-Podcast(no instruction)
+ - Ours(no instruction)
- Ours(with instruction)
diff --git a/static/task/paral/task.yaml b/static/task/paral/task.yaml
index 1544dea4b5904f2545ad11e5f066192fbed2921e..688e11c628fbed48e6f7fb242e41ba774a970132 100644
--- a/static/task/paral/task.yaml
+++ b/static/task/paral/task.yaml
@@ -1,5 +1,5 @@
name: Paralinguistic Cue Generation
-description: Demos for generating non-verbal paralinguistic sounds conditioned on natural text input
+description: The Paralinguistic Cue Generation task is designed to evaluate whether the model can understand and generate paralinguistic vocal cues expressed in text, such as laughter, sighs, and coughs.
show_reference: false
models:
- CosyVoice3
diff --git a/static/task/punct/task.yaml b/static/task/punct/task.yaml
index 5302cdc3089e07c3828aafc988fc824e5b1d0a0f..7e8b781f341ce683bf1c47285f314dec2244e9da 100644
--- a/static/task/punct/task.yaml
+++ b/static/task/punct/task.yaml
@@ -1,5 +1,6 @@
name: Punctuation Control
-description: Single-utterance demos with punctuation-rich expressive delivery.
+description: The Punctuation Control task aims to evaluate the model’s ability to understand and realize prosodic cues expressed through punctuation, such as pauses, lengthening, and tonal transitions.
+
show_reference: false
models:
- MiMo-V2.5