Spaces:
Running
Running
Anonymous commited on
Commit ·
ff857f3
1
Parent(s): ec2fd98
update
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- index.html +21 -8
- static/css/index.css +14 -1
- static/task/emo/task.yaml +1 -1
- static/task/mixed-style/CosyVoice2/manifest.jsonl +20 -32
- static/task/mixed-style/CosyVoice3/manifest.jsonl +20 -32
- static/task/mixed-style/MiMo-V2.5/manifest.jsonl +20 -32
- static/task/mixed-style/Ours/manifest.jsonl +20 -32
- static/task/mixed-style/Qwen3/manifest.jsonl +20 -32
- static/task/mixed-style/task.yaml +1 -1
- static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/manifest.jsonl +0 -0
- static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case4__prompt0.wav +0 -0
- static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case4__prompt1.wav +0 -0
- static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case5__prompt0.wav +0 -0
- static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case5__prompt1.wav +0 -0
- static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case6__prompt0.wav +0 -0
- static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case6__prompt1.wav +0 -0
- static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case7__prompt0.wav +0 -0
- static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case7__prompt1.wav +0 -0
- static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/samples/case4.wav +0 -0
- static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/samples/case5.wav +0 -0
- static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/samples/case6.wav +0 -0
- static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/samples/case7.wav +0 -0
- static/task/multi/{Ours(without instruction) → Ours(no instruction)}/manifest.jsonl +0 -0
- static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case4__prompt0.wav +0 -0
- static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case4__prompt1.wav +0 -0
- static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case5__prompt0.wav +0 -0
- static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case5__prompt1.wav +0 -0
- static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case6__prompt0.wav +0 -0
- static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case6__prompt1.wav +0 -0
- static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case7__prompt0.wav +0 -0
- static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case7__prompt1.wav +0 -0
- static/task/multi/{Ours(without instruction) → Ours(no instruction)}/samples/case4.wav +0 -0
- static/task/multi/{Ours(without instruction) → Ours(no instruction)}/samples/case5.wav +0 -0
- static/task/multi/{Ours(without instruction) → Ours(no instruction)}/samples/case6.wav +0 -0
- static/task/multi/{Ours(without instruction) → Ours(no instruction)}/samples/case7.wav +0 -0
- static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/manifest.jsonl +0 -0
- static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case4__prompt0.wav +0 -0
- static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case4__prompt1.wav +0 -0
- static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case5__prompt0.wav +0 -0
- static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case5__prompt1.wav +0 -0
- static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case6__prompt0.wav +0 -0
- static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case6__prompt1.wav +0 -0
- static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case7__prompt0.wav +0 -0
- static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case7__prompt1.wav +0 -0
- static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/samples/case4.wav +0 -0
- static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/samples/case5.wav +0 -0
- static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/samples/case6.wav +0 -0
- static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/samples/case7.wav +0 -0
- static/task/multi/task.yaml +4 -4
- static/task/paral/task.yaml +1 -1
index.html
CHANGED
|
@@ -22,7 +22,7 @@
|
|
| 22 |
<div class="container wide-container">
|
| 23 |
<div class="demo-intro">
|
| 24 |
<p>
|
| 25 |
-
|
| 26 |
</p>
|
| 27 |
</div>
|
| 28 |
|
|
@@ -124,8 +124,11 @@
|
|
| 124 |
item.variants.forEach((variant, index) => {
|
| 125 |
const context = document.createElement("p");
|
| 126 |
context.className = "variant-context";
|
| 127 |
-
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
| 129 |
wrapper.appendChild(context);
|
| 130 |
});
|
| 131 |
|
|
@@ -213,10 +216,11 @@
|
|
| 213 |
const row = document.createElement("div");
|
| 214 |
row.className = "variant-audio-row";
|
| 215 |
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
|
|
|
| 220 |
row.appendChild(createAudio(`${taskRoot}/${model}/${variant.output_audio}`));
|
| 221 |
wrapper.appendChild(row);
|
| 222 |
});
|
|
@@ -291,10 +295,19 @@
|
|
| 291 |
links.className = "task-directory-links";
|
| 292 |
|
| 293 |
tasks.forEach(({ id, task }) => {
|
|
|
|
|
|
|
|
|
|
| 294 |
const link = document.createElement("a");
|
| 295 |
link.href = `#task-${id}`;
|
| 296 |
link.textContent = task.name;
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
});
|
| 299 |
|
| 300 |
directory.replaceChildren(title, links);
|
|
|
|
| 22 |
<div class="container wide-container">
|
| 23 |
<div class="demo-intro">
|
| 24 |
<p>
|
| 25 |
+
This page presents the results obtained by fine-tuning SoulX-Podcast on our dataset. We compare our model with other models that possess related capabilities across multiple tasks, in order to demonstrate the superiority of our dataset. Since our dataset is primarily composed of Chinese data, we mainly present the performance of our model on Chinese speech generation. In addition, we supplement the training data with 100 hours of English audiobook data for fine-tuning, thereby demonstrating that the construction of our dataset exhibits cross-lingual transferability.
|
| 26 |
</p>
|
| 27 |
</div>
|
| 28 |
|
|
|
|
| 124 |
item.variants.forEach((variant, index) => {
|
| 125 |
const context = document.createElement("p");
|
| 126 |
context.className = "variant-context";
|
| 127 |
+
if (hasMultipleVariants) {
|
| 128 |
+
context.innerHTML = `<strong>instruction${index + 1}</strong> <span class="context-cue">${escapeHtml(variant.context)}</span>`;
|
| 129 |
+
} else {
|
| 130 |
+
context.innerHTML = `<span class="context-cue">${escapeHtml(variant.context)}</span>`;
|
| 131 |
+
}
|
| 132 |
wrapper.appendChild(context);
|
| 133 |
});
|
| 134 |
|
|
|
|
| 216 |
const row = document.createElement("div");
|
| 217 |
row.className = "variant-audio-row";
|
| 218 |
|
| 219 |
+
if (hasMultipleVariants) {
|
| 220 |
+
const label = document.createElement("span");
|
| 221 |
+
label.textContent = `instruction${index + 1}`;
|
| 222 |
+
row.appendChild(label);
|
| 223 |
+
}
|
| 224 |
row.appendChild(createAudio(`${taskRoot}/${model}/${variant.output_audio}`));
|
| 225 |
wrapper.appendChild(row);
|
| 226 |
});
|
|
|
|
| 295 |
links.className = "task-directory-links";
|
| 296 |
|
| 297 |
tasks.forEach(({ id, task }) => {
|
| 298 |
+
const item = document.createElement("div");
|
| 299 |
+
item.className = "task-directory-item";
|
| 300 |
+
|
| 301 |
const link = document.createElement("a");
|
| 302 |
link.href = `#task-${id}`;
|
| 303 |
link.textContent = task.name;
|
| 304 |
+
|
| 305 |
+
const description = document.createElement("p");
|
| 306 |
+
description.textContent = task.description;
|
| 307 |
+
|
| 308 |
+
item.appendChild(link);
|
| 309 |
+
item.appendChild(description);
|
| 310 |
+
links.appendChild(item);
|
| 311 |
});
|
| 312 |
|
| 313 |
directory.replaceChildren(title, links);
|
static/css/index.css
CHANGED
|
@@ -82,19 +82,32 @@ body {
|
|
| 82 |
|
| 83 |
.task-directory-links {
|
| 84 |
display: grid;
|
| 85 |
-
gap: 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
}
|
| 87 |
|
| 88 |
.task-directory a {
|
| 89 |
color: var(--link);
|
| 90 |
text-decoration: none;
|
| 91 |
line-height: 1.35;
|
|
|
|
| 92 |
}
|
| 93 |
|
| 94 |
.task-directory a:hover {
|
| 95 |
text-decoration: underline;
|
| 96 |
}
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
.task-list {
|
| 99 |
display: grid;
|
| 100 |
gap: 1.6rem;
|
|
|
|
| 82 |
|
| 83 |
.task-directory-links {
|
| 84 |
display: grid;
|
| 85 |
+
gap: 0.7rem;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
.task-directory-item {
|
| 89 |
+
display: grid;
|
| 90 |
+
gap: 0.2rem;
|
| 91 |
}
|
| 92 |
|
| 93 |
.task-directory a {
|
| 94 |
color: var(--link);
|
| 95 |
text-decoration: none;
|
| 96 |
line-height: 1.35;
|
| 97 |
+
font-weight: 700;
|
| 98 |
}
|
| 99 |
|
| 100 |
.task-directory a:hover {
|
| 101 |
text-decoration: underline;
|
| 102 |
}
|
| 103 |
|
| 104 |
+
.task-directory-item p {
|
| 105 |
+
margin: 0;
|
| 106 |
+
color: var(--muted);
|
| 107 |
+
font-size: 0.9rem;
|
| 108 |
+
line-height: 1.45;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
.task-list {
|
| 112 |
display: grid;
|
| 113 |
gap: 1.6rem;
|
static/task/emo/task.yaml
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
name: Emotion Control
|
| 2 |
-
description:
|
| 3 |
show_reference: false
|
| 4 |
models:
|
| 5 |
- CosyVoice3
|
|
|
|
| 1 |
name: Emotion Control
|
| 2 |
+
description: The Emotion Control task is designed to evaluate the model’s controllability under conventional emotion labels. In this task, we expand the set of emotion categories and assess the performance of different models across 18 emotion labels. Notably, our dataset was not explicitly constructed with emotion-labeled data; nevertheless, the results show that our dataset generalizes to emotion-labeled speech generation tasks
|
| 3 |
show_reference: false
|
| 4 |
models:
|
| 5 |
- CosyVoice3
|
static/task/mixed-style/CosyVoice2/manifest.jsonl
CHANGED
|
@@ -1,32 +1,20 @@
|
|
| 1 |
-
{"utt":"
|
| 2 |
-
{"utt":"
|
| 3 |
-
{"utt":"
|
| 4 |
-
{"utt":"
|
| 5 |
-
{"utt":"
|
| 6 |
-
{"utt":"
|
| 7 |
-
{"utt":"
|
| 8 |
-
{"utt":"
|
| 9 |
-
{"utt":"
|
| 10 |
-
{"utt":"
|
| 11 |
-
{"utt":"
|
| 12 |
-
{"utt":"
|
| 13 |
-
{"utt":"
|
| 14 |
-
{"utt":"
|
| 15 |
-
{"utt":"
|
| 16 |
-
{"utt":"
|
| 17 |
-
{"utt":"
|
| 18 |
-
{"utt":"
|
| 19 |
-
{"utt":"
|
| 20 |
-
{"utt":"
|
| 21 |
-
{"utt":"utt_0015","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"}]}
|
| 22 |
-
{"utt":"utt_0016","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
|
| 23 |
-
{"utt":"utt_0021","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"}]}
|
| 24 |
-
{"utt":"utt_0022","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
|
| 25 |
-
{"utt":"utt_0023","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"}]}
|
| 26 |
-
{"utt":"utt_0024","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
|
| 27 |
-
{"utt":"utt_0025","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"}]}
|
| 28 |
-
{"utt":"utt_0026","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
|
| 29 |
-
{"utt":"utt_0029","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"}]}
|
| 30 |
-
{"utt":"utt_0030","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
|
| 31 |
-
{"utt":"utt_0039","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"}]}
|
| 32 |
-
{"utt":"utt_0040","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
|
|
|
|
| 1 |
+
{"utt":"That's not what I meant, I swear.","target_text":"That's not what I meant, I swear.","variants":[{"utt":"utt_001","context":"Defensive, speaking quickly with slight panic","output_audio":"sample/utt_001.wav"}]}
|
| 2 |
+
{"utt":"Oh. Oh no.","target_text":"Oh. Oh no.","variants":[{"utt":"utt_002","context":"Realization dawning slowly, voice dropping to a whisper","output_audio":"sample/utt_002.wav"}]}
|
| 3 |
+
{"utt":"I... I don't know what to say.","target_text":"I... I don't know what to say.","variants":[{"utt":"utt_003","context":"nervous and hesitant, with the voice gradually trailing off","output_audio":"sample/utt_003.wav"}]}
|
| 4 |
+
{"utt":"Wait— just give me a second.","target_text":"Wait— just give me a second.","variants":[{"utt":"utt_004","context":"flustered, interrupting oneself mid-sentence","output_audio":"sample/utt_004.wav"}]}
|
| 5 |
+
{"utt":"Fine. Do whatever you want.","target_text":"Fine. Do whatever you want.","variants":[{"utt":"utt_005","context":"resigned, with a calm tone tinged with faint bitterness","output_audio":"sample/utt_005.wav"}]}
|
| 6 |
+
{"utt":"Don't you dare talk to me like that.","target_text":"Don't you dare talk to me like that.","variants":[{"utt":"utt_006","context":"coldly angry, with every word articulated clearly and forcefully","output_audio":"sample/utt_006.wav"}]}
|
| 7 |
+
{"utt":"I trusted you. I actually trusted you.","target_text":"I trusted you. I actually trusted you.","variants":[{"utt":"utt_007","context":"angry, with the heartbreak of betrayal, the voice catching in the throat","output_audio":"sample/utt_007.wav"}]}
|
| 8 |
+
{"utt":"I'm not— I'm not drunk, you're drunk.","target_text":"I'm not— I'm not drunk, you're drunk.","variants":[{"utt":"utt_008","context":"drunkenly denying it, slurring the words and speaking incoherently","output_audio":"sample/utt_008.wav"}]}
|
| 9 |
+
{"utt":"呼——终于做完了,可以放假啦!","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0001","context":"如释重负,笑意藏不住地感叹着","output_audio":"sample/utt_0001.wav"},{"utt":"utt_0002","context":"兴奋得不行,音量抬高语速加快地喊着","output_audio":"sample/utt_0002.wav"}]}
|
| 10 |
+
{"utt":"哎,这雨怎么说下就下啊?","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0003","context":"无奈得很,拖着调子抱怨着","output_audio":"sample/utt_0003.wav"},{"utt":"utt_0004","context":"压低声音,带着烦躁小声嘟囔着","output_audio":"sample/utt_0004.wav"}]}
|
| 11 |
+
{"utt":"别挤别挤,一个个来!","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0005","context":"语气很冲,音量抬高地喝止着","output_audio":"sample/utt_0005.wav"},{"utt":"utt_0006","context":"急得发紧,语速飞快几乎不停顿地喊着","output_audio":"sample/utt_0006.wav"}]}
|
| 12 |
+
{"utt":"真的假的?你别骗我!","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0009","context":"惊得一愣,语调上扬地反问着","output_audio":"sample/utt_0009.wav"},{"utt":"utt_0010","context":"带着笑意,半真半假地追问着","output_audio":"sample/utt_0010.wav"}]}
|
| 13 |
+
{"utt":"嘘,小点声,有人来了。","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0011","context":"压着嗓子,用气声又快又轻地提醒着","output_audio":"sample/utt_0011.wav"},{"utt":"utt_0012","context":"紧张得发虚,声音更轻更低地说着","output_audio":"sample/utt_0012.wav"}]}
|
| 14 |
+
{"utt":"我都说了不是我干的!","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0013","context":"委屈得很,急着解释地喊着","output_audio":"sample/utt_0013.wav"},{"utt":"utt_0014","context":"火气上来,嗓门拔高地吼着","output_audio":"sample/utt_0014.wav"}]}
|
| 15 |
+
{"utt":"等我两分钟,我马上回来。","target_text":"等我两分钟���我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"sample/utt_0015.wav"},{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"sample/utt_0016.wav"}]}
|
| 16 |
+
{"utt":"嗯?你刚刚叫我吗?","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"sample/utt_0021.wav"},{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"sample/utt_0022.wav"}]}
|
| 17 |
+
{"utt":"别担心,我在呢。","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"sample/utt_0023.wav"},{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"sample/utt_0024.wav"}]}
|
| 18 |
+
{"utt":"不是我说你,这也太慢了。","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"sample/utt_0025.wav"},{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"sample/utt_0026.wav"}]}
|
| 19 |
+
{"utt":"我再确认一遍:你确定要这样做?","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"sample/utt_0029.wav"},{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"sample/utt_0030.wav"}]}
|
| 20 |
+
{"utt":"我不太确定,但我可以试试。","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"sample/utt_0039.wav"},{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"sample/utt_0040.wav"}]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static/task/mixed-style/CosyVoice3/manifest.jsonl
CHANGED
|
@@ -1,32 +1,20 @@
|
|
| 1 |
-
{"utt":"
|
| 2 |
-
{"utt":"
|
| 3 |
-
{"utt":"
|
| 4 |
-
{"utt":"
|
| 5 |
-
{"utt":"
|
| 6 |
-
{"utt":"
|
| 7 |
-
{"utt":"
|
| 8 |
-
{"utt":"
|
| 9 |
-
{"utt":"
|
| 10 |
-
{"utt":"
|
| 11 |
-
{"utt":"
|
| 12 |
-
{"utt":"
|
| 13 |
-
{"utt":"
|
| 14 |
-
{"utt":"
|
| 15 |
-
{"utt":"
|
| 16 |
-
{"utt":"
|
| 17 |
-
{"utt":"
|
| 18 |
-
{"utt":"
|
| 19 |
-
{"utt":"
|
| 20 |
-
{"utt":"
|
| 21 |
-
{"utt":"utt_0015","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"}]}
|
| 22 |
-
{"utt":"utt_0016","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
|
| 23 |
-
{"utt":"utt_0021","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"}]}
|
| 24 |
-
{"utt":"utt_0022","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
|
| 25 |
-
{"utt":"utt_0023","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"}]}
|
| 26 |
-
{"utt":"utt_0024","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
|
| 27 |
-
{"utt":"utt_0025","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"}]}
|
| 28 |
-
{"utt":"utt_0026","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
|
| 29 |
-
{"utt":"utt_0029","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"}]}
|
| 30 |
-
{"utt":"utt_0030","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
|
| 31 |
-
{"utt":"utt_0039","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"}]}
|
| 32 |
-
{"utt":"utt_0040","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
|
|
|
|
| 1 |
+
{"utt":"That's not what I meant, I swear.","target_text":"That's not what I meant, I swear.","variants":[{"utt":"utt_001","context":"Defensive, speaking quickly with slight panic","output_audio":"sample/utt_001.wav"}]}
|
| 2 |
+
{"utt":"Oh. Oh no.","target_text":"Oh. Oh no.","variants":[{"utt":"utt_002","context":"Realization dawning slowly, voice dropping to a whisper","output_audio":"sample/utt_002.wav"}]}
|
| 3 |
+
{"utt":"I... I don't know what to say.","target_text":"I... I don't know what to say.","variants":[{"utt":"utt_003","context":"nervous and hesitant, with the voice gradually trailing off","output_audio":"sample/utt_003.wav"}]}
|
| 4 |
+
{"utt":"Wait— just give me a second.","target_text":"Wait— just give me a second.","variants":[{"utt":"utt_004","context":"flustered, interrupting oneself mid-sentence","output_audio":"sample/utt_004.wav"}]}
|
| 5 |
+
{"utt":"Fine. Do whatever you want.","target_text":"Fine. Do whatever you want.","variants":[{"utt":"utt_005","context":"resigned, with a calm tone tinged with faint bitterness","output_audio":"sample/utt_005.wav"}]}
|
| 6 |
+
{"utt":"Don't you dare talk to me like that.","target_text":"Don't you dare talk to me like that.","variants":[{"utt":"utt_006","context":"coldly angry, with every word articulated clearly and forcefully","output_audio":"sample/utt_006.wav"}]}
|
| 7 |
+
{"utt":"I trusted you. I actually trusted you.","target_text":"I trusted you. I actually trusted you.","variants":[{"utt":"utt_007","context":"angry, with the heartbreak of betrayal, the voice catching in the throat","output_audio":"sample/utt_007.wav"}]}
|
| 8 |
+
{"utt":"I'm not— I'm not drunk, you're drunk.","target_text":"I'm not— I'm not drunk, you're drunk.","variants":[{"utt":"utt_008","context":"drunkenly denying it, slurring the words and speaking incoherently","output_audio":"sample/utt_008.wav"}]}
|
| 9 |
+
{"utt":"呼——终于做完了,可以放假啦!","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0001","context":"如释重负,笑意藏不住地感叹着","output_audio":"sample/utt_0001.wav"},{"utt":"utt_0002","context":"兴奋得不行,音量抬高语速加快地喊着","output_audio":"sample/utt_0002.wav"}]}
|
| 10 |
+
{"utt":"哎,这雨怎么说下就下啊?","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0003","context":"无奈得很,拖着调子抱怨着","output_audio":"sample/utt_0003.wav"},{"utt":"utt_0004","context":"压低声音,带着烦躁小声嘟囔着","output_audio":"sample/utt_0004.wav"}]}
|
| 11 |
+
{"utt":"别挤别挤,一个个来!","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0005","context":"语气很冲,音量抬高地喝止着","output_audio":"sample/utt_0005.wav"},{"utt":"utt_0006","context":"急得发紧,语速飞快几乎不停顿地喊着","output_audio":"sample/utt_0006.wav"}]}
|
| 12 |
+
{"utt":"真的假的?你别骗我!","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0009","context":"惊得一愣,语调上扬地反问着","output_audio":"sample/utt_0009.wav"},{"utt":"utt_0010","context":"带着笑意,半真半假地追问着","output_audio":"sample/utt_0010.wav"}]}
|
| 13 |
+
{"utt":"嘘,小点声,有人来了。","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0011","context":"压着嗓子,用气声又快又轻地提醒着","output_audio":"sample/utt_0011.wav"},{"utt":"utt_0012","context":"紧张得发虚,声音更轻更低地说着","output_audio":"sample/utt_0012.wav"}]}
|
| 14 |
+
{"utt":"我都说了不是我干的!","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0013","context":"委屈得很,急着解释地喊着","output_audio":"sample/utt_0013.wav"},{"utt":"utt_0014","context":"火气上来,嗓门拔高地吼着","output_audio":"sample/utt_0014.wav"}]}
|
| 15 |
+
{"utt":"等我两分钟,我马上回来。","target_text":"等我两分钟���我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"sample/utt_0015.wav"},{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"sample/utt_0016.wav"}]}
|
| 16 |
+
{"utt":"嗯?你刚刚叫我吗?","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"sample/utt_0021.wav"},{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"sample/utt_0022.wav"}]}
|
| 17 |
+
{"utt":"别担心,我在呢。","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"sample/utt_0023.wav"},{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"sample/utt_0024.wav"}]}
|
| 18 |
+
{"utt":"不是我说你,这也太慢了。","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"sample/utt_0025.wav"},{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"sample/utt_0026.wav"}]}
|
| 19 |
+
{"utt":"我再确认一遍:你确定要这样做?","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"sample/utt_0029.wav"},{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"sample/utt_0030.wav"}]}
|
| 20 |
+
{"utt":"我不太确定,但我可以试试。","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"sample/utt_0039.wav"},{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"sample/utt_0040.wav"}]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static/task/mixed-style/MiMo-V2.5/manifest.jsonl
CHANGED
|
@@ -1,32 +1,20 @@
|
|
| 1 |
-
{"utt":"
|
| 2 |
-
{"utt":"
|
| 3 |
-
{"utt":"
|
| 4 |
-
{"utt":"
|
| 5 |
-
{"utt":"
|
| 6 |
-
{"utt":"
|
| 7 |
-
{"utt":"
|
| 8 |
-
{"utt":"
|
| 9 |
-
{"utt":"
|
| 10 |
-
{"utt":"
|
| 11 |
-
{"utt":"
|
| 12 |
-
{"utt":"
|
| 13 |
-
{"utt":"
|
| 14 |
-
{"utt":"
|
| 15 |
-
{"utt":"
|
| 16 |
-
{"utt":"
|
| 17 |
-
{"utt":"
|
| 18 |
-
{"utt":"
|
| 19 |
-
{"utt":"
|
| 20 |
-
{"utt":"
|
| 21 |
-
{"utt":"utt_0015","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"}]}
|
| 22 |
-
{"utt":"utt_0016","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
|
| 23 |
-
{"utt":"utt_0021","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"}]}
|
| 24 |
-
{"utt":"utt_0022","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
|
| 25 |
-
{"utt":"utt_0023","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"}]}
|
| 26 |
-
{"utt":"utt_0024","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
|
| 27 |
-
{"utt":"utt_0025","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"}]}
|
| 28 |
-
{"utt":"utt_0026","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
|
| 29 |
-
{"utt":"utt_0029","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"}]}
|
| 30 |
-
{"utt":"utt_0030","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
|
| 31 |
-
{"utt":"utt_0039","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"}]}
|
| 32 |
-
{"utt":"utt_0040","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
|
|
|
|
| 1 |
+
{"utt":"That's not what I meant, I swear.","target_text":"That's not what I meant, I swear.","variants":[{"utt":"utt_001","context":"Defensive, speaking quickly with slight panic","output_audio":"samples/utt_001.wav"}]}
|
| 2 |
+
{"utt":"Oh. Oh no.","target_text":"Oh. Oh no.","variants":[{"utt":"utt_002","context":"Realization dawning slowly, voice dropping to a whisper","output_audio":"samples/utt_002.wav"}]}
|
| 3 |
+
{"utt":"I... I don't know what to say.","target_text":"I... I don't know what to say.","variants":[{"utt":"utt_003","context":"nervous and hesitant, with the voice gradually trailing off","output_audio":"samples/utt_003.wav"}]}
|
| 4 |
+
{"utt":"Wait— just give me a second.","target_text":"Wait— just give me a second.","variants":[{"utt":"utt_004","context":"flustered, interrupting oneself mid-sentence","output_audio":"samples/utt_004.wav"}]}
|
| 5 |
+
{"utt":"Fine. Do whatever you want.","target_text":"Fine. Do whatever you want.","variants":[{"utt":"utt_005","context":"resigned, with a calm tone tinged with faint bitterness","output_audio":"samples/utt_005.wav"}]}
|
| 6 |
+
{"utt":"Don't you dare talk to me like that.","target_text":"Don't you dare talk to me like that.","variants":[{"utt":"utt_006","context":"coldly angry, with every word articulated clearly and forcefully","output_audio":"samples/utt_006.wav"}]}
|
| 7 |
+
{"utt":"I trusted you. I actually trusted you.","target_text":"I trusted you. I actually trusted you.","variants":[{"utt":"utt_007","context":"angry, with the heartbreak of betrayal, the voice catching in the throat","output_audio":"samples/utt_007.wav"}]}
|
| 8 |
+
{"utt":"I'm not— I'm not drunk, you're drunk.","target_text":"I'm not— I'm not drunk, you're drunk.","variants":[{"utt":"utt_008","context":"drunkenly denying it, slurring the words and speaking incoherently","output_audio":"samples/utt_008.wav"}]}
|
| 9 |
+
{"utt":"呼——终于做完了,可以放假啦!","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0001","context":"如释重负,笑意藏不住地感叹着","output_audio":"samples/utt_0001.wav"},{"utt":"utt_0002","context":"兴奋得不行,音量抬高语速加快地喊着","output_audio":"samples/utt_0002.wav"}]}
|
| 10 |
+
{"utt":"哎,这雨怎么说下就下啊?","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0003","context":"无奈得很,拖着调子抱怨着","output_audio":"samples/utt_0003.wav"},{"utt":"utt_0004","context":"压低声音,带着烦躁小声嘟囔着","output_audio":"samples/utt_0004.wav"}]}
|
| 11 |
+
{"utt":"别挤别挤,一个个来!","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0005","context":"语气很冲,音量抬高地喝止着","output_audio":"samples/utt_0005.wav"},{"utt":"utt_0006","context":"急得发紧,语速飞快几乎不停顿地喊着","output_audio":"samples/utt_0006.wav"}]}
|
| 12 |
+
{"utt":"真的假的?你别骗我!","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0009","context":"惊得一愣,语调上扬地反问着","output_audio":"samples/utt_0009.wav"},{"utt":"utt_0010","context":"带着笑意,半真半假地追问着","output_audio":"samples/utt_0010.wav"}]}
|
| 13 |
+
{"utt":"嘘,小点声,有人来了。","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0011","context":"压着嗓子,用气声又快又轻地提醒着","output_audio":"samples/utt_0011.wav"},{"utt":"utt_0012","context":"紧张得发虚,声音更轻更低地说着","output_audio":"samples/utt_0012.wav"}]}
|
| 14 |
+
{"utt":"我都说了不是我干的!","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0013","context":"委屈得很,急着解释地喊着","output_audio":"samples/utt_0013.wav"},{"utt":"utt_0014","context":"火气上来,嗓门拔高地吼着","output_audio":"samples/utt_0014.wav"}]}
|
| 15 |
+
{"utt":"等我两分钟,我马上回来。","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"},{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
|
| 16 |
+
{"utt":"嗯?你刚刚叫我吗?","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"},{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
|
| 17 |
+
{"utt":"别担心,我在呢。","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"},{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
|
| 18 |
+
{"utt":"不是我说你,这也太慢了。","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"},{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
|
| 19 |
+
{"utt":"我再确认一遍:你确定要这样做?","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"},{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
|
| 20 |
+
{"utt":"我不太确定,但我可以试试。","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"},{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static/task/mixed-style/Ours/manifest.jsonl
CHANGED
|
@@ -1,32 +1,20 @@
|
|
| 1 |
-
{"utt":"
|
| 2 |
-
{"utt":"
|
| 3 |
-
{"utt":"
|
| 4 |
-
{"utt":"
|
| 5 |
-
{"utt":"
|
| 6 |
-
{"utt":"
|
| 7 |
-
{"utt":"
|
| 8 |
-
{"utt":"
|
| 9 |
-
{"utt":"
|
| 10 |
-
{"utt":"
|
| 11 |
-
{"utt":"
|
| 12 |
-
{"utt":"
|
| 13 |
-
{"utt":"
|
| 14 |
-
{"utt":"
|
| 15 |
-
{"utt":"
|
| 16 |
-
{"utt":"
|
| 17 |
-
{"utt":"
|
| 18 |
-
{"utt":"
|
| 19 |
-
{"utt":"
|
| 20 |
-
{"utt":"
|
| 21 |
-
{"utt":"utt_0015","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"}]}
|
| 22 |
-
{"utt":"utt_0016","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
|
| 23 |
-
{"utt":"utt_0021","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"}]}
|
| 24 |
-
{"utt":"utt_0022","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
|
| 25 |
-
{"utt":"utt_0023","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"}]}
|
| 26 |
-
{"utt":"utt_0024","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
|
| 27 |
-
{"utt":"utt_0025","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"}]}
|
| 28 |
-
{"utt":"utt_0026","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
|
| 29 |
-
{"utt":"utt_0029","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"}]}
|
| 30 |
-
{"utt":"utt_0030","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
|
| 31 |
-
{"utt":"utt_0039","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"}]}
|
| 32 |
-
{"utt":"utt_0040","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
|
|
|
|
| 1 |
+
{"utt":"That's not what I meant, I swear.","target_text":"That's not what I meant, I swear.","variants":[{"utt":"utt_001","context":"Defensive, speaking quickly with slight panic","output_audio":"samples/utt_001.wav"}]}
|
| 2 |
+
{"utt":"Oh. Oh no.","target_text":"Oh. Oh no.","variants":[{"utt":"utt_002","context":"Realization dawning slowly, voice dropping to a whisper","output_audio":"samples/utt_002.wav"}]}
|
| 3 |
+
{"utt":"I... I don't know what to say.","target_text":"I... I don't know what to say.","variants":[{"utt":"utt_003","context":"nervous and hesitant, with the voice gradually trailing off","output_audio":"samples/utt_003.wav"}]}
|
| 4 |
+
{"utt":"Wait— just give me a second.","target_text":"Wait— just give me a second.","variants":[{"utt":"utt_004","context":"flustered, interrupting oneself mid-sentence","output_audio":"samples/utt_004.wav"}]}
|
| 5 |
+
{"utt":"Fine. Do whatever you want.","target_text":"Fine. Do whatever you want.","variants":[{"utt":"utt_005","context":"resigned, with a calm tone tinged with faint bitterness","output_audio":"samples/utt_005.wav"}]}
|
| 6 |
+
{"utt":"Don't you dare talk to me like that.","target_text":"Don't you dare talk to me like that.","variants":[{"utt":"utt_006","context":"coldly angry, with every word articulated clearly and forcefully","output_audio":"samples/utt_006.wav"}]}
|
| 7 |
+
{"utt":"I trusted you. I actually trusted you.","target_text":"I trusted you. I actually trusted you.","variants":[{"utt":"utt_007","context":"angry, with the heartbreak of betrayal, the voice catching in the throat","output_audio":"samples/utt_007.wav"}]}
|
| 8 |
+
{"utt":"I'm not— I'm not drunk, you're drunk.","target_text":"I'm not— I'm not drunk, you're drunk.","variants":[{"utt":"utt_008","context":"drunkenly denying it, slurring the words and speaking incoherently","output_audio":"samples/utt_008.wav"}]}
|
| 9 |
+
{"utt":"呼——终于做完了,可以放假啦!","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0001","context":"如释重负,笑意藏不住地感叹着","output_audio":"samples/utt_0001.wav"},{"utt":"utt_0002","context":"兴奋得不行,音量抬高语速加快地喊着","output_audio":"samples/utt_0002.wav"}]}
|
| 10 |
+
{"utt":"哎,这雨怎么说下就下啊?","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0003","context":"无奈得很,拖着调子抱怨着","output_audio":"samples/utt_0003.wav"},{"utt":"utt_0004","context":"压低声音,带着烦躁小声嘟囔着","output_audio":"samples/utt_0004.wav"}]}
|
| 11 |
+
{"utt":"别挤别挤,一个个来!","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0005","context":"语气很冲,音量抬高地喝止着","output_audio":"samples/utt_0005.wav"},{"utt":"utt_0006","context":"急得发紧,语速飞快几乎不停顿地喊着","output_audio":"samples/utt_0006.wav"}]}
|
| 12 |
+
{"utt":"真的假的?你别骗我!","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0009","context":"惊得一愣,语调上扬地反问着","output_audio":"samples/utt_0009.wav"},{"utt":"utt_0010","context":"带着笑意,半真半假地追问着","output_audio":"samples/utt_0010.wav"}]}
|
| 13 |
+
{"utt":"嘘,小点声,有人来了。","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0011","context":"压着嗓子,用气声又快又轻地提醒着","output_audio":"samples/utt_0011.wav"},{"utt":"utt_0012","context":"紧张得发虚,声音更轻更低地说着","output_audio":"samples/utt_0012.wav"}]}
|
| 14 |
+
{"utt":"我都说了不是我干的!","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0013","context":"委屈得很,急着解释地喊着","output_audio":"samples/utt_0013.wav"},{"utt":"utt_0014","context":"火气上来,嗓门拔高地吼着","output_audio":"samples/utt_0014.wav"}]}
|
| 15 |
+
{"utt":"等我两分钟,我马上回来。","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"},{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
|
| 16 |
+
{"utt":"嗯?你刚刚叫我吗?","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"},{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
|
| 17 |
+
{"utt":"别担心,我在呢。","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"},{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
|
| 18 |
+
{"utt":"不是我说你,这也太慢了。","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"},{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
|
| 19 |
+
{"utt":"我再确认一遍:你确定要这样做?","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"},{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
|
| 20 |
+
{"utt":"我不太确定,但我可以试试。","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"},{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static/task/mixed-style/Qwen3/manifest.jsonl
CHANGED
|
@@ -1,32 +1,20 @@
|
|
| 1 |
-
{"utt":"
|
| 2 |
-
{"utt":"
|
| 3 |
-
{"utt":"
|
| 4 |
-
{"utt":"
|
| 5 |
-
{"utt":"
|
| 6 |
-
{"utt":"
|
| 7 |
-
{"utt":"
|
| 8 |
-
{"utt":"
|
| 9 |
-
{"utt":"
|
| 10 |
-
{"utt":"
|
| 11 |
-
{"utt":"
|
| 12 |
-
{"utt":"
|
| 13 |
-
{"utt":"
|
| 14 |
-
{"utt":"
|
| 15 |
-
{"utt":"
|
| 16 |
-
{"utt":"
|
| 17 |
-
{"utt":"
|
| 18 |
-
{"utt":"
|
| 19 |
-
{"utt":"
|
| 20 |
-
{"utt":"
|
| 21 |
-
{"utt":"utt_0015","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"}]}
|
| 22 |
-
{"utt":"utt_0016","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
|
| 23 |
-
{"utt":"utt_0021","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"}]}
|
| 24 |
-
{"utt":"utt_0022","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
|
| 25 |
-
{"utt":"utt_0023","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"}]}
|
| 26 |
-
{"utt":"utt_0024","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
|
| 27 |
-
{"utt":"utt_0025","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"}]}
|
| 28 |
-
{"utt":"utt_0026","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
|
| 29 |
-
{"utt":"utt_0029","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"}]}
|
| 30 |
-
{"utt":"utt_0030","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
|
| 31 |
-
{"utt":"utt_0039","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"}]}
|
| 32 |
-
{"utt":"utt_0040","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
|
|
|
|
| 1 |
+
{"utt":"That's not what I meant, I swear.","target_text":"That's not what I meant, I swear.","variants":[{"utt":"utt_001","context":"Defensive, speaking quickly with slight panic","output_audio":"samples/utt_001.wav"}]}
|
| 2 |
+
{"utt":"Oh. Oh no.","target_text":"Oh. Oh no.","variants":[{"utt":"utt_002","context":"Realization dawning slowly, voice dropping to a whisper","output_audio":"samples/utt_002.wav"}]}
|
| 3 |
+
{"utt":"I... I don't know what to say.","target_text":"I... I don't know what to say.","variants":[{"utt":"utt_003","context":"nervous and hesitant, with the voice gradually trailing off","output_audio":"samples/utt_003.wav"}]}
|
| 4 |
+
{"utt":"Wait— just give me a second.","target_text":"Wait— just give me a second.","variants":[{"utt":"utt_004","context":"flustered, interrupting oneself mid-sentence","output_audio":"samples/utt_004.wav"}]}
|
| 5 |
+
{"utt":"Fine. Do whatever you want.","target_text":"Fine. Do whatever you want.","variants":[{"utt":"utt_005","context":"resigned, with a calm tone tinged with faint bitterness","output_audio":"samples/utt_005.wav"}]}
|
| 6 |
+
{"utt":"Don't you dare talk to me like that.","target_text":"Don't you dare talk to me like that.","variants":[{"utt":"utt_006","context":"coldly angry, with every word articulated clearly and forcefully","output_audio":"samples/utt_006.wav"}]}
|
| 7 |
+
{"utt":"I trusted you. I actually trusted you.","target_text":"I trusted you. I actually trusted you.","variants":[{"utt":"utt_007","context":"angry, with the heartbreak of betrayal, the voice catching in the throat","output_audio":"samples/utt_007.wav"}]}
|
| 8 |
+
{"utt":"I'm not— I'm not drunk, you're drunk.","target_text":"I'm not— I'm not drunk, you're drunk.","variants":[{"utt":"utt_008","context":"drunkenly denying it, slurring the words and speaking incoherently","output_audio":"samples/utt_008.wav"}]}
|
| 9 |
+
{"utt":"呼——终于做完了,可以放假啦!","target_text":"呼——终于做完了,可以放假啦!","variants":[{"utt":"utt_0001","context":"如释重负,笑意藏不住地感叹着","output_audio":"samples/utt_0001.wav"},{"utt":"utt_0002","context":"兴奋得不行,音量抬高语速加快地喊着","output_audio":"samples/utt_0002.wav"}]}
|
| 10 |
+
{"utt":"哎,这雨怎么说下就下啊?","target_text":"哎,这雨怎么说下就下啊?","variants":[{"utt":"utt_0003","context":"无奈得很,拖着调子抱怨着","output_audio":"samples/utt_0003.wav"},{"utt":"utt_0004","context":"压低声音,带着烦躁小声嘟囔着","output_audio":"samples/utt_0004.wav"}]}
|
| 11 |
+
{"utt":"别挤别挤,一个个来!","target_text":"别挤别挤,一个个来!","variants":[{"utt":"utt_0005","context":"语气很冲,音量抬高地喝止着","output_audio":"samples/utt_0005.wav"},{"utt":"utt_0006","context":"急得发紧,语速飞快几乎不停顿地喊着","output_audio":"samples/utt_0006.wav"}]}
|
| 12 |
+
{"utt":"真的假的?你别骗我!","target_text":"真的假的?你别骗我!","variants":[{"utt":"utt_0009","context":"惊得一愣,语调上扬地反问着","output_audio":"samples/utt_0009.wav"},{"utt":"utt_0010","context":"带着笑意,半真半假地追问着","output_audio":"samples/utt_0010.wav"}]}
|
| 13 |
+
{"utt":"嘘,小点声,有人来了。","target_text":"嘘,小点声,有人来了。","variants":[{"utt":"utt_0011","context":"压着嗓子,用气声又快又轻地提醒着","output_audio":"samples/utt_0011.wav"},{"utt":"utt_0012","context":"紧张得发虚,声音更轻更低地说着","output_audio":"samples/utt_0012.wav"}]}
|
| 14 |
+
{"utt":"我都说了不是我干的!","target_text":"我都说了不是我干的!","variants":[{"utt":"utt_0013","context":"委屈得很,急着解释地喊着","output_audio":"samples/utt_0013.wav"},{"utt":"utt_0014","context":"火气上来,嗓门拔高地吼着","output_audio":"samples/utt_0014.wav"}]}
|
| 15 |
+
{"utt":"等我两分钟,我马上回来。","target_text":"等我两分钟,我马上回来。","variants":[{"utt":"utt_0015","context":"语速很快,干脆利落地交代着","output_audio":"samples/utt_0015.wav"},{"utt":"utt_0016","context":"语气笃定,声音稳稳地下压着说","output_audio":"samples/utt_0016.wav"}]}
|
| 16 |
+
{"utt":"嗯?你刚刚叫我吗?","target_text":"嗯?你刚刚叫我吗?","variants":[{"utt":"utt_0021","context":"满是困惑,语调上扬地问着","output_audio":"samples/utt_0021.wav"},{"utt":"utt_0022","context":"压低声音,停顿一下更谨慎地问着","output_audio":"samples/utt_0022.wav"}]}
|
| 17 |
+
{"utt":"别担心,我在呢。","target_text":"别担心,我在呢。","variants":[{"utt":"utt_0023","context":"声音软下来,温温柔柔地哄着","output_audio":"samples/utt_0023.wav"},{"utt":"utt_0024","context":"语气稳得很,声音下压地说着","output_audio":"samples/utt_0024.wav"}]}
|
| 18 |
+
{"utt":"不是我说你,这也太慢了。","target_text":"不是我说你,这也太慢了。","variants":[{"utt":"utt_0025","context":"不耐烦得很,语速偏快地吐槽着","output_audio":"samples/utt_0025.wav"},{"utt":"utt_0026","context":"语气冷冷的,带着讥讽地吐槽着","output_audio":"samples/utt_0026.wav"}]}
|
| 19 |
+
{"utt":"我再确认一遍:你确定要这样做?","target_text":"我再确认一遍:你确定要这样做?","variants":[{"utt":"utt_0029","context":"语气严谨,放慢语速反复确认地问着","output_audio":"samples/utt_0029.wav"},{"utt":"utt_0030","context":"语气更严肃,音量抬高地追问着","output_audio":"samples/utt_0030.wav"}]}
|
| 20 |
+
{"utt":"我不太确定,但我可以试试。","target_text":"我不太确定,但我可以试试。","variants":[{"utt":"utt_0039","context":"犹豫得很,声音放轻地说着","output_audio":"samples/utt_0039.wav"},{"utt":"utt_0040","context":"深吸一口气,语气笃定地说着","output_audio":"samples/utt_0040.wav"}]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static/task/mixed-style/task.yaml
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
name: Mixed Style Control
|
| 2 |
-
description: To better illustrate the stylistic differences in Chinese speech, we use comparative examples in the Chinese data presentation.
|
| 3 |
show_reference: false
|
| 4 |
models:
|
| 5 |
- CosyVoice2
|
|
|
|
| 1 |
name: Mixed Style Control
|
| 2 |
+
description: The Mixed Style Control task is designed to evaluate whether the model can accurately follow natural language instructions under mixed-style conditions. To better illustrate the stylistic differences in Chinese speech, we use comparative examples in the Chinese data presentation.
|
| 3 |
show_reference: false
|
| 4 |
models:
|
| 5 |
- CosyVoice2
|
static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/manifest.jsonl
RENAMED
|
File without changes
|
static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case4__prompt0.wav
RENAMED
|
File without changes
|
static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case4__prompt1.wav
RENAMED
|
File without changes
|
static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case5__prompt0.wav
RENAMED
|
File without changes
|
static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case5__prompt1.wav
RENAMED
|
File without changes
|
static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case6__prompt0.wav
RENAMED
|
File without changes
|
static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case6__prompt1.wav
RENAMED
|
File without changes
|
static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case7__prompt0.wav
RENAMED
|
File without changes
|
static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/refer/case7__prompt1.wav
RENAMED
|
File without changes
|
static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/samples/case4.wav
RENAMED
|
File without changes
|
static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/samples/case5.wav
RENAMED
|
File without changes
|
static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/samples/case6.wav
RENAMED
|
File without changes
|
static/task/multi/{MOSS-TTSD-V1 → MOSS-TTSD-V1(no instruction)}/samples/case7.wav
RENAMED
|
File without changes
|
static/task/multi/{Ours(without instruction) → Ours(no instruction)}/manifest.jsonl
RENAMED
|
File without changes
|
static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case4__prompt0.wav
RENAMED
|
File without changes
|
static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case4__prompt1.wav
RENAMED
|
File without changes
|
static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case5__prompt0.wav
RENAMED
|
File without changes
|
static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case5__prompt1.wav
RENAMED
|
File without changes
|
static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case6__prompt0.wav
RENAMED
|
File without changes
|
static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case6__prompt1.wav
RENAMED
|
File without changes
|
static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case7__prompt0.wav
RENAMED
|
File without changes
|
static/task/multi/{Ours(without instruction) → Ours(no instruction)}/refer/case7__prompt1.wav
RENAMED
|
File without changes
|
static/task/multi/{Ours(without instruction) → Ours(no instruction)}/samples/case4.wav
RENAMED
|
File without changes
|
static/task/multi/{Ours(without instruction) → Ours(no instruction)}/samples/case5.wav
RENAMED
|
File without changes
|
static/task/multi/{Ours(without instruction) → Ours(no instruction)}/samples/case6.wav
RENAMED
|
File without changes
|
static/task/multi/{Ours(without instruction) → Ours(no instruction)}/samples/case7.wav
RENAMED
|
File without changes
|
static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/manifest.jsonl
RENAMED
|
File without changes
|
static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case4__prompt0.wav
RENAMED
|
File without changes
|
static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case4__prompt1.wav
RENAMED
|
File without changes
|
static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case5__prompt0.wav
RENAMED
|
File without changes
|
static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case5__prompt1.wav
RENAMED
|
File without changes
|
static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case6__prompt0.wav
RENAMED
|
File without changes
|
static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case6__prompt1.wav
RENAMED
|
File without changes
|
static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case7__prompt0.wav
RENAMED
|
File without changes
|
static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/refer/case7__prompt1.wav
RENAMED
|
File without changes
|
static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/samples/case4.wav
RENAMED
|
File without changes
|
static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/samples/case5.wav
RENAMED
|
File without changes
|
static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/samples/case6.wav
RENAMED
|
File without changes
|
static/task/multi/{SoulX-Podcast → SoulX-Podcast(no instruction)}/samples/case7.wav
RENAMED
|
File without changes
|
static/task/multi/task.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
name: Multi-Speaker Dialogue Generation
|
| 2 |
-
description:
|
| 3 |
models:
|
| 4 |
-
- MOSS-TTSD-V1
|
| 5 |
-
- SoulX-Podcast
|
| 6 |
-
- Ours(
|
| 7 |
- Ours(with instruction)
|
|
|
|
| 1 |
name: Multi-Speaker Dialogue Generation
|
| 2 |
+
description: The Multi-Speaker Dialogue Generation task evaluates the model’s ability to generate speech in multi-turn dialogue scenarios. We report results under two input settings: with and without instructions. This comparison demonstrates the model’s natural controllability through contextual scene understanding, as well as its guided controllability when provided with instructions. It should be noted that the compared baseline models do not provide explicit controllability, and are therefore evaluated only under their default generation settings.
|
| 3 |
models:
|
| 4 |
+
- MOSS-TTSD-V1(no instruction)
|
| 5 |
+
- SoulX-Podcast(no instruction)
|
| 6 |
+
- Ours(no instruction)
|
| 7 |
- Ours(with instruction)
|
static/task/paral/task.yaml
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
name: Paralinguistic Cue Generation
|
| 2 |
-
description:
|
| 3 |
show_reference: false
|
| 4 |
models:
|
| 5 |
- CosyVoice3
|
|
|
|
| 1 |
name: Paralinguistic Cue Generation
|
| 2 |
+
description: The Paralinguistic Cue Generation task is designed to evaluate whether the model can understand and generate paralinguistic vocal cues expressed in text, such as laughter, sighs, and coughs.
|
| 3 |
show_reference: false
|
| 4 |
models:
|
| 5 |
- CosyVoice3
|