Anonymous commited on
Commit
f69c8ff
·
1 Parent(s): f0fe2fc

Update demo tasks and audio manifests

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. index.html +45 -2
  2. static/css/index.css +18 -0
  3. static/task/emo/CosyVoice3/manifest.csv +19 -0
  4. static/task/emo/CosyVoice3/manifest.jsonl +18 -0
  5. static/task/{multi/MOSS-TTSD-V1/refer/case32__prompt0.wav → emo/CosyVoice3/refer/utt_0001_fear.wav} +2 -2
  6. static/task/{multi/MOSS-TTSD-V1/refer/case32__prompt1.wav → emo/CosyVoice3/refer/utt_0002_nervous.wav} +2 -2
  7. static/task/{multi/Ours(with instruction)/refer/case32__prompt1.wav → emo/CosyVoice3/refer/utt_0003_tired.wav} +2 -2
  8. static/task/{multi/Ours(with instruction)/refer/case32__prompt0.wav → emo/CosyVoice3/refer/utt_0004_peaceful.wav} +2 -2
  9. static/task/emo/CosyVoice3/refer/utt_0005_enthusiastic.wav +3 -0
  10. static/task/emo/CosyVoice3/refer/utt_0006_proud.wav +3 -0
  11. static/task/emo/CosyVoice3/refer/utt_0007_happy.wav +3 -0
  12. static/task/emo/CosyVoice3/refer/utt_0008_confused.wav +3 -0
  13. static/task/emo/CosyVoice3/refer/utt_0009_surprise.wav +3 -0
  14. static/task/emo/CosyVoice3/refer/utt_0010_worried.wav +3 -0
  15. static/task/emo/CosyVoice3/refer/utt_0011_sadness.wav +3 -0
  16. static/task/emo/CosyVoice3/refer/utt_0012_frustration.wav +3 -0
  17. static/task/emo/CosyVoice3/refer/utt_0013_shameful.wav +3 -0
  18. static/task/emo/CosyVoice3/refer/utt_0014_disappointed.wav +3 -0
  19. static/task/emo/CosyVoice3/refer/utt_0015_anger.wav +3 -0
  20. static/task/emo/CosyVoice3/refer/utt_0016_disgust.wav +3 -0
  21. static/task/emo/CosyVoice3/refer/utt_0017_contempt.wav +3 -0
  22. static/task/emo/CosyVoice3/refer/utt_0018_pity.wav +3 -0
  23. static/task/emo/CosyVoice3/sample/utt_0001_fear.wav +3 -0
  24. static/task/emo/CosyVoice3/sample/utt_0002_nervous.wav +3 -0
  25. static/task/emo/CosyVoice3/sample/utt_0003_tired.wav +3 -0
  26. static/task/emo/CosyVoice3/sample/utt_0004_peaceful.wav +3 -0
  27. static/task/emo/CosyVoice3/sample/utt_0005_enthusiastic.wav +3 -0
  28. static/task/emo/CosyVoice3/sample/utt_0006_proud.wav +3 -0
  29. static/task/emo/CosyVoice3/sample/utt_0007_happy.wav +3 -0
  30. static/task/emo/CosyVoice3/sample/utt_0008_confused.wav +3 -0
  31. static/task/emo/CosyVoice3/sample/utt_0009_surprise.wav +3 -0
  32. static/task/emo/CosyVoice3/sample/utt_0010_worried.wav +3 -0
  33. static/task/emo/CosyVoice3/sample/utt_0011_sadness.wav +3 -0
  34. static/task/emo/CosyVoice3/sample/utt_0012_frustration.wav +3 -0
  35. static/task/emo/CosyVoice3/sample/utt_0013_shameful.wav +3 -0
  36. static/task/emo/CosyVoice3/sample/utt_0014_disappointed.wav +3 -0
  37. static/task/emo/CosyVoice3/sample/utt_0015_anger.wav +3 -0
  38. static/task/emo/CosyVoice3/sample/utt_0016_disgust.wav +3 -0
  39. static/task/emo/CosyVoice3/sample/utt_0017_contempt.wav +3 -0
  40. static/task/emo/CosyVoice3/sample/utt_0018_pity.wav +3 -0
  41. static/task/emo/IndexTTS2/manifest.csv +19 -0
  42. static/task/emo/IndexTTS2/manifest.jsonl +18 -0
  43. static/task/emo/IndexTTS2/refer/utt_0001_fear.wav +3 -0
  44. static/task/emo/IndexTTS2/refer/utt_0002_nervous.wav +3 -0
  45. static/task/emo/IndexTTS2/refer/utt_0003_tired.wav +3 -0
  46. static/task/emo/IndexTTS2/refer/utt_0004_peaceful.wav +3 -0
  47. static/task/emo/IndexTTS2/refer/utt_0005_enthusiastic.wav +3 -0
  48. static/task/emo/IndexTTS2/refer/utt_0006_proud.wav +3 -0
  49. static/task/emo/IndexTTS2/refer/utt_0007_happy.wav +3 -0
  50. static/task/emo/IndexTTS2/refer/utt_0008_confused.wav +3 -0
index.html CHANGED
@@ -50,7 +50,10 @@
50
  <script>
51
  const TASKS = [
52
  { id: "multi", root: "./static/task/multi" },
53
- { id: "paral", root: "./static/task/paral" }
 
 
 
54
  ];
55
 
56
  function parseTaskYaml(source) {
@@ -120,6 +123,21 @@
120
  function createScriptCell(item, highlightParalinguistic = false) {
121
  const wrapper = document.createElement("div");
122
  wrapper.className = "script-cell";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  const texts = Array.isArray(item.target_text) ? item.target_text : [item.target_text];
124
  const speakers = Array.isArray(item.speaker) ? item.speaker : [];
125
  const contexts = Array.isArray(item.context) ? item.context : [];
@@ -188,6 +206,29 @@
188
  return card;
189
  }
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  function renderTable(task, manifests, taskRoot) {
192
  const wrapper = document.createElement("div");
193
  wrapper.className = "demo-cases";
@@ -218,7 +259,9 @@
218
  const modelItem = manifests[model].find((entry) => entry.utt === item.utt);
219
  const content = document.createElement("div");
220
  content.className = "audio-card-content";
221
- if (modelItem) {
 
 
222
  content.appendChild(createAudio(`${taskRoot}/${model}/${modelItem.output_audio}`));
223
  }
224
  audioGrid.appendChild(createAudioCard(model, content));
 
50
  <script>
51
  const TASKS = [
52
  { id: "multi", root: "./static/task/multi" },
53
+ { id: "paral", root: "./static/task/paral" },
54
+ { id: "punct", root: "./static/task/punct" },
55
+ { id: "emo", root: "./static/task/emo" },
56
+ { id: "mixed-style", root: "./static/task/mixed-style" }
57
  ];
58
 
59
  function parseTaskYaml(source) {
 
123
  function createScriptCell(item, highlightParalinguistic = false) {
124
  const wrapper = document.createElement("div");
125
  wrapper.className = "script-cell";
126
+ if (Array.isArray(item.variants)) {
127
+ const target = document.createElement("p");
128
+ target.innerHTML = formatScriptText(item.target_text, highlightParalinguistic);
129
+ wrapper.appendChild(target);
130
+
131
+ item.variants.forEach((variant) => {
132
+ const context = document.createElement("p");
133
+ context.className = "variant-context";
134
+ context.innerHTML = `<strong>${escapeHtml(variant.utt)}</strong> ${escapeHtml(variant.context)}`;
135
+ wrapper.appendChild(context);
136
+ });
137
+
138
+ return wrapper;
139
+ }
140
+
141
  const texts = Array.isArray(item.target_text) ? item.target_text : [item.target_text];
142
  const speakers = Array.isArray(item.speaker) ? item.speaker : [];
143
  const contexts = Array.isArray(item.context) ? item.context : [];
 
206
  return card;
207
  }
208
 
209
+ function createVariantAudioCell(modelItem, taskRoot, model) {
210
+ const wrapper = document.createElement("div");
211
+ wrapper.className = "variant-audio-list";
212
+
213
+ if (!modelItem || !Array.isArray(modelItem.variants)) {
214
+ return wrapper;
215
+ }
216
+
217
+ modelItem.variants.forEach((variant) => {
218
+ const row = document.createElement("div");
219
+ row.className = "variant-audio-row";
220
+
221
+ const label = document.createElement("span");
222
+ label.textContent = variant.utt;
223
+
224
+ row.appendChild(label);
225
+ row.appendChild(createAudio(`${taskRoot}/${model}/${variant.output_audio}`));
226
+ wrapper.appendChild(row);
227
+ });
228
+
229
+ return wrapper;
230
+ }
231
+
232
  function renderTable(task, manifests, taskRoot) {
233
  const wrapper = document.createElement("div");
234
  wrapper.className = "demo-cases";
 
259
  const modelItem = manifests[model].find((entry) => entry.utt === item.utt);
260
  const content = document.createElement("div");
261
  content.className = "audio-card-content";
262
+ if (modelItem && Array.isArray(modelItem.variants)) {
263
+ content.appendChild(createVariantAudioCell(modelItem, taskRoot, model));
264
+ } else if (modelItem) {
265
  content.appendChild(createAudio(`${taskRoot}/${model}/${modelItem.output_audio}`));
266
  }
267
  audioGrid.appendChild(createAudioCard(model, content));
static/css/index.css CHANGED
@@ -244,6 +244,11 @@ body {
244
  content: ")";
245
  }
246
 
 
 
 
 
 
247
  .paralinguistic-cue {
248
  font-style: italic;
249
  font-weight: 700;
@@ -262,6 +267,19 @@ body {
262
  font-weight: 700;
263
  }
264
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  audio {
266
  display: block;
267
  width: 100%;
 
244
  content: ")";
245
  }
246
 
247
+ .variant-context {
248
+ color: var(--muted);
249
+ font-size: 0.88rem;
250
+ }
251
+
252
  .paralinguistic-cue {
253
  font-style: italic;
254
  font-weight: 700;
 
267
  font-weight: 700;
268
  }
269
 
270
+ .variant-audio-list {
271
+ display: grid;
272
+ gap: 0.6rem;
273
+ }
274
+
275
+ .variant-audio-row span {
276
+ display: block;
277
+ margin-bottom: 0.28rem;
278
+ color: var(--muted);
279
+ font-size: 0.82rem;
280
+ font-weight: 700;
281
+ }
282
+
283
  audio {
284
  display: block;
285
  width: 100%;
static/task/emo/CosyVoice3/manifest.csv ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wav,target_path,target_text,target_context,reference_path,reference_text,reference_context,speaker,generate_path
2
+ utt_0001_fear,,别、别过来……我们可以好好说,求你了,我真的没做什么,别靠近我,好吗?,害怕,refer/utt_0001_fear.wav,小偷却一点也不气馁,继续在抽屉里翻找。,,,sample/utt_0001_fear.wav
3
+ utt_0002_nervous,,我……我不是想拖延,我只是——能不能再给我一点时间?我怕说错话……真的,我现在脑子有点乱,手也在抖。,紧张,refer/utt_0002_nervous.wav,那么比黄光波长更短的绿光蓝光,和紫光为什么不用呢。,,,sample/utt_0002_nervous.wav
4
+ utt_0003_tired,,先别催了,给我两分钟喘口气……唉,昨晚基本没睡,脑子像糊住了。让我坐一下,喝口水,等我缓过来再说行吗?,疲惫,refer/utt_0003_tired.wav,因为有可能用老规矩付佣金。,,,sample/utt_0003_tired.wav
5
+ utt_0004_peaceful,,嗯,别急,我们就按自己的步子来。先坐一会儿,喝口温水——现在这样就挺好。,平静,refer/utt_0004_peaceful.wav,每年二百一十万儿童,死于装修污染。,,,sample/utt_0004_peaceful.wav
6
+ utt_0005_enthusiastic,,太给力了!就按这个来!我现在就去把人叫上、把材料备好,你把时间敲定——十分钟后咱们直接开干!,热情,refer/utt_0005_enthusiastic.wav,然而阿卡显然已经拿定主意,要援救黑老鼠。,,,sample/utt_0005_enthusiastic.wav
7
+ utt_0006_proud,,这块交给我吧,我已经把方案推了三遍,坑都踩过了。你只管等结果,今晚给你一个漂亮的答复——放心,看我的。,自信,refer/utt_0006_proud.wav,公寓四周的街道上,都布设了警卫暗哨。,,,sample/utt_0006_proud.wav
8
+ utt_0007_happy,,太好了!我就说肯定没问题的!走走走,今晚我请客,必须好好庆祝一下!,开心,refer/utt_0007_happy.wav,火精灵又出现了,猴腿,帕克挠挠头。,,,sample/utt_0007_happy.wav
9
+ utt_0008_confused,,等一下,我有点没听明白……你说的‘那个’是现在要做,还是等会儿?为啥跟我理解的不太一样啊?,困惑,refer/utt_0008_confused.wav,梁永祥又如何与创业挑战面对面对决。,,,sample/utt_0008_confused.wav
10
+ utt_0009_surprise,,等、等等,这是真的?你别逗我——我刚才没看错吧?!,惊讶,refer/utt_0009_surprise.wav,他在位于奥格登市郊外的家族土地上养了。,,,sample/utt_0009_surprise.wav
11
+ utt_0010_worried,,对不起我又给你打了……可是电话一直不通,你没事吧?能不能回我一下,好吗?就发个字也行,我真的有点慌。,担忧,refer/utt_0010_worried.wav,咪咪想抓住它,可转了一百圈还是没抓住。,,,sample/utt_0010_worried.wav
12
+ utt_0011_sadness,,我以为会好起来的……结果还是没撑住。对不起,我现在真的不想说话,给我一点时间,好吗?,悲伤,refer/utt_0011_sadness.wav,把铺盖给抖抖,铺上白床单,一会我就来睡。,,,sample/utt_0011_sadness.wav
13
+ utt_0012_frustration,,我真的是尽力了,可还是搞不定……唉,怎么就差一点点呢?算了,你们先说吧,我现在脑子一团糟。,沮丧,refer/utt_0012_frustration.wav,鼓励优秀电影创作者,及优良影片。,,,sample/utt_0012_frustration.wav
14
+ utt_0013_shameful,,我……我不是那个意思啦,别一直看我嘛。嗯……要不我们改天再说?,害羞,refer/utt_0013_shameful.wav,美父亲为女儿打造独特万圣节礼物,形似雷云。,,,sample/utt_0013_shameful.wav
15
+ utt_0014_disappointed,,好吧,我懂了。我以为这次会不一样,结果……还是这样。算了,你别解释了,我没事。,失望,refer/utt_0014_disappointed.wav,总是要点缀三两女侠,一二胭脂。,,,sample/utt_0014_disappointed.wav
16
+ utt_0015_anger,,你这是在耍我吗?!说好的呢?别跟我打太极,我现在就要一个痛快的说法——要么立刻兑现,要么别在这儿装无辜!,愤怒,refer/utt_0015_anger.wav,才出现了,包含各种冬季运动项目的冬季奥运会。,,,sample/utt_0015_anger.wav
17
+ utt_0016_disgust,,呃……别靠近我行吗?这味儿真的受不了,把它拿走,好吗?我现在特别不舒服。,不快,refer/utt_0016_disgust.wav,多云有阵雨,暴雷有大风。,,,sample/utt_0016_disgust.wav
18
+ utt_0017_contempt,,呵——就这?别把小把戏端到我面前,挺丢人的。省省吧,我没空陪你演。,鄙视,refer/utt_0017_contempt.wav,花草茶的口味,一般比普通的茶叶要苦一些。,,,sample/utt_0017_contempt.wav
19
+ utt_0018_pity,,唉……别急,慢慢来,好吗?我在这儿。先擦擦眼睛,要不要喝口水,或者我们出去透透气?,同情,refer/utt_0018_pity.wav,我吃了点燕麦片,煎鸡蛋,还喝了点橙汁。,,,sample/utt_0018_pity.wav
static/task/emo/CosyVoice3/manifest.jsonl ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"utt": "utt_0001_fear", "target_text": "别、别过来……我们可以好好说,求你了,我真的没做什么,别靠近我,好吗?", "speaker": "", "output_audio": "sample/utt_0001_fear.wav"}
2
+ {"utt": "utt_0002_nervous", "target_text": "我……我不是想拖延,我只是——能不能再给我一点时间?我怕说错话……真的,我现在脑子有点乱,手也在抖。", "speaker": "", "output_audio": "sample/utt_0002_nervous.wav"}
3
+ {"utt": "utt_0003_tired", "target_text": "先别催了,给我两分钟喘口气……唉,昨晚基本没睡,脑子像糊住了。让我坐一下,喝口水,等我缓过来再说行吗?", "speaker": "", "output_audio": "sample/utt_0003_tired.wav"}
4
+ {"utt": "utt_0004_peaceful", "target_text": "嗯,别急,我们就按自己的步子来。先坐一会儿,喝口温水——现在这样就挺好。", "speaker": "", "output_audio": "sample/utt_0004_peaceful.wav"}
5
+ {"utt": "utt_0005_enthusiastic", "target_text": "太给力了!就按这个来!我现在就去把人叫上、把材料备好,你把时间敲定——十分钟后咱们直接开干!", "speaker": "", "output_audio": "sample/utt_0005_enthusiastic.wav"}
6
+ {"utt": "utt_0006_proud", "target_text": "这块交给我吧,我已经把方案推了三遍,坑都踩过了。你只管等结果,今晚给你一个漂亮的答复——放心,看我的。", "speaker": "", "output_audio": "sample/utt_0006_proud.wav"}
7
+ {"utt": "utt_0007_happy", "target_text": "太好了!我就说肯定没问题的!走走走,今晚我请客,必须好好庆祝一下!", "speaker": "", "output_audio": "sample/utt_0007_happy.wav"}
8
+ {"utt": "utt_0008_confused", "target_text": "等一下,我有点没听明白……你说的‘那个’是现在要做,还是等会儿?为啥跟我理解的不太一样啊?", "speaker": "", "output_audio": "sample/utt_0008_confused.wav"}
9
+ {"utt": "utt_0009_surprise", "target_text": "等、等等,这是真的?你别逗我——我刚才没看错吧?!", "speaker": "", "output_audio": "sample/utt_0009_surprise.wav"}
10
+ {"utt": "utt_0010_worried", "target_text": "对不起我又给你打了……可是电话一直不通,你没事吧?能不能回我一下,好吗?就发个字也行,我真的有点慌。", "speaker": "", "output_audio": "sample/utt_0010_worried.wav"}
11
+ {"utt": "utt_0011_sadness", "target_text": "我以为会好起来的……结果还是没撑住。对不起,我现在真的不想说话,给我一点时间,好吗?", "speaker": "", "output_audio": "sample/utt_0011_sadness.wav"}
12
+ {"utt": "utt_0012_frustration", "target_text": "我真的是尽力了,可还是搞不定……唉,怎么就差一点点呢?算了,你们先说吧,我现在脑子一团糟。", "speaker": "", "output_audio": "sample/utt_0012_frustration.wav"}
13
+ {"utt": "utt_0013_shameful", "target_text": "我……我不是那个意思啦,别一直看我嘛。嗯……要不我们改天再说?", "speaker": "", "output_audio": "sample/utt_0013_shameful.wav"}
14
+ {"utt": "utt_0014_disappointed", "target_text": "好吧,我懂了。我以为这次会不一样,结果……还是这样。算了,你别解释了,我没事。", "speaker": "", "output_audio": "sample/utt_0014_disappointed.wav"}
15
+ {"utt": "utt_0015_anger", "target_text": "你这是在耍我吗?!说好的呢?别跟我打太极,我现在就要一个痛快的说法——要么立刻兑现,要么别在这儿装无辜!", "speaker": "", "output_audio": "sample/utt_0015_anger.wav"}
16
+ {"utt": "utt_0016_disgust", "target_text": "呃……别靠近我行吗?这味儿真的受不了,把它拿走,好吗?我现在特别不舒服。", "speaker": "", "output_audio": "sample/utt_0016_disgust.wav"}
17
+ {"utt": "utt_0017_contempt", "target_text": "呵——就这?别把小把戏端到我面前,挺丢人的。省省吧,我没空陪你演。", "speaker": "", "output_audio": "sample/utt_0017_contempt.wav"}
18
+ {"utt": "utt_0018_pity", "target_text": "唉……别急,慢慢来,好吗?我在这儿。先擦擦眼睛,要不要喝口水,或者我们出去透透气?", "speaker": "", "output_audio": "sample/utt_0018_pity.wav"}
static/task/{multi/MOSS-TTSD-V1/refer/case32__prompt0.wav → emo/CosyVoice3/refer/utt_0001_fear.wav} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba7331a856eafd70fb06db9a78aa06495ccf6ede7f10c8ddeefffb5f76a12766
3
- size 210476
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:631608f5c8b931ece1d45adc7f40a3b3b0ae2ec056a8a08a3565b04cc5750a4b
3
+ size 243244
static/task/{multi/MOSS-TTSD-V1/refer/case32__prompt1.wav → emo/CosyVoice3/refer/utt_0002_nervous.wav} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c501163c355e7648a6e6fc918c32c3fc62f781ed80ccc13b16c9178b1e0ae0f
3
- size 225580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1280a95715ecdd2dfb3da9eb303f67e9a87a23071a06668416cb0b9e156a86e0
3
+ size 289324
static/task/{multi/Ours(with instruction)/refer/case32__prompt1.wav → emo/CosyVoice3/refer/utt_0003_tired.wav} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c501163c355e7648a6e6fc918c32c3fc62f781ed80ccc13b16c9178b1e0ae0f
3
- size 225580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8321bfebcc27f54737deeb4d0f2498f53de45228b5d4bc7b6d712a9a7161ab52
3
+ size 198188
static/task/{multi/Ours(with instruction)/refer/case32__prompt0.wav → emo/CosyVoice3/refer/utt_0004_peaceful.wav} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba7331a856eafd70fb06db9a78aa06495ccf6ede7f10c8ddeefffb5f76a12766
3
- size 210476
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a9bccc3feed3d89250b23c3117fecf7e4fdada04f08002dad23fc7fcd629256
3
+ size 195628
static/task/emo/CosyVoice3/refer/utt_0005_enthusiastic.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7954f48a79e4478750d07eb88a4b88a61fde5476e8d5bd0a45dbdbe5b02c6f4b
3
+ size 255788
static/task/emo/CosyVoice3/refer/utt_0006_proud.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfa639a29c8f644ee642b24b62db45e7a1cd27b39a3fb478db5dcfdabce7a028
3
+ size 246060
static/task/emo/CosyVoice3/refer/utt_0007_happy.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d17b104e8a6a1403e0b53fd669dc021ed8c223f35fa414750c3f38ca8ab549c
3
+ size 196652
static/task/emo/CosyVoice3/refer/utt_0008_confused.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4db9ed60dac2dbfd7f5cb0cd38a9392451263e0c41a9b360b6e7fadb6a57aec2
3
+ size 216108
static/task/emo/CosyVoice3/refer/utt_0009_surprise.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98143363f8beb7938c4f6d8344f07fbd2b5d40bd0d8214c9637ff70012563d9e
3
+ size 206636
static/task/emo/CosyVoice3/refer/utt_0010_worried.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:739ab2183a65576f14eec6f72ea7de6954e02014491b2876b1c528feea4ba5bc
3
+ size 214316
static/task/emo/CosyVoice3/refer/utt_0011_sadness.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cd3b5fb1c50e4ed1e2f4124b54123467bf0d66a41c0bfb1f3485306fc255c08
3
+ size 224556
static/task/emo/CosyVoice3/refer/utt_0012_frustration.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a8991e8065a7901318a77bb7abbc054eebd81cd40afe25bb9fbb2dd15ee4709
3
+ size 195372
static/task/emo/CosyVoice3/refer/utt_0013_shameful.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a81ba8e187276ac58944acd5e44e49eda57fe41830b876a780bb94ceb4c344c
3
+ size 242476
static/task/emo/CosyVoice3/refer/utt_0014_disappointed.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:472de0782c2f87d4cffe450d6e33d0b34a2e34224d813199194a5e7a234335d5
3
+ size 267052
static/task/emo/CosyVoice3/refer/utt_0015_anger.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c475e50b7fe1b24d7de7fa406ea0077944f2d19d9b36faa548f1fc974bd069de
3
+ size 226604
static/task/emo/CosyVoice3/refer/utt_0016_disgust.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a104203a1daa2ff2df19f9fb22658d2acc4b6adff98c91d9cc745fe461855554
3
+ size 200060
static/task/emo/CosyVoice3/refer/utt_0017_contempt.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:435f496aad99ce446c7c0a5fd6fbecda3034a7754e096cb1c8ad1b608723c5bf
3
+ size 210988
static/task/emo/CosyVoice3/refer/utt_0018_pity.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6316b34f2e39fb0cb0d4db0518be5c9ab621d71ce99a9aeefadaa6200ca9d53
3
+ size 209196
static/task/emo/CosyVoice3/sample/utt_0001_fear.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:934e93493208e5947d425ceb3eab804120886a054cbd640c3fd80622b92390a8
3
+ size 480078
static/task/emo/CosyVoice3/sample/utt_0002_nervous.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d121cff911e5bbd41bdd6b5983d54598e9d1a8c1a59c84b2d05afb3407b408e
3
+ size 570318
static/task/emo/CosyVoice3/sample/utt_0003_tired.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:220611609f13efad3a624aadcb5742e29213a5500f6c218788a2df3ac4918a3d
3
+ size 560718
static/task/emo/CosyVoice3/sample/utt_0004_peaceful.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a22f0b2fea000f0f5c6c6fa621fafa88dcd86cd77367ea3539e261aa4810a68
3
+ size 361038
static/task/emo/CosyVoice3/sample/utt_0005_enthusiastic.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:704fec3832d9f7716a6c7566f21840ee7440f2394cd4d85541f563afb092609e
3
+ size 384078
static/task/emo/CosyVoice3/sample/utt_0006_proud.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a1656dc4aad49fee8890b0c79b3b0dc11cd6a4f90a637797e9a7aa40c895aad
3
+ size 416718
static/task/emo/CosyVoice3/sample/utt_0007_happy.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd8bc572fe809ec2c972c9ceb821a19c6befaaac9b8f7bfa8b8232b88798190a
3
+ size 299598
static/task/emo/CosyVoice3/sample/utt_0008_confused.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15fdcc938f76bc807f7d0c4a20d51364d2f7592c4258e404e95f735bc102baa4
3
+ size 399438
static/task/emo/CosyVoice3/sample/utt_0009_surprise.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e798d2d5d43ccde2eb7d1280462eccf96809bacfd9bc957b160436d17a0da8d
3
+ size 263118
static/task/emo/CosyVoice3/sample/utt_0010_worried.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f6bdc03b95dd25bee7256dc38d6dd9a709289f33cfc0f4440fe7f8872e09edf
3
+ size 591438
static/task/emo/CosyVoice3/sample/utt_0011_sadness.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3465babecbecb9e9a4951c099f34ffe111964153423d3776a9ab9d4a0c0dd003
3
+ size 485838
static/task/emo/CosyVoice3/sample/utt_0012_frustration.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f21935940cd5212fae46952511bd00d0af319ee9fa511ae0d1207bc27183d5e
3
+ size 435918
static/task/emo/CosyVoice3/sample/utt_0013_shameful.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20c1b810c85c45fe0a1eb0914c6b3af0850268591007e06c4adaadad726d0283
3
+ size 432078
static/task/emo/CosyVoice3/sample/utt_0014_disappointed.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:157abd61be679e98b3c1bedb5c0f6ee0a13564a8c637cb5a689978a08c820b38
3
+ size 503118
static/task/emo/CosyVoice3/sample/utt_0015_anger.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a36d00d84506bbf6a164e55e79bea7c678fd66bd451533b5d1dee5b41392535
3
+ size 487758
static/task/emo/CosyVoice3/sample/utt_0016_disgust.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9af45349fd786ee385ec333dbbe4bd1861f7c7e9a3f894f938ff6bcefa862e90
3
+ size 401358
static/task/emo/CosyVoice3/sample/utt_0017_contempt.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25b7e56a72b14d9868e8797f728323c6c576c87136e05af7943c1b78dafcb3f
3
+ size 370638
static/task/emo/CosyVoice3/sample/utt_0018_pity.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78a9190aa30d40decacb52dff85f2a9856e2c14d73b5f30512babaa3c5e7dce8
3
+ size 458958
static/task/emo/IndexTTS2/manifest.csv ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wav,target_path,target_text,target_context,reference_path,reference_text,reference_context,speaker,generate_path
2
+ utt_0001_fear,,别、别过来……我们可以好好说,求你了,我真的没做什么,别靠近我,好吗?,害怕,refer/utt_0001_fear.wav,小偷却一点也不气馁,继续在抽屉里翻找。,,,sample/utt_0001_fear.wav
3
+ utt_0002_nervous,,我……我不是想拖延,我只是——能不能再给我一点时间?我怕说错话……真的,我现在脑子有点乱,手也在抖。,紧张,refer/utt_0002_nervous.wav,那么比黄光波长更短的绿光蓝光,和紫光为什么不用呢。,,,sample/utt_0002_nervous.wav
4
+ utt_0003_tired,,先别催了,给我两分钟喘口气……唉,昨晚基本没睡,脑子像糊住了。让我坐一下,喝口水,等我缓过来再说行吗?,疲惫,refer/utt_0003_tired.wav,因为有可能用老规矩付佣金。,,,sample/utt_0003_tired.wav
5
+ utt_0004_peaceful,,嗯,别急,我们就按自己的步子来。先坐一会儿,喝口温水——现在这样就挺好。,平静,refer/utt_0004_peaceful.wav,每年二百一十万儿童,死于装修污染。,,,sample/utt_0004_peaceful.wav
6
+ utt_0005_enthusiastic,,太给力了!就按这个来!我现在就去把人叫上、把材料备好,你把时间敲定——十分钟后咱们直接开干!,热情,refer/utt_0005_enthusiastic.wav,然而阿卡显然已经拿定主意,要援救黑老鼠。,,,sample/utt_0005_enthusiastic.wav
7
+ utt_0006_proud,,这块交给我吧,我已经把方案推了三遍,坑都踩过了。你只管等结果,今晚给你一个漂亮的答复——放心,看我的。,自信,refer/utt_0006_proud.wav,公寓四周的街道上,都布设了警卫暗哨。,,,sample/utt_0006_proud.wav
8
+ utt_0007_happy,,太好了!我就说肯定没问题的!走走走,今晚我请客,必须好好庆祝一下!,开心,refer/utt_0007_happy.wav,火精灵又出现了,猴腿,帕克挠挠头。,,,sample/utt_0007_happy.wav
9
+ utt_0008_confused,,等一下,我有点没听明白……你说的‘那个’是现在要做,还是等会儿?为啥跟我理解的不太一样啊?,困惑,refer/utt_0008_confused.wav,梁永祥又如何与创业挑战面对面对决。,,,sample/utt_0008_confused.wav
10
+ utt_0009_surprise,,等、等等,这是真的?你别逗我——我刚才没看错吧?!,惊讶,refer/utt_0009_surprise.wav,他在位于奥格登市郊外的家族土地上养了。,,,sample/utt_0009_surprise.wav
11
+ utt_0010_worried,,对不起我又给你打了……可是电话一直不通,你没事吧?能不能回我一下,好吗?就发个字也行,我真的有点慌。,担忧,refer/utt_0010_worried.wav,咪咪想抓住它,可转了一百圈还是没抓住。,,,sample/utt_0010_worried.wav
12
+ utt_0011_sadness,,我以为会好起来的……结果还是没撑住。对不起,我现在真的不想说话,给我一点时间,好吗?,悲伤,refer/utt_0011_sadness.wav,把铺盖给抖抖,铺上白床单,一会我就来睡。,,,sample/utt_0011_sadness.wav
13
+ utt_0012_frustration,,我真的是尽力了,可还是搞不定……唉,怎么就差一点点呢?算了,你们先说吧,我现在脑子一团糟。,沮丧,refer/utt_0012_frustration.wav,鼓励优秀电影创作者,及优良影片。,,,sample/utt_0012_frustration.wav
14
+ utt_0013_shameful,,我……我不是那个意思啦,别一直看我嘛。嗯……要不我们改天再说?,害羞,refer/utt_0013_shameful.wav,美父亲为女儿打造独特万圣节礼物,形似雷云。,,,sample/utt_0013_shameful.wav
15
+ utt_0014_disappointed,,好吧,我懂了。我以为这次会不一样,结果……还是这样。算了,你别解释了,我没事。,失望,refer/utt_0014_disappointed.wav,总是要点缀三两女侠,一二胭脂。,,,sample/utt_0014_disappointed.wav
16
+ utt_0015_anger,,你这是在耍我吗?!说好的呢?别跟我打太极,我现在就要一个痛快的说法——要么立刻兑现,要么别在这儿装无辜!,愤怒,refer/utt_0015_anger.wav,才出现了,包含各种冬季运动项目的冬季奥运会。,,,sample/utt_0015_anger.wav
17
+ utt_0016_disgust,,呃……别靠近我行吗?这味儿真的受不了,把它拿走,好吗?我现在特别不舒服。,不快,refer/utt_0016_disgust.wav,多云有阵雨,暴雷有大风。,,,sample/utt_0016_disgust.wav
18
+ utt_0017_contempt,,呵——就这?别把小把戏端到我面前,挺丢人的。省省吧,我没空陪你演。,鄙视,refer/utt_0017_contempt.wav,花草茶的口味,一般比普通的茶叶要苦一些。,,,sample/utt_0017_contempt.wav
19
+ utt_0018_pity,,唉……别急,慢慢来,好吗?我在这儿。先擦擦眼睛,要不要喝口水,或者我们出去透透气?,同情,refer/utt_0018_pity.wav,我吃了点燕麦片,煎鸡蛋,还喝了点橙汁。,,,sample/utt_0018_pity.wav
static/task/emo/IndexTTS2/manifest.jsonl ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"utt": "utt_0001_fear", "target_text": "别、别过来……我们可以好好说,求你了,我真的没做什么,别靠近我,好吗?", "speaker": "", "output_audio": "sample/utt_0001_fear.wav"}
2
+ {"utt": "utt_0002_nervous", "target_text": "我……我不是想拖延,我只是——能不能再给我一点时间?我怕说错话……真的,我现在脑子有点乱,手也在抖。", "speaker": "", "output_audio": "sample/utt_0002_nervous.wav"}
3
+ {"utt": "utt_0003_tired", "target_text": "先别催了,给我两分钟喘口气……唉,昨晚基本没睡,脑子像糊住了。让我坐一下,喝口水,等我缓过来再说行吗?", "speaker": "", "output_audio": "sample/utt_0003_tired.wav"}
4
+ {"utt": "utt_0004_peaceful", "target_text": "嗯,别急,我们就按自己的步子来。先坐一会儿,喝口温水——现在这样就挺好。", "speaker": "", "output_audio": "sample/utt_0004_peaceful.wav"}
5
+ {"utt": "utt_0005_enthusiastic", "target_text": "太给力了!就按这个来!我现在就去把人叫上、把材料备好,你把时间敲定——十分钟后咱们直接开干!", "speaker": "", "output_audio": "sample/utt_0005_enthusiastic.wav"}
6
+ {"utt": "utt_0006_proud", "target_text": "这块交给我吧,我已经把方案推了三遍,坑都踩过了。你只管等结果,今晚给你一个漂亮的答复——放心,看我的。", "speaker": "", "output_audio": "sample/utt_0006_proud.wav"}
7
+ {"utt": "utt_0007_happy", "target_text": "太好了!我就说肯定没问题的!走走走,今晚我请客,必须好好庆祝一下!", "speaker": "", "output_audio": "sample/utt_0007_happy.wav"}
8
+ {"utt": "utt_0008_confused", "target_text": "等一下,我有点没听明白……你说的‘那个’是现在要做,还是等会儿?为啥跟我理解的不太一样啊?", "speaker": "", "output_audio": "sample/utt_0008_confused.wav"}
9
+ {"utt": "utt_0009_surprise", "target_text": "等、等等,这是真的?你别逗我——我刚才没看错吧?!", "speaker": "", "output_audio": "sample/utt_0009_surprise.wav"}
10
+ {"utt": "utt_0010_worried", "target_text": "对不起我又给你打了……可是电话一直不通,你没事吧?能不能回我一下,好吗?就发个字也行,我真的有点慌。", "speaker": "", "output_audio": "sample/utt_0010_worried.wav"}
11
+ {"utt": "utt_0011_sadness", "target_text": "我以为会好起来的……结果还是没撑住。对不起,我现在真的不想说话,给我一点时间,好吗?", "speaker": "", "output_audio": "sample/utt_0011_sadness.wav"}
12
+ {"utt": "utt_0012_frustration", "target_text": "我真的是尽力了,可还是搞不定……唉,怎么就差一点点呢?算了,你们先说吧,我现在脑子一团糟。", "speaker": "", "output_audio": "sample/utt_0012_frustration.wav"}
13
+ {"utt": "utt_0013_shameful", "target_text": "我……我不是那个意思啦,别一直看我嘛。嗯……要不我们改天再说?", "speaker": "", "output_audio": "sample/utt_0013_shameful.wav"}
14
+ {"utt": "utt_0014_disappointed", "target_text": "好吧,我懂了。我以为这次会不一样,结果……还是这样。算了,你别解释了,我没事。", "speaker": "", "output_audio": "sample/utt_0014_disappointed.wav"}
15
+ {"utt": "utt_0015_anger", "target_text": "你这是在耍我吗?!说好的呢?别跟我打太极,我现在就要一个痛快的说法——要么立刻兑现,要么别在这儿装无辜!", "speaker": "", "output_audio": "sample/utt_0015_anger.wav"}
16
+ {"utt": "utt_0016_disgust", "target_text": "呃……别靠近我行吗?这味儿真的受不了,把它拿走,好吗?我现在特别不舒服。", "speaker": "", "output_audio": "sample/utt_0016_disgust.wav"}
17
+ {"utt": "utt_0017_contempt", "target_text": "呵——就这?别把小把戏端到我面前,挺丢人的。省省吧,我没空陪你演。", "speaker": "", "output_audio": "sample/utt_0017_contempt.wav"}
18
+ {"utt": "utt_0018_pity", "target_text": "唉……别急,慢慢来,好吗?我在这儿。先擦擦眼睛,要不要喝口水,或者我们出去透透气?", "speaker": "", "output_audio": "sample/utt_0018_pity.wav"}
static/task/emo/IndexTTS2/refer/utt_0001_fear.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:631608f5c8b931ece1d45adc7f40a3b3b0ae2ec056a8a08a3565b04cc5750a4b
3
+ size 243244
static/task/emo/IndexTTS2/refer/utt_0002_nervous.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1280a95715ecdd2dfb3da9eb303f67e9a87a23071a06668416cb0b9e156a86e0
3
+ size 289324
static/task/emo/IndexTTS2/refer/utt_0003_tired.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8321bfebcc27f54737deeb4d0f2498f53de45228b5d4bc7b6d712a9a7161ab52
3
+ size 198188
static/task/emo/IndexTTS2/refer/utt_0004_peaceful.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a9bccc3feed3d89250b23c3117fecf7e4fdada04f08002dad23fc7fcd629256
3
+ size 195628
static/task/emo/IndexTTS2/refer/utt_0005_enthusiastic.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7954f48a79e4478750d07eb88a4b88a61fde5476e8d5bd0a45dbdbe5b02c6f4b
3
+ size 255788
static/task/emo/IndexTTS2/refer/utt_0006_proud.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfa639a29c8f644ee642b24b62db45e7a1cd27b39a3fb478db5dcfdabce7a028
3
+ size 246060
static/task/emo/IndexTTS2/refer/utt_0007_happy.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d17b104e8a6a1403e0b53fd669dc021ed8c223f35fa414750c3f38ca8ab549c
3
+ size 196652
static/task/emo/IndexTTS2/refer/utt_0008_confused.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4db9ed60dac2dbfd7f5cb0cd38a9392451263e0c41a9b360b6e7fadb6a57aec2
3
+ size 216108