File size: 11,118 Bytes
2e90754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435f76d
2e90754
 
 
 
 
435f76d
6586b49
 
 
a4e61d9
6586b49
 
 
 
2e90754
 
 
 
 
 
87bf86d
 
f69c8ff
 
 
 
87bf86d
2e90754
 
87bf86d
2e90754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87bf86d
 
 
 
2e90754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4552c2a
 
 
 
 
f0fe2fc
4552c2a
 
 
2e90754
 
f69c8ff
 
 
 
 
a4e61d9
f69c8ff
 
a4e61d9
f69c8ff
 
 
 
 
 
87bf86d
 
5d8c977
2e90754
87bf86d
2e90754
87bf86d
e67d3e6
dc40f4d
 
4552c2a
87bf86d
4552c2a
87bf86d
4552c2a
87bf86d
2e90754
 
 
 
 
 
 
 
 
 
87bf86d
 
2e90754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bede684
 
 
 
 
 
 
9f2b52f
 
bede684
9f2b52f
 
 
 
bede684
 
 
089c290
 
f69c8ff
 
 
 
 
 
 
 
a4e61d9
f69c8ff
 
 
 
a4e61d9
f69c8ff
 
 
 
 
 
 
 
 
87bf86d
435f76d
bede684
4552c2a
2e90754
 
bede684
 
0fd02ae
bede684
 
 
 
 
 
4552c2a
bede684
 
 
 
87bf86d
 
 
bede684
87bf86d
2e90754
 
 
9f2b52f
bede684
f69c8ff
 
 
9f2b52f
2e90754
bede684
2e90754
 
bede684
 
2e90754
 
435f76d
2e90754
 
435f76d
 
 
 
 
 
 
 
87bf86d
 
435f76d
 
 
 
87bf86d
 
435f76d
 
87bf86d
 
 
2e90754
 
 
87bf86d
2e90754
 
87bf86d
 
 
435f76d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87bf86d
 
 
 
435f76d
 
 
 
2e90754
 
 
435f76d
 
2e90754
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta name="description" content="PromptDialog multi-speaker dialogue audio demo.">
  <meta name="keywords" content="PromptDialog, text-to-speech, dialogue, audio demo">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>PromptDialog</title>

  <link rel="stylesheet" href="./static/css/bulma.min.css">
  <link rel="stylesheet" href="./static/css/index.css">
</head>
<body>
  <main>
    <section class="prompt-hero">
      <div class="container wide-container">
        <h1 class="publication-title">PromptDialog</h1>
      </div>
    </section>

    <section class="section demo-section" id="audio-demo">
      <div class="container wide-container">
        <nav class="task-directory" id="task-directory" aria-label="Audio demo tasks">
          <p class="loading">Loading task directory...</p>
        </nav>

        <div class="task-list" id="task-list" aria-live="polite">
          <div class="task-card">
            <p class="loading">Loading audio demos...</p>
          </div>
        </div>
      </div>
    </section>
  </main>

  <script>
    const TASKS = [
      { id: "multi", root: "./static/task/multi" },
      { id: "paral", root: "./static/task/paral" },
      { id: "punct", root: "./static/task/punct" },
      { id: "emo", root: "./static/task/emo" },
      { id: "mixed-style", root: "./static/task/mixed-style" }
    ];

    function parseTaskYaml(source) {
      const task = { name: "", description: "", show_reference: true, models: [] };
      let listKey = "";

      source.split(/\r?\n/).forEach((rawLine) => {
        const line = rawLine.trim();
        if (!line || line.startsWith("#")) return;

        if (line.startsWith("- ") && listKey) {
          task[listKey].push(line.slice(2).trim().replace(/^["']|["']$/g, ""));
          return;
        }

        const match = line.match(/^([A-Za-z0-9_-]+):\s*(.*)$/);
        if (!match) return;

        const key = match[1];
        const value = match[2].trim().replace(/^["']|["']$/g, "");
        if (value === "true" || value === "false") {
          task[key] = value === "true";
          listKey = "";
        } else if (Array.isArray(task[key])) {
          listKey = key;
        } else {
          task[key] = value;
          listKey = "";
        }
      });

      return task;
    }

    async function fetchText(path) {
      const response = await fetch(path);
      if (!response.ok) {
        throw new Error(`Failed to load ${path}`);
      }
      return response.text();
    }

    function parseJsonl(source) {
      return source
        .split(/\r?\n/)
        .map((line) => line.trim())
        .filter(Boolean)
        .map((line) => JSON.parse(line));
    }

    function createAudio(src) {
      const audio = document.createElement("audio");
      audio.controls = true;
      audio.preload = "none";
      audio.src = src;
      return audio;
    }

    function formatScriptText(text, highlightParalinguistic) {
      const escapedText = escapeHtml(text);
      if (!highlightParalinguistic) {
        return escapedText;
      }
      return escapedText.replace(/[唉嗯哈呵嘿哼呼咳啧]/g, '<em class="paralinguistic-cue">$&</em>');
    }

    function createScriptCell(item, highlightParalinguistic = false) {
      const wrapper = document.createElement("div");
      wrapper.className = "script-cell";
      if (Array.isArray(item.variants)) {
        const target = document.createElement("p");
        target.innerHTML = formatScriptText(item.target_text, highlightParalinguistic);
        wrapper.appendChild(target);

        item.variants.forEach((variant, index) => {
          const context = document.createElement("p");
          context.className = "variant-context";
          context.innerHTML = `<strong>instruction${index + 1}</strong> <span class="context-cue">${escapeHtml(variant.context)}</span>`;
          wrapper.appendChild(context);
        });

        return wrapper;
      }

      const texts = Array.isArray(item.target_text) ? item.target_text : [item.target_text];
      const speakers = Array.isArray(item.speaker) ? item.speaker : [];
      const contexts = Array.isArray(item.context) ? item.context : [item.context];

      texts.forEach((text, index) => {
        const turn = document.createElement("p");
        const speaker = speakers[index];
        const context = contexts[index];
        const cleanContext = context ? String(context).replace(/[,,。.!!??;;::、\s]+$/u, "") : "";
        const contextBadge = cleanContext ? ` <span class="context-cue">${escapeHtml(cleanContext)}</span>` : "";
        const formattedText = formatScriptText(text, highlightParalinguistic);
        if (speaker !== undefined && speaker !== "") {
          turn.innerHTML = `<strong>[S${Number(speaker) + 1}]</strong> ${formattedText}${contextBadge}`;
        } else {
          turn.innerHTML = `${formattedText}${contextBadge}`;
        }
        wrapper.appendChild(turn);
      });

      return wrapper;
    }

    function createReferenceCell(item, modelRoot) {
      const wrapper = document.createElement("div");
      wrapper.className = "reference-cell";

      const prompts = Array.isArray(item.prompt_audio) ? item.prompt_audio : [];
      prompts.forEach((audioPath, index) => {
        const block = document.createElement("div");
        const label = document.createElement("span");
        label.textContent = `Speaker ${index + 1}`;
        block.appendChild(label);
        block.appendChild(createAudio(`${modelRoot}/${audioPath}`));
        wrapper.appendChild(block);
      });

      return wrapper;
    }

    function escapeHtml(text) {
      return text.replace(/[&<>"']/g, (char) => ({
        "&": "&amp;",
        "<": "&lt;",
        ">": "&gt;",
        '"': "&quot;",
        "'": "&#039;"
      }[char]));
    }

    function createAudioCard(label, content) {
      const card = document.createElement("div");
      card.className = "audio-card";

      const title = document.createElement("div");
      title.className = "audio-card-title";
      title.textContent = label;

      const body = document.createElement("div");
      body.className = "audio-card-body";
      if (content) {
        body.appendChild(content);
      }

      card.appendChild(title);
      card.appendChild(body);
      return card;
    }

    function createVariantAudioCell(modelItem, taskRoot, model) {
      const wrapper = document.createElement("div");
      wrapper.className = "variant-audio-list";

      if (!modelItem || !Array.isArray(modelItem.variants)) {
        return wrapper;
      }

      modelItem.variants.forEach((variant, index) => {
        const row = document.createElement("div");
        row.className = "variant-audio-row";

        const label = document.createElement("span");
        label.textContent = `instruction${index + 1}`;

        row.appendChild(label);
        row.appendChild(createAudio(`${taskRoot}/${model}/${variant.output_audio}`));
        wrapper.appendChild(row);
      });

      return wrapper;
    }

    function renderTable(task, manifests, taskRoot) {
      const wrapper = document.createElement("div");
      wrapper.className = "demo-cases";
      const highlightParalinguistic = task.name === "Paralinguistic Cue Generation";

      manifests[task.models[0]].forEach((item) => {
        const caseBlock = document.createElement("article");
        caseBlock.className = "case-block";

        const scriptBlock = document.createElement("div");
        scriptBlock.className = "case-script";
        const scriptTitle = document.createElement("div");
        scriptTitle.className = "case-section-title";
        scriptTitle.textContent = "Script";
        scriptBlock.appendChild(scriptTitle);
        scriptBlock.appendChild(createScriptCell(item, highlightParalinguistic));
        caseBlock.appendChild(scriptBlock);

        const audioGrid = document.createElement("div");
        audioGrid.className = "audio-grid";

        if (task.show_reference) {
          const modelRoot = `${taskRoot}/${task.models[0]}`;
          audioGrid.appendChild(createAudioCard("Reference", createReferenceCell(item, modelRoot)));
        }

        task.models.forEach((model) => {
          const modelItem = manifests[model].find((entry) => entry.utt === item.utt);
          const content = document.createElement("div");
          content.className = "audio-card-content";
          if (modelItem && Array.isArray(modelItem.variants)) {
            content.appendChild(createVariantAudioCell(modelItem, taskRoot, model));
          } else if (modelItem) {
            content.appendChild(createAudio(`${taskRoot}/${model}/${modelItem.output_audio}`));
          }
          audioGrid.appendChild(createAudioCard(model, content));
        });

        caseBlock.appendChild(audioGrid);
        wrapper.appendChild(caseBlock);
      });

      return wrapper;
    }

    function renderDirectory(tasks) {
      const directory = document.getElementById("task-directory");
      const title = document.createElement("div");
      title.className = "task-directory-title";
      title.textContent = "Tasks";

      const links = document.createElement("div");
      links.className = "task-directory-links";

      tasks.forEach(({ id, task }) => {
        const link = document.createElement("a");
        link.href = `#task-${id}`;
        link.textContent = task.name;
        links.appendChild(link);
      });

      directory.replaceChildren(title, links);
    }

    async function loadTask(config) {
      const task = parseTaskYaml(await fetchText(`${config.root}/task.yaml`));
      const manifests = {};

      await Promise.all(task.models.map(async (model) => {
        manifests[model] = parseJsonl(await fetchText(`${config.root}/${model}/manifest.jsonl`));
      }));

      return { ...config, task, manifests };
    }

    function renderTaskCard({ id, root, task, manifests }) {
      const card = document.createElement("section");
      card.className = "task-card";
      card.id = `task-${id}`;

      const heading = document.createElement("div");
      heading.className = "task-card-heading";

      const title = document.createElement("h3");
      title.textContent = task.name;
      const description = document.createElement("p");
      description.textContent = task.description;

      heading.appendChild(title);
      heading.appendChild(description);
      card.appendChild(heading);
      card.appendChild(renderTable(task, manifests, root));
      return card;
    }

    async function initDemo() {
      const tasks = await Promise.all(TASKS.map(loadTask));
      renderDirectory(tasks);

      const list = document.getElementById("task-list");
      list.replaceChildren(...tasks.map(renderTaskCard));
    }

    initDemo().catch((error) => {
      document.getElementById("task-directory").textContent = "Failed to load task metadata.";
      document.getElementById("task-list").textContent = error.message;
    });
  </script>
</body>
</html>