t0m1ab commited on
Commit
e2fa2a6
·
verified ·
1 Parent(s): c3a64cf

html + js

Browse files
Files changed (2) hide show
  1. helper.js +205 -0
  2. index.html +271 -17
helper.js ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const langFlags = {'fr': '🇫🇷', 'es': '🇪🇸', 'pt': '🇵🇹', 'de': '🇩🇪', 'it': '🇮🇹'};
2
+ const langNames = {'fr': 'French', 'es': 'Spanish', 'pt': 'Portuguese', 'de': 'German', 'it': 'Italian'};
3
+
4
+ const shortformFilenamesPerLang = {
5
+ 'fr': ["30ef344ae8687926.mp3", "4539f03d07ce7fbf.mp3"], // "6d6261093edc78c2.mp3", "6d6261093edc78c2.mp3"],
6
+ 'es': ["5dc1d533e21f43b2.mp3", "963de6cbb0eaee36.mp3"], // "a22a3eff8576211c.mp3", "ff65061e3b636834.mp3"],
7
+ 'pt': ["1263b98457966b2a.mp3", "3a2a8fd3a3bd2feb.mp3"], // "6cf8e09e87612d2f.mp3", "70a4955ff0149f5f.mp3"],
8
+ 'de': ["2d05ea9d4a065778.mp3", "3f5d622c2955df4c.mp3"], // "64fbd8fd8ecd4d63.mp3", "93cce2bd8093062f.mp3"],
9
+ 'it': ["61fada964460ad67.mp3", "9c6657d3fe647ecb.mp3", "84fbf6f8271c43b4.mp3", "83fcc138b2a8df7f.mp3"], // "a1fa8e69d4019e03.mp3", "f30cef780a80ca78.mp3"],
10
+ };
11
+
12
+ const longformFilenamesPerLang = {
13
+ 'fr': ["ee67adf3f3768b1d_11labs.mp3", "f9fcfb48c566cfad_11labs.mp3"],
14
+ 'es': ["02fc8ce1843e4638_11labs.mp3", "bb3e91e3f0488a24_11labs.mp3"],
15
+ 'pt': ["73725fb3cf2cf669_cartesia.mp3 ", "7b42a118f93b1867_cartesia.mp3"],
16
+ 'de': ["02df47e0d27a8b80_cartesia.mp3", "b0e7b4b91e9d91db_gradium.mp3"],
17
+ };
18
+
19
+ function createAudioHTML(path) {
20
+ return '<audio controls controlslist="nodownload" class="px-1"> <source src=' +
21
+ path +
22
+ ' type="audio/wav">Your browser does not support the audio element.</audio>';
23
+ }
24
+
25
+ function generateExampleRow(table_row, base_dir, lang, dirs, filename, row_idx, n_files_per_lang) {
26
+ // Put the flag and full language name in the first column
27
+ if (row_idx % n_files_per_lang === 0) {
28
+ table_row.cells[0].innerHTML = `
29
+ <div style="display: flex; align-items: center; justify-content: center; gap: 0.5em">
30
+ <span style="font-size: 1em;">${langNames[lang] || lang}</span>
31
+ <span style="font-size: 2em;">${langFlags[lang] || ''}</span>
32
+ </div>
33
+ `;
34
+ table_row.cells[0].setAttribute('rowspan', n_files_per_lang);
35
+ table_row.cells[0].style.verticalAlign = "middle";
36
+ // Remove the first cell from the next row because of row span
37
+ let nextRow = table_row.parentElement.rows[row_idx + 1];
38
+ if (nextRow) {
39
+ nextRow.deleteCell(0);
40
+ }
41
+ }
42
+
43
+ // because of rowspan, odd row indexes have no source language cell
44
+ let col_offset = (row_idx % n_files_per_lang === 0) ? 1 : 0;
45
+
46
+ for (var col_idx = 0; col_idx < dirs.length; col_idx++) {
47
+
48
+ let cell = table_row.cells[col_idx + col_offset];
49
+ let p = base_dir + '/' + lang + '/' + dirs[col_idx] + '/' + filename;
50
+
51
+ let container = cell.querySelector('div') || cell;
52
+
53
+ if (p.endsWith('txt')) {
54
+ var req = new XMLHttpRequest();
55
+ req.onreadystatechange = function() {
56
+ if (this.readyState === this.DONE) {
57
+ container.innerHTML += '<font size="-1">' + req.responseText + '</font>';
58
+ }
59
+ };
60
+ req.open('GET', p);
61
+ req.send(null);
62
+ } else {
63
+ // container.innerHTML += createAudioHTML(p);
64
+ container.innerHTML += `
65
+ <div style="display: flex; justify-content: center; align-items: center;">
66
+ ${createAudioHTML(p)}
67
+ </div>
68
+ `;
69
+ }
70
+ }
71
+ }
72
+
73
+ function generateSamplesTable(tableId, base_dir, fnames_per_lang, langs) {
74
+ let tbody = document.getElementById(tableId).querySelector('tbody');
75
+ let n_files_per_lang = fnames_per_lang[langs[0]].length; // all langs must have the same number of samples
76
+ let dirs = ['source', 'hibiki-zero', 'seamless'];
77
+ for (var lang_idx = 0; lang_idx < langs.length; lang_idx++) {
78
+ let lang = langs[lang_idx];
79
+ let fnames = fnames_per_lang[lang];
80
+ for (var sample_idx = 0; sample_idx < fnames.length; sample_idx++) {
81
+ let row_idx = n_files_per_lang * lang_idx + sample_idx
82
+ generateExampleRow(tbody.rows[row_idx], base_dir, lang, dirs, fnames[sample_idx], row_idx, n_files_per_lang);
83
+ }
84
+ }
85
+ }
86
+
87
+ generateSamplesTable('shortform-table', 'data/europarl_st', shortformFilenamesPerLang, ['fr', 'es', 'pt', 'de'])
88
+ generateSamplesTable('longform-table', 'data/audio_ntrex_4L', longformFilenamesPerLang, ['fr', 'es', 'pt', 'de'])
89
+ generateSamplesTable('shortform-table-it', 'data/europarl_st', shortformFilenamesPerLang, ['it'])
90
+
91
+ // Borrowed from https://nu-dialogue.github.io/j-moshi/
92
+ $(document).ready(function () {
93
+
94
+ const columns = ['Hibiki-Zero', 'Seamless'];
95
+
96
+ const rowLangIds = ['fr', 'es', 'pt', 'de'];
97
+
98
+ const rows = [
99
+ [
100
+ 'data-stereo/hibiki-zero_fr_3963c038b9f8d311_gradium.mp3',
101
+ 'data-stereo/seamless_fr_3963c038b9f8d311_gradium.mp3'
102
+ ],
103
+ [
104
+ 'data-stereo/hibiki-zero_es_949ebe18ff5f86ec_cartesia.mp3',
105
+ 'data-stereo/seamless_es_949ebe18ff5f86ec_cartesia.mp3'
106
+ ],
107
+ [
108
+ 'data-stereo/hibiki-zero_pt_4bb12dfdfd3877d8_11labs.mp3',
109
+ 'data-stereo/seamless_pt_4bb12dfdfd3877d8_11labs.mp3'
110
+ ],
111
+ [
112
+ 'data-stereo/hibiki-zero_de_3bf4c877f039e01a_11labs.mp3',
113
+ 'data-stereo/seamless_de_3bf4c877f039e01a_11labs.mp3'
114
+ ],
115
+ ];
116
+
117
+ const table = $('#multistream-table');
118
+
119
+ /* ---------- Header ---------- */
120
+ const thead = $('<thead>');
121
+ const headerRow = $('<tr>');
122
+
123
+ headerRow.append($('<th>').text('Source language').css({'white-space': 'nowrap', 'text-align': 'center'}));
124
+
125
+
126
+ columns.forEach(header => {
127
+ headerRow.append($('<th style="text-align: center">').text(header));
128
+ });
129
+
130
+ thead.append(headerRow);
131
+ table.append(thead);
132
+
133
+ /* ---------- Body ---------- */
134
+ const tbody = $('<tbody>');
135
+
136
+ rows.forEach((files, i) => {
137
+ const row = $('<tr>');
138
+
139
+ // Language label cell with big flag
140
+ const langName = langNames[rowLangIds[i]]
141
+ const flag = langFlags[rowLangIds[i]] || '';
142
+ row.append(
143
+ $('<td>')
144
+ .css({
145
+ 'font-weight': 'bold',
146
+ 'white-space': 'nowrap',
147
+ 'vertical-align': 'middle'
148
+ })
149
+ .html(
150
+ `<div style="display: flex; align-items: center; justify-content: center; gap: 0.5em">
151
+ <span style="font-size: 1em;">${langName}</span>
152
+ <span style="font-size: 2em;">${flag}</span>
153
+ </div>`
154
+ )
155
+ );
156
+
157
+ files.forEach((file, j) => {
158
+ const waveCell = $('<td style="text-align: center; vertical-align: middle;">');
159
+ const waveform = $('<div>').attr('id', `waveform-${i}-${j}`);
160
+ waveCell.append(waveform);
161
+
162
+ const playPauseButton = `
163
+ <button class="btn btn-secondary mt-1" id="play-pause-${i}-${j}">
164
+ <i class="bi bi-play-fill"></i> Play /
165
+ <i class="bi bi-pause-fill"></i> Pause
166
+ </button>
167
+ `;
168
+ waveCell.append(playPauseButton);
169
+ row.append(waveCell);
170
+ });
171
+
172
+ tbody.append(row);
173
+ });
174
+
175
+ table.append(tbody);
176
+
177
+ /* ---------- WaveSurfer ---------- */
178
+ rows.forEach((files, i) => {
179
+ files.forEach((file, j) => {
180
+ const wavesurfer = WaveSurfer.create({
181
+ container: `#waveform-${i}-${j}`,
182
+ url: file,
183
+ splitChannels: [
184
+ {
185
+ waveColor: '#39f2aeff',
186
+ progressColor: '#808080',
187
+ },
188
+ {
189
+ waveColor: '#ffab40ff',
190
+ progressColor: '#000000',
191
+ }
192
+ ],
193
+ barWidth: 2,
194
+ height: 55,
195
+ width: 650,
196
+ normalize: true,
197
+ });
198
+
199
+ $(`#play-pause-${i}-${j}`).click(() => {
200
+ wavesurfer.playPause();
201
+ });
202
+ });
203
+ });
204
+
205
+ });
index.html CHANGED
@@ -1,19 +1,273 @@
1
- <!doctype html>
2
  <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  </html>
 
1
+ <!DOCTYPE html>
2
  <html>
3
+ <head>
4
+ <title>Hibiki</title>
5
+ <link
6
+ href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css"
7
+ rel="stylesheet"
8
+ />
9
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.min.css">
10
+ <meta charset="utf-8" />
11
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
12
+ <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
13
+ <script src="https://unpkg.com/wavesurfer.js@7"></script>
14
+ <script src="helper.js" defer></script>
15
+ <!-- <script>
16
+ window.addEventListener('DOMContentLoaded', () => {
17
+ const disclaimer = document.getElementById('browser-disclaimer');
18
+ if (!disclaimer) return;
19
+ const isChrome = /Chrome/.test(navigator.userAgent) && /Google Inc/.test(navigator.vendor);
20
+ if (isChrome) {
21
+ disclaimer.style.display = 'none';
22
+ }
23
+ });
24
+ </script> -->
25
+ <style>
26
+ h1, h2, h3, h4, h5, h6, body, b, strong {color: #595959}
27
+ .container {max-width: 1620px}
28
+ .no-hover:hover td {box-shadow: none !important}
29
+ .centered-video {
30
+ display: block;
31
+ margin: 0 auto;
32
+ max-width: 90%;
33
+ min-width: 400px;
34
+ }
35
+ .video-legend {
36
+ margin-top: 6px;
37
+ font-size: 1em;
38
+ color: #555;
39
+ text-align: center;
40
+ }
41
+ </style>
42
+ </head>
43
+
44
+ <body>
45
+
46
+ <!-- DISCLAIMER -->
47
+ <div id="browser-disclaimer" style="
48
+ background-color: rgb(221, 255, 243);
49
+ color: #333;
50
+ padding: 10px 20px;
51
+ text-align: center;
52
+ font-family: sans-serif;
53
+ font-size: 14px;
54
+ ">
55
+ For faster loading of audio samples, we recommend using <strong>Google Chrome</strong>.
56
+ </div>
57
+
58
+ <!-- HEADER -->
59
+ <div class="container pt-5 mt-5 shadow p-5 mb-5 bg-white rounded">
60
+ <div class="text-center">
61
+ <h1>Simultaneous Speech-to-Speech Translation Without Aligned Data</h1>
62
+ <p class="lead">
63
+ [...]
64
+ </p>
65
+ <p class="fst-italic mb-0">
66
+ Anonymous Authors
67
+ </p>
68
+ <p><b>Anonymous Institution</b></p>
69
+ </div>
70
+
71
+ <p>
72
+ <b>Abstract.</b>
73
+ Simultaneous speech translation requires translating source speech into a target language in real-time while handling non-monotonic word dependencies.
74
+ Traditional approaches rely on supervised training with word-level aligned data, which is difficult to collect at scale and thus depends on synthetic alignments using language-specific heuristics that are suboptimal.
75
+ We propose <i>Hibiki-Zero</i>, which eliminates the need for word-level alignments entirely.
76
+ This fundamentally simplifies the training pipeline and enables seamless scaling to diverse languages with varying grammatical structures, removing the bottleneck of designing language-specific alignment heuristics.
77
+ We first train on sentence-level aligned data to learn speech translation at high latency, then apply a novel reinforcement learning strategy using GRPO to optimize latency while preserving translation quality.
78
+ Hibiki-Zero achieves state-of-the-art performance in translation accuracy, latency, voice transfer, and naturalness across five X-to-English tasks.
79
+ Moreover, we demonstrate that our model can be adapted to support a new input language with less than 1000h of speech data.
80
+ We provide examples as well as models and we release a benchmark containing 15h of multilingual data for speech translation evaluation.
81
+ </p>
82
+
83
+ </div>
84
+
85
+ <!-- IN THE WILD -->
86
+ <div class="container shadow p-5 mb-5 bg-white rounded">
87
+ <h3>In the Wild Examples 🇫🇷🇪🇸🇵🇹🇩🇪</h3>
88
+ <p class="mb-0">
89
+ </p>
90
+ <div class="container pt-3 table-responsive">
91
+ <table class="table table-hover" width="100%">
92
+ <tr class="no-hover">
93
+ <!-- FR video -->
94
+ <td>
95
+ <video class="embed-responsive-item centered-video" controls>
96
+ <source src="videos/clip_fr_translated.mp4" type="video/mp4">
97
+ Your browser does not support HTML video.
98
+ </video>
99
+ <div class="video-legend">
100
+ Source:
101
+ <a href="https://www.youtube.com/watch?v=3nox96KbhV0" target="_blank">
102
+ The legendary Paris 2024 Olympic Games of Léon Marchand.
103
+ </a>
104
+ - <i>Eurosport France</i>
105
+ </div>
106
+ </td>
107
+ <!-- DE video -->
108
+ <td>
109
+ <video class="embed-responsive-item centered-video" controls>
110
+ <source src="videos/clip_de_translated.mp4" type="video/mp4">
111
+ Your browser does not support HTML video.
112
+ </video>
113
+ <div class="video-legend">
114
+ Source:
115
+ <a href="https://www.youtube.com/watch?v=4kFw5gi9JKI" target="_blank">
116
+ Biathlon 2025: Franziska Preuß wins her first World Championship.
117
+ </a>
118
+ - <i>Eurosport Germany</i>
119
+ </div>
120
+ </td>
121
+ </tr>
122
+ <tr class="no-hover">
123
+ <!-- ES video -->
124
+ <td>
125
+ <video class="embed-responsive-item centered-video" controls>
126
+ <source src="videos/clip_es_translated.mp4" type="video/mp4">
127
+ Your browser does not support HTML video.
128
+ </video>
129
+ <div class="video-legend">
130
+ Source:
131
+ <a href="https://www.youtube.com/watch?v=O0M-o7CnmUE" target="_blank">
132
+ Australian Open 2026 Final: Carlos Alcaraz vs. Novak Djokovic.
133
+ </a>
134
+ - <i>Eurosport España</i>
135
+ </div>
136
+ </td>
137
+ <!-- PT video -->
138
+ <td>
139
+ <video class="embed-responsive-item centered-video" controls>
140
+ <source src="videos/clip_pt_translated.mp4" type="video/mp4">
141
+ Your browser does not support HTML video.
142
+ </video>
143
+ <div class="video-legend">
144
+ Source:
145
+ <a href="https://www.facebook.com/share/v/1HAYhUFVm3/" target="_blank">
146
+ Iuri Leitao and Rui Oliveira win gold for Portugal at the Paris 2024 Olympics.
147
+ </a>
148
+ - <i>Facebook</i>
149
+ </div>
150
+ </td>
151
+ </tr>
152
+ </table>
153
+ </div>
154
+ </div>
155
+
156
+ <!-- MULTISTREAM -->
157
+ <div class="container shadow p-5 mb-5 bg-white rounded">
158
+ <h3>Multistream Visualization</h3>
159
+ <p class="mb-0">
160
+ The source audios (from our long-form evaluation dataset Audio-NTREX-4L) and translated versions are on different channels.
161
+ The volume of the sources are reduced so that it's easier to hear the translations.
162
+ </p>
163
+ <div class="container pt-3 table-responsive">
164
+ <table class="table" id="multistream-table"></table>
165
+ </div>
166
+ </div>
167
+
168
+ <!-- SHORTFORM -->
169
+ <div class="container shadow p-5 mb-5 bg-white rounded">
170
+ <h3>Short-form Simultaneous Translations</h3>
171
+ <p class="mb-0"> The source audios come from our Europarl-ST evaluation data. </p>
172
+ <div class="container pt-3 table-responsive">
173
+ <table class="table" id="shortform-table">
174
+ <thead>
175
+ <tr>
176
+ <th style="text-align: center; min-width: 150px;">Source language</th>
177
+ <th style="text-align: center;min-width: 200px;">Source</th>
178
+ <th style="text-align: center;">Hibiki-Zero</th>
179
+ <th style="text-align: center">Seamless</th>
180
+ </tr>
181
+ </thead>
182
+ <tbody>
183
+ <!-- fr -->
184
+ <tr> <td></td> <td></td> <td></td> <td></td></tr>
185
+ <tr> <td></td> <td></td> <td></td> <td></td></tr>
186
+ <!-- <tr> <td></td> <td></td> <td></td> <td></td></tr>
187
+ <tr> <td></td> <td></td> <td></td> <td></td></tr> -->
188
+ <!-- es -->
189
+ <tr> <td></td> <td></td> <td></td> <td></td></tr>
190
+ <tr> <td></td> <td></td> <td></td> <td></td></tr>
191
+ <!-- <tr> <td></td> <td></td> <td></td> <td></td></tr>
192
+ <tr> <td></td> <td></td> <td></td> <td></td></tr> -->
193
+ <!-- pt -->
194
+ <tr> <td></td> <td></td> <td></td> <td></td></tr>
195
+ <tr> <td></td> <td></td> <td></td> <td></td></tr>
196
+ <!-- <tr> <td></td> <td></td> <td></td> <td></td></tr>
197
+ <tr> <td></td> <td></td> <td></td> <td></td></tr> -->
198
+ <!-- de -->
199
+ <tr> <td></td> <td></td> <td></td> <td></td></tr>
200
+ <tr> <td></td> <td></td> <td></td> <td></td></tr>
201
+ <!-- <tr> <td></td> <td></td> <td></td> <td></td></tr>
202
+ <tr> <td></td> <td></td> <td></td> <td></td></tr> -->
203
+ </tbody>
204
+ </table>
205
+ </div>
206
+ </div>
207
+
208
+ <!-- LONGFORM -->
209
+ <div class="container shadow p-5 mb-5 bg-white rounded">
210
+ <h3>Long-form Simultaneous Translations</h3>
211
+ <p class="mb-0"> The source audios come from taken from our Audio-NTREX-4L evaluation dataset.</p>
212
+ <div class="container pt-3 table-responsive">
213
+ <table class="table" id="longform-table">
214
+ <thead>
215
+ <tr>
216
+ <th style="text-align: center; min-width: 150px;">Source language</th>
217
+ <th style="text-align: center;min-width: 200px;">Source</th>
218
+ <th style="text-align: center;">Hibiki-Zero</th>
219
+ <th style="text-align: center;">Seamless</th>
220
+ </tr>
221
+ </thead>
222
+ <tbody>
223
+ <!-- fr -->
224
+ <tr> <td></td> <td></td> <td></td> <td></td> </tr>
225
+ <tr> <td></td> <td></td> <td></td> <td></td> </tr>
226
+ <!-- es -->
227
+ <tr> <td></td> <td></td> <td></td> <td></td> </tr>
228
+ <tr> <td></td> <td></td> <td></td> <td></td> </tr>
229
+ <!-- pt -->
230
+ <tr> <td></td> <td></td> <td></td> <td></td> </tr>
231
+ <tr> <td></td> <td></td> <td></td> <td></td> </tr>
232
+ <!-- de -->
233
+ <tr> <td></td> <td></td> <td></td> <td></td> </tr>
234
+ <tr> <td></td> <td></td> <td></td> <td></td> </tr>
235
+ </tbody>
236
+ </table>
237
+ </div>
238
+ </div>
239
+
240
+ <!-- SHORTFORM ITALIAN -->
241
+ <div class="container shadow p-5 mb-5 bg-white rounded">
242
+ <h3>Short-form Simultaneous Translations from Italian</h3>
243
+ <p class="mb-0"> The source audios come from our Europarl-ST evaluation data. Hibiki-Zero-IT denotes our model adapted for translation from Italian with less than 1000 hours of Italian-to-English data. </p>
244
+ <div class="container pt-3 table-responsive">
245
+ <table class="table" id="shortform-table-it">
246
+ <thead>
247
+ <tr>
248
+ <th style="text-align: center; min-width: 150px;">Source language</th>
249
+ <th style="text-align: center;min-width: 200px;">Source</th>
250
+ <th style="text-align: center;">Hibiki-Zero-IT</th>
251
+ <th style="text-align: center">Seamless</th>
252
+ </tr>
253
+ </thead>
254
+ <tbody>
255
+ <!-- it -->
256
+ <tr> <td></td> <td></td> <td></td> <td></td></tr>
257
+ <tr> <td></td> <td></td> <td></td> <td></td></tr>
258
+ <tr> <td></td> <td></td> <td></td> <td></td></tr>
259
+ <tr> <td></td> <td></td> <td></td> <td></td></tr>
260
+ </tbody>
261
+ </table>
262
+ </div>
263
+ </div>
264
+
265
+ <!-- TAIL -->
266
+ <div class="container p-5 mb-5 bg-white rounded">
267
+ <p class="mb-0">
268
+ This page was adapted from the <a href="https://google-research.github.io/seanet/soundstorm/examples">SoundStorm project page</a>.
269
+ </p>
270
+ </div>
271
+
272
+ </body>
273
  </html>