woiceatus commited on
Commit
740e55f
·
1 Parent(s): adb34c2

make send wav audio file works now

Browse files
public/chatclient/app.js CHANGED
@@ -25,6 +25,7 @@ const settingsToggle = document.querySelector("#settings-toggle");
25
  const statusLine = document.querySelector("#status-line");
26
  const sendButton = document.querySelector("#send-button");
27
  const responseOutput = document.querySelector("#response-output");
 
28
  const rawJson = document.querySelector("#raw-json");
29
  const errorToast = document.querySelector("#error-toast");
30
  const previewController = createAttachmentPreviewController();
@@ -41,10 +42,10 @@ function bindEvents() {
41
  audioOutputInput.addEventListener("change", syncAudioFields);
42
  attachmentInput.addEventListener("change", handleAttachmentSelect);
43
  document.querySelector("#attachment-trigger").addEventListener("click", () => toggleAttachmentPicker());
44
- document.querySelector("#attachment-upload-trigger").addEventListener("click", () => attachmentInput.click());
45
  document.querySelector("#attachment-link-add").addEventListener("click", handleLinkAdd);
46
  attachmentLinkUrl.addEventListener("keydown", handleLinkKeyDown);
47
  document.querySelector("#send-button").addEventListener("click", handleSend);
 
48
  document.querySelector(".editor-toolbar").addEventListener("click", handleFormatClick);
49
  editor.addEventListener("keydown", handleEditorKeyDown);
50
  editor.addEventListener("input", persistDraft);
@@ -54,7 +55,7 @@ function bindEvents() {
54
  }
55
 
56
  for (const button of document.querySelectorAll(".picker-mode-button")) {
57
- button.addEventListener("click", () => setAttachmentMode(button.dataset.mode));
58
  }
59
 
60
  for (const element of [endpointInput, modelInput, systemPromptInput, attachmentLinkType]) {
@@ -108,6 +109,13 @@ function setAttachmentMode(mode) {
108
  persistDraft();
109
  }
110
 
 
 
 
 
 
 
 
111
  function setActiveTab(tabName) {
112
  for (const button of document.querySelectorAll(".tab-button")) {
113
  const isActive = button.dataset.tab === tabName;
@@ -140,9 +148,9 @@ function handleEditorKeyDown(event) {
140
  }
141
  }
142
 
143
- function handleAttachmentSelect(event) {
144
  try {
145
- const nextItems = createAttachments(event.target.files);
146
  if (nextItems.length === 0) {
147
  return;
148
  }
@@ -233,6 +241,7 @@ async function handleSend() {
233
  async function buildPayload() {
234
  const text = readEditorText();
235
  const content = [];
 
236
 
237
  if (text) {
238
  content.push({ type: "text", text });
@@ -248,6 +257,10 @@ async function buildPayload() {
248
  messages: []
249
  };
250
 
 
 
 
 
251
  if (!payload.model) {
252
  throw new Error("Enter a model name in settings.");
253
  }
@@ -337,3 +350,21 @@ function renderRequestError(error, stage) {
337
  function showPendingOutput() {
338
  responseOutput.innerHTML = '<p class="empty-state">Sending request...</p>';
339
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  const statusLine = document.querySelector("#status-line");
26
  const sendButton = document.querySelector("#send-button");
27
  const responseOutput = document.querySelector("#response-output");
28
+ const rawCopyButton = document.querySelector("#raw-copy-button");
29
  const rawJson = document.querySelector("#raw-json");
30
  const errorToast = document.querySelector("#error-toast");
31
  const previewController = createAttachmentPreviewController();
 
42
  audioOutputInput.addEventListener("change", syncAudioFields);
43
  attachmentInput.addEventListener("change", handleAttachmentSelect);
44
  document.querySelector("#attachment-trigger").addEventListener("click", () => toggleAttachmentPicker());
 
45
  document.querySelector("#attachment-link-add").addEventListener("click", handleLinkAdd);
46
  attachmentLinkUrl.addEventListener("keydown", handleLinkKeyDown);
47
  document.querySelector("#send-button").addEventListener("click", handleSend);
48
+ rawCopyButton.addEventListener("click", handleRawCopy);
49
  document.querySelector(".editor-toolbar").addEventListener("click", handleFormatClick);
50
  editor.addEventListener("keydown", handleEditorKeyDown);
51
  editor.addEventListener("input", persistDraft);
 
55
  }
56
 
57
  for (const button of document.querySelectorAll(".picker-mode-button")) {
58
+ button.addEventListener("click", () => handleAttachmentModeSelect(button.dataset.mode));
59
  }
60
 
61
  for (const element of [endpointInput, modelInput, systemPromptInput, attachmentLinkType]) {
 
109
  persistDraft();
110
  }
111
 
112
+ function handleAttachmentModeSelect(mode) {
113
+ setAttachmentMode(mode);
114
+ if (mode === "upload") {
115
+ attachmentInput.click();
116
+ }
117
+ }
118
+
119
  function setActiveTab(tabName) {
120
  for (const button of document.querySelectorAll(".tab-button")) {
121
  const isActive = button.dataset.tab === tabName;
 
148
  }
149
  }
150
 
151
+ async function handleAttachmentSelect(event) {
152
  try {
153
+ const nextItems = await createAttachments(event.target.files);
154
  if (nextItems.length === 0) {
155
  return;
156
  }
 
241
  async function buildPayload() {
242
  const text = readEditorText();
243
  const content = [];
244
+ const hasAudioAttachment = state.attachments.some((attachment) => attachment.kind === "audio");
245
 
246
  if (text) {
247
  content.push({ type: "text", text });
 
257
  messages: []
258
  };
259
 
260
+ if (hasAudioAttachment) {
261
+ payload.modalities = ["text", "audio"];
262
+ }
263
+
264
  if (!payload.model) {
265
  throw new Error("Enter a model name in settings.");
266
  }
 
350
  function showPendingOutput() {
351
  responseOutput.innerHTML = '<p class="empty-state">Sending request...</p>';
352
  }
353
+
354
+ async function handleRawCopy() {
355
+ try {
356
+ await navigator.clipboard.writeText(rawJson.textContent);
357
+ setStatus(statusLine, "Raw response copied.", true);
358
+ rawCopyButton.textContent = "Copied";
359
+ } catch (_error) {
360
+ showError(errorToast, "Failed to copy raw response.");
361
+ rawCopyButton.textContent = "Copy Failed";
362
+ }
363
+
364
+ window.clearTimeout(handleRawCopy.timer);
365
+ handleRawCopy.timer = window.setTimeout(() => {
366
+ rawCopyButton.textContent = "Copy Raw";
367
+ }, 1600);
368
+ }
369
+
370
+ handleRawCopy.timer = 0;
public/chatclient/draft.js CHANGED
@@ -1,3 +1,5 @@
 
 
1
  const DRAFT_KEY = "oapix.chatclient.draft";
2
 
3
  export function loadDraft() {
@@ -17,23 +19,28 @@ export function loadDraft() {
17
  }
18
 
19
  export function saveDraft({ editor, attachmentMode, attachmentLinkType, attachmentLinkUrl, attachments }) {
20
- window.localStorage.setItem(DRAFT_KEY, JSON.stringify({
21
- editorHtml: editor.innerHTML,
22
- attachmentMode,
23
- attachmentLinkType,
24
- attachmentLinkUrl,
25
- attachments: attachments
26
- .filter((attachment) => attachment.sourceType === "link")
27
- .map((attachment) => ({
28
- id: attachment.id,
29
- kind: attachment.kind,
30
- name: attachment.name,
31
- sizeLabel: attachment.sizeLabel,
32
- previewUrl: attachment.previewUrl,
33
- sourceType: attachment.sourceType,
34
- url: attachment.url
35
- }))
36
- }));
 
 
 
 
 
37
  }
38
 
39
  export function clearDraft() {
@@ -45,13 +52,9 @@ function sanitizeAttachments(value) {
45
  return [];
46
  }
47
 
48
- return value.filter((attachment) => {
49
- return attachment
50
- && attachment.sourceType === "link"
51
- && (attachment.kind === "image" || attachment.kind === "audio")
52
- && typeof attachment.url === "string"
53
- && typeof attachment.name === "string";
54
- });
55
  }
56
 
57
  function emptyDraft() {
@@ -63,3 +66,51 @@ function emptyDraft() {
63
  attachments: []
64
  };
65
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { hydrateAttachment } from "/chatclient/media.js";
2
+
3
  const DRAFT_KEY = "oapix.chatclient.draft";
4
 
5
  export function loadDraft() {
 
19
  }
20
 
21
  export function saveDraft({ editor, attachmentMode, attachmentLinkType, attachmentLinkUrl, attachments }) {
22
+ try {
23
+ window.localStorage.setItem(DRAFT_KEY, JSON.stringify({
24
+ editorHtml: editor.innerHTML,
25
+ attachmentMode,
26
+ attachmentLinkType,
27
+ attachmentLinkUrl,
28
+ attachments: attachments
29
+ .map((attachment) => serializeAttachment(attachment))
30
+ .filter(Boolean)
31
+ }));
32
+ } catch (_error) {
33
+ window.localStorage.setItem(DRAFT_KEY, JSON.stringify({
34
+ editorHtml: editor.innerHTML,
35
+ attachmentMode,
36
+ attachmentLinkType,
37
+ attachmentLinkUrl,
38
+ attachments: attachments
39
+ .filter((attachment) => attachment.sourceType === "link")
40
+ .map((attachment) => serializeAttachment(attachment))
41
+ .filter(Boolean)
42
+ }));
43
+ }
44
  }
45
 
46
  export function clearDraft() {
 
52
  return [];
53
  }
54
 
55
+ return value
56
+ .map((attachment) => sanitizeAttachment(attachment))
57
+ .filter(Boolean);
 
 
 
 
58
  }
59
 
60
  function emptyDraft() {
 
66
  attachments: []
67
  };
68
  }
69
+
70
+ function serializeAttachment(attachment) {
71
+ if (!attachment || (attachment.kind !== "image" && attachment.kind !== "audio")) {
72
+ return null;
73
+ }
74
+
75
+ if (attachment.sourceType === "link") {
76
+ return {
77
+ id: attachment.id,
78
+ kind: attachment.kind,
79
+ name: attachment.name,
80
+ sizeLabel: attachment.sizeLabel,
81
+ previewUrl: attachment.previewUrl,
82
+ sourceType: "link",
83
+ url: attachment.url
84
+ };
85
+ }
86
+
87
+ if (attachment.sourceType === "file" && typeof attachment.dataUrl === "string") {
88
+ return {
89
+ id: attachment.id,
90
+ kind: attachment.kind,
91
+ name: attachment.name,
92
+ sizeLabel: attachment.sizeLabel,
93
+ sourceType: "file",
94
+ dataUrl: attachment.dataUrl,
95
+ mimeType: attachment.mimeType
96
+ };
97
+ }
98
+
99
+ return null;
100
+ }
101
+
102
+ function sanitizeAttachment(attachment) {
103
+ if (!attachment || (attachment.kind !== "image" && attachment.kind !== "audio") || typeof attachment.name !== "string") {
104
+ return null;
105
+ }
106
+
107
+ if (attachment.sourceType === "link" && typeof attachment.url === "string") {
108
+ return hydrateAttachment(attachment);
109
+ }
110
+
111
+ if (attachment.sourceType === "file" && typeof attachment.dataUrl === "string") {
112
+ return hydrateAttachment(attachment);
113
+ }
114
+
115
+ return null;
116
+ }
public/chatclient/index.html CHANGED
@@ -51,19 +51,6 @@
51
  ></div>
52
  </article>
53
 
54
- <section class="attachments-card" aria-labelledby="attachments-heading" hidden>
55
- <div class="section-head">
56
- <div>
57
- <p class="eyebrow">Attachments</p>
58
- <h3 id="attachments-heading">Image and Audio Attachments</h3>
59
- </div>
60
- <p id="attachment-summary" class="section-copy">No files added.</p>
61
- </div>
62
- <div id="attachment-list" class="attachment-list">
63
- <p class="empty-state">Add an image or audio file or link to include it with the next request.</p>
64
- </div>
65
- </section>
66
-
67
  <div class="tool-row" aria-label="Request tools">
68
  <button id="settings-toggle" class="tool-button icon-button" type="button" aria-label="Open settings" title="Settings">
69
  <svg viewBox="0 0 24 24" aria-hidden="true">
@@ -108,8 +95,7 @@
108
  </div>
109
 
110
  <div id="attachment-picker-upload" class="picker-panel is-active">
111
- <p class="picker-copy">Select one or more image/audio files from this device.</p>
112
- <button id="attachment-upload-trigger" class="picker-action-button" type="button">Choose Files</button>
113
  </div>
114
 
115
  <div id="attachment-picker-link" class="picker-panel" hidden>
@@ -132,6 +118,19 @@
132
  </div>
133
  </section>
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  <section id="settings-panel" class="settings-panel" hidden>
136
  <div class="section-head">
137
  <div>
@@ -199,8 +198,9 @@
199
  <p class="eyebrow">Debug</p>
200
  <h3>Raw Response</h3>
201
  </div>
202
- <p class="section-copy">This tab shows the exact JSON returned by the proxy.</p>
203
  </div>
 
204
  <pre id="raw-json" class="raw-output">{}</pre>
205
  </article>
206
  </section>
 
51
  ></div>
52
  </article>
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  <div class="tool-row" aria-label="Request tools">
55
  <button id="settings-toggle" class="tool-button icon-button" type="button" aria-label="Open settings" title="Settings">
56
  <svg viewBox="0 0 24 24" aria-hidden="true">
 
95
  </div>
96
 
97
  <div id="attachment-picker-upload" class="picker-panel is-active">
98
+ <p class="picker-copy">Click <strong>Upload Files</strong> above to open the system picker immediately.</p>
 
99
  </div>
100
 
101
  <div id="attachment-picker-link" class="picker-panel" hidden>
 
118
  </div>
119
  </section>
120
 
121
+ <section class="attachments-card" aria-labelledby="attachments-heading">
122
+ <div class="section-head">
123
+ <div>
124
+ <p class="eyebrow">Attachments</p>
125
+ <h3 id="attachments-heading">Image and Audio Attachments</h3>
126
+ </div>
127
+ <p id="attachment-summary" class="section-copy">No files added.</p>
128
+ </div>
129
+ <div id="attachment-list" class="attachment-list">
130
+ <p class="empty-state">Add an image or audio file or link to include it with the next request.</p>
131
+ </div>
132
+ </section>
133
+
134
  <section id="settings-panel" class="settings-panel" hidden>
135
  <div class="section-head">
136
  <div>
 
198
  <p class="eyebrow">Debug</p>
199
  <h3>Raw Response</h3>
200
  </div>
201
+ <button id="raw-copy-button" class="secondary-button" type="button">Copy Raw</button>
202
  </div>
203
+ <p class="section-copy">This tab shows the exact JSON returned by the proxy.</p>
204
  <pre id="raw-json" class="raw-output">{}</pre>
205
  </article>
206
  </section>
public/chatclient/media.js CHANGED
@@ -1,8 +1,8 @@
1
  const IMAGE_PREFIX = "image/";
2
  const AUDIO_PREFIX = "audio/";
3
 
4
- export function createAttachments(fileList) {
5
- return Array.from(fileList, (file) => createFileAttachment(file));
6
  }
7
 
8
  export function createLinkAttachment(kind, url) {
@@ -31,21 +31,44 @@ export async function buildAttachmentParts(attachments) {
31
  }
32
 
33
  export function releaseAttachment(attachment) {
34
- if (attachment?.sourceType === "file" && attachment.previewUrl) {
35
  URL.revokeObjectURL(attachment.previewUrl);
36
  }
37
  }
38
 
39
- function createFileAttachment(file) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  const kind = classifyFile(file);
 
41
  return {
42
  id: crypto.randomUUID(),
43
- file,
44
  kind,
45
  name: file.name,
46
  sizeLabel: formatBytes(file.size),
47
- previewUrl: URL.createObjectURL(file),
48
- sourceType: "file"
 
 
49
  };
50
  }
51
 
@@ -60,7 +83,7 @@ async function buildAttachmentPart(attachment) {
60
  return {
61
  type: "image_url",
62
  image_url: {
63
- url: await readFileAsDataUrl(attachment.file)
64
  }
65
  };
66
  }
@@ -68,8 +91,8 @@ async function buildAttachmentPart(attachment) {
68
  return {
69
  type: "input_audio",
70
  input_audio: {
71
- data: await readFileAsBase64(attachment.file),
72
- format: inferAudioFormat(attachment.file)
73
  }
74
  };
75
  }
@@ -87,8 +110,9 @@ function classifyFile(file) {
87
  }
88
 
89
  function inferAudioFormat(file) {
90
- const name = file.name.toLowerCase();
91
- if (file.type === "audio/wav" || name.endsWith(".wav")) {
 
92
  return "wav";
93
  }
94
 
@@ -106,6 +130,10 @@ function readFileAsDataUrl(file) {
106
 
107
  async function readFileAsBase64(file) {
108
  const dataUrl = await readFileAsDataUrl(file);
 
 
 
 
109
  return dataUrl.split(",")[1] || "";
110
  }
111
 
 
1
  const IMAGE_PREFIX = "image/";
2
  const AUDIO_PREFIX = "audio/";
3
 
4
+ export async function createAttachments(fileList) {
5
+ return Promise.all(Array.from(fileList, (file) => createFileAttachment(file)));
6
  }
7
 
8
  export function createLinkAttachment(kind, url) {
 
31
  }
32
 
33
  export function releaseAttachment(attachment) {
34
+ if (attachment?.sourceType === "file" && attachment.previewUrl?.startsWith("blob:")) {
35
  URL.revokeObjectURL(attachment.previewUrl);
36
  }
37
  }
38
 
39
+ export function hydrateAttachment(savedAttachment) {
40
+ if (savedAttachment?.sourceType === "link") {
41
+ return savedAttachment;
42
+ }
43
+
44
+ if (savedAttachment?.sourceType === "file" && typeof savedAttachment.dataUrl === "string") {
45
+ return {
46
+ id: savedAttachment.id,
47
+ kind: savedAttachment.kind,
48
+ name: savedAttachment.name,
49
+ sizeLabel: savedAttachment.sizeLabel,
50
+ previewUrl: savedAttachment.dataUrl,
51
+ sourceType: "file",
52
+ dataUrl: savedAttachment.dataUrl,
53
+ mimeType: savedAttachment.mimeType
54
+ };
55
+ }
56
+
57
+ return null;
58
+ }
59
+
60
+ async function createFileAttachment(file) {
61
  const kind = classifyFile(file);
62
+ const dataUrl = await readFileAsDataUrl(file);
63
  return {
64
  id: crypto.randomUUID(),
 
65
  kind,
66
  name: file.name,
67
  sizeLabel: formatBytes(file.size),
68
+ previewUrl: dataUrl,
69
+ sourceType: "file",
70
+ dataUrl,
71
+ mimeType: file.type
72
  };
73
  }
74
 
 
83
  return {
84
  type: "image_url",
85
  image_url: {
86
+ url: attachment.dataUrl ?? await readFileAsDataUrl(attachment.file)
87
  }
88
  };
89
  }
 
91
  return {
92
  type: "input_audio",
93
  input_audio: {
94
+ data: extractBase64(attachment.dataUrl ?? await readFileAsDataUrl(attachment.file)),
95
+ format: inferAudioFormat(attachment.file ?? attachment)
96
  }
97
  };
98
  }
 
110
  }
111
 
112
  function inferAudioFormat(file) {
113
+ const mimeType = file?.type ?? file?.mimeType ?? "";
114
+ const name = (file?.name ?? "").toLowerCase();
115
+ if (mimeType === "audio/wav" || mimeType === "audio/x-wav" || name.endsWith(".wav")) {
116
  return "wav";
117
  }
118
 
 
130
 
131
  async function readFileAsBase64(file) {
132
  const dataUrl = await readFileAsDataUrl(file);
133
+ return extractBase64(dataUrl);
134
+ }
135
+
136
+ function extractBase64(dataUrl) {
137
  return dataUrl.split(",")[1] || "";
138
  }
139
 
public/chatclient/render.js CHANGED
@@ -1,7 +1,6 @@
1
  import { escapeHtml, renderRichText } from "/chatclient/richText.js";
2
 
3
  export function renderAttachments(card, container, summary, attachments, onRemove, onPreview) {
4
- card.hidden = attachments.length === 0;
5
  summary.textContent = buildAttachmentSummary(attachments);
6
  container.innerHTML = "";
7
 
 
1
  import { escapeHtml, renderRichText } from "/chatclient/richText.js";
2
 
3
  export function renderAttachments(card, container, summary, attachments, onRemove, onPreview) {
 
4
  summary.textContent = buildAttachmentSummary(attachments);
5
  container.innerHTML = "";
6
 
public/chatclient/styles.css CHANGED
@@ -75,6 +75,7 @@ button {
75
  .format-button,
76
  .tool-button,
77
  .send-button,
 
78
  .attachment-remove,
79
  .attachment-preview-trigger,
80
  .attachment-preview__close {
@@ -178,7 +179,8 @@ h3 {
178
 
179
  .format-button,
180
  .tool-button,
181
- .send-button {
 
182
  min-height: 48px;
183
  border-radius: 18px;
184
  padding: 0 16px;
@@ -232,6 +234,10 @@ h3 {
232
  cursor: progress;
233
  }
234
 
 
 
 
 
235
  .tool-row .send-button {
236
  margin-left: auto;
237
  }
 
75
  .format-button,
76
  .tool-button,
77
  .send-button,
78
+ .secondary-button,
79
  .attachment-remove,
80
  .attachment-preview-trigger,
81
  .attachment-preview__close {
 
179
 
180
  .format-button,
181
  .tool-button,
182
+ .send-button,
183
+ .secondary-button {
184
  min-height: 48px;
185
  border-radius: 18px;
186
  padding: 0 16px;
 
234
  cursor: progress;
235
  }
236
 
237
+ .secondary-button {
238
+ white-space: nowrap;
239
+ }
240
+
241
  .tool-row .send-button {
242
  margin-left: auto;
243
  }
src/services/audioConversionService.js CHANGED
@@ -5,15 +5,14 @@ import { spawn } from "node:child_process";
5
  import ffmpegPath from "ffmpeg-static";
6
  import { HttpError } from "../utils/httpError.js";
7
 
8
- function runFfmpeg(inputPath, outputPath) {
9
  return new Promise((resolve, reject) => {
10
  const child = spawn(ffmpegPath, [
11
  "-y",
12
  "-i",
13
  inputPath,
14
  "-vn",
15
- "-acodec",
16
- "libmp3lame",
17
  outputPath
18
  ]);
19
 
@@ -55,22 +54,59 @@ export function createAudioConversionService({ fetchImpl = fetch, maxAudioDownlo
55
  throw new HttpError(413, `Audio URL exceeded ${maxAudioDownloadMb}MB download limit.`);
56
  }
57
 
58
- const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "oapix-audio-"));
59
- const inputPath = path.join(tempDir, "input-media");
60
- const outputPath = path.join(tempDir, "output.mp3");
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  try {
63
- await fs.writeFile(inputPath, audioBuffer);
64
- await runFfmpeg(inputPath, outputPath);
65
- const mp3Buffer = await fs.readFile(outputPath);
66
-
67
- return {
68
- data: mp3Buffer.toString("base64"),
69
- format: "mp3"
70
- };
71
- } finally {
72
- await fs.rm(tempDir, { force: true, recursive: true });
73
  }
74
  }
75
  };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  }
 
5
  import ffmpegPath from "ffmpeg-static";
6
  import { HttpError } from "../utils/httpError.js";
7
 
8
+ function runFfmpeg(inputPath, outputPath, outputArgs) {
9
  return new Promise((resolve, reject) => {
10
  const child = spawn(ffmpegPath, [
11
  "-y",
12
  "-i",
13
  inputPath,
14
  "-vn",
15
+ ...outputArgs,
 
16
  outputPath
17
  ]);
18
 
 
54
  throw new HttpError(413, `Audio URL exceeded ${maxAudioDownloadMb}MB download limit.`);
55
  }
56
 
57
+ return transcodeAudioBuffer(audioBuffer, "mp3");
58
+ },
59
+
60
+ async normalizeBase64Audio({ data, format }) {
61
+ const audioBuffer = Buffer.from(data, "base64");
62
+ if (audioBuffer.length === 0) {
63
+ throw new HttpError(400, "Audio input must include base64 data.");
64
+ }
65
+
66
+ if (audioBuffer.length > maxBytes) {
67
+ throw new HttpError(413, `Audio input exceeded ${maxAudioDownloadMb}MB upload limit.`);
68
+ }
69
+
70
+ if (!["mp3", "wav"].includes(format)) {
71
+ throw new HttpError(400, "Audio input format must be mp3 or wav.");
72
+ }
73
 
74
  try {
75
+ return await transcodeAudioBuffer(audioBuffer, format);
76
+ } catch (error) {
77
+ if (error instanceof HttpError) {
78
+ throw error;
79
+ }
80
+
81
+ throw new HttpError(400, `Failed to normalize audio input as ${format}.`, error.message);
 
 
 
82
  }
83
  }
84
  };
85
+
86
+ async function transcodeAudioBuffer(audioBuffer, outputFormat) {
87
+ const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "oapix-audio-"));
88
+ const inputPath = path.join(tempDir, "input-media");
89
+ const outputPath = path.join(tempDir, `output.${outputFormat}`);
90
+
91
+ try {
92
+ await fs.writeFile(inputPath, audioBuffer);
93
+ await runFfmpeg(inputPath, outputPath, ffmpegOutputArgs(outputFormat));
94
+ const convertedBuffer = await fs.readFile(outputPath);
95
+
96
+ return {
97
+ data: convertedBuffer.toString("base64"),
98
+ format: outputFormat
99
+ };
100
+ } finally {
101
+ await fs.rm(tempDir, { force: true, recursive: true });
102
+ }
103
+ }
104
+ }
105
+
106
+ function ffmpegOutputArgs(outputFormat) {
107
+ if (outputFormat === "wav") {
108
+ return ["-acodec", "pcm_s16le"];
109
+ }
110
+
111
+ return ["-acodec", "libmp3lame"];
112
  }
src/services/requestNormalizationService.js CHANGED
@@ -88,7 +88,7 @@ export function createRequestNormalizationService({ audioConversionService }) {
88
 
89
  nextParts.push({
90
  type: "input_audio",
91
- input_audio: normalizeAudioBase64(audio)
92
  });
93
  continue;
94
  }
 
88
 
89
  nextParts.push({
90
  type: "input_audio",
91
+ input_audio: await audioConversionService.normalizeBase64Audio(normalizeAudioBase64(audio))
92
  });
93
  continue;
94
  }
test/requestNormalization.test.js CHANGED
@@ -11,6 +11,9 @@ test("normalizes audio URLs to mp3 base64 and raw image base64 to data URLs", as
11
  data: "ZmFrZS1tcDM=",
12
  format: "mp3"
13
  };
 
 
 
14
  }
15
  }
16
  });
@@ -51,3 +54,49 @@ test("normalizes audio URLs to mp3 base64 and raw image base64 to data URLs", as
51
  });
52
  assert.equal(responseContext.audioFormat, "wav");
53
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  data: "ZmFrZS1tcDM=",
12
  format: "mp3"
13
  };
14
+ },
15
+ async normalizeBase64Audio(audio) {
16
+ return audio;
17
  }
18
  }
19
  });
 
54
  });
55
  assert.equal(responseContext.audioFormat, "wav");
56
  });
57
+
58
+ test("normalizes uploaded wav audio before forwarding upstream", async () => {
59
+ const service = createRequestNormalizationService({
60
+ audioConversionService: {
61
+ async downloadAndConvertToMp3Base64() {
62
+ throw new Error("unexpected url conversion");
63
+ },
64
+ async normalizeBase64Audio(audio) {
65
+ assert.deepEqual(audio, {
66
+ data: "UklGRl8AAABXQVZFZm10",
67
+ format: "wav"
68
+ });
69
+
70
+ return {
71
+ data: "bm9ybWFsaXplZC13YXY=",
72
+ format: "wav"
73
+ };
74
+ }
75
+ }
76
+ });
77
+
78
+ const { normalizedBody } = await service.normalize({
79
+ messages: [
80
+ {
81
+ role: "user",
82
+ content: [
83
+ {
84
+ type: "input_audio",
85
+ input_audio: {
86
+ data: "UklGRl8AAABXQVZFZm10",
87
+ format: "wav"
88
+ }
89
+ }
90
+ ]
91
+ }
92
+ ]
93
+ });
94
+
95
+ assert.deepEqual(normalizedBody.messages[0].content[0], {
96
+ type: "input_audio",
97
+ input_audio: {
98
+ data: "bm9ybWFsaXplZC13YXY=",
99
+ format: "wav"
100
+ }
101
+ });
102
+ });