Upload project files
Browse files
src/app/api/consciousness/asr/route.ts
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { NextRequest, NextResponse } from "next/server";
|
| 2 |
+
|
| 3 |
+
export async function POST(req: NextRequest) {
|
| 4 |
+
try {
|
| 5 |
+
const body = await req.json();
|
| 6 |
+
const { audio } = body;
|
| 7 |
+
|
| 8 |
+
if (!audio) {
|
| 9 |
+
return NextResponse.json(
|
| 10 |
+
{ error: "Audio data is required" },
|
| 11 |
+
{ status: 400 }
|
| 12 |
+
);
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
// Try to use z-ai-web-dev-sdk for ASR
|
| 16 |
+
try {
|
| 17 |
+
const ZAI = (await import("z-ai-web-dev-sdk")).default;
|
| 18 |
+
const zai = await ZAI.create();
|
| 19 |
+
|
| 20 |
+
// Convert base64 to a proper File/Blob for the Whisper API
|
| 21 |
+
// The SDK expects an actual file-like object, not a raw base64 string
|
| 22 |
+
let audioBuffer: Buffer;
|
| 23 |
+
|
| 24 |
+
if (typeof audio === "string") {
|
| 25 |
+
// Remove data URL prefix if present (e.g., "data:audio/webm;base64,")
|
| 26 |
+
const base64Data = audio.includes(",") ? audio.split(",")[1] : audio;
|
| 27 |
+
audioBuffer = Buffer.from(base64Data, "base64");
|
| 28 |
+
} else {
|
| 29 |
+
return NextResponse.json(
|
| 30 |
+
{ error: "Audio data must be a base64 string" },
|
| 31 |
+
{ status: 400 }
|
| 32 |
+
);
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
// Create a File-like object with proper MIME type
|
| 36 |
+
const audioBlob = new Blob([audioBuffer], { type: "audio/webm" });
|
| 37 |
+
const audioFile = new File([audioBlob], "recording.webm", {
|
| 38 |
+
type: "audio/webm",
|
| 39 |
+
lastModified: Date.now(),
|
| 40 |
+
});
|
| 41 |
+
|
| 42 |
+
const result = await zai.audio.transcriptions.create({
|
| 43 |
+
model: "whisper-1",
|
| 44 |
+
file: audioFile,
|
| 45 |
+
});
|
| 46 |
+
|
| 47 |
+
const transcribedText = (result as Record<string, unknown>).text || (result as { text?: string }).text || "";
|
| 48 |
+
|
| 49 |
+
if (transcribedText && typeof transcribedText === "string" && transcribedText.trim()) {
|
| 50 |
+
return NextResponse.json({
|
| 51 |
+
text: transcribedText.trim(),
|
| 52 |
+
success: true,
|
| 53 |
+
});
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
// If transcription returned empty, still return gracefully
|
| 57 |
+
return NextResponse.json({
|
| 58 |
+
text: "",
|
| 59 |
+
success: true,
|
| 60 |
+
note: "Transcription returned empty — please try again",
|
| 61 |
+
});
|
| 62 |
+
} catch (apiError) {
|
| 63 |
+
console.error("ASR API error:", apiError);
|
| 64 |
+
|
| 65 |
+
// Enhanced fallback: use Web Speech API hint in the response
|
| 66 |
+
// The client-side will handle browser-based recognition as backup
|
| 67 |
+
return NextResponse.json({
|
| 68 |
+
text: "",
|
| 69 |
+
success: false,
|
| 70 |
+
fallback: true,
|
| 71 |
+
note: "Server ASR unavailable — use browser SpeechRecognition API",
|
| 72 |
+
});
|
| 73 |
+
}
|
| 74 |
+
} catch (error) {
|
| 75 |
+
console.error("ASR route error:", error);
|
| 76 |
+
return NextResponse.json(
|
| 77 |
+
{ error: "Speech recognition failed" },
|
| 78 |
+
{ status: 500 }
|
| 79 |
+
);
|
| 80 |
+
}
|
| 81 |
+
}
|