theNorms commited on
Commit
32ed110
·
verified ·
1 Parent(s): afaab50

Upload project files

Browse files
src/app/api/consciousness/asr/route.ts ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { NextRequest, NextResponse } from "next/server";
2
+
3
+ export async function POST(req: NextRequest) {
4
+ try {
5
+ const body = await req.json();
6
+ const { audio } = body;
7
+
8
+ if (!audio) {
9
+ return NextResponse.json(
10
+ { error: "Audio data is required" },
11
+ { status: 400 }
12
+ );
13
+ }
14
+
15
+ // Try to use z-ai-web-dev-sdk for ASR
16
+ try {
17
+ const ZAI = (await import("z-ai-web-dev-sdk")).default;
18
+ const zai = await ZAI.create();
19
+
20
+ // Convert base64 to a proper File/Blob for the Whisper API
21
+ // The SDK expects an actual file-like object, not a raw base64 string
22
+ let audioBuffer: Buffer;
23
+
24
+ if (typeof audio === "string") {
25
+ // Remove data URL prefix if present (e.g., "data:audio/webm;base64,")
26
+ const base64Data = audio.includes(",") ? audio.split(",")[1] : audio;
27
+ audioBuffer = Buffer.from(base64Data, "base64");
28
+ } else {
29
+ return NextResponse.json(
30
+ { error: "Audio data must be a base64 string" },
31
+ { status: 400 }
32
+ );
33
+ }
34
+
35
+ // Create a File-like object with proper MIME type
36
+ const audioBlob = new Blob([audioBuffer], { type: "audio/webm" });
37
+ const audioFile = new File([audioBlob], "recording.webm", {
38
+ type: "audio/webm",
39
+ lastModified: Date.now(),
40
+ });
41
+
42
+ const result = await zai.audio.transcriptions.create({
43
+ model: "whisper-1",
44
+ file: audioFile,
45
+ });
46
+
47
+ const transcribedText = (result as Record<string, unknown>).text || (result as { text?: string }).text || "";
48
+
49
+ if (transcribedText && typeof transcribedText === "string" && transcribedText.trim()) {
50
+ return NextResponse.json({
51
+ text: transcribedText.trim(),
52
+ success: true,
53
+ });
54
+ }
55
+
56
+ // If transcription returned empty, still return gracefully
57
+ return NextResponse.json({
58
+ text: "",
59
+ success: true,
60
+ note: "Transcription returned empty — please try again",
61
+ });
62
+ } catch (apiError) {
63
+ console.error("ASR API error:", apiError);
64
+
65
+ // Enhanced fallback: use Web Speech API hint in the response
66
+ // The client-side will handle browser-based recognition as backup
67
+ return NextResponse.json({
68
+ text: "",
69
+ success: false,
70
+ fallback: true,
71
+ note: "Server ASR unavailable — use browser SpeechRecognition API",
72
+ });
73
+ }
74
+ } catch (error) {
75
+ console.error("ASR route error:", error);
76
+ return NextResponse.json(
77
+ { error: "Speech recognition failed" },
78
+ { status: 500 }
79
+ );
80
+ }
81
+ }