File size: 4,866 Bytes
ff0e173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import { NextResponse } from 'next/server';
import {
  readStore,
  addFileRecord,
  updateFileRecord,
  toPublicFile,
  type KBFileRecord,
  type Chunk,
} from '@/lib/kb-store';
import {
  getFileType,
  formatSize,
} from '@/lib/file-meta';

// pdf-parse / mammoth / xlsx require Node, not the edge runtime.
export const runtime = 'nodejs';
export const dynamic = 'force-dynamic';
export const maxDuration = 60;

function makeId() {
  return `file-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
}

async function streamToBuffer(stream: ReadableStream<Uint8Array>): Promise<Buffer> {
  const arrayBuffer = await new Response(stream).arrayBuffer();
  return Buffer.from(arrayBuffer);
}

async function createFileRecord(
  fileName: string,
  fileSize: number,
  buffer: Buffer
): Promise<NextResponse> {
  const type = getFileType(fileName);
  if (!type) {
    return NextResponse.json(
      { error: 'Unsupported file type. Use PDF, DOCX, XLSX, XLS, or CSV.' },
      { status: 400 }
    );
  }

  const record: KBFileRecord = {
    id: makeId(),
    name: fileName,
    type,
    size: formatSize(fileSize),
    status: 'Processing',
    uploadedAt: new Date().toISOString(),
    chunks: [],
  };
  await addFileRecord(record);

  try {
    const [{ extractText, chunkText }, { embedDocuments }] = await Promise.all([
      import('@/lib/parsers'),
      import('@/lib/cohere'),
    ]);
    const text = await extractText(buffer, type);
    const chunkTexts = chunkText(text);

    if (chunkTexts.length === 0) {
      const updated = await updateFileRecord(record.id, {
        status: 'Failed',
        error: 'No extractable text found in the document.',
      });
      return NextResponse.json({ file: toPublicFile(updated ?? record) }, { status: 422 });
    }

    const embeddings = await embedDocuments(chunkTexts);
    const chunks: Chunk[] = chunkTexts.map((t, i) => ({
      id: `${record.id}-c${i}`,
      text: t,
      embedding: embeddings[i] ?? [],
    }));

    const updated = await updateFileRecord(record.id, { status: 'Ready', chunks });
    return NextResponse.json({ file: toPublicFile(updated ?? record) });
  } catch (err) {
    const message = err instanceof Error ? err.message : 'Processing failed.';
    const updated = await updateFileRecord(record.id, { status: 'Failed', error: message });
    return NextResponse.json(
      { file: toPublicFile(updated ?? record), error: message },
      { status: 500 }
    );
  }
}

export async function GET() {
  try {
    const store = await readStore();
    return NextResponse.json({ files: store.files.map(toPublicFile) });
  } catch (err) {
    const message = err instanceof Error ? err.message : 'Failed to load documents.';
    return NextResponse.json({ error: message, files: [] }, { status: 500 });
  }
}

export async function POST(request: Request) {
  const contentType = request.headers.get('content-type') ?? '';

  if (contentType.includes('application/json')) {
    let body: { blobPathname?: string; name?: string; size?: number };
    try {
      body = await request.json();
    } catch {
      return NextResponse.json({ error: 'Invalid JSON body.' }, { status: 400 });
    }

    const blobPathname = body.blobPathname?.trim();
    const fileName = body.name?.trim();
    if (!blobPathname || !fileName) {
      return NextResponse.json(
        { error: 'blobPathname and name are required.' },
        { status: 400 }
      );
    }

    try {
      const { del, get } = await import('@vercel/blob');
      const blob = await get(blobPathname, { access: 'private', useCache: false });
      if (!blob || blob.statusCode !== 200) {
        return NextResponse.json({ error: 'Uploaded blob was not found.' }, { status: 404 });
      }

      const buffer = await streamToBuffer(blob.stream);
      const response = await createFileRecord(fileName, body.size ?? blob.blob.size, buffer);

      // The raw upload is only a handoff object. The indexed KB is stored separately.
      await del(blobPathname).catch(() => undefined);
      return response;
    } catch (err) {
      const message = err instanceof Error ? err.message : 'Blob processing failed.';
      return NextResponse.json({ error: message }, { status: 500 });
    }
  }

  let form: FormData;
  try {
    form = await request.formData();
  } catch {
    return NextResponse.json({ error: 'Expected multipart/form-data.' }, { status: 400 });
  }

  const file = form.get('file');
  if (!(file instanceof File)) {
    return NextResponse.json({ error: 'No file provided.' }, { status: 400 });
  }

  try {
    const buffer = Buffer.from(await file.arrayBuffer());
    return createFileRecord(file.name, file.size, buffer);
  } catch (err) {
    const message = err instanceof Error ? err.message : 'Processing failed.';
    return NextResponse.json({ error: message }, { status: 500 });
  }
}