Spaces:
No application file
No application file
| import { NextRequest } from 'next/server'; | |
| import { parsePDF } from '@/lib/pdf/pdf-providers'; | |
| import { resolvePDFApiKey, resolvePDFBaseUrl } from '@/lib/server/provider-config'; | |
| import type { PDFProviderId } from '@/lib/pdf/types'; | |
| import type { ParsedPdfContent } from '@/lib/types/pdf'; | |
| import { createLogger } from '@/lib/logger'; | |
| import { apiError, apiSuccess } from '@/lib/server/api-response'; | |
| import { validateUrlForSSRF } from '@/lib/server/ssrf-guard'; | |
| const log = createLogger('Parse PDF'); | |
| export async function POST(req: NextRequest) { | |
| try { | |
| const contentType = req.headers.get('content-type') || ''; | |
| if (!contentType.includes('multipart/form-data')) { | |
| log.error('Invalid Content-Type for PDF upload:', contentType); | |
| return apiError( | |
| 'INVALID_REQUEST', | |
| 400, | |
| `Invalid Content-Type: expected multipart/form-data, got "${contentType}"`, | |
| ); | |
| } | |
| const formData = await req.formData(); | |
| const pdfFile = formData.get('pdf') as File | null; | |
| const providerId = formData.get('providerId') as PDFProviderId | null; | |
| const apiKey = formData.get('apiKey') as string | null; | |
| const baseUrl = formData.get('baseUrl') as string | null; | |
| if (!pdfFile) { | |
| return apiError('MISSING_REQUIRED_FIELD', 400, 'No PDF file provided'); | |
| } | |
| // providerId is required from the client — no server-side store to fall back to | |
| const effectiveProviderId = providerId || ('unpdf' as PDFProviderId); | |
| const clientBaseUrl = baseUrl || undefined; | |
| if (clientBaseUrl && process.env.NODE_ENV === 'production') { | |
| const ssrfError = validateUrlForSSRF(clientBaseUrl); | |
| if (ssrfError) { | |
| return apiError('INVALID_URL', 403, ssrfError); | |
| } | |
| } | |
| const config = { | |
| providerId: effectiveProviderId, | |
| apiKey: clientBaseUrl | |
| ? apiKey || '' | |
| : resolvePDFApiKey(effectiveProviderId, apiKey || undefined), | |
| baseUrl: clientBaseUrl | |
| ? clientBaseUrl | |
| : resolvePDFBaseUrl(effectiveProviderId, baseUrl || undefined), | |
| }; | |
| // Convert PDF to buffer | |
| const arrayBuffer = await pdfFile.arrayBuffer(); | |
| const buffer = Buffer.from(arrayBuffer); | |
| // Parse PDF using the provider system | |
| const result = await parsePDF(config, buffer); | |
| // Add file metadata | |
| const resultWithMetadata: ParsedPdfContent = { | |
| ...result, | |
| metadata: { | |
| pageCount: result.metadata?.pageCount || 0, // Ensure pageCount is always a number | |
| ...result.metadata, | |
| fileName: pdfFile.name, | |
| fileSize: pdfFile.size, | |
| }, | |
| }; | |
| return apiSuccess({ data: resultWithMetadata }); | |
| } catch (error) { | |
| log.error('Error parsing PDF:', error); | |
| return apiError('PARSE_FAILED', 500, error instanceof Error ? error.message : 'Unknown error'); | |
| } | |
| } | |