| | |
| | |
| | |
| |
|
| |
|
| | import { browser } from '$app/environment';
|
| | import { MimeTypeApplication, MimeTypeImage } from '$lib/enums';
|
| | import * as pdfjs from 'pdfjs-dist';
|
| |
|
| | type TextContent = {
|
| | items: Array<{ str: string }>;
|
| | };
|
| |
|
| | if (browser) {
|
| |
|
| | import('pdfjs-dist/build/pdf.worker.min.mjs?raw')
|
| | .then((workerModule) => {
|
| | const workerBlob = new Blob([workerModule.default], { type: 'application/javascript' });
|
| | pdfjs.GlobalWorkerOptions.workerSrc = URL.createObjectURL(workerBlob);
|
| | })
|
| | .catch(() => {
|
| | console.warn('Failed to load PDF.js worker, PDF processing may not work');
|
| | });
|
| | }
|
| |
|
| | |
| | |
| | |
| | |
| |
|
| | async function getFileAsBuffer(file: File): Promise<ArrayBuffer> {
|
| | return new Promise((resolve, reject) => {
|
| | const reader = new FileReader();
|
| | reader.onload = (event) => {
|
| | if (event.target?.result) {
|
| | resolve(event.target.result as ArrayBuffer);
|
| | } else {
|
| | reject(new Error('Failed to read file.'));
|
| | }
|
| | };
|
| | reader.onerror = () => {
|
| | reject(new Error('Failed to read file.'));
|
| | };
|
| | reader.readAsArrayBuffer(file);
|
| | });
|
| | }
|
| |
|
| | |
| | |
| | |
| | |
| |
|
| | export async function convertPDFToText(file: File): Promise<string> {
|
| | if (!browser) {
|
| | throw new Error('PDF processing is only available in the browser');
|
| | }
|
| |
|
| | try {
|
| | const buffer = await getFileAsBuffer(file);
|
| | const pdf = await pdfjs.getDocument(buffer).promise;
|
| | const numPages = pdf.numPages;
|
| |
|
| | const textContentPromises: Promise<TextContent>[] = [];
|
| |
|
| | for (let i = 1; i <= numPages; i++) {
|
| |
|
| | textContentPromises.push(pdf.getPage(i).then((page: any) => page.getTextContent()));
|
| | }
|
| |
|
| | const textContents = await Promise.all(textContentPromises);
|
| | const textItems = textContents.flatMap((textContent: TextContent) =>
|
| | textContent.items.map((item) => item.str ?? '')
|
| | );
|
| |
|
| | return textItems.join('\n');
|
| | } catch (error) {
|
| | console.error('Error converting PDF to text:', error);
|
| | throw new Error(
|
| | `Failed to convert PDF to text: ${error instanceof Error ? error.message : 'Unknown error'}`
|
| | );
|
| | }
|
| | }
|
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | export async function convertPDFToImage(file: File, scale: number = 1.5): Promise<string[]> {
|
| | if (!browser) {
|
| | throw new Error('PDF processing is only available in the browser');
|
| | }
|
| |
|
| | try {
|
| | const buffer = await getFileAsBuffer(file);
|
| | const doc = await pdfjs.getDocument(buffer).promise;
|
| | const pages: Promise<string>[] = [];
|
| |
|
| | for (let i = 1; i <= doc.numPages; i++) {
|
| | const page = await doc.getPage(i);
|
| | const viewport = page.getViewport({ scale });
|
| | const canvas = document.createElement('canvas');
|
| | const ctx = canvas.getContext('2d');
|
| |
|
| | canvas.width = viewport.width;
|
| | canvas.height = viewport.height;
|
| |
|
| | if (!ctx) {
|
| | throw new Error('Failed to get 2D context from canvas');
|
| | }
|
| |
|
| | const task = page.render({
|
| | canvasContext: ctx,
|
| | viewport: viewport,
|
| | canvas: canvas
|
| | });
|
| | pages.push(
|
| | task.promise.then(() => {
|
| | return canvas.toDataURL(MimeTypeImage.PNG);
|
| | })
|
| | );
|
| | }
|
| |
|
| | return await Promise.all(pages);
|
| | } catch (error) {
|
| | console.error('Error converting PDF to images:', error);
|
| | throw new Error(
|
| | `Failed to convert PDF to images: ${error instanceof Error ? error.message : 'Unknown error'}`
|
| | );
|
| | }
|
| | }
|
| |
|
| | |
| | |
| | |
| | |
| |
|
| | export function isPdfFile(file: File): boolean {
|
| | return file.type === MimeTypeApplication.PDF;
|
| | }
|
| |
|
| | |
| | |
| | |
| | |
| |
|
| | export function isApplicationMimeType(mimeType: string): boolean {
|
| | return mimeType === MimeTypeApplication.PDF;
|
| | }
|
| |
|