Reubencf commited on
Commit
94c2cc5
·
1 Parent(s): de1a703

feat: Add PDF text extraction and update MCP read_file to support documents

Browse files
app/api/documents/process/route.ts CHANGED
@@ -113,12 +113,24 @@ export async function POST(request: NextRequest) {
113
  break;
114
 
115
  case 'pdf':
116
- content = {
117
- type: 'pdf',
118
- fileName,
119
- size: fileBuffer.length,
120
- message: 'PDF processing requires additional libraries for text extraction'
121
- };
 
 
 
 
 
 
 
 
 
 
 
 
122
  break;
123
 
124
  case 'pptx':
 
113
  break;
114
 
115
  case 'pdf':
116
+ try {
117
+ const pdf = require('pdf-parse');
118
+ const data = await pdf(fileBuffer);
119
+ content = {
120
+ type: 'pdf',
121
+ text: data.text,
122
+ info: data.info,
123
+ metadata: data.metadata,
124
+ version: data.version,
125
+ numpages: data.numpages
126
+ };
127
+ } catch (error) {
128
+ content = {
129
+ type: 'pdf',
130
+ error: 'Failed to process PDF document',
131
+ details: error
132
+ };
133
+ }
134
  break;
135
 
136
  case 'pptx':
mcp-server.js CHANGED
@@ -143,8 +143,6 @@ class ReubenOSMCPServer {
143
  type: 'array',
144
  items: { type: 'string' }
145
  },
146
- correctAnswer: { type: ['string', 'number', 'boolean'] },
147
- explanation: { type: 'string' },
148
  points: { type: 'number' },
149
  },
150
  required: ['id', 'question', 'type'],
@@ -229,6 +227,50 @@ class ReubenOSMCPServer {
229
  };
230
  }
231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  const url = new URL(API_ENDPOINT);
233
  if (passkey) url.searchParams.set('passkey', passkey);
234
  if (isPublic) url.searchParams.set('isPublic', 'true');
 
143
  type: 'array',
144
  items: { type: 'string' }
145
  },
 
 
146
  points: { type: 'number' },
147
  },
148
  required: ['id', 'question', 'type'],
 
227
  };
228
  }
229
 
230
+ const ext = fileName.split('.').pop().toLowerCase();
231
+ const isDocument = ['pdf', 'docx', 'xlsx', 'xls', 'pptx'].includes(ext);
232
+
233
+ if (isDocument) {
234
+ // Use document processing endpoint
235
+ const processUrl = `${BASE_URL}/api/documents/process`;
236
+ const response = await fetch(processUrl, {
237
+ method: 'POST',
238
+ headers: { 'Content-Type': 'application/json' },
239
+ body: JSON.stringify({
240
+ fileName,
241
+ key: passkey,
242
+ isPublic,
243
+ operation: 'read'
244
+ })
245
+ });
246
+
247
+ const data = await response.json();
248
+
249
+ if (response.ok && data.success) {
250
+ let textContent = '';
251
+ if (data.content.text) {
252
+ textContent = data.content.text;
253
+ } else if (data.content.sheets) {
254
+ textContent = JSON.stringify(data.content.sheets, null, 2);
255
+ } else {
256
+ textContent = JSON.stringify(data.content, null, 2);
257
+ }
258
+
259
+ return {
260
+ content: [
261
+ {
262
+ type: 'text',
263
+ text: `📄 Content of ${fileName} (${data.content.type}):\n\n${textContent}`,
264
+ },
265
+ ],
266
+ };
267
+ } else {
268
+ return {
269
+ content: [{ type: 'text', text: `❌ Failed to process document: ${data.error || 'Unknown error'}` }],
270
+ };
271
+ }
272
+ }
273
+
274
  const url = new URL(API_ENDPOINT);
275
  if (passkey) url.searchParams.set('passkey', passkey);
276
  if (isPublic) url.searchParams.set('isPublic', 'true');
package-lock.json CHANGED
@@ -26,6 +26,7 @@
26
  "next": "16.0.1",
27
  "node-fetch": "^3.3.2",
28
  "officegen": "^0.6.5",
 
29
  "pdfkit": "^0.17.2",
30
  "puppeteer-core": "^24.30.0",
31
  "react": "19.2.0",
@@ -9487,6 +9488,210 @@
9487
  "url": "https://opencollective.com/express"
9488
  }
9489
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9490
  "node_modules/pdfjs-dist": {
9491
  "version": "5.4.296",
9492
  "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.4.296.tgz",
 
26
  "next": "16.0.1",
27
  "node-fetch": "^3.3.2",
28
  "officegen": "^0.6.5",
29
+ "pdf-parse": "^2.4.5",
30
  "pdfkit": "^0.17.2",
31
  "puppeteer-core": "^24.30.0",
32
  "react": "19.2.0",
 
9488
  "url": "https://opencollective.com/express"
9489
  }
9490
  },
9491
+ "node_modules/pdf-parse": {
9492
+ "version": "2.4.5",
9493
+ "resolved": "https://registry.npmjs.org/pdf-parse/-/pdf-parse-2.4.5.tgz",
9494
+ "integrity": "sha512-mHU89HGh7v+4u2ubfnevJ03lmPgQ5WU4CxAVmTSh/sxVTEDYd1er/dKS/A6vg77NX47KTEoihq8jZBLr8Cxuwg==",
9495
+ "license": "Apache-2.0",
9496
+ "dependencies": {
9497
+ "@napi-rs/canvas": "0.1.80",
9498
+ "pdfjs-dist": "5.4.296"
9499
+ },
9500
+ "bin": {
9501
+ "pdf-parse": "bin/cli.mjs"
9502
+ },
9503
+ "engines": {
9504
+ "node": ">=20.16.0 <21 || >=22.3.0"
9505
+ },
9506
+ "funding": {
9507
+ "type": "github",
9508
+ "url": "https://github.com/sponsors/mehmet-kozan"
9509
+ }
9510
+ },
9511
+ "node_modules/pdf-parse/node_modules/@napi-rs/canvas": {
9512
+ "version": "0.1.80",
9513
+ "resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.80.tgz",
9514
+ "integrity": "sha512-DxuT1ClnIPts1kQx8FBmkk4BQDTfI5kIzywAaMjQSXfNnra5UFU9PwurXrl+Je3bJ6BGsp/zmshVVFbCmyI+ww==",
9515
+ "license": "MIT",
9516
+ "workspaces": [
9517
+ "e2e/*"
9518
+ ],
9519
+ "engines": {
9520
+ "node": ">= 10"
9521
+ },
9522
+ "optionalDependencies": {
9523
+ "@napi-rs/canvas-android-arm64": "0.1.80",
9524
+ "@napi-rs/canvas-darwin-arm64": "0.1.80",
9525
+ "@napi-rs/canvas-darwin-x64": "0.1.80",
9526
+ "@napi-rs/canvas-linux-arm-gnueabihf": "0.1.80",
9527
+ "@napi-rs/canvas-linux-arm64-gnu": "0.1.80",
9528
+ "@napi-rs/canvas-linux-arm64-musl": "0.1.80",
9529
+ "@napi-rs/canvas-linux-riscv64-gnu": "0.1.80",
9530
+ "@napi-rs/canvas-linux-x64-gnu": "0.1.80",
9531
+ "@napi-rs/canvas-linux-x64-musl": "0.1.80",
9532
+ "@napi-rs/canvas-win32-x64-msvc": "0.1.80"
9533
+ }
9534
+ },
9535
+ "node_modules/pdf-parse/node_modules/@napi-rs/canvas-android-arm64": {
9536
+ "version": "0.1.80",
9537
+ "resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.80.tgz",
9538
+ "integrity": "sha512-sk7xhN/MoXeuExlggf91pNziBxLPVUqF2CAVnB57KLG/pz7+U5TKG8eXdc3pm0d7Od0WreB6ZKLj37sX9muGOQ==",
9539
+ "cpu": [
9540
+ "arm64"
9541
+ ],
9542
+ "license": "MIT",
9543
+ "optional": true,
9544
+ "os": [
9545
+ "android"
9546
+ ],
9547
+ "engines": {
9548
+ "node": ">= 10"
9549
+ }
9550
+ },
9551
+ "node_modules/pdf-parse/node_modules/@napi-rs/canvas-darwin-arm64": {
9552
+ "version": "0.1.80",
9553
+ "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.80.tgz",
9554
+ "integrity": "sha512-O64APRTXRUiAz0P8gErkfEr3lipLJgM6pjATwavZ22ebhjYl/SUbpgM0xcWPQBNMP1n29afAC/Us5PX1vg+JNQ==",
9555
+ "cpu": [
9556
+ "arm64"
9557
+ ],
9558
+ "license": "MIT",
9559
+ "optional": true,
9560
+ "os": [
9561
+ "darwin"
9562
+ ],
9563
+ "engines": {
9564
+ "node": ">= 10"
9565
+ }
9566
+ },
9567
+ "node_modules/pdf-parse/node_modules/@napi-rs/canvas-darwin-x64": {
9568
+ "version": "0.1.80",
9569
+ "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.80.tgz",
9570
+ "integrity": "sha512-FqqSU7qFce0Cp3pwnTjVkKjjOtxMqRe6lmINxpIZYaZNnVI0H5FtsaraZJ36SiTHNjZlUB69/HhxNDT1Aaa9vA==",
9571
+ "cpu": [
9572
+ "x64"
9573
+ ],
9574
+ "license": "MIT",
9575
+ "optional": true,
9576
+ "os": [
9577
+ "darwin"
9578
+ ],
9579
+ "engines": {
9580
+ "node": ">= 10"
9581
+ }
9582
+ },
9583
+ "node_modules/pdf-parse/node_modules/@napi-rs/canvas-linux-arm-gnueabihf": {
9584
+ "version": "0.1.80",
9585
+ "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.80.tgz",
9586
+ "integrity": "sha512-eyWz0ddBDQc7/JbAtY4OtZ5SpK8tR4JsCYEZjCE3dI8pqoWUC8oMwYSBGCYfsx2w47cQgQCgMVRVTFiiO38hHQ==",
9587
+ "cpu": [
9588
+ "arm"
9589
+ ],
9590
+ "license": "MIT",
9591
+ "optional": true,
9592
+ "os": [
9593
+ "linux"
9594
+ ],
9595
+ "engines": {
9596
+ "node": ">= 10"
9597
+ }
9598
+ },
9599
+ "node_modules/pdf-parse/node_modules/@napi-rs/canvas-linux-arm64-gnu": {
9600
+ "version": "0.1.80",
9601
+ "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.80.tgz",
9602
+ "integrity": "sha512-qwA63t8A86bnxhuA/GwOkK3jvb+XTQaTiVML0vAWoHyoZYTjNs7BzoOONDgTnNtr8/yHrq64XXzUoLqDzU+Uuw==",
9603
+ "cpu": [
9604
+ "arm64"
9605
+ ],
9606
+ "license": "MIT",
9607
+ "optional": true,
9608
+ "os": [
9609
+ "linux"
9610
+ ],
9611
+ "engines": {
9612
+ "node": ">= 10"
9613
+ }
9614
+ },
9615
+ "node_modules/pdf-parse/node_modules/@napi-rs/canvas-linux-arm64-musl": {
9616
+ "version": "0.1.80",
9617
+ "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.80.tgz",
9618
+ "integrity": "sha512-1XbCOz/ymhj24lFaIXtWnwv/6eFHXDrjP0jYkc6iHQ9q8oXKzUX1Lc6bu+wuGiLhGh2GS/2JlfORC5ZcXimRcg==",
9619
+ "cpu": [
9620
+ "arm64"
9621
+ ],
9622
+ "license": "MIT",
9623
+ "optional": true,
9624
+ "os": [
9625
+ "linux"
9626
+ ],
9627
+ "engines": {
9628
+ "node": ">= 10"
9629
+ }
9630
+ },
9631
+ "node_modules/pdf-parse/node_modules/@napi-rs/canvas-linux-riscv64-gnu": {
9632
+ "version": "0.1.80",
9633
+ "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.80.tgz",
9634
+ "integrity": "sha512-XTzR125w5ZMs0lJcxRlS1K3P5RaZ9RmUsPtd1uGt+EfDyYMu4c6SEROYsxyatbbu/2+lPe7MPHOO/0a0x7L/gw==",
9635
+ "cpu": [
9636
+ "riscv64"
9637
+ ],
9638
+ "license": "MIT",
9639
+ "optional": true,
9640
+ "os": [
9641
+ "linux"
9642
+ ],
9643
+ "engines": {
9644
+ "node": ">= 10"
9645
+ }
9646
+ },
9647
+ "node_modules/pdf-parse/node_modules/@napi-rs/canvas-linux-x64-gnu": {
9648
+ "version": "0.1.80",
9649
+ "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.80.tgz",
9650
+ "integrity": "sha512-BeXAmhKg1kX3UCrJsYbdQd3hIMDH/K6HnP/pG2LuITaXhXBiNdh//TVVVVCBbJzVQaV5gK/4ZOCMrQW9mvuTqA==",
9651
+ "cpu": [
9652
+ "x64"
9653
+ ],
9654
+ "license": "MIT",
9655
+ "optional": true,
9656
+ "os": [
9657
+ "linux"
9658
+ ],
9659
+ "engines": {
9660
+ "node": ">= 10"
9661
+ }
9662
+ },
9663
+ "node_modules/pdf-parse/node_modules/@napi-rs/canvas-linux-x64-musl": {
9664
+ "version": "0.1.80",
9665
+ "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.80.tgz",
9666
+ "integrity": "sha512-x0XvZWdHbkgdgucJsRxprX/4o4sEed7qo9rCQA9ugiS9qE2QvP0RIiEugtZhfLH3cyI+jIRFJHV4Fuz+1BHHMg==",
9667
+ "cpu": [
9668
+ "x64"
9669
+ ],
9670
+ "license": "MIT",
9671
+ "optional": true,
9672
+ "os": [
9673
+ "linux"
9674
+ ],
9675
+ "engines": {
9676
+ "node": ">= 10"
9677
+ }
9678
+ },
9679
+ "node_modules/pdf-parse/node_modules/@napi-rs/canvas-win32-x64-msvc": {
9680
+ "version": "0.1.80",
9681
+ "resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.80.tgz",
9682
+ "integrity": "sha512-Z8jPsM6df5V8B1HrCHB05+bDiCxjE9QA//3YrkKIdVDEwn5RKaqOxCJDRJkl48cJbylcrJbW4HxZbTte8juuPg==",
9683
+ "cpu": [
9684
+ "x64"
9685
+ ],
9686
+ "license": "MIT",
9687
+ "optional": true,
9688
+ "os": [
9689
+ "win32"
9690
+ ],
9691
+ "engines": {
9692
+ "node": ">= 10"
9693
+ }
9694
+ },
9695
  "node_modules/pdfjs-dist": {
9696
  "version": "5.4.296",
9697
  "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.4.296.tgz",
package.json CHANGED
@@ -30,6 +30,7 @@
30
  "next": "16.0.1",
31
  "node-fetch": "^3.3.2",
32
  "officegen": "^0.6.5",
 
33
  "pdfkit": "^0.17.2",
34
  "puppeteer-core": "^24.30.0",
35
  "react": "19.2.0",
 
30
  "next": "16.0.1",
31
  "node-fetch": "^3.3.2",
32
  "officegen": "^0.6.5",
33
+ "pdf-parse": "^2.4.5",
34
  "pdfkit": "^0.17.2",
35
  "puppeteer-core": "^24.30.0",
36
  "react": "19.2.0",