devusman commited on
Commit
036a6e1
Β·
1 Parent(s): afab07a

feat:fix the more detailed scraping

Browse files
Files changed (3) hide show
  1. package-lock.json +442 -5
  2. package.json +5 -3
  3. server.js +78 -53
package-lock.json CHANGED
@@ -11,7 +11,9 @@
11
  "axios": "^1.11.0",
12
  "cors": "^2.8.5",
13
  "express": "^5.1.0",
14
- "puppeteer": "^24.16.2"
 
 
15
  },
16
  "devDependencies": {
17
  "nodemon": "^3.1.10",
@@ -69,6 +71,21 @@
69
  "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==",
70
  "license": "MIT"
71
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  "node_modules/@types/node": {
73
  "version": "24.3.0",
74
  "resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.0.tgz",
@@ -155,6 +172,15 @@
155
  "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
156
  "license": "Python-2.0"
157
  },
 
 
 
 
 
 
 
 
 
158
  "node_modules/ast-types": {
159
  "version": "0.13.4",
160
  "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz",
@@ -194,7 +220,6 @@
194
  "version": "1.0.2",
195
  "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
196
  "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
197
- "dev": true,
198
  "license": "MIT"
199
  },
200
  "node_modules/bare-events": {
@@ -315,7 +340,6 @@
315
  "version": "1.1.12",
316
  "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
317
  "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
318
- "dev": true,
319
  "license": "MIT",
320
  "dependencies": {
321
  "balanced-match": "^1.0.0",
@@ -443,6 +467,22 @@
443
  "node": ">=12"
444
  }
445
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446
  "node_modules/color-convert": {
447
  "version": "2.0.1",
448
  "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
@@ -477,7 +517,6 @@
477
  "version": "0.0.1",
478
  "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
479
  "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
480
- "dev": true,
481
  "license": "MIT"
482
  },
483
  "node_modules/content-disposition": {
@@ -599,6 +638,15 @@
599
  }
600
  }
601
  },
 
 
 
 
 
 
 
 
 
602
  "node_modules/degenerator": {
603
  "version": "5.0.1",
604
  "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz",
@@ -970,6 +1018,27 @@
970
  }
971
  }
972
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
973
  "node_modules/form-data": {
974
  "version": "4.0.4",
975
  "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz",
@@ -1032,6 +1101,26 @@
1032
  "dev": true,
1033
  "license": "MIT"
1034
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1035
  "node_modules/fsevents": {
1036
  "version": "2.3.3",
1037
  "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
@@ -1131,6 +1220,27 @@
1131
  "node": ">= 14"
1132
  }
1133
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1134
  "node_modules/glob-parent": {
1135
  "version": "5.1.2",
1136
  "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
@@ -1156,6 +1266,12 @@
1156
  "url": "https://github.com/sponsors/ljharb"
1157
  }
1158
  },
 
 
 
 
 
 
1159
  "node_modules/has-flag": {
1160
  "version": "3.0.0",
1161
  "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
@@ -1291,6 +1407,17 @@
1291
  "url": "https://github.com/sponsors/sindresorhus"
1292
  }
1293
  },
 
 
 
 
 
 
 
 
 
 
 
1294
  "node_modules/inherits": {
1295
  "version": "2.0.4",
1296
  "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
@@ -1334,6 +1461,21 @@
1334
  "node": ">=8"
1335
  }
1336
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1337
  "node_modules/is-extglob": {
1338
  "version": "2.1.1",
1339
  "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
@@ -1376,6 +1518,18 @@
1376
  "node": ">=0.12.0"
1377
  }
1378
  },
 
 
 
 
 
 
 
 
 
 
 
 
1379
  "node_modules/is-promise": {
1380
  "version": "4.0.0",
1381
  "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-4.0.0.tgz",
@@ -1389,6 +1543,15 @@
1389
  "dev": true,
1390
  "license": "ISC"
1391
  },
 
 
 
 
 
 
 
 
 
1392
  "node_modules/js-tokens": {
1393
  "version": "4.0.0",
1394
  "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
@@ -1413,6 +1576,39 @@
1413
  "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==",
1414
  "license": "MIT"
1415
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1416
  "node_modules/lines-and-columns": {
1417
  "version": "1.2.4",
1418
  "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz",
@@ -1452,6 +1648,20 @@
1452
  "node": ">= 0.8"
1453
  }
1454
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1455
  "node_modules/merge-descriptors": {
1456
  "version": "2.0.0",
1457
  "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-2.0.0.tgz",
@@ -1489,7 +1699,6 @@
1489
  "version": "3.1.2",
1490
  "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
1491
  "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
1492
- "dev": true,
1493
  "license": "ISC",
1494
  "dependencies": {
1495
  "brace-expansion": "^1.1.7"
@@ -1504,6 +1713,28 @@
1504
  "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==",
1505
  "license": "MIT"
1506
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1507
  "node_modules/ms": {
1508
  "version": "2.1.3",
1509
  "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
@@ -1687,6 +1918,15 @@
1687
  "node": ">= 0.8"
1688
  }
1689
  },
 
 
 
 
 
 
 
 
 
1690
  "node_modules/path-key": {
1691
  "version": "3.1.1",
1692
  "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
@@ -1862,6 +2102,142 @@
1862
  "node": ">=18"
1863
  }
1864
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1865
  "node_modules/qs": {
1866
  "version": "6.14.0",
1867
  "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.0.tgz",
@@ -1932,6 +2308,22 @@
1932
  "node": ">=4"
1933
  }
1934
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1935
  "node_modules/router": {
1936
  "version": "2.2.0",
1937
  "resolved": "https://registry.npmjs.org/router/-/router-2.2.0.tgz",
@@ -2029,6 +2421,42 @@
2029
  "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==",
2030
  "license": "ISC"
2031
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2032
  "node_modules/shebang-command": {
2033
  "version": "2.0.0",
2034
  "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
@@ -2428,6 +2856,15 @@
2428
  "license": "MIT",
2429
  "optional": true
2430
  },
 
 
 
 
 
 
 
 
 
2431
  "node_modules/unpipe": {
2432
  "version": "1.0.0",
2433
  "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
 
11
  "axios": "^1.11.0",
12
  "cors": "^2.8.5",
13
  "express": "^5.1.0",
14
+ "puppeteer": "^24.16.2",
15
+ "puppeteer-extra": "^3.3.6",
16
+ "puppeteer-extra-plugin-stealth": "^2.11.2"
17
  },
18
  "devDependencies": {
19
  "nodemon": "^3.1.10",
 
71
  "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==",
72
  "license": "MIT"
73
  },
74
+ "node_modules/@types/debug": {
75
+ "version": "4.1.12",
76
+ "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.12.tgz",
77
+ "integrity": "sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==",
78
+ "license": "MIT",
79
+ "dependencies": {
80
+ "@types/ms": "*"
81
+ }
82
+ },
83
+ "node_modules/@types/ms": {
84
+ "version": "2.1.0",
85
+ "resolved": "https://registry.npmjs.org/@types/ms/-/ms-2.1.0.tgz",
86
+ "integrity": "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==",
87
+ "license": "MIT"
88
+ },
89
  "node_modules/@types/node": {
90
  "version": "24.3.0",
91
  "resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.0.tgz",
 
172
  "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
173
  "license": "Python-2.0"
174
  },
175
+ "node_modules/arr-union": {
176
+ "version": "3.1.0",
177
+ "resolved": "https://registry.npmjs.org/arr-union/-/arr-union-3.1.0.tgz",
178
+ "integrity": "sha512-sKpyeERZ02v1FeCZT8lrfJq5u6goHCtpTAzPwJYe7c8SPFOboNjNg1vz2L4VTn9T4PQxEx13TbXLmYUcS6Ug7Q==",
179
+ "license": "MIT",
180
+ "engines": {
181
+ "node": ">=0.10.0"
182
+ }
183
+ },
184
  "node_modules/ast-types": {
185
  "version": "0.13.4",
186
  "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz",
 
220
  "version": "1.0.2",
221
  "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
222
  "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
 
223
  "license": "MIT"
224
  },
225
  "node_modules/bare-events": {
 
340
  "version": "1.1.12",
341
  "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
342
  "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
 
343
  "license": "MIT",
344
  "dependencies": {
345
  "balanced-match": "^1.0.0",
 
467
  "node": ">=12"
468
  }
469
  },
470
+ "node_modules/clone-deep": {
471
+ "version": "0.2.4",
472
+ "resolved": "https://registry.npmjs.org/clone-deep/-/clone-deep-0.2.4.tgz",
473
+ "integrity": "sha512-we+NuQo2DHhSl+DP6jlUiAhyAjBQrYnpOk15rN6c6JSPScjiCLh8IbSU+VTcph6YS3o7mASE8a0+gbZ7ChLpgg==",
474
+ "license": "MIT",
475
+ "dependencies": {
476
+ "for-own": "^0.1.3",
477
+ "is-plain-object": "^2.0.1",
478
+ "kind-of": "^3.0.2",
479
+ "lazy-cache": "^1.0.3",
480
+ "shallow-clone": "^0.1.2"
481
+ },
482
+ "engines": {
483
+ "node": ">=0.10.0"
484
+ }
485
+ },
486
  "node_modules/color-convert": {
487
  "version": "2.0.1",
488
  "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
 
517
  "version": "0.0.1",
518
  "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
519
  "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
 
520
  "license": "MIT"
521
  },
522
  "node_modules/content-disposition": {
 
638
  }
639
  }
640
  },
641
+ "node_modules/deepmerge": {
642
+ "version": "4.3.1",
643
+ "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
644
+ "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
645
+ "license": "MIT",
646
+ "engines": {
647
+ "node": ">=0.10.0"
648
+ }
649
+ },
650
  "node_modules/degenerator": {
651
  "version": "5.0.1",
652
  "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz",
 
1018
  }
1019
  }
1020
  },
1021
+ "node_modules/for-in": {
1022
+ "version": "1.0.2",
1023
+ "resolved": "https://registry.npmjs.org/for-in/-/for-in-1.0.2.tgz",
1024
+ "integrity": "sha512-7EwmXrOjyL+ChxMhmG5lnW9MPt1aIeZEwKhQzoBUdTV0N3zuwWDZYVJatDvZ2OyzPUvdIAZDsCetk3coyMfcnQ==",
1025
+ "license": "MIT",
1026
+ "engines": {
1027
+ "node": ">=0.10.0"
1028
+ }
1029
+ },
1030
+ "node_modules/for-own": {
1031
+ "version": "0.1.5",
1032
+ "resolved": "https://registry.npmjs.org/for-own/-/for-own-0.1.5.tgz",
1033
+ "integrity": "sha512-SKmowqGTJoPzLO1T0BBJpkfp3EMacCMOuH40hOUbrbzElVktk4DioXVM99QkLCyKoiuOmyjgcWMpVz2xjE7LZw==",
1034
+ "license": "MIT",
1035
+ "dependencies": {
1036
+ "for-in": "^1.0.1"
1037
+ },
1038
+ "engines": {
1039
+ "node": ">=0.10.0"
1040
+ }
1041
+ },
1042
  "node_modules/form-data": {
1043
  "version": "4.0.4",
1044
  "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz",
 
1101
  "dev": true,
1102
  "license": "MIT"
1103
  },
1104
+ "node_modules/fs-extra": {
1105
+ "version": "10.1.0",
1106
+ "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.1.0.tgz",
1107
+ "integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==",
1108
+ "license": "MIT",
1109
+ "dependencies": {
1110
+ "graceful-fs": "^4.2.0",
1111
+ "jsonfile": "^6.0.1",
1112
+ "universalify": "^2.0.0"
1113
+ },
1114
+ "engines": {
1115
+ "node": ">=12"
1116
+ }
1117
+ },
1118
+ "node_modules/fs.realpath": {
1119
+ "version": "1.0.0",
1120
+ "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
1121
+ "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
1122
+ "license": "ISC"
1123
+ },
1124
  "node_modules/fsevents": {
1125
  "version": "2.3.3",
1126
  "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
 
1220
  "node": ">= 14"
1221
  }
1222
  },
1223
+ "node_modules/glob": {
1224
+ "version": "7.2.3",
1225
+ "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
1226
+ "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==",
1227
+ "deprecated": "Glob versions prior to v9 are no longer supported",
1228
+ "license": "ISC",
1229
+ "dependencies": {
1230
+ "fs.realpath": "^1.0.0",
1231
+ "inflight": "^1.0.4",
1232
+ "inherits": "2",
1233
+ "minimatch": "^3.1.1",
1234
+ "once": "^1.3.0",
1235
+ "path-is-absolute": "^1.0.0"
1236
+ },
1237
+ "engines": {
1238
+ "node": "*"
1239
+ },
1240
+ "funding": {
1241
+ "url": "https://github.com/sponsors/isaacs"
1242
+ }
1243
+ },
1244
  "node_modules/glob-parent": {
1245
  "version": "5.1.2",
1246
  "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
 
1266
  "url": "https://github.com/sponsors/ljharb"
1267
  }
1268
  },
1269
+ "node_modules/graceful-fs": {
1270
+ "version": "4.2.11",
1271
+ "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
1272
+ "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==",
1273
+ "license": "ISC"
1274
+ },
1275
  "node_modules/has-flag": {
1276
  "version": "3.0.0",
1277
  "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
 
1407
  "url": "https://github.com/sponsors/sindresorhus"
1408
  }
1409
  },
1410
+ "node_modules/inflight": {
1411
+ "version": "1.0.6",
1412
+ "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
1413
+ "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
1414
+ "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.",
1415
+ "license": "ISC",
1416
+ "dependencies": {
1417
+ "once": "^1.3.0",
1418
+ "wrappy": "1"
1419
+ }
1420
+ },
1421
  "node_modules/inherits": {
1422
  "version": "2.0.4",
1423
  "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
 
1461
  "node": ">=8"
1462
  }
1463
  },
1464
+ "node_modules/is-buffer": {
1465
+ "version": "1.1.6",
1466
+ "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz",
1467
+ "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==",
1468
+ "license": "MIT"
1469
+ },
1470
+ "node_modules/is-extendable": {
1471
+ "version": "0.1.1",
1472
+ "resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz",
1473
+ "integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==",
1474
+ "license": "MIT",
1475
+ "engines": {
1476
+ "node": ">=0.10.0"
1477
+ }
1478
+ },
1479
  "node_modules/is-extglob": {
1480
  "version": "2.1.1",
1481
  "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
 
1518
  "node": ">=0.12.0"
1519
  }
1520
  },
1521
+ "node_modules/is-plain-object": {
1522
+ "version": "2.0.4",
1523
+ "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz",
1524
+ "integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==",
1525
+ "license": "MIT",
1526
+ "dependencies": {
1527
+ "isobject": "^3.0.1"
1528
+ },
1529
+ "engines": {
1530
+ "node": ">=0.10.0"
1531
+ }
1532
+ },
1533
  "node_modules/is-promise": {
1534
  "version": "4.0.0",
1535
  "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-4.0.0.tgz",
 
1543
  "dev": true,
1544
  "license": "ISC"
1545
  },
1546
+ "node_modules/isobject": {
1547
+ "version": "3.0.1",
1548
+ "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
1549
+ "integrity": "sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==",
1550
+ "license": "MIT",
1551
+ "engines": {
1552
+ "node": ">=0.10.0"
1553
+ }
1554
+ },
1555
  "node_modules/js-tokens": {
1556
  "version": "4.0.0",
1557
  "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
 
1576
  "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==",
1577
  "license": "MIT"
1578
  },
1579
+ "node_modules/jsonfile": {
1580
+ "version": "6.2.0",
1581
+ "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.2.0.tgz",
1582
+ "integrity": "sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg==",
1583
+ "license": "MIT",
1584
+ "dependencies": {
1585
+ "universalify": "^2.0.0"
1586
+ },
1587
+ "optionalDependencies": {
1588
+ "graceful-fs": "^4.1.6"
1589
+ }
1590
+ },
1591
+ "node_modules/kind-of": {
1592
+ "version": "3.2.2",
1593
+ "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
1594
+ "integrity": "sha512-NOW9QQXMoZGg/oqnVNoNTTIFEIid1627WCffUBJEdMxYApq7mNE7CpzucIPc+ZQg25Phej7IJSmX3hO+oblOtQ==",
1595
+ "license": "MIT",
1596
+ "dependencies": {
1597
+ "is-buffer": "^1.1.5"
1598
+ },
1599
+ "engines": {
1600
+ "node": ">=0.10.0"
1601
+ }
1602
+ },
1603
+ "node_modules/lazy-cache": {
1604
+ "version": "1.0.4",
1605
+ "resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-1.0.4.tgz",
1606
+ "integrity": "sha512-RE2g0b5VGZsOCFOCgP7omTRYFqydmZkBwl5oNnQ1lDYC57uyO9KqNnNVxT7COSHTxrRCWVcAVOcbjk+tvh/rgQ==",
1607
+ "license": "MIT",
1608
+ "engines": {
1609
+ "node": ">=0.10.0"
1610
+ }
1611
+ },
1612
  "node_modules/lines-and-columns": {
1613
  "version": "1.2.4",
1614
  "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz",
 
1648
  "node": ">= 0.8"
1649
  }
1650
  },
1651
+ "node_modules/merge-deep": {
1652
+ "version": "3.0.3",
1653
+ "resolved": "https://registry.npmjs.org/merge-deep/-/merge-deep-3.0.3.tgz",
1654
+ "integrity": "sha512-qtmzAS6t6grwEkNrunqTBdn0qKwFgNWvlxUbAV8es9M7Ot1EbyApytCnvE0jALPa46ZpKDUo527kKiaWplmlFA==",
1655
+ "license": "MIT",
1656
+ "dependencies": {
1657
+ "arr-union": "^3.1.0",
1658
+ "clone-deep": "^0.2.4",
1659
+ "kind-of": "^3.0.2"
1660
+ },
1661
+ "engines": {
1662
+ "node": ">=0.10.0"
1663
+ }
1664
+ },
1665
  "node_modules/merge-descriptors": {
1666
  "version": "2.0.0",
1667
  "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-2.0.0.tgz",
 
1699
  "version": "3.1.2",
1700
  "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
1701
  "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
 
1702
  "license": "ISC",
1703
  "dependencies": {
1704
  "brace-expansion": "^1.1.7"
 
1713
  "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==",
1714
  "license": "MIT"
1715
  },
1716
+ "node_modules/mixin-object": {
1717
+ "version": "2.0.1",
1718
+ "resolved": "https://registry.npmjs.org/mixin-object/-/mixin-object-2.0.1.tgz",
1719
+ "integrity": "sha512-ALGF1Jt9ouehcaXaHhn6t1yGWRqGaHkPFndtFVHfZXOvkIZ/yoGaSi0AHVTafb3ZBGg4dr/bDwnaEKqCXzchMA==",
1720
+ "license": "MIT",
1721
+ "dependencies": {
1722
+ "for-in": "^0.1.3",
1723
+ "is-extendable": "^0.1.1"
1724
+ },
1725
+ "engines": {
1726
+ "node": ">=0.10.0"
1727
+ }
1728
+ },
1729
+ "node_modules/mixin-object/node_modules/for-in": {
1730
+ "version": "0.1.8",
1731
+ "resolved": "https://registry.npmjs.org/for-in/-/for-in-0.1.8.tgz",
1732
+ "integrity": "sha512-F0to7vbBSHP8E3l6dCjxNOLuSFAACIxFy3UehTUlG7svlXi37HHsDkyVcHo0Pq8QwrE+pXvWSVX3ZT1T9wAZ9g==",
1733
+ "license": "MIT",
1734
+ "engines": {
1735
+ "node": ">=0.10.0"
1736
+ }
1737
+ },
1738
  "node_modules/ms": {
1739
  "version": "2.1.3",
1740
  "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
 
1918
  "node": ">= 0.8"
1919
  }
1920
  },
1921
+ "node_modules/path-is-absolute": {
1922
+ "version": "1.0.1",
1923
+ "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
1924
+ "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
1925
+ "license": "MIT",
1926
+ "engines": {
1927
+ "node": ">=0.10.0"
1928
+ }
1929
+ },
1930
  "node_modules/path-key": {
1931
  "version": "3.1.1",
1932
  "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
 
2102
  "node": ">=18"
2103
  }
2104
  },
2105
+ "node_modules/puppeteer-extra": {
2106
+ "version": "3.3.6",
2107
+ "resolved": "https://registry.npmjs.org/puppeteer-extra/-/puppeteer-extra-3.3.6.tgz",
2108
+ "integrity": "sha512-rsLBE/6mMxAjlLd06LuGacrukP2bqbzKCLzV1vrhHFavqQE/taQ2UXv3H5P0Ls7nsrASa+6x3bDbXHpqMwq+7A==",
2109
+ "license": "MIT",
2110
+ "dependencies": {
2111
+ "@types/debug": "^4.1.0",
2112
+ "debug": "^4.1.1",
2113
+ "deepmerge": "^4.2.2"
2114
+ },
2115
+ "engines": {
2116
+ "node": ">=8"
2117
+ },
2118
+ "peerDependencies": {
2119
+ "@types/puppeteer": "*",
2120
+ "puppeteer": "*",
2121
+ "puppeteer-core": "*"
2122
+ },
2123
+ "peerDependenciesMeta": {
2124
+ "@types/puppeteer": {
2125
+ "optional": true
2126
+ },
2127
+ "puppeteer": {
2128
+ "optional": true
2129
+ },
2130
+ "puppeteer-core": {
2131
+ "optional": true
2132
+ }
2133
+ }
2134
+ },
2135
+ "node_modules/puppeteer-extra-plugin": {
2136
+ "version": "3.2.3",
2137
+ "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin/-/puppeteer-extra-plugin-3.2.3.tgz",
2138
+ "integrity": "sha512-6RNy0e6pH8vaS3akPIKGg28xcryKscczt4wIl0ePciZENGE2yoaQJNd17UiEbdmh5/6WW6dPcfRWT9lxBwCi2Q==",
2139
+ "license": "MIT",
2140
+ "dependencies": {
2141
+ "@types/debug": "^4.1.0",
2142
+ "debug": "^4.1.1",
2143
+ "merge-deep": "^3.0.1"
2144
+ },
2145
+ "engines": {
2146
+ "node": ">=9.11.2"
2147
+ },
2148
+ "peerDependencies": {
2149
+ "playwright-extra": "*",
2150
+ "puppeteer-extra": "*"
2151
+ },
2152
+ "peerDependenciesMeta": {
2153
+ "playwright-extra": {
2154
+ "optional": true
2155
+ },
2156
+ "puppeteer-extra": {
2157
+ "optional": true
2158
+ }
2159
+ }
2160
+ },
2161
+ "node_modules/puppeteer-extra-plugin-stealth": {
2162
+ "version": "2.11.2",
2163
+ "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-stealth/-/puppeteer-extra-plugin-stealth-2.11.2.tgz",
2164
+ "integrity": "sha512-bUemM5XmTj9i2ZerBzsk2AN5is0wHMNE6K0hXBzBXOzP5m5G3Wl0RHhiqKeHToe/uIH8AoZiGhc1tCkLZQPKTQ==",
2165
+ "license": "MIT",
2166
+ "dependencies": {
2167
+ "debug": "^4.1.1",
2168
+ "puppeteer-extra-plugin": "^3.2.3",
2169
+ "puppeteer-extra-plugin-user-preferences": "^2.4.1"
2170
+ },
2171
+ "engines": {
2172
+ "node": ">=8"
2173
+ },
2174
+ "peerDependencies": {
2175
+ "playwright-extra": "*",
2176
+ "puppeteer-extra": "*"
2177
+ },
2178
+ "peerDependenciesMeta": {
2179
+ "playwright-extra": {
2180
+ "optional": true
2181
+ },
2182
+ "puppeteer-extra": {
2183
+ "optional": true
2184
+ }
2185
+ }
2186
+ },
2187
+ "node_modules/puppeteer-extra-plugin-user-data-dir": {
2188
+ "version": "2.4.1",
2189
+ "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-data-dir/-/puppeteer-extra-plugin-user-data-dir-2.4.1.tgz",
2190
+ "integrity": "sha512-kH1GnCcqEDoBXO7epAse4TBPJh9tEpVEK/vkedKfjOVOhZAvLkHGc9swMs5ChrJbRnf8Hdpug6TJlEuimXNQ+g==",
2191
+ "license": "MIT",
2192
+ "dependencies": {
2193
+ "debug": "^4.1.1",
2194
+ "fs-extra": "^10.0.0",
2195
+ "puppeteer-extra-plugin": "^3.2.3",
2196
+ "rimraf": "^3.0.2"
2197
+ },
2198
+ "engines": {
2199
+ "node": ">=8"
2200
+ },
2201
+ "peerDependencies": {
2202
+ "playwright-extra": "*",
2203
+ "puppeteer-extra": "*"
2204
+ },
2205
+ "peerDependenciesMeta": {
2206
+ "playwright-extra": {
2207
+ "optional": true
2208
+ },
2209
+ "puppeteer-extra": {
2210
+ "optional": true
2211
+ }
2212
+ }
2213
+ },
2214
+ "node_modules/puppeteer-extra-plugin-user-preferences": {
2215
+ "version": "2.4.1",
2216
+ "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-preferences/-/puppeteer-extra-plugin-user-preferences-2.4.1.tgz",
2217
+ "integrity": "sha512-i1oAZxRbc1bk8MZufKCruCEC3CCafO9RKMkkodZltI4OqibLFXF3tj6HZ4LZ9C5vCXZjYcDWazgtY69mnmrQ9A==",
2218
+ "license": "MIT",
2219
+ "dependencies": {
2220
+ "debug": "^4.1.1",
2221
+ "deepmerge": "^4.2.2",
2222
+ "puppeteer-extra-plugin": "^3.2.3",
2223
+ "puppeteer-extra-plugin-user-data-dir": "^2.4.1"
2224
+ },
2225
+ "engines": {
2226
+ "node": ">=8"
2227
+ },
2228
+ "peerDependencies": {
2229
+ "playwright-extra": "*",
2230
+ "puppeteer-extra": "*"
2231
+ },
2232
+ "peerDependenciesMeta": {
2233
+ "playwright-extra": {
2234
+ "optional": true
2235
+ },
2236
+ "puppeteer-extra": {
2237
+ "optional": true
2238
+ }
2239
+ }
2240
+ },
2241
  "node_modules/qs": {
2242
  "version": "6.14.0",
2243
  "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.0.tgz",
 
2308
  "node": ">=4"
2309
  }
2310
  },
2311
+ "node_modules/rimraf": {
2312
+ "version": "3.0.2",
2313
+ "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
2314
+ "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==",
2315
+ "deprecated": "Rimraf versions prior to v4 are no longer supported",
2316
+ "license": "ISC",
2317
+ "dependencies": {
2318
+ "glob": "^7.1.3"
2319
+ },
2320
+ "bin": {
2321
+ "rimraf": "bin.js"
2322
+ },
2323
+ "funding": {
2324
+ "url": "https://github.com/sponsors/isaacs"
2325
+ }
2326
+ },
2327
  "node_modules/router": {
2328
  "version": "2.2.0",
2329
  "resolved": "https://registry.npmjs.org/router/-/router-2.2.0.tgz",
 
2421
  "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==",
2422
  "license": "ISC"
2423
  },
2424
+ "node_modules/shallow-clone": {
2425
+ "version": "0.1.2",
2426
+ "resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-0.1.2.tgz",
2427
+ "integrity": "sha512-J1zdXCky5GmNnuauESROVu31MQSnLoYvlyEn6j2Ztk6Q5EHFIhxkMhYcv6vuDzl2XEzoRr856QwzMgWM/TmZgw==",
2428
+ "license": "MIT",
2429
+ "dependencies": {
2430
+ "is-extendable": "^0.1.1",
2431
+ "kind-of": "^2.0.1",
2432
+ "lazy-cache": "^0.2.3",
2433
+ "mixin-object": "^2.0.1"
2434
+ },
2435
+ "engines": {
2436
+ "node": ">=0.10.0"
2437
+ }
2438
+ },
2439
+ "node_modules/shallow-clone/node_modules/kind-of": {
2440
+ "version": "2.0.1",
2441
+ "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-2.0.1.tgz",
2442
+ "integrity": "sha512-0u8i1NZ/mg0b+W3MGGw5I7+6Eib2nx72S/QvXa0hYjEkjTknYmEYQJwGu3mLC0BrhtJjtQafTkyRUQ75Kx0LVg==",
2443
+ "license": "MIT",
2444
+ "dependencies": {
2445
+ "is-buffer": "^1.0.2"
2446
+ },
2447
+ "engines": {
2448
+ "node": ">=0.10.0"
2449
+ }
2450
+ },
2451
+ "node_modules/shallow-clone/node_modules/lazy-cache": {
2452
+ "version": "0.2.7",
2453
+ "resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-0.2.7.tgz",
2454
+ "integrity": "sha512-gkX52wvU/R8DVMMt78ATVPFMJqfW8FPz1GZ1sVHBVQHmu/WvhIWE4cE1GBzhJNFicDeYhnwp6Rl35BcAIM3YOQ==",
2455
+ "license": "MIT",
2456
+ "engines": {
2457
+ "node": ">=0.10.0"
2458
+ }
2459
+ },
2460
  "node_modules/shebang-command": {
2461
  "version": "2.0.0",
2462
  "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
 
2856
  "license": "MIT",
2857
  "optional": true
2858
  },
2859
+ "node_modules/universalify": {
2860
+ "version": "2.0.1",
2861
+ "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz",
2862
+ "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==",
2863
+ "license": "MIT",
2864
+ "engines": {
2865
+ "node": ">= 10.0.0"
2866
+ }
2867
+ },
2868
  "node_modules/unpipe": {
2869
  "version": "1.0.0",
2870
  "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
package.json CHANGED
@@ -7,12 +7,14 @@
7
  "axios": "^1.11.0",
8
  "cors": "^2.8.5",
9
  "express": "^5.1.0",
10
- "puppeteer": "^24.16.2"
 
 
11
  },
12
  "devDependencies": {
13
  "nodemon": "^3.1.10",
14
- "typescript": "^5.4.0",
15
- "tsc-watch": "^6.2.0"
16
  },
17
  "scripts": {
18
  "dev": "nodemon server.js",
 
7
  "axios": "^1.11.0",
8
  "cors": "^2.8.5",
9
  "express": "^5.1.0",
10
+ "puppeteer": "^24.16.2",
11
+ "puppeteer-extra": "^3.3.6",
12
+ "puppeteer-extra-plugin-stealth": "^2.11.2"
13
  },
14
  "devDependencies": {
15
  "nodemon": "^3.1.10",
16
+ "tsc-watch": "^6.2.0",
17
+ "typescript": "^5.4.0"
18
  },
19
  "scripts": {
20
  "dev": "nodemon server.js",
server.js CHANGED
@@ -1,19 +1,19 @@
1
  const express = require('express');
2
- const puppeteer = require('puppeteer');
 
3
  const cors = require('cors');
4
  const { EventEmitter } = require('events');
5
 
 
 
6
  const app = express();
7
  const port = 7860;
8
 
9
  app.use(cors());
10
  app.use(express.json());
11
 
12
- // --- Progress Tracking and Job Storage ---
13
-
14
- // Stores the real-time progress of active jobs
15
  const progressTrackers = new Map();
16
- // Stores the final state and result (PDF buffer or error) of jobs
17
  const downloadJobs = new Map();
18
 
19
  class ProgressTracker extends EventEmitter {
@@ -41,8 +41,7 @@ class ProgressTracker extends EventEmitter {
41
  }
42
  }
43
 
44
-
45
- // --- Puppeteer Logic (Unchanged) ---
46
  const bypassCookiesAndRestrictions = async (page, progressTracker) => {
47
  progressTracker?.updateProgress(5, 'bypassing', 'Setting up cookie bypass...');
48
 
@@ -66,7 +65,7 @@ const bypassCookiesAndRestrictions = async (page, progressTracker) => {
66
  }
67
  }
68
 
69
- // Step 2: Inject CSS to hide cookie banners immediately
70
  await page.addStyleTag({
71
  content: `
72
  /* Hide all possible cookie banners */
@@ -107,7 +106,7 @@ const bypassCookiesAndRestrictions = async (page, progressTracker) => {
107
  `
108
  });
109
 
110
- // Step 3: Inject JavaScript to handle dynamic cookie banners
111
  await page.evaluateOnNewDocument(() => {
112
  // Override common cookie consent functions
113
  window.cookieConsent = { accepted: true };
@@ -161,6 +160,7 @@ const bypassCookiesAndRestrictions = async (page, progressTracker) => {
161
  progressTracker?.updateProgress(10, 'bypassing', 'Cookie bypass configured successfully');
162
  return true;
163
  };
 
164
  const unblurContent = async (page, progressTracker) => {
165
  progressTracker?.updateProgress(15, 'unblurring', 'Removing content restrictions...');
166
 
@@ -219,6 +219,7 @@ const unblurContent = async (page, progressTracker) => {
219
 
220
  progressTracker?.updateProgress(20, 'unblurring', 'Content restrictions removed');
221
  };
 
222
  const applyPrintStyles = async (page, progressTracker) => {
223
  progressTracker?.updateProgress(85, 'styling', 'Applying print styles...');
224
 
@@ -228,16 +229,25 @@ const applyPrintStyles = async (page, progressTracker) => {
228
  style.id = "print-style-extension";
229
  style.innerHTML = `
230
  @page {
 
231
  size: A4 portrait;
232
- margin: 5mm;
233
  }
234
  @media print {
235
  html, body {
 
 
 
236
  margin: 0 !important;
237
  padding: 0 !important;
238
  overflow: visible !important;
 
 
239
  }
 
 
240
  header, footer, nav, aside, .no-print, .ads, .sidebar, .premium-banner,
 
241
  .ViewerToolbar, .Layout_info-bar-wrapper__He0Ho, .Sidebar_sidebar-scrollable__kqeBZ,
242
  .HeaderWrapper_header-wrapper__mCmf3, .Layout_visible-content-bottom-wrapper-sticky__yaaAB,
243
  .Layout_bottom-section-wrapper__yBWWk, .Layout_footer-wrapper__bheJQ,
@@ -245,21 +255,32 @@ const applyPrintStyles = async (page, progressTracker) => {
245
  .Layout_sidebar-wrapper__unavM, .Layout_is-open__9DQr4 {
246
  display: none !important;
247
  }
248
- body {
249
- background: white !important;
250
- color: black !important;
251
- }
252
  * {
253
  box-shadow: none !important;
254
  background: transparent !important;
 
255
  }
256
- .Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ, .Viewer_document-wrapper__XsO4j, .page-content {
257
- display: flex !important;
258
- flex-direction: column !important;
 
 
 
 
 
 
 
259
  width: 100% !important;
260
- max-width: 210mm !important;
261
- margin: 0 auto !important;
 
 
 
262
  }
 
 
263
  [data-page], .page, .document-page, img {
264
  page-break-after: always !important;
265
  page-break-inside: avoid !important;
@@ -267,6 +288,7 @@ const applyPrintStyles = async (page, progressTracker) => {
267
  width: 100% !important;
268
  max-width: 100% !important;
269
  height: auto !important;
 
270
  }
271
  }
272
  `;
@@ -275,13 +297,14 @@ const applyPrintStyles = async (page, progressTracker) => {
275
 
276
  progressTracker?.updateProgress(88, 'styling', 'Print styles applied successfully');
277
  };
 
278
  const studocuDownloader = async (url, options = {}, progressTracker = null) => {
279
  let browser;
280
  try {
281
  progressTracker?.updateProgress(0, 'initializing', 'Starting browser...');
282
 
283
- console.log("πŸš€ Launching browser with stealth configuration...");
284
- browser = await puppeteer.launch({
285
  headless: true,
286
  args: [
287
  '--no-sandbox',
@@ -300,8 +323,10 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
300
  '--disable-web-security',
301
  '--disable-features=site-per-process',
302
  '--disable-blink-features=AutomationControlled',
303
- '--disable-extensions'
 
304
  ],
 
305
  timeout: 300000,
306
  });
307
 
@@ -309,9 +334,10 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
309
 
310
  progressTracker?.updateProgress(2, 'initializing', 'Configuring browser settings...');
311
 
312
- await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
313
- await page.setViewport({ width: 794, height: 1122 });
314
 
 
315
  await page.evaluateOnNewDocument(() => {
316
  Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
317
  Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
@@ -321,11 +347,17 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
321
  // Set up cookie and content bypass
322
  await bypassCookiesAndRestrictions(page, progressTracker);
323
 
324
- // Block unnecessary resources
325
  await page.setRequestInterception(true);
326
  page.on('request', (req) => {
327
  const resourceType = req.resourceType();
328
- const reqUrl = req.url();
 
 
 
 
 
 
329
  if (
330
  reqUrl.includes('doubleclick') ||
331
  reqUrl.includes('googletagmanager') ||
@@ -337,7 +369,7 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
337
  reqUrl.includes('mixpanel') ||
338
  reqUrl.includes('onetrust') ||
339
  reqUrl.includes('cookielaw') ||
340
- (resourceType === 'other' && reqUrl.includes('track'))
341
  ) {
342
  req.abort();
343
  } else {
@@ -350,7 +382,7 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
350
  progressTracker?.updateProgress(12, 'authenticating', 'Logging into StuDocu...');
351
 
352
  console.log("πŸ”‘ Logging in to StuDocu...");
353
- await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded', timeout: 60000 });
354
  await page.waitForSelector('#email', { timeout: 15000 });
355
  await page.type('#email', options.email);
356
  await page.type('#password', options.password);
@@ -366,23 +398,28 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
366
  }
367
  }
368
 
369
- progressTracker?.updateProgress(25, 'navigating', 'Navigating to document...');
 
 
 
 
 
370
  console.log(`πŸ“„ Navigating to ${url}...`);
371
 
372
  let navigationSuccess = false;
373
  let attempts = 0;
374
- const maxAttempts = 3;
375
  while (!navigationSuccess && attempts < maxAttempts) {
376
  try {
377
  attempts++;
378
- progressTracker?.updateProgress(25 + (attempts * 5), 'navigating', `Navigation attempt ${attempts}/${maxAttempts}`);
379
  console.log(`Navigation attempt ${attempts}/${maxAttempts}`);
380
- await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
381
  navigationSuccess = true;
382
  } catch (e) {
383
  console.log(`Navigation attempt ${attempts} failed:`, e.message);
384
  if (attempts >= maxAttempts) throw e;
385
- await new Promise(resolve => setTimeout(resolve, 5000));
386
  }
387
  }
388
 
@@ -512,7 +549,7 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
512
 
513
  const pdfBuffer = await page.pdf({
514
  printBackground: true,
515
- preferCSSPageSize: true,
516
  displayHeaderFooter: false,
517
  timeout: 180000,
518
  scale: 1,
@@ -539,11 +576,7 @@ const studocuDownloader = async (url, options = {}, progressTracker = null) => {
539
  }
540
  };
541
 
542
- // --- API Routes ---
543
-
544
- /**
545
- * [NEW] Endpoint to start the download process and get a session ID.
546
- */
547
  app.post('/api/request-download', (req, res) => {
548
  const { url, email, password } = req.body;
549
  if (!url || !url.includes('studocu.com')) {
@@ -575,10 +608,6 @@ app.post('/api/request-download', (req, res) => {
575
  });
576
  });
577
 
578
-
579
- /**
580
- * [EXISTING] Endpoint for the client to poll for progress updates.
581
- */
582
  app.get('/api/progress/:sessionId', (req, res) => {
583
  const { sessionId } = req.params;
584
  const tracker = progressTrackers.get(sessionId);
@@ -608,10 +637,6 @@ app.get('/api/progress/:sessionId', (req, res) => {
608
  return res.status(404).json({ error: 'Session not found' });
609
  });
610
 
611
-
612
- /**
613
- * [NEW] Endpoint to download the final PDF file.
614
- */
615
  app.get('/api/download/:sessionId', (req, res) => {
616
  const { sessionId } = req.params;
617
  const job = downloadJobs.get(sessionId);
@@ -639,7 +664,6 @@ app.get('/api/download/:sessionId', (req, res) => {
639
  }
640
  });
641
 
642
-
643
  // --- Health and Info Endpoints (Unchanged) ---
644
  app.get('/health', (req, res) => {
645
  res.json({
@@ -652,14 +676,15 @@ app.get('/health', (req, res) => {
652
 
653
  app.get('/', (req, res) => {
654
  res.json({
655
- message: 'πŸš€ Enhanced StuDocu Downloader API v5.1 - Real-time Progress Tracking',
656
- version: '5.1.0',
657
  features: [
658
  'πŸͺ Advanced cookie banner bypass',
659
  'πŸ”“ Premium content unblurring',
660
  'πŸ”‘ Login support for full access',
661
  'πŸ“Š Real-time progress tracking via polling',
662
- 'πŸ“„ Clean PDF generation with print styles'
 
663
  ],
664
  endpoints: {
665
  request: 'POST /api/request-download (body: {url, filename?, email?, password?})',
@@ -681,6 +706,6 @@ process.on('SIGINT', () => {
681
  });
682
 
683
  app.listen(port, () => {
684
- console.log(`πŸš€ Enhanced StuDocu Downloader v5.1.0 running on http://localhost:${port}`);
685
- console.log(`✨ Features: Real-time progress tracking and enhanced user feedback`);
686
  });
 
1
  const express = require('express');
2
+ const puppeteerExtra = require('puppeteer-extra'); // NEW: For stealth
3
+ const StealthPlugin = require('puppeteer-extra-plugin-stealth'); // NEW: Stealth plugin
4
  const cors = require('cors');
5
  const { EventEmitter } = require('events');
6
 
7
+ puppeteerExtra.use(StealthPlugin()); // NEW: Enable stealth plugin
8
+
9
  const app = express();
10
  const port = 7860;
11
 
12
  app.use(cors());
13
  app.use(express.json());
14
 
15
+ // --- Progress Tracking and Job Storage --- (Unchanged)
 
 
16
  const progressTrackers = new Map();
 
17
  const downloadJobs = new Map();
18
 
19
  class ProgressTracker extends EventEmitter {
 
41
  }
42
  }
43
 
44
+ // --- Puppeteer Logic (Updated for Stealth and Reliability) ---
 
45
  const bypassCookiesAndRestrictions = async (page, progressTracker) => {
46
  progressTracker?.updateProgress(5, 'bypassing', 'Setting up cookie bypass...');
47
 
 
65
  }
66
  }
67
 
68
+ // Step 2: Inject CSS to hide cookie banners immediately (Unchanged)
69
  await page.addStyleTag({
70
  content: `
71
  /* Hide all possible cookie banners */
 
106
  `
107
  });
108
 
109
+ // Step 3: Inject JavaScript to handle dynamic cookie banners (Unchanged)
110
  await page.evaluateOnNewDocument(() => {
111
  // Override common cookie consent functions
112
  window.cookieConsent = { accepted: true };
 
160
  progressTracker?.updateProgress(10, 'bypassing', 'Cookie bypass configured successfully');
161
  return true;
162
  };
163
+
164
  const unblurContent = async (page, progressTracker) => {
165
  progressTracker?.updateProgress(15, 'unblurring', 'Removing content restrictions...');
166
 
 
219
 
220
  progressTracker?.updateProgress(20, 'unblurring', 'Content restrictions removed');
221
  };
222
+
223
  const applyPrintStyles = async (page, progressTracker) => {
224
  progressTracker?.updateProgress(85, 'styling', 'Applying print styles...');
225
 
 
229
  style.id = "print-style-extension";
230
  style.innerHTML = `
231
  @page {
232
+ /* Set page size to A4 and remove default margins */
233
  size: A4 portrait;
234
+ margin: 0mm;
235
  }
236
  @media print {
237
  html, body {
238
+ /* Ensure the body takes the full width and has no extra padding/margin */
239
+ width: 210mm !important;
240
+ height: auto !important;
241
  margin: 0 !important;
242
  padding: 0 !important;
243
  overflow: visible !important;
244
+ background: white !important;
245
+ color: black !important;
246
  }
247
+
248
+ /* Remove all unwanted elements like headers, footers, sidebars, etc. */
249
  header, footer, nav, aside, .no-print, .ads, .sidebar, .premium-banner,
250
+ [class*="Header"], [class*="Footer"], [class*="Sidebar"], [id*="Header"],
251
  .ViewerToolbar, .Layout_info-bar-wrapper__He0Ho, .Sidebar_sidebar-scrollable__kqeBZ,
252
  .HeaderWrapper_header-wrapper__mCmf3, .Layout_visible-content-bottom-wrapper-sticky__yaaAB,
253
  .Layout_bottom-section-wrapper__yBWWk, .Layout_footer-wrapper__bheJQ,
 
255
  .Layout_sidebar-wrapper__unavM, .Layout_is-open__9DQr4 {
256
  display: none !important;
257
  }
258
+
259
+ /* Force all elements to have a transparent background and no shadow */
 
 
260
  * {
261
  box-shadow: none !important;
262
  background: transparent !important;
263
+ color: inherit !important;
264
  }
265
+
266
+ /*
267
+ * KEY FIX: Target the main document container.
268
+ * Force it to be a block element, remove any transforms or max-widths,
269
+ * and center it perfectly within the page.
270
+ */
271
+ .Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ,
272
+ .Viewer_document-wrapper__XsO4j, .page-content, .document-viewer, #page-container {
273
+ position: static !important;
274
+ display: block !important;
275
  width: 100% !important;
276
+ max-width: none !important;
277
+ margin: 0 auto !important; /* Center the content */
278
+ padding: 5mm !important; /* Add a small margin for readability */
279
+ box-sizing: border-box; /* Include padding in width calculation */
280
+ transform: none !important;
281
  }
282
+
283
+ /* Ensure individual pages and images within the document use the full width */
284
  [data-page], .page, .document-page, img {
285
  page-break-after: always !important;
286
  page-break-inside: avoid !important;
 
288
  width: 100% !important;
289
  max-width: 100% !important;
290
  height: auto !important;
291
+ display: block !important;
292
  }
293
  }
294
  `;
 
297
 
298
  progressTracker?.updateProgress(88, 'styling', 'Print styles applied successfully');
299
  };
300
+
301
  const studocuDownloader = async (url, options = {}, progressTracker = null) => {
302
  let browser;
303
  try {
304
  progressTracker?.updateProgress(0, 'initializing', 'Starting browser...');
305
 
306
+ console.log("πŸš€ Launching browser with enhanced stealth configuration...");
307
+ browser = await puppeteerExtra.launch({ // UPDATED: Use puppeteerExtra
308
  headless: true,
309
  args: [
310
  '--no-sandbox',
 
323
  '--disable-web-security',
324
  '--disable-features=site-per-process',
325
  '--disable-blink-features=AutomationControlled',
326
+ '--disable-extensions',
327
+ '--ignore-certificate-errors'
328
  ],
329
+ ignoreHTTPSErrors: true,
330
  timeout: 300000,
331
  });
332
 
 
334
 
335
  progressTracker?.updateProgress(2, 'initializing', 'Configuring browser settings...');
336
 
337
+ await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36');
338
+ await page.setViewport({ width: 794, height: 1122 }); // A4 size in pixels at 96 DPI
339
 
340
+ // NOTE: Stealth plugin handles most of this, but keeping for extra safety
341
  await page.evaluateOnNewDocument(() => {
342
  Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
343
  Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
 
347
  // Set up cookie and content bypass
348
  await bypassCookiesAndRestrictions(page, progressTracker);
349
 
350
+ // Block unnecessary resources (UPDATED: Always continue for 'document' to prevent navigation failures)
351
  await page.setRequestInterception(true);
352
  page.on('request', (req) => {
353
  const resourceType = req.resourceType();
354
+ const reqUrl = req.url().toLowerCase();
355
+
356
+ if (resourceType === 'document') {
357
+ req.continue();
358
+ return;
359
+ }
360
+
361
  if (
362
  reqUrl.includes('doubleclick') ||
363
  reqUrl.includes('googletagmanager') ||
 
369
  reqUrl.includes('mixpanel') ||
370
  reqUrl.includes('onetrust') ||
371
  reqUrl.includes('cookielaw') ||
372
+ (resourceType === 'other' && reqUrl.includes('/track/')) // UPDATED: More specific to avoid over-blocking
373
  ) {
374
  req.abort();
375
  } else {
 
382
  progressTracker?.updateProgress(12, 'authenticating', 'Logging into StuDocu...');
383
 
384
  console.log("πŸ”‘ Logging in to StuDocu...");
385
+ await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded', timeout: 120000 });
386
  await page.waitForSelector('#email', { timeout: 15000 });
387
  await page.type('#email', options.email);
388
  await page.type('#password', options.password);
 
398
  }
399
  }
400
 
401
+ progressTracker?.updateProgress(25, 'navigating', 'Navigating to homepage first for session setup...');
402
+ console.log(`πŸ“„ Navigating to homepage to simulate natural session...`);
403
+ await page.goto('https://www.studocu.com/en-us', { waitUntil: 'domcontentloaded', timeout: 150000 }); // NEW: Preliminary homepage visit
404
+ await new Promise(resolve => setTimeout(resolve, 3000)); // Short delay for session stabilization
405
+
406
+ progressTracker?.updateProgress(30, 'navigating', 'Navigating to document...');
407
  console.log(`πŸ“„ Navigating to ${url}...`);
408
 
409
  let navigationSuccess = false;
410
  let attempts = 0;
411
+ const maxAttempts = 5;
412
  while (!navigationSuccess && attempts < maxAttempts) {
413
  try {
414
  attempts++;
415
+ progressTracker?.updateProgress(30 + (attempts * 5), 'navigating', `Navigation attempt ${attempts}/${maxAttempts}`);
416
  console.log(`Navigation attempt ${attempts}/${maxAttempts}`);
417
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 150000 }); // UPDATED: Increased timeout to 2.5 min
418
  navigationSuccess = true;
419
  } catch (e) {
420
  console.log(`Navigation attempt ${attempts} failed:`, e.message);
421
  if (attempts >= maxAttempts) throw e;
422
+ await new Promise(resolve => setTimeout(resolve, 15000)); // UPDATED: Increased retry delay to 15s
423
  }
424
  }
425
 
 
549
 
550
  const pdfBuffer = await page.pdf({
551
  printBackground: true,
552
+ preferCSSPageSize: true, // Use the @page size
553
  displayHeaderFooter: false,
554
  timeout: 180000,
555
  scale: 1,
 
576
  }
577
  };
578
 
579
+ // --- API Routes --- (Unchanged)
 
 
 
 
580
  app.post('/api/request-download', (req, res) => {
581
  const { url, email, password } = req.body;
582
  if (!url || !url.includes('studocu.com')) {
 
608
  });
609
  });
610
 
 
 
 
 
611
  app.get('/api/progress/:sessionId', (req, res) => {
612
  const { sessionId } = req.params;
613
  const tracker = progressTrackers.get(sessionId);
 
637
  return res.status(404).json({ error: 'Session not found' });
638
  });
639
 
 
 
 
 
640
  app.get('/api/download/:sessionId', (req, res) => {
641
  const { sessionId } = req.params;
642
  const job = downloadJobs.get(sessionId);
 
664
  }
665
  });
666
 
 
667
  // --- Health and Info Endpoints (Unchanged) ---
668
  app.get('/health', (req, res) => {
669
  res.json({
 
676
 
677
  app.get('/', (req, res) => {
678
  res.json({
679
+ message: 'πŸš€ Enhanced StuDocu Downloader API v5.2 - Real-time Progress Tracking with Stealth',
680
+ version: '5.2.0',
681
  features: [
682
  'πŸͺ Advanced cookie banner bypass',
683
  'πŸ”“ Premium content unblurring',
684
  'πŸ”‘ Login support for full access',
685
  'πŸ“Š Real-time progress tracking via polling',
686
+ 'πŸ“„ Clean PDF generation with print styles',
687
+ 'πŸ•΅οΈ Enhanced stealth to evade bot detection'
688
  ],
689
  endpoints: {
690
  request: 'POST /api/request-download (body: {url, filename?, email?, password?})',
 
706
  });
707
 
708
  app.listen(port, () => {
709
+ console.log(`πŸš€ Enhanced StuDocu Downloader v5.2.0 running on http://localhost:${port}`);
710
+ console.log(`✨ Features: Real-time progress tracking, enhanced stealth, and user feedback`);
711
  });