| | import cliProgress from "cli-progress" |
| | import { puppeteerSg } from "../utils/request/PuppeteerSg.js"; |
| | import { pdfGenerator } from "../utils/io/PdfGenerator.js"; |
| | import { configLoader } from "../utils/io/ConfigLoader.js"; |
| | import { directoryIo } from "../utils/io/DirectoryIo.js" |
| | import * as scribdRegex from "../const/ScribdRegex.js" |
| | import * as scribdFlag from '../const/ScribdFlag.js' |
| | import { Image } from "../object/Image.js" |
| | import sharp from "sharp"; |
| | import path from 'path' |
| |
|
| |
|
| | const output = configLoader.load("DIRECTORY", "output") |
| | const rendertime = parseInt(configLoader.load("SCRIBD", "rendertime")) |
| |
|
| | class ScribdDownloader { |
| | constructor() { |
| | if (!ScribdDownloader.instance) { |
| | ScribdDownloader.instance = this |
| | } |
| | return ScribdDownloader.instance |
| | } |
| |
|
| | async execute(url, flag) { |
| | let fn; |
| | if (flag === scribdFlag.IMAGE) { |
| | console.log(`Mode: IMAGE`) |
| | fn = this.embeds_image |
| | } else { |
| | console.log(`Mode: DEFAULT`) |
| | fn = this.embeds_default |
| | } |
| | if (url.match(scribdRegex.DOCUMENT)) { |
| | await fn(`https://www.scribd.com/embeds/${scribdRegex.DOCUMENT.exec(url)[2]}/content`) |
| | } else if (url.match(scribdRegex.EMBED)) { |
| | await fn(url) |
| | } else { |
| | throw new Error(`Unsupported URL: ${url}`) |
| | } |
| | } |
| |
|
| | async embeds_default(url) { |
| | const m = scribdRegex.EMBED.exec(url) |
| | if (m) { |
| | let id = m[1] |
| |
|
| | |
| | let page = await puppeteerSg.getPage(url) |
| |
|
| | |
| | await new Promise(resolve => setTimeout(resolve, 1000)) |
| |
|
| | |
| | let doc_pages = await page.$$("div.outer_page_container div[id^='outer_page_']") |
| | const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic); |
| | bar.start(doc_pages.length, 0); |
| | for (let i = 0; i < doc_pages.length; i++) { |
| | await page.evaluate((i) => { |
| | document.getElementById(`outer_page_${(i + 1)}`).scrollIntoView() |
| | document.getElementById(`outer_page_${(i + 1)}`).style.margin = 0 |
| | }, i) |
| | await new Promise(resolve => setTimeout(resolve, rendertime)) |
| | bar.update(i + 1); |
| | } |
| | bar.stop(); |
| |
|
| | |
| | let options = { |
| | path: `${output}/${id}.pdf`, |
| | printBackground: true, |
| | } |
| | let first_page = await page.$("div.outer_page_container div[id^='outer_page_']") |
| | let style = await first_page.evaluate((el) => el.getAttribute("style")) |
| | if (style.includes("width:") && style.includes("height:")) { |
| | options.height = parseInt(style.split("height:")[1].split("px")[0].trim()) |
| | options.width = parseInt(style.split("width:")[1].split("px")[0].trim()) |
| | } |
| |
|
| | |
| | await page.evaluate(() => { |
| | document.body.innerHTML = document.querySelector("div.outer_page_container").innerHTML |
| | }) |
| | |
| | await directoryIo.create(path.dirname(options.path)) |
| | await page.pdf(options); |
| | console.log(`Generated: ${options.path}`) |
| |
|
| | await page.close() |
| | await puppeteerSg.close() |
| | } else { |
| | throw new Error(`Unsupported URL: ${url}`) |
| | } |
| | } |
| |
|
| | async embeds_image(url) { |
| | let deviceScaleFactor = 2 |
| | const m = scribdRegex.EMBED.exec(url) |
| | if (m) { |
| | let id = m[1] |
| |
|
| | |
| | let dir = `${output}/${id}` |
| | await directoryIo.create(dir) |
| |
|
| | |
| | let page = await puppeteerSg.getPage(url) |
| |
|
| | |
| | await new Promise(resolve => setTimeout(resolve, 1000)) |
| |
|
| | |
| | let doc_container = await page.$("div.document_scroller") |
| | await doc_container.evaluate((el) => { |
| | el["style"]["bottom"] = "0px" |
| | el["style"]["margin-top"] = "0px" |
| | }); |
| | let doc_toolbar = await page.$("div.toolbar_drop") |
| | await doc_toolbar.evaluate((el) => el["style"]["display"] = "none"); |
| |
|
| | |
| | let doc_pages = await page.$$("div.outer_page_container div[id^='outer_page_']") |
| | let images = [] |
| | const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic); |
| | bar.start(doc_pages.length, 0); |
| | for (let i = 0; i < doc_pages.length; i++) { |
| | await page.evaluate((i) => { |
| | document.getElementById(`outer_page_${(i + 1)}`).scrollIntoView() |
| | }, i) |
| |
|
| | let width = 1191 |
| | let height = 1684 |
| | let style = await doc_pages[i].evaluate((el) => el.getAttribute("style")); |
| | if (style.includes("width:") && style.includes("height:")) { |
| | height = Math.ceil(width * parseInt(style.split("height:")[1].split("px")[0].trim()) / parseInt(style.split("width:")[1].split("px")[0].trim())) |
| | } |
| | await page.setViewport({ width: width, height: height, deviceScaleFactor: deviceScaleFactor }); |
| |
|
| | let path = `${dir}/${(i + 1).toString().padStart(4, 0)}.png` |
| | await doc_pages[i].screenshot({ path: path }); |
| |
|
| | let metadata = await sharp(path).metadata() |
| | images.push(new Image( |
| | path, |
| | metadata.width, |
| | metadata.height |
| | )) |
| | bar.update(i + 1); |
| | } |
| | bar.stop(); |
| |
|
| | |
| | await pdfGenerator.generate(images, `${output}/${id}.pdf`) |
| |
|
| | |
| | directoryIo.remove(`${output}/${id}`) |
| |
|
| | await page.close() |
| | await puppeteerSg.close() |
| | } else { |
| | throw new Error(`Unsupported URL: ${url}`) |
| | } |
| | } |
| | } |
| |
|
| | export const scribdDownloader = new ScribdDownloader() |