Spaces:

cINAWGD
/

skribd

Sleeping

App Files Files Community

rkwyu commited on May 13, 2024

Commit

58293b7

1 Parent(s): 3a25a20

Support everand podcast

Browse files

Files changed (5) hide show

README.md +21 -7
run.js +1 -1
src/App.js +4 -0
src/const/EverandRegex.js +6 -0
src/service/EverandDownloader.js +112 -0

README.md CHANGED Viewed

@@ -5,7 +5,9 @@
 </a>
 ## About ##
-Scribd-dl helps downloading documents on [scribd.com](https://www.scribd.com/) and [slideshare.net](https://www.slideshare.net/) without membership / sign-in.
 ## Prerequisites ##
 To use Scridb-dl, you need to install [Node.js](https://nodejs.org/en/download/). It is recommended that you use the latest LTS version available.
@@ -39,40 +41,52 @@ rendertime=100
 [DIRECTORY]
 output=output
 ```
-`rendertime` is the waiting time in millisecond for single page rendering in [scribd.com](https://www.scribd.com/), it is only applicable for `default` mode.
 `output` is the ouput directory for generated .pdf files.
 ## Usage (CLI) ##
 ```console
 Usage: npm start [options] url
 Options:
-  /i        image-based: generated by image snapshots taken for pages in scribd.com
 ```
-#### Example 1: Download 《The Minds of Billy Milligan》 ####
 ```console
 npm start "https://www.scribd.com/doc/249398282/The-Minds-of-Billy-Milligan-Daniel-Keyes"
 ```
-#### Example 2: Download 《The Minds of Billy Milligan》 using `image-based` method ####
 ```console
 npm start /i "https://www.scribd.com/doc/249398282/The-Minds-of-Billy-Milligan-Daniel-Keyes"
 ```
-#### Example 3: Download 《Everything You Need To Know About ChatGPT》 ####
 ```console
 npm start "https://www.slideshare.net/slideshow/everything-you-need-to-know-about-chatgpt-8ba3/266783915"
 ```
 ## Support URL Format ##
 - https://www.scribd.com/doc/**
 - https://www.scribd.com/embeds/**
 - https://www.slideshare.net/**
 - https://www.slideshare.net/slideshow/**
 ## Development Plan ##
-- Support [everand.com](https://www.everand.com/)
 - Scribd obfuscates the .pdf files, the texts copied from the documents might become strange garbled message. De-obfuscating is one of the future plan.
 ## License ##

 </a>
 ## About ##
+Scribd-dl helps downloading:
+- documents on [scribd.com](https://www.scribd.com/) and [slideshare.net](https://www.slideshare.net/) without membership / sign-in
+- podcast audios on [everand.com](https://www.everand.com/podcasts)
 ## Prerequisites ##
 To use Scridb-dl, you need to install [Node.js](https://nodejs.org/en/download/). It is recommended that you use the latest LTS version available.
 [DIRECTORY]
 output=output
 ```
+`rendertime` is the waiting time in millisecond for single page rendering on [scribd.com](https://www.scribd.com/), it is only applicable for `default` mode.
 `output` is the ouput directory for generated .pdf files.
 ## Usage (CLI) ##
 ```console
 Usage: npm start [options] url
 Options:
+  /i        image-based: generated by image snapshots taken for pages on scribd.com
 ```
+#### Example 1: Download 《The Minds of Billy Milligan》 on scribd.com ####
 ```console
 npm start "https://www.scribd.com/doc/249398282/The-Minds-of-Billy-Milligan-Daniel-Keyes"
 ```
+#### Example 2: Download 《The Minds of Billy Milligan》 using `image-based` method on scribd.com ####
 ```console
 npm start /i "https://www.scribd.com/doc/249398282/The-Minds-of-Billy-Milligan-Daniel-Keyes"
 ```
+#### Example 3: Download 《Everything You Need To Know About ChatGPT》 on slideshare.net ####
 ```console
 npm start "https://www.slideshare.net/slideshow/everything-you-need-to-know-about-chatgpt-8ba3/266783915"
 ```
+#### Example 4: Download all 《TED Talks Daily》 episodes on everand.com ####
+```console
+npm start "https://www.everand.com/podcast-show/414106971/TED-Talks-Daily"
+```
+#### Example 5: Download 《Sunday Pick: How to care for the people who take care of us (w/ Ai-jen Poo)》 on everand.com ####
+```console
+npm start "https://www.everand.com/listen/podcast/731670963"
+```
 ## Support URL Format ##
 - https://www.scribd.com/doc/**
 - https://www.scribd.com/embeds/**
 - https://www.slideshare.net/**
 - https://www.slideshare.net/slideshow/**
+- https://www.everand.com/podcast-show/**
+- https://www.everand.com/podcast/**
+- https://www.everand.com/listen/podcast/**
 ## Development Plan ##
 - Scribd obfuscates the .pdf files, the texts copied from the documents might become strange garbled message. De-obfuscating is one of the future plan.
 ## License ##

run.js CHANGED Viewed

@@ -18,6 +18,6 @@ if (process.argv.length >= 3) {
     console.error(`
 Usage: npm start [options] url
 Options:
-  /i        image-based: generated by image snapshots taken for pages
     `)
 }

     console.error(`
 Usage: npm start [options] url
 Options:
+  /i        image-based: generated by image snapshots taken for pages on scribd.com
     `)
 }

src/App.js CHANGED Viewed

@@ -1,7 +1,9 @@
 import { scribdDownloader } from "./service/ScribdDownloader.js"
 import { slideshareDownloader } from "./service/SlideshareDownloader.js"
 import * as scribdRegex from "./const/ScribdRegex.js"
 import * as slideshareRegex from "./const/SlideshareRegex.js"
 class App {
     constructor() {
@@ -16,6 +18,8 @@ class App {
             await scribdDownloader.execute(url, flag)
         } else if (url.match(slideshareRegex.DOMAIN)) {
             await slideshareDownloader.execute(url)
         } else {
             throw new Error(`Unsupported URL: ${url}`)
         }

 import { scribdDownloader } from "./service/ScribdDownloader.js"
 import { slideshareDownloader } from "./service/SlideshareDownloader.js"
+import { everandDownloader } from "./service/EverandDownloader.js"
 import * as scribdRegex from "./const/ScribdRegex.js"
 import * as slideshareRegex from "./const/SlideshareRegex.js"
+import * as everandRegex from "./const/EverandRegex.js"
 class App {
     constructor() {
             await scribdDownloader.execute(url, flag)
         } else if (url.match(slideshareRegex.DOMAIN)) {
             await slideshareDownloader.execute(url)
+        } else if (url.match(everandRegex.DOMAIN)) {
+            await everandDownloader.execute(url)
         } else {
             throw new Error(`Unsupported URL: ${url}`)
         }

src/const/EverandRegex.js ADDED Viewed

	@@ -0,0 +1,6 @@

+const DOMAIN = /^https:\/\/www\.everand\.com/
+const PODCAST_SERIES = /^https:\/\/www\.everand\.com\/podcast-show\/([0-9]+)\/([a-zA-z0-9_-]+)/
+const PODCAST_EPISODE = /^https:\/\/www\.everand\.com\/podcast\/([0-9]+)\/([a-zA-z0-9_-]+)/
+const PODCAST_LISTEN = /^https:\/\/www\.everand\.com\/listen\/podcast\/([0-9]+)/
+export { DOMAIN, PODCAST_SERIES, PODCAST_EPISODE, PODCAST_LISTEN }

src/service/EverandDownloader.js ADDED Viewed

	@@ -0,0 +1,112 @@

+import cliProgress from "cli-progress"
+import { puppeteerSg } from "../utils/request/PuppeteerSg.js";
+import { pdfGenerator } from "../utils/io/PdfGenerator.js";
+import { configLoader } from "../utils/io/ConfigLoader.js";
+import { directoryIo } from "../utils/io/DirectoryIo.js"
+import * as everandRegex from "../const/EverandRegex.js"
+import { Image } from "../object/Image.js"
+import sharp from "sharp";
+import axios from "axios";
+import fs from "fs"
+const output = configLoader.load("DIRECTORY", "output")
+class EverandDownloader {
+    constructor() {
+        if (!EverandDownloader.instance) {
+            EverandDownloader.instance = this
+        }
+        return EverandDownloader.instance
+    }
+    async execute(url) {
+        if (url.match(everandRegex.PODCAST_SERIES)) {
+            await this.series(url, )
+        } else if (url.match(everandRegex.PODCAST_EPISODE)) {
+            await this.listen(`https://www.everand.com/listen/podcast/${everandRegex.PODCAST_EPISODE.exec(url)[1]}`)
+        } else if (url.match(everandRegex.PODCAST_LISTEN)) {
+            await this.listen(url)
+        } else {
+            throw new Error(`Unsupported URL: ${url}`)
+        }
+    }
+    async listen(url, isEpisode) {
+        if (typeof isEpisode === "undefined") {
+            isEpisode = true
+        }
+        const episodeId = everandRegex.PODCAST_LISTEN.exec(url)[1]
+        // navigate to everand
+        let page = await puppeteerSg.getPage(url)
+        // wait rendering
+        await new Promise(resolve => setTimeout(resolve, 1000))
+        // get title, audio-url, series-url
+        const title = await page.evaluate(() => eval('Scribd.current_doc.short_title'))
+        const audioUrl = await page.evaluate(() => document.querySelector('audio#audioplayer').src)
+        const seriesUrl = await page.evaluate(() => document.querySelector('a[href^="https://www.everand.com/podcast-show/"]').href)
+        // prepare output dir
+        let seriesId = everandRegex.PODCAST_SERIES.exec(seriesUrl)[1]
+        let dir = `${output}/${seriesId}`
+        await directoryIo.create(dir)
+        // download audio
+        const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
+        if (isEpisode) {
+            bar.start(1, 0)
+        }
+        let path = `${dir}/${episodeId}_${title}.mp3`
+        const resp = await axios.get(audioUrl, { responseType: 'stream' })
+        resp.data.pipe(fs.createWriteStream(path))
+        if (isEpisode) {
+            bar.update(1)
+            bar.stop()
+        }
+        await page.close()
+        if (isEpisode) {
+            await puppeteerSg.close()
+        }
+    }
+    async series(url) {
+        const seriesId = everandRegex.PODCAST_SERIES.exec(url)[1]
+        // navigate to everand
+        let page = await puppeteerSg.getPage(url)
+        // wait rendering
+        await new Promise(resolve => setTimeout(resolve, 1000))
+        // get number-of-episodes
+        const totalEpisode = await page.evaluate(() => parseInt(document.querySelector('span[data-e2e="podcast-series-header-total-episodes"]').textContent.replace("episodes", "").trim()))
+        // get pages
+        const totalPage = await page.evaluate(() => [...document.querySelectorAll('div[data-e2e="pagination"] a[aria-label^="Page"]')].at(-1).textContent)
+        const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic)
+        bar.start(totalEpisode, 0)
+        xx:
+        for (let i = 1; i <= totalPage; i++) {
+            await page.goto(`${url}?page=${i}&sort=desc`, { waitUntil: "load" })
+            await new Promise(resolve => setTimeout(resolve, 1000))
+            let episodes = await page.evaluate(() => [...document.querySelectorAll('div.breakpoint_hide.below a[data-e2e="podcast-episode-player-button"]')].map(x => x.href))
+            for (let j = 0; j < episodes.length; j++ ) {
+                await this.listen(episodes[j], false)
+                bar.update(((i - 1) * 10) + (j + 1))
+                break xx
+            }
+        }
+        bar.stop()
+        await page.close()
+        await puppeteerSg.close()
+    }
+}
+export const everandDownloader = new EverandDownloader()