| | import Parser from '@jocmp/mercury-parser'; |
| | import type { CheerioAPI } from 'cheerio'; |
| | import { load } from 'cheerio'; |
| | import type { Element } from 'domhandler'; |
| | import * as entities from 'entities'; |
| | import type { MiddlewareHandler } from 'hono'; |
| | import { convert } from 'html-to-text'; |
| | import markdownit from 'markdown-it'; |
| | import { RE2JS } from 're2js'; |
| | import sanitizeHtml from 'sanitize-html'; |
| | import { simplecc } from 'simplecc-wasm'; |
| |
|
| | import { config } from '@/config'; |
| | import type { Data, DataItem } from '@/types'; |
| | import cache from '@/utils/cache'; |
| | import ofetch from '@/utils/ofetch'; |
| |
|
| | const md = markdownit({ |
| | html: true, |
| | }); |
| |
|
| | const resolveRelativeLink = ($: CheerioAPI, elem: Element, attr: string, baseUrl?: string) => { |
| | const $elem = $(elem); |
| |
|
| | if (baseUrl) { |
| | try { |
| | const oldAttr = $elem.attr(attr); |
| | if (oldAttr) { |
| | |
| | $elem.attr(attr, new URL(oldAttr, baseUrl).href); |
| | } |
| | } catch { |
| | |
| | } |
| | } |
| | }; |
| |
|
| | const getAiCompletion = async (prompt: string, text: string) => { |
| | const apiUrl = `${config.openai.endpoint}/chat/completions`; |
| | const response = await ofetch(apiUrl, { |
| | method: 'POST', |
| | body: { |
| | model: config.openai.model, |
| | max_tokens: config.openai.maxTokens, |
| | messages: [ |
| | { role: 'system', content: prompt }, |
| | { role: 'user', content: text }, |
| | ], |
| | temperature: config.openai.temperature, |
| | }, |
| | headers: { |
| | Authorization: `Bearer ${config.openai.apiKey}`, |
| | }, |
| | }); |
| |
|
| | return response.choices[0].message.content; |
| | }; |
| |
|
| | const getAuthorString = (item) => { |
| | let author = ''; |
| | if (item.author) { |
| | author = typeof item.author === 'string' ? item.author : item.author.map((i) => i.name).join(' '); |
| | } |
| | return author; |
| | }; |
| |
|
| | const middleware: MiddlewareHandler = async (ctx, next) => { |
| | await next(); |
| |
|
| | const data = ctx.get('data') as Data; |
| | if (data) { |
| | if ((!data.item || data.item.length === 0) && !data.allowEmpty) { |
| | throw new Error('this route is empty, please check the original site or <a href="https://github.com/DIYgod/RSSHub/issues/new/choose">create an issue</a>'); |
| | } |
| |
|
| | |
| | data.item = data.item || []; |
| |
|
| | |
| | data.title && (data.title = entities.decodeXML(data.title + '')); |
| | data.description && (data.description = entities.decodeXML(data.description + '')); |
| |
|
| | |
| | if (ctx.req.query('sorted') !== 'false') { |
| | data.item = data.item.toSorted((a: DataItem, b: DataItem) => +new Date(b.pubDate || 0) - +new Date(a.pubDate || 0)); |
| | } |
| |
|
| | const handleItem = (item: DataItem) => { |
| | item.title && (item.title = entities.decodeXML(item.title + '')); |
| |
|
| | |
| | if (item.pubDate) { |
| | item.pubDate = new Date(item.pubDate).toUTCString(); |
| | } |
| |
|
| | |
| | if (item.link) { |
| | let baseUrl = data.link; |
| | if (baseUrl && !/^https?:\/\//.test(baseUrl)) { |
| | baseUrl = /^\/\//.test(baseUrl) ? 'http:' + baseUrl : 'http://' + baseUrl; |
| | } |
| |
|
| | item.link = new URL(item.link, baseUrl).href; |
| | } |
| |
|
| | |
| | if (item.description) { |
| | const $ = load(item.description); |
| | let baseUrl = item.link || data.link; |
| |
|
| | if (baseUrl && !/^https?:\/\//.test(baseUrl)) { |
| | baseUrl = /^\/\//.test(baseUrl) ? 'http:' + baseUrl : 'http://' + baseUrl; |
| | } |
| |
|
| | $('script').remove(); |
| |
|
| | $('img').each((_, ele) => { |
| | const $ele = $(ele); |
| |
|
| | |
| | if (!$ele.attr('src')) { |
| | const lazySrc = $ele.attr('data-src') || $ele.attr('data-original'); |
| | if (lazySrc) { |
| | $ele.attr('src', lazySrc); |
| | } else { |
| | for (const key in ele.attribs) { |
| | const value = ele.attribs[key].trim(); |
| | if (['.gif', '.png', '.jpg', '.webp'].some((suffix) => value.includes(suffix))) { |
| | $ele.attr('src', value); |
| | break; |
| | } |
| | } |
| | } |
| | } |
| |
|
| | |
| | for (const e of ['onclick', 'onerror', 'onload']) { |
| | $ele.removeAttr(e); |
| | } |
| | }); |
| |
|
| | |
| | |
| | |
| | $('a, area').each((_, elem) => { |
| | resolveRelativeLink($, elem, 'href', baseUrl); |
| | |
| | }); |
| | |
| | $('img, video, audio, source, iframe, embed, track').each((_, elem) => { |
| | resolveRelativeLink($, elem, 'src', baseUrl); |
| | }); |
| | $('video[poster]').each((_, elem) => { |
| | resolveRelativeLink($, elem, 'poster', baseUrl); |
| | }); |
| | $('img, iframe').each((_, elem) => { |
| | if (!$(elem).attr('referrerpolicy')) { |
| | $(elem).attr('referrerpolicy', 'no-referrer'); |
| | } |
| | }); |
| |
|
| | item.description = $('body').html() + '' + (config.suffix || ''); |
| |
|
| | if (item._extra?.links && $('.rsshub-quote').length) { |
| | item._extra?.links?.map((e) => { |
| | e.content_html = $.html($('.rsshub-quote')); |
| | return e; |
| | }); |
| | } |
| | } |
| |
|
| | |
| | if (item.category) { |
| | |
| | Array.isArray(item.category) || (item.category = [item.category]); |
| | item.category = item.category.filter((e) => typeof e === 'string'); |
| | } |
| | return item; |
| | }; |
| |
|
| | data.item = await Promise.all(data.item.map((itm) => handleItem(itm))); |
| |
|
| | |
| | const engine = config.feature.filter_regex_engine; |
| | const makeRegex = (str: string) => { |
| | |
| | const insensitive = ctx.req.query('filter_case_sensitive') === 'false'; |
| | switch (engine) { |
| | case 'regexp': |
| | return new RegExp(str, insensitive ? 'i' : ''); |
| | case 're2': |
| | return RE2JS.compile(str, insensitive ? RE2JS.CASE_INSENSITIVE : 0); |
| | default: |
| | throw new Error(`Invalid Engine Value: ${engine}, please check your config.`); |
| | } |
| | }; |
| |
|
| | if (ctx.req.query('filter')) { |
| | const regex = makeRegex(ctx.req.query('filter')!); |
| |
|
| | data.item = data.item.filter((item) => { |
| | const title = item.title || ''; |
| | const description = item.description || title; |
| | const author = getAuthorString(item); |
| | const category = item.category || []; |
| | const isFilter = |
| | regex instanceof RE2JS |
| | ? regex.matcher(title).find() || regex.matcher(description).find() || regex.matcher(author).find() || category.some((c) => regex.matcher(c).find()) |
| | : title.match(regex) || description.match(regex) || author.match(regex) || category.some((c) => c.match(regex)); |
| |
|
| | return isFilter; |
| | }); |
| | } |
| |
|
| | |
| | if (!ctx.req.query('filter') && (ctx.req.query('filter_title') || ctx.req.query('filter_description') || ctx.req.query('filter_author') || ctx.req.query('filter_category'))) { |
| | data.item = data.item.filter((item) => { |
| | const title = item.title || ''; |
| | const description = item.description || title; |
| | const author = getAuthorString(item); |
| | const category = item.category || []; |
| | let isFilter = true; |
| |
|
| | if (ctx.req.query('filter_title')) { |
| | const titleRegex = makeRegex(ctx.req.query('filter_title')!); |
| | isFilter = titleRegex instanceof RE2JS ? titleRegex.matcher(title).find() : !!titleRegex.test(title); |
| | } |
| | if (ctx.req.query('filter_description')) { |
| | const descriptionRegex = makeRegex(ctx.req.query('filter_description')!); |
| | isFilter = isFilter && (descriptionRegex instanceof RE2JS ? descriptionRegex.matcher(description).find() : !!descriptionRegex.test(description)); |
| | } |
| | if (ctx.req.query('filter_author')) { |
| | const authorRegex = makeRegex(ctx.req.query('filter_author')!); |
| | isFilter = isFilter && (authorRegex instanceof RE2JS ? authorRegex.matcher(author).find() : !!authorRegex.test(author)); |
| | } |
| | if (ctx.req.query('filter_category')) { |
| | const categoryRegex = makeRegex(ctx.req.query('filter_category')!); |
| | isFilter = isFilter && category.some((c) => (categoryRegex instanceof RE2JS ? categoryRegex.matcher(c).find() : c.match(categoryRegex))); |
| | } |
| |
|
| | return isFilter; |
| | }); |
| | } |
| |
|
| | if (ctx.req.query('filterout') || ctx.req.query('filterout_title') || ctx.req.query('filterout_description') || ctx.req.query('filterout_author') || ctx.req.query('filterout_category')) { |
| | data.item = data.item.filter((item) => { |
| | const title = item.title; |
| | const description = item.description || title; |
| | const author = getAuthorString(item); |
| | const category = item.category || []; |
| | let isFilter = true; |
| |
|
| | if (ctx.req.query('filterout') || ctx.req.query('filterout_title')) { |
| | const titleRegex = makeRegex(ctx.req.query('filterout_title') || ctx.req.query('filterout')!); |
| | isFilter = titleRegex instanceof RE2JS ? !titleRegex.matcher(title).find() : !titleRegex.test(title); |
| | } |
| | if (ctx.req.query('filterout') || ctx.req.query('filterout_description')) { |
| | const descriptionRegex = makeRegex(ctx.req.query('filterout_description') || ctx.req.query('filterout')!); |
| | isFilter = isFilter && (descriptionRegex instanceof RE2JS ? !descriptionRegex.matcher(description).find() : !descriptionRegex.test(description)); |
| | } |
| | if (ctx.req.query('filterout_author')) { |
| | const authorRegex = makeRegex(ctx.req.query('filterout_author')!); |
| | isFilter = isFilter && (authorRegex instanceof RE2JS ? !authorRegex.matcher(author).find() : !authorRegex.test(author)); |
| | } |
| | if (ctx.req.query('filterout_category')) { |
| | const categoryRegex = makeRegex(ctx.req.query('filterout_category')!); |
| | isFilter = isFilter && !category.some((c) => (categoryRegex instanceof RE2JS ? categoryRegex.matcher(c).find() : c.match(categoryRegex))); |
| | } |
| |
|
| | return isFilter; |
| | }); |
| | } |
| |
|
| | if (ctx.req.query('filter_time')) { |
| | const now = Date.now(); |
| | data.item = data.item.filter(({ pubDate }) => { |
| | let isFilter = true; |
| | try { |
| | isFilter = !pubDate || now - new Date(pubDate).getTime() <= Number.parseInt(ctx.req.query('filter_time')!) * 1000; |
| | } catch { |
| | |
| | } |
| | return isFilter; |
| | }); |
| | } |
| |
|
| | |
| | if (ctx.req.query('limit')) { |
| | data.item = data.item.slice(0, Number.parseInt(ctx.req.query('limit')!)); |
| | } |
| |
|
| | |
| | if (ctx.req.query('tgiv')) { |
| | data.item.map((item) => { |
| | if (item.link) { |
| | const encodedlink = encodeURIComponent(item.link); |
| | item.link = `https://t.me/iv?url=${encodedlink}&rhash=${ctx.req.query('tgiv')}`; |
| | return item; |
| | } else { |
| | return item; |
| | } |
| | }); |
| | } |
| |
|
| | |
| | if (ctx.req.query('mode')?.toLowerCase() === 'fulltext') { |
| | const tasks = data.item.map(async (item) => { |
| | const { link, author, description } = item; |
| | const parsed_result: any = await cache.tryGet(`mercury-cache-${link}`, async () => { |
| | if (link) { |
| | |
| | try { |
| | const res = await ofetch(link); |
| | const $ = load(res); |
| | const result = await Parser.parse(link, { |
| | html: $.html(), |
| | }); |
| | return result; |
| | } catch { |
| | |
| | } |
| | } |
| | }); |
| |
|
| | item.author = author || parsed_result?.author; |
| | item.description = parsed_result && parsed_result.content.length > 40 ? entities.decodeXML(parsed_result.content) : description; |
| | }); |
| | await Promise.all(tasks); |
| | } |
| |
|
| | |
| | if (ctx.req.query('chatgpt') && config.openai.apiKey) { |
| | data.item = await Promise.all( |
| | data.item.map(async (item) => { |
| | try { |
| | |
| | if (config.openai.inputOption === 'description' && item.description) { |
| | const description = await cache.tryGet(`openai:description:${item.link}`, async () => { |
| | const description = convert(item.description!); |
| | const descriptionMd = await getAiCompletion(config.openai.promptDescription, description); |
| | return md.render(descriptionMd); |
| | }); |
| | |
| | if (description !== '') { |
| | item.description = description + '<hr/><br/>' + item.description; |
| | } |
| | } |
| | |
| | else if (config.openai.inputOption === 'title' && item.title) { |
| | const title = await cache.tryGet(`openai:title:${item.link}`, async () => { |
| | const title = convert(item.title!); |
| | return await getAiCompletion(config.openai.promptTitle, title); |
| | }); |
| | |
| | if (title !== '') { |
| | item.title = title + ''; |
| | } |
| | } |
| | |
| | else if (config.openai.inputOption === 'both' && item.title && item.description) { |
| | const title = await cache.tryGet(`openai:title:${item.link}`, async () => { |
| | const title = convert(item.title!); |
| | return await getAiCompletion(config.openai.promptTitle, title); |
| | }); |
| | |
| | if (title !== '') { |
| | item.title = title + ''; |
| | } |
| |
|
| | const description = await cache.tryGet(`openai:description:${item.link}`, async () => { |
| | const description = convert(item.description!); |
| | const descriptionMd = await getAiCompletion(config.openai.promptDescription, description); |
| | return md.render(descriptionMd); |
| | }); |
| | |
| | if (description !== '') { |
| | item.description = description + '<hr/><br/>' + item.description; |
| | } |
| | } |
| | } catch { |
| | |
| | } |
| | return item; |
| | }) |
| | ); |
| | } |
| |
|
| | |
| | if (ctx.req.query('scihub')) { |
| | data.item.map((item) => { |
| | item.link = item.doi ? `${config.scihub.host}${item.doi}` : `${config.scihub.host}${item.link}`; |
| | return item; |
| | }); |
| | } |
| |
|
| | |
| | if (ctx.req.query('opencc')) { |
| | for (const item of data.item) { |
| | item.title = simplecc(item.title ?? item.link, ctx.req.query('opencc')!); |
| | item.description = simplecc(item.description ?? item.title ?? item.link, ctx.req.query('opencc')!); |
| | } |
| | } |
| |
|
| | |
| | if (ctx.req.query('brief')) { |
| | const num = /[1-9]\d{2,}/; |
| | if (num.test(ctx.req.query('brief')!)) { |
| | const brief: number = Number.parseInt(ctx.req.query('brief')!); |
| | for (const item of data.item) { |
| | let text; |
| | if (item.description) { |
| | text = sanitizeHtml(item.description, { allowedTags: [], allowedAttributes: {} }); |
| | item.description = text.length > brief ? `<p>${text.slice(0, brief)}…</p>` : `<p>${text}</p>`; |
| | } |
| | } |
| | } else { |
| | throw new Error(`Invalid parameter brief. Please check the doc https://docs.rsshub.app/guide/parameters#shu-chu-jian-xun`); |
| | } |
| | } |
| | |
| |
|
| | ctx.set('data', data); |
| | } else { |
| | |
| | } |
| | }; |
| |
|
| | export default middleware; |
| |
|