Spaces:
Sleeping
Sleeping
| import { AxiosInstance, AxiosHeaders } from "axios"; | |
| import { createAxiosInstance } from "../utils/axiosInstance"; | |
| import GlobalSetting from "../models/GlobalSetting"; | |
| import { GlobalSettingAttributes } from "../models/GlobalSetting"; | |
| import * as cheerio from "cheerio"; | |
| import { config } from "../config"; | |
| import { logger } from "../utils/logger"; | |
| import { injectable } from "inversify"; | |
| interface sourceItem { | |
| messageId?: string; | |
| title?: string; | |
| completeTitle?: string; | |
| link?: string; | |
| pubDate?: string; | |
| content?: string; | |
| description?: string; | |
| image?: string; | |
| cloudLinks?: string[]; | |
| tags?: string[]; | |
| cloudType?: string; | |
| } | |
| () | |
| export class Searcher { | |
| private static instance: Searcher; | |
| private api: AxiosInstance | null = null; | |
| constructor() { | |
| this.initAxiosInstance(); | |
| Searcher.instance = this; | |
| } | |
| private async initAxiosInstance(isUpdate: boolean = false) { | |
| let globalSetting = {} as GlobalSettingAttributes; | |
| if (isUpdate) { | |
| const settings = await GlobalSetting.findOne(); | |
| globalSetting = settings?.dataValues || ({} as GlobalSettingAttributes); | |
| } | |
| this.api = createAxiosInstance( | |
| config.telegram.baseUrl, | |
| AxiosHeaders.from({ | |
| accept: | |
| "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", | |
| "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", | |
| "cache-control": "max-age=0", | |
| priority: "u=0, i", | |
| "sec-ch-ua": '"Microsoft Edge";v="131", "Chromium";v="131", "Not_A Brand";v="24"', | |
| "sec-ch-ua-mobile": "?0", | |
| "sec-ch-ua-platform": '"macOS"', | |
| "sec-fetch-dest": "document", | |
| "sec-fetch-mode": "navigate", | |
| "sec-fetch-site": "none", | |
| "sec-fetch-user": "?1", | |
| "upgrade-insecure-requests": "1", | |
| }), | |
| globalSetting?.isProxyEnabled, | |
| globalSetting?.isProxyEnabled | |
| ? { host: globalSetting?.httpProxyHost, port: globalSetting?.httpProxyPort } | |
| : undefined | |
| ); | |
| } | |
| public static async updateAxiosInstance(): Promise<void> { | |
| await Searcher.instance.initAxiosInstance(true); | |
| } | |
| private extractCloudLinks(text: string): { links: string[]; cloudType: string } { | |
| const links: string[] = []; | |
| let cloudType = ""; | |
| Object.values(config.cloudPatterns).forEach((pattern, index) => { | |
| const matches = text.match(pattern); | |
| if (matches) { | |
| links.push(...matches); | |
| if (!cloudType) cloudType = Object.keys(config.cloudPatterns)[index]; | |
| } | |
| }); | |
| return { | |
| links: [...new Set(links)], | |
| cloudType, | |
| }; | |
| } | |
| async searchAll(keyword: string, channelId?: string, messageId?: string) { | |
| const allResults: any[] = []; | |
| const channelList: any[] = channelId | |
| ? config.telegram.channels.filter((channel: any) => channel.id === channelId) | |
| : config.telegram.channels; | |
| // 使用Promise.all进行并行请求 | |
| const searchPromises = channelList.map(async (channel) => { | |
| try { | |
| const messageIdparams = messageId ? `before=${messageId}` : ""; | |
| const url = `/${channel.id}${keyword ? `?q=${encodeURIComponent(keyword)}&${messageIdparams}` : `?${messageIdparams}`}`; | |
| console.log(`Searching in channel ${channel.name} with URL: ${url}`); | |
| return this.searchInWeb(url).then((results) => { | |
| console.log(`Found ${results.items.length} items in channel ${channel.name}`); | |
| if (results.items.length > 0) { | |
| const channelResults = results.items | |
| .filter((item: sourceItem) => item.cloudLinks && item.cloudLinks.length > 0) | |
| .map((item: sourceItem) => ({ | |
| ...item, | |
| channel: channel.name, | |
| channelId: channel.id, | |
| })); | |
| allResults.push({ | |
| list: channelResults, | |
| channelInfo: { | |
| ...channel, | |
| channelLogo: results.channelLogo, | |
| }, | |
| id: channel.id, | |
| }); | |
| } | |
| }); | |
| } catch (error) { | |
| logger.error(`搜索频道 ${channel.name} 失败:`, error); | |
| } | |
| }); | |
| // 等待所有请求完成 | |
| await Promise.all(searchPromises); | |
| return { | |
| data: allResults, | |
| }; | |
| } | |
| async searchInWeb(url: string) { | |
| try { | |
| if (!this.api) { | |
| throw new Error("Axios instance is not initialized"); | |
| } | |
| const response = await this.api.get(url); | |
| const html = response.data; | |
| const $ = cheerio.load(html); | |
| const items: sourceItem[] = []; | |
| let channelLogo = ""; | |
| $(".tgme_header_link").each((_, element) => { | |
| channelLogo = $(element).find("img").attr("src") || ""; | |
| }); | |
| // 遍历每个消息容器 | |
| $(".tgme_widget_message_wrap").each((_, element) => { | |
| const messageEl = $(element); | |
| // 通过 data-post 属性来获取消息的链接 去除channelId 获得消息id | |
| const messageId = messageEl | |
| .find(".tgme_widget_message") | |
| .data("post") | |
| ?.toString() | |
| .split("/")[1]; | |
| // 提取标题 (第一个<br>标签前的内容) | |
| const title = | |
| messageEl | |
| .find(".js-message_text") | |
| .html() | |
| ?.split("<br>")[0] | |
| .replace(/<[^>]+>/g, "") | |
| .replace(/\n/g, "") || ""; | |
| // 提取描述 (第一个<a>标签前面的内容,不包含标题) | |
| const content = | |
| messageEl | |
| .find(".js-message_text") | |
| .html() | |
| ?.replace(title, "") | |
| .split("<a")[0] | |
| .replace(/<br>/g, "") | |
| .trim() || ""; | |
| // 提取链接 (消息中的链接) | |
| // const link = messageEl.find('.tgme_widget_message').data('post'); | |
| // 提取发布时间 | |
| const pubDate = messageEl.find("time").attr("datetime"); | |
| // 提取图片 | |
| const image = messageEl | |
| .find(".tgme_widget_message_photo_wrap") | |
| .attr("style") | |
| ?.match(/url\('(.+?)'\)/)?.[1]; | |
| const tags: string[] = []; | |
| // 提取云盘链接 | |
| const links = messageEl | |
| .find(".tgme_widget_message_text a") | |
| .map((_, el) => $(el).attr("href")) | |
| .get(); | |
| messageEl.find(".tgme_widget_message_text a").each((index, element) => { | |
| const tagText = $(element).text(); | |
| if (tagText && tagText.startsWith("#")) { | |
| tags.push(tagText); | |
| } | |
| }); | |
| const cloudInfo = this.extractCloudLinks(links.join(" ")); | |
| // 添加到数组第一位 | |
| items.unshift({ | |
| messageId, | |
| title, | |
| pubDate, | |
| content, | |
| image, | |
| cloudLinks: cloudInfo.links, | |
| cloudType: cloudInfo.cloudType, | |
| tags, | |
| }); | |
| }); | |
| return { items: items, channelLogo }; | |
| } catch (error) { | |
| logger.error(`搜索错误: ${url}`, error); | |
| return { | |
| items: [], | |
| channelLogo: "", | |
| }; | |
| } | |
| } | |
| } | |
| export default new Searcher(); | |