| import express from 'express'; | |
| import axios from 'axios'; | |
| import { Readability } from '@mozilla/readability'; | |
| import { JSDOM } from 'jsdom'; | |
| import TurndownService from 'turndown'; | |
| const turndownService = new TurndownService({ | |
| headingStyle: 'atx', | |
| codeBlockStyle: 'fenced' | |
| }); | |
| class WebsiteParser { | |
| async fetchAndParse(url) { | |
| try { | |
| const response = await axios.get(url, { | |
| headers: { | |
| 'User-Agent': 'Mozilla/5.0 (compatible; MCPBot/1.0)' | |
| } | |
| }); | |
| const dom = new JSDOM(response.data, { url }); | |
| const document = dom.window.document; | |
| const reader = new Readability(document); | |
| const article = reader.parse(); | |
| if (!article) { | |
| throw new Error('не удалось спарсить страницу'); | |
| } | |
| const markdown = turndownService.turndown(article.content); | |
| return { | |
| title: article.title, | |
| content: markdown, | |
| excerpt: article.excerpt, | |
| byline: article.byline, | |
| siteName: article.siteName | |
| }; | |
| } catch (error) { | |
| throw new Error(`ошибка парсинга или получения страницы: ${error.message}`); | |
| } | |
| } | |
| } | |
| const app = express(); | |
| const PORT = process.env.PORT || 7860; | |
| const parser = new WebsiteParser(); | |
| app.use(express.json()); | |
| app.post('/parse', async (req, res) => { | |
| try { | |
| const { url } = req.body; | |
| if (!url) { | |
| return res.status(400).json({ error: 'необходимо указать URL!' }); | |
| } | |
| const result = await parser.fetchAndParse(url); | |
| res.json({ | |
| title: result.title, | |
| content: result.content, | |
| metadata: { | |
| excerpt: result.excerpt, | |
| byline: result.byline, | |
| siteName: result.siteName | |
| } | |
| }); | |
| } catch (error) { | |
| res.status(500).json({ | |
| error: error.message | |
| }); | |
| } | |
| }); | |
| app.get('/', (req, res) => { | |
| res.send(`curl -X POST https://prolapse-read.hf.space/parse -H "Content-Type: application/json" -d '{"url": "https://habr.com/ru/companies/serverspace/articles/869252/"}'`); | |
| }); | |
| app.listen(PORT, () => { | |
| console.log(`Server running on port ${PORT}`); | |
| }); | |