nkw commited on
Commit
d456a76
·
verified ·
1 Parent(s): fad61c7

Update index.js

Browse files
Files changed (1) hide show
  1. index.js +59 -183
index.js CHANGED
@@ -1,201 +1,77 @@
1
- const express = require("express")
2
  const app = express();
3
- const puppeteer = require("puppeteer");
 
4
  const bodyParser = require("body-parser");
5
  const mutler = require("multer");
6
 
7
- // parse application/json
8
  app.use(bodyParser.json());
9
-
10
- // parse application/x-www-form-urlencoded
11
  app.use(bodyParser.urlencoded({ extended: false }));
12
-
13
- // app.use(express.urlencoded());
14
  app.use(mutler().array(""));
15
 
 
16
 
17
  const initBrowser = puppeteer.launch({
18
- executablePath: process.env.CHROME_BIN || null,
19
- defaultViewport: null,
20
- headless: true,
21
  });
22
 
23
- async function addRequestFilter(page) {
24
-
25
- const resourcetypes = ['document'];
26
-
27
- await page.setRequestInterception(true);
28
- // page.
29
- page.on("request", (request) => {
30
- // return request.continue();
31
- if (resourcetypes.includes(request.resourceType())) {
32
- request.continue();
33
- }
34
- else {
35
- request.abort();
36
- }
37
- });
38
-
39
- return page;
40
- }
41
-
42
- const getData = async (url) => {
43
-
44
- // Open a new page
45
- const browser = await initBrowser;
46
- var page = await browser.newPage();
47
- page = await addRequestFilter(page);
48
-
49
- await page.goto(url, {
50
- // waitUntil: "domcontentloaded",
51
- });
52
- // console.log(await page.content());
53
-
54
- const title = await page.evaluate(() => {
55
- const elem = document.querySelector("span.B_NuCI");
56
- if (elem)
57
- return elem.textContent;
58
- });
59
- const price = await page.evaluate(() => {
60
- const elem = document.querySelector("div._30jeq3._16Jk6d");
61
- if (elem)
62
- return elem.textContent;
63
- });
64
-
65
-
66
- const image = await page.evaluate(() => {
67
- const elem = document.querySelector("div.CXW8mj._3nMexc>img");
68
- if (elem)
69
- return elem.src;
70
- });
71
-
72
-
73
- page.close();
74
- return {
75
- title: title,
76
- price: price,
77
- image: image,
78
- };
79
- };
80
-
81
- const getHTML = async (url, headers = null) => {
82
-
83
- const browser = await initBrowser;
84
- var page = await browser.newPage();
85
- page = await addRequestFilter(page);
86
- if (headers) {
87
- // console.log("headers",headers);
88
- await page.setExtraHTTPHeaders(headers);
89
- }
90
-
91
- await page.goto(url, {
92
- // waitUntil: "domcontentloaded",
93
- });
94
-
95
- return await page.content();
96
-
97
- }
98
- const getScreenshot = async (url, headers = null) => {
99
-
100
- const browser = await initBrowser;
101
- var page = await browser.newPage();
102
-
103
- if (headers) {
104
- // console.log("headers",headers);
105
- await page.setExtraHTTPHeaders(headers);
106
- }
107
-
108
- await page.goto(url, {
109
- waitUntil: "domcontentloaded",
110
- });
111
-
112
- const image = await page.screenshot({
113
- type: "png",
114
- });
115
-
116
- const html = await page.content();
117
-
118
- page.close();
119
- return { image, html };
120
-
121
  }
122
 
123
  app.get("/", async (req, res) => {
124
- res.send('go to /test');
125
- })
126
-
127
- url = "https://www.flipkart.com/apple-iphone-15-blue-128-gb/p/itmbf14ef54f645d";
128
-
129
-
130
- app.get("/test", async (req, res) => {
131
- const data = await getData(url);
132
- res.type("json");
133
- res.send(JSON.stringify(data));
134
- })
135
-
136
- app.get("/testhtml", async (req, res) => {
137
- const html = await getHTML(url);
138
- res.type("json");
139
- res.send(JSON.stringify({
140
- html: html
141
- }));
142
- })
143
- app.get("/testscreenshot", async (req, res) => {
144
- const image = await getScreenshot(url);
145
- // convert buffer to base64 string
146
- const base64Image = await image.toString('base64');
147
-
148
- res.type("json");
149
- return res.send(JSON.stringify({
150
- "screenshot": "data:image/png;base64," + base64Image
151
- }));
152
- })
153
- app.post("/html", async (req, res) => {
154
- const data = req.body;
155
- if (!("url" in data)) {
156
- res.type("json");
157
- return res.send(JSON.stringify({
158
- "error": "no url parameter in request",
159
- }));
160
- }
161
- const { url, headers } = data;
162
-
163
- try {
164
- const html = await getHTML(url, headers);
165
- res.type("json").send(JSON.stringify({
166
- html: html
167
- }));
168
- }
169
- catch (e) {
170
- return res.type("json").send(JSON.stringify({
171
- "error": "can't open page",
172
- }));
173
- }
174
- })
175
- app.post("/screenshot", async (req, res) => {
176
- const data = req.body;
177
- if (!("url" in data)) {
178
- return res.type("json").send(JSON.stringify({
179
- "error": "no url parameter in request",
180
- }));
181
- }
182
- const url = data['url'];
183
-
184
- try {
185
- const { image, html } = await getScreenshot(url);
186
- // convert buffer to base64 string
187
- const base64Image = await image.toString('base64');
188
 
189
- return res.type("json").send(JSON.stringify({
190
- "base64": "data:image/png;base64," + base64Image,
191
- "html":html,
192
- }));
193
- }
194
- catch (e) {
195
- return res.type("json").send(JSON.stringify({
196
- "error": "can't open page",
197
- }));
198
- }
199
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
  app.listen(8080, () => console.log("Server running at port 8080"));
 
1
+ const express = require("express");
2
  const app = express();
3
+ const puppeteer = require("puppeteer-extra");
4
+ const StealthPlugin = require("puppeteer-extra-plugin-stealth");
5
  const bodyParser = require("body-parser");
6
  const mutler = require("multer");
7
 
 
8
  app.use(bodyParser.json());
 
 
9
  app.use(bodyParser.urlencoded({ extended: false }));
 
 
10
  app.use(mutler().array(""));
11
 
12
+ puppeteer.use(StealthPlugin());
13
 
14
  const initBrowser = puppeteer.launch({
15
+ executablePath: process.env.CHROME_BIN || null,
16
+ defaultViewport: null,
17
+ headless: true,
18
  });
19
 
20
+ async function getDataWebsite(url) {
21
+ const browser = await initBrowser;
22
+ const page = await browser.newPage();
23
+ await page.setViewport({width: 1920, height: 1080});
24
+
25
+ const response = await page.goto(url, {
26
+ timeout: 0,
27
+ waitUntil: 'networkidle0',
28
+ });
29
+
30
+ const headers = response.headers();
31
+ const cookies = await page.cookies();
32
+ const cookieString = cookies?.map(cookie => `${cookie.name}=${cookie.value}`).join('; ');
33
+ const content_html = await page.evaluate(() => {
34
+ return new XMLSerializer().serializeToString(document);
35
+ });
36
+ const screenData = await page.screenshot({encoding: 'binary', type: 'jpeg', quality: 100});
37
+ await page.close();
38
+ await browser.close();
39
+ return {
40
+ headers,
41
+ cookies,
42
+ cookieString,
43
+ content_html,
44
+ screenData,
45
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  }
47
 
48
  app.get("/", async (req, res) => {
49
+ res.send("Hello World");
50
+ });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
+ app.post("/getDataWebsite", async (req, res) => {
53
+ const data = req.body;
54
+ if (!("url" in data)) {
55
+ return res.type("json").send(
56
+ JSON.stringify({
57
+ error: "no url parameter in request",
58
+ })
59
+ );
60
+ }
61
+ const url = data["url"];
62
+
63
+ try {
64
+ const response = await getDataWebsite(url);
65
+ return res.type("json").send(
66
+ JSON.stringify(response)
67
+ );
68
+ } catch (e) {
69
+ return res.type("json").send(
70
+ JSON.stringify({
71
+ error: "can't open page",
72
+ })
73
+ );
74
+ }
75
+ });
76
 
77
  app.listen(8080, () => console.log("Server running at port 8080"));