| const fs = require('fs'); |
| const db = require('./db'); |
| const config = require('./config'); |
| const { getName } = require('country-list'); |
| const YAML = require('yaml'); |
| const js2xml = require('xml-js'); |
|
|
| describe('Scrapers', function () { |
|
|
| jasmine.DEFAULT_TIMEOUT_INTERVAL = 600000; |
|
|
| let proxies = { lastUpdated: new Date(), bySource: {}, byType: { socks5: [], socks4: [], http: [] } } |
|
|
| it('Proxynova Scraper', function () { |
|
|
| let scraperId = 'proxynova'; |
| let pages = ['https://www.proxynova.com/proxy-server-list/country-bd/ ', 'https://www.proxynova.com/proxy-server-list/country-br/ ', 'https://www.proxynova.com/proxy-server-list/country-cl/ ', 'https://www.proxynova.com/proxy-server-list/country-cn/ ', 'https://www.proxynova.com/proxy-server-list/country-co/ ', 'https://www.proxynova.com/proxy-server-list/country-fr/ ', 'https://www.proxynova.com/proxy-server-list/country-de/ ', 'https://www.proxynova.com/proxy-server-list/country-hk/ ', 'https://www.proxynova.com/proxy-server-list/country-in/ ', 'https://www.proxynova.com/proxy-server-list/country-id/ ', 'https://www.proxynova.com/proxy-server-list/country-jp/ ', 'https://www.proxynova.com/proxy-server-list/country-ke/ ', 'https://www.proxynova.com/proxy-server-list/country-nl/ ', 'https://www.proxynova.com/proxy-server-list/country-pl/ ', 'https://www.proxynova.com/proxy-server-list/country-ru/ ', 'https://www.proxynova.com/proxy-server-list/country-rs/ ', 'https://www.proxynova.com/proxy-server-list/country-kr/ ', 'https://www.proxynova.com/proxy-server-list/country-tw/ ', 'https://www.proxynova.com/proxy-server-list/country-th/ ', 'https://www.proxynova.com/proxy-server-list/country-ua/ ', 'https://www.proxynova.com/proxy-server-list/country-gb/ ', 'https://www.proxynova.com/proxy-server-list/country-us/ ', 'https://www.proxynova.com/proxy-server-list/country-ve/ ', 'https://www.proxynova.com/proxy-server-list/country-ir/ ', 'https://www.proxynova.com/proxy-server-list/country-tr/ ', 'https://www.proxynova.com/proxy-server-list/country-na/ ', 'https://www.proxynova.com/proxy-server-list/country-mz/ ', 'https://www.proxynova.com/proxy-server-list/country-it/ ', 'https://www.proxynova.com/proxy-server-list/country-eg/ ', 'https://www.proxynova.com/proxy-server-list/country-bg/']; |
| let pageIndex = 0; |
| let proxyFound = 0; |
| proxies.bySource[scraperId] = []; |
|
|
|
|
| function loadPage() { |
|
|
| let page = pages[pageIndex++]; |
| browser.driver.get(page); |
|
|
| console.log(scraperId + " Visiting... " + page); |
| browser.driver.findElement(by.tagName('tbody')).findElements(by.tagName('tr')).then((data) => { |
| data.forEach(row => { |
| row.findElements(by.tagName('td')).then((col) => { |
| if (col[1] != undefined) { |
| var proxy = new Object(); |
| col[0].getText().then((text) => { |
| proxy.ip = text |
| }).then(() => { |
| col[1].getText().then((text) => { |
| proxy.port = text |
| }) |
| }).then(() => { |
| col[5].getText().then((text) => { |
| proxy.country = text |
| }) |
| }).then(() => { |
| col[6].getText().then((text) => { |
| proxy.anonymity = text |
| }) |
| }).then(() => { |
| proxy.type = 'HTTP/HTTPS'; |
| }).then(() => { |
| proxies.bySource[scraperId].push(proxy); |
| if (proxies.byType[getTypeMapping(proxy.type)] == undefined) proxies.byType[getTypeMapping(proxy.type)] = []; |
| proxies.byType[getTypeMapping(proxy.type)].push(proxy); |
| proxyFound++; |
| }) |
| .catch((err) => { |
| console.log("Exception Occured! in " + scraperId, err.stack); |
| }) |
| } |
| }); |
| }); |
| }).then(() => { |
| if (pageIndex < pages.length) { |
| loadPage(); |
| } else { |
| console.log(`Got ${proxyFound} proxies from ${scraperId}`); |
| } |
| }).catch((err) => { |
| console.log("Exception Occured! in " + scraperId, err.stack); |
| }); |
| } |
|
|
| loadPage(); |
|
|
| }); |
|
|
| it('US-proxy.org Scraper', function () { |
|
|
| let scraperId = 'usproxy'; |
| var pages = ['https://free-proxy-list.net/ ', 'https://www.socks-proxy.net/', 'https://www.us-proxy.org/', 'https://free-proxy-list.net/uk-proxy.html', 'https://www.sslproxies.org/', 'https://free-proxy-list.net/anonymous-proxy.html']; |
| let pageIndex = 0; |
| let proxyFound = 0; |
| proxies.bySource[scraperId] = []; |
|
|
|
|
| function loadPage() { |
|
|
| let page = pages[pageIndex++]; |
| browser.driver.get(page); |
|
|
| console.log(scraperId + " Visiting... " + page); |
| browser.driver.findElement(by.className('table-responsive fpl-list')).findElements(by.tagName('tr')).then((rows) => { |
| rows.forEach(row => { |
| var proxy = new Object(); |
| row.findElements(by.tagName('td')).then((cols) => { |
| if (cols.length > 0) { |
| cols[0].getText().then((text) => { |
| proxy.ip = text; |
| }).then(() => { |
| cols[1].getText().then((text) => { |
| proxy.port = text; |
| }) |
| }).then(() => { |
| cols[4].getText().then((text) => { |
| proxy.country = text; |
| }) |
| }).then(() => { |
| cols[4].getText().then((text) => { |
| proxy.anonymity = text; |
| }) |
| }).then(() => { |
| cols[4].getText().then((text) => { |
| proxy.type = 'HTTP/HTTPS'; |
| }) |
| }).then(() => { |
| proxies.bySource[scraperId].push(proxy); |
| if (proxies.byType[getTypeMapping(proxy.type)] == undefined) proxies.byType[getTypeMapping(proxy.type)] = []; |
| proxies.byType[getTypeMapping(proxy.type)].push(proxy); |
| proxyFound++; |
| }).catch((err) => { |
| console.log("Exception Occured! in " + scraperId, err); |
| }) |
| } |
| }) |
| }); |
|
|
| }).then(() => { |
| if (pageIndex < pages.length) { |
| loadPage(); |
| } else { |
| console.log(`Got ${proxyFound} proxies from ${scraperId}`); |
| } |
| }).catch((err) => { |
| console.log("Exception Occured! in " + scraperId, err.stack); |
| }); |
| } |
|
|
| loadPage(); |
|
|
| }); |
|
|
| it('openproxy.space Scraper', function () { |
|
|
| try { |
| let scraperId = 'openproxy'; |
| let pages = ['https://openproxy.space/list/http']; |
| let pageIndex = 0; |
| let proxyFound = 0; |
| proxies.bySource[scraperId] = []; |
|
|
|
|
| function loadPage() { |
|
|
| let page = pages[pageIndex++]; |
| browser.driver.get(page); |
|
|
| console.log(scraperId + " Visiting... " + page); |
| browser.driver.executeScript('return __NUXT__').then(d => { |
| let proxyList = d.data[0].data; |
| proxyList.forEach(country => { |
| country.items.forEach(proxyItem => { |
| var proxy = new Object(); |
| proxy.country = getName(country.code); |
| proxy.ip = proxyItem.split(':')[0]; |
| proxy.port = proxyItem.split(':')[1]; |
| proxy.type = 'unknown'; |
| proxy.anonymity = 'unknown'; |
| proxyFound++; |
| proxies.bySource[scraperId].push(proxy); |
| if (proxies.byType[getTypeMapping(proxy.type)] == undefined) proxies.byType[getTypeMapping(proxy.type)] = []; |
| proxies.byType[getTypeMapping(proxy.type)].push(proxy); |
| }); |
| }) |
| console.log(`Got ${proxyFound} proxies from ${scraperId}`); |
| }).catch((error) => { |
| console.log("Exception Occured! in " + scraperId, error.stack); |
| }) |
| } |
|
|
| loadPage(); |
| } catch (error) { |
| console.log("Exception Occured! in " + scraperId, error.stack); |
| } |
|
|
| }); |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| it('freeproxy.world Scraper', function () { |
|
|
| let scraperId = 'freeproxy'; |
| let pages = []; |
| for (let i = 0; i <= 50; i++) { |
| pages.push("https://www.freeproxy.world/?type=&anonymity=&country=&speed=&port=&page=" + i); |
| } |
| let pageIndex = 0; |
| let proxyFound = 0; |
| proxies.bySource[scraperId] = []; |
|
|
|
|
| function loadPage() { |
|
|
| let page = pages[pageIndex++]; |
| browser.driver.get(page); |
|
|
| console.log(scraperId + " Visiting... " + page); |
| browser.driver.findElement(by.tagName('tbody')).findElements(by.tagName('tr')).then((rows) => { |
| rows.forEach(row => { |
| try { |
| var proxy = new Object(); |
| row.findElements(by.tagName('td')).then((cols) => { |
| if (cols.length > 3) { |
| cols[0].getText().then((text) => { |
| proxy.ip = text; |
| }).then(() => { |
| cols[1].getText().then((text) => { |
| proxy.port = text; |
| }) |
| }).then(() => { |
| cols[2].getText().then((text) => { |
| proxy.country = text; |
| }) |
| }).then(() => { |
| cols[5].getText().then((text) => { |
| proxy.type = text; |
| }) |
| }).then(() => { |
| cols[6].getText().then((text) => { |
| proxy.anonymity = text; |
| }) |
| }).then(() => { |
| proxies.bySource[scraperId].push(proxy); |
| if (proxies.byType[getTypeMapping(proxy.type)] == undefined) proxies.byType[getTypeMapping(proxy.type)] = []; |
| proxies.byType[getTypeMapping(proxy.type)].push(proxy); |
| proxyFound++; |
| }).catch((err) => { |
| console.log("Exception Occured! in " + scraperId, err); |
| }) |
| } |
| }) |
| proxyFound++; |
| } catch (error) { |
| console.log("Exception Occured! in " + scraperId, error.stack); |
| } |
| }); |
|
|
| }).then(() => { |
| if (pageIndex < pages.length) { |
| loadPage(); |
| } else { |
| console.log(`Got ${proxyFound} proxies from ${scraperId}`); |
| } |
| }).catch((err) => { |
| console.log("Exception Occured! in " + scraperId, err.stack); |
| }); |
| } |
|
|
| loadPage(); |
|
|
| }); |
|
|
| it('geonode.com Scraper', function () { |
|
|
| let scraperId = 'geonode'; |
| let pages = ['https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc&filterLastChecked=60']; |
| let pageIndex = 0; |
| let proxyFound = 0; |
| proxies.bySource[scraperId] = []; |
|
|
|
|
| function loadPage() { |
|
|
| let page = pages[pageIndex++]; |
| browser.driver.get(page); |
|
|
| console.log(scraperId + " Visiting... " + page); |
| browser.driver.executeScript('return document.getElementsByTagName("body")[0].innerText').then(d => { |
| try { |
| let proxyList = JSON.parse(d).data; |
| proxyList.forEach(proxyItem => { |
| var proxy = new Object(); |
| proxy.country = getName(proxyItem.country); |
| proxy.ip = proxyItem.ip; |
| proxy.port = proxyItem.port; |
| proxy.type = proxyItem.protocols.join(" | "); |
| proxy.anonymity = proxyItem.anonymityLevel; |
| proxyFound++; |
| proxies.bySource[scraperId].push(proxy); |
| if (proxies.byType[getTypeMapping(proxy.type)] == undefined) proxies.byType[getTypeMapping(proxy.type)] = []; |
| proxies.byType[getTypeMapping(proxy.type)].push(proxy); |
| }); |
| } catch (error) { |
| console.log('Error occured in ' + scraperId, error); |
| } |
| }) |
| } |
|
|
| loadPage(); |
|
|
| }); |
|
|
| it('Save', function () { |
|
|
| |
| fs.writeFile('generated/raw.json', JSON.stringify(proxies, null, 2), function (err, data) { |
| console.log("Successfully saved raw.json"); |
| }); |
|
|
| |
| let jsonArray = []; |
| Object.keys(proxies.bySource).forEach(element => { |
| if (Array.isArray(proxies.bySource[element])) { |
| proxies.bySource[element].forEach(item => { |
| jsonArray.push(item); |
| saveIntoDb(item); |
| }); |
| } |
| }); |
|
|
| |
| fs.writeFile('proxies.csv', getCSV(jsonArray).join('\n'), function (err, data) { |
| console.log("Successfully saved proxies.csv"); |
| }); |
| fs.writeFile('proxies.json', JSON.stringify(jsonArray, null, 2), function (err, data) { |
| console.log("Successfully saved proxies.json"); |
| }); |
| fs.writeFile('proxies.txt', getTXT(jsonArray).join('\n'), function (err, data) { |
| console.log("Successfully saved proxies.txt"); |
| }); |
| fs.writeFile('proxies.yaml', YAML.stringify(jsonArray), function (err, data) { |
| console.log("Successfully saved proxies.yaml"); |
| }); |
| fs.writeFile('proxies.xml', js2xml.js2xml(jsonArray, { compact: true, ignoreComment: true, spaces: 4 }), function (err, data) { |
| console.log("Successfully saved proxies.xml"); |
| }); |
|
|
|
|
| |
| |
| fs.writeFile('generated/socks5_proxies.csv', getCSV(proxies.byType.socks5).join('\n'), function (err, data) { |
| console.log("Successfully saved socks5_proxies.csv"); |
| }); |
| fs.writeFile('generated/socks5_proxies.json', JSON.stringify(proxies.byType.socks5, null, 2), function (err, data) { |
| console.log("Successfully saved socks5_proxies.json"); |
| }); |
| fs.writeFile('generated/socks5_proxies.txt', getTXT(proxies.byType.socks5).join('\n'), function (err, data) { |
| console.log("Successfully saved socks5_proxies.txt"); |
| }); |
| fs.writeFile('generated/socks5_proxies.yaml', YAML.stringify(proxies.byType.socks5), function (err, data) { |
| console.log("Successfully saved socks5_proxies.yaml"); |
| }); |
| fs.writeFile('generated/socks5_proxies.xml', js2xml.js2xml(proxies.byType.socks5, { compact: true, ignoreComment: true, spaces: 4 }), function (err, data) { |
| console.log("Successfully saved socks5_proxies.xml"); |
| }); |
| |
| fs.writeFile('generated/socks4_proxies.csv', getCSV(proxies.byType.socks4).join('\n'), function (err, data) { |
| console.log("Successfully saved socks4_proxies.csv"); |
| }); |
| fs.writeFile('generated/socks4_proxies.json', JSON.stringify(proxies.byType.socks4, null, 2), function (err, data) { |
| console.log("Successfully saved socks4_proxies.json"); |
| }); |
| fs.writeFile('generated/socks4_proxies.txt', getTXT(proxies.byType.socks4).join('\n'), function (err, data) { |
| console.log("Successfully saved socks4_proxies.txt"); |
| }); |
| fs.writeFile('generated/socks4_proxies.yaml', YAML.stringify(proxies.byType.socks4), function (err, data) { |
| console.log("Successfully saved socks4_proxies.yaml"); |
| }); |
| fs.writeFile('generated/socks4_proxies.xml', js2xml.js2xml(proxies.byType.socks4, { compact: true, ignoreComment: true, spaces: 4 }), function (err, data) { |
| console.log("Successfully saved socks4_proxies.xml"); |
| }); |
| |
| fs.writeFile('generated/http_proxies.csv', getCSV(proxies.byType.http).join('\n'), function (err, data) { |
| console.log("Successfully saved http_proxies.csv"); |
| }); |
| fs.writeFile('generated/http_proxies.json', JSON.stringify(proxies.byType.http, null, 2), function (err, data) { |
| console.log("Successfully saved http_proxies.json"); |
| }); |
| fs.writeFile('generated/http_proxies.txt', getTXT(proxies.byType.http).join('\n'), function (err, data) { |
| console.log("Successfully saved http_proxies.txt"); |
| }); |
| fs.writeFile('generated/http_proxies.yaml', YAML.stringify(proxies.byType.http), function (err, data) { |
| console.log("Successfully saved http_proxies.yaml"); |
| }); |
| fs.writeFile('generated/http_proxies.xml', js2xml.js2xml(proxies.byType.http, { compact: true, ignoreComment: true, spaces: 4 }), function (err, data) { |
| console.log("Successfully saved http_proxies.xml"); |
| }); |
|
|
| |
| if(jsonArray.length > 100) { |
| let README = fs.readFileSync('README.md', 'utf-8'); |
| let dynamicLine = README.substring(README.indexOf('<!-- dynamic-count-start -->') + 29, README.indexOf('<!-- dynamic-count-end -->') - 1); |
| README = README.replace(dynamicLine, '## Current Proxy Count: ' + (Math.floor(jsonArray.length/100)*100) + '+ 🚀'); |
| fs.writeFileSync('README.md', README); |
| } |
|
|
| }) |
|
|
| function getTypeMapping(proxyType) { |
| if (['HTTP/HTTPS', 'unknown', 'HTTPS', 'HTTP'].includes(proxyType)) { |
| return 'http'; |
| } else if(['SOCKS5,SOCKS4'].includes(proxyType)) { |
| return ' socks5' |
| } else if(['SOCKS4'].includes(proxyType)) { |
| return ' socks4' |
| } else if(['SOCKS5'].includes(proxyType)) { |
| return ' socks5' |
| } else { |
| return proxyType; |
| } |
| } |
|
|
| function getCSV(jsonArray) { |
| let csvContent = []; |
| jsonArray.forEach(item => { |
| let values = [item.ip, item.port, item.country, item.anonymity, item.type]; |
| csvContent.push(values.join(',')); |
| }) |
| return csvContent; |
| } |
|
|
| function getTXT(jsonArray) { |
| let txtContent = []; |
| jsonArray.forEach(item => { |
| let values = [item.ip, item.port]; |
| txtContent.push(values.join(':')); |
| }) |
| return txtContent; |
| } |
|
|
| function saveIntoDb(proxy) { |
| if (!config.SAVE_TO_DB) return; |
| var sql = "INSERT into proxies_tb (`proxy`,`port`,`country`,`type`,`anonymity`) VALUES('" + proxy.ip + "'," + proxy.port + ",'" + proxy.country + "','" + proxy.type + "','" + proxy.anonymity + "')"; |
| db.query(sql, (err, result) => { |
| if (err) console.log(err) |
| }) |
| } |
| }); |