CrowBotForDumps
This commit is contained in:
89
crowlBot.js
Normal file
89
crowlBot.js
Normal file
@@ -0,0 +1,89 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const startUrl = [
|
||||
'https://backyard-design.co.uk',
|
||||
'https://backyard-design.com',
|
||||
'https://backyarddesign.be',
|
||||
'https://backyarddesign.co.nz',
|
||||
'https://backyarddesign.co.uk',
|
||||
'https://backyarddesign.co.za',
|
||||
'https://backyarddesign.es',
|
||||
'https://backyarddesign.fr',
|
||||
'https://backyarddesign.it',
|
||||
'https://backyarddesign.mx',
|
||||
'https://backyarddesign.nl',
|
||||
'https://backyarddesign.se',
|
||||
'https://backyarddesignaus.com',
|
||||
'https://backyarddesignusa.com',
|
||||
'https://backyardstreet.com',
|
||||
'https://backyardstreet.de',
|
||||
];
|
||||
const cacheFolder = path.join(__dirname, 'cache');
|
||||
|
||||
const visitedUrls = new Set();
|
||||
const pageUrls = [...startUrl];
|
||||
|
||||
async function crawl(url) {
|
||||
if (!visitedUrls.has(url)) {
|
||||
console.log('Visid URL:', url);
|
||||
|
||||
try {
|
||||
const response = await axios.get(url);
|
||||
const $ = cheerio.load(response.data);
|
||||
|
||||
const links = [];
|
||||
$('a').each((index, element) => {
|
||||
const link = $(element).attr('href');
|
||||
if (link && link.startsWith(startUrl)) {
|
||||
links.push(link);
|
||||
}
|
||||
});
|
||||
|
||||
console.log('Links found:', links);
|
||||
|
||||
visitedUrls.add(url);
|
||||
|
||||
for (const link of links) {
|
||||
if (!visitedUrls.has(link)) {
|
||||
pageUrls.push(link);
|
||||
}
|
||||
}
|
||||
|
||||
await cachePage(url);
|
||||
} catch (error) {
|
||||
console.error('Error from:', url);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function cachePage(url) {
|
||||
try {
|
||||
const response = await axios.get(url);
|
||||
const pageContent = response.data;
|
||||
|
||||
const fileName = url.replace(/[^a-z0-9]/gi, '_').toLowerCase() + '.html';
|
||||
|
||||
const filePath = path.join(cacheFolder, fileName);
|
||||
|
||||
fs.writeFileSync(filePath, pageContent);
|
||||
|
||||
console.log(`Visited ${url} scanned.`);
|
||||
} catch (error) {
|
||||
console.error(`Chache Error ${url}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
if (!fs.existsSync(cacheFolder)) {
|
||||
fs.mkdirSync(cacheFolder);
|
||||
}
|
||||
|
||||
// pageUrls.forEach((url) => {
|
||||
// cachePage(url);
|
||||
// });
|
||||
|
||||
startUrl.forEach((url) => {
|
||||
crawl(url);
|
||||
})
|
||||
Reference in New Issue
Block a user