CrowBotForDumps

This commit is contained in:
2023-10-02 18:23:14 +02:00
commit 1f42a5d445
750 changed files with 135955 additions and 0 deletions

89
crowlBot.js Normal file
View File

@@ -0,0 +1,89 @@
const axios = require('axios');
const cheerio = require('cheerio');
const fs = require('fs');
const path = require('path');
const startUrl = [
'https://backyard-design.co.uk',
'https://backyard-design.com',
'https://backyarddesign.be',
'https://backyarddesign.co.nz',
'https://backyarddesign.co.uk',
'https://backyarddesign.co.za',
'https://backyarddesign.es',
'https://backyarddesign.fr',
'https://backyarddesign.it',
'https://backyarddesign.mx',
'https://backyarddesign.nl',
'https://backyarddesign.se',
'https://backyarddesignaus.com',
'https://backyarddesignusa.com',
'https://backyardstreet.com',
'https://backyardstreet.de',
];
const cacheFolder = path.join(__dirname, 'cache');
const visitedUrls = new Set();
const pageUrls = [...startUrl];
async function crawl(url) {
if (!visitedUrls.has(url)) {
console.log('Visid URL:', url);
try {
const response = await axios.get(url);
const $ = cheerio.load(response.data);
const links = [];
$('a').each((index, element) => {
const link = $(element).attr('href');
if (link && link.startsWith(startUrl)) {
links.push(link);
}
});
console.log('Links found:', links);
visitedUrls.add(url);
for (const link of links) {
if (!visitedUrls.has(link)) {
pageUrls.push(link);
}
}
await cachePage(url);
} catch (error) {
console.error('Error from:', url);
}
}
}
async function cachePage(url) {
try {
const response = await axios.get(url);
const pageContent = response.data;
const fileName = url.replace(/[^a-z0-9]/gi, '_').toLowerCase() + '.html';
const filePath = path.join(cacheFolder, fileName);
fs.writeFileSync(filePath, pageContent);
console.log(`Visited ${url} scanned.`);
} catch (error) {
console.error(`Chache Error ${url}:`, error);
}
}
if (!fs.existsSync(cacheFolder)) {
fs.mkdirSync(cacheFolder);
}
// pageUrls.forEach((url) => {
// cachePage(url);
// });
startUrl.forEach((url) => {
crawl(url);
})