90 lines
2.1 KiB
JavaScript
90 lines
2.1 KiB
JavaScript
const axios = require('axios');
|
|
const cheerio = require('cheerio');
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const startUrl = [
|
|
'https://backyard-design.co.uk',
|
|
'https://backyard-design.com',
|
|
'https://backyarddesign.be',
|
|
'https://backyarddesign.co.nz',
|
|
'https://backyarddesign.co.uk',
|
|
'https://backyarddesign.co.za',
|
|
'https://backyarddesign.es',
|
|
'https://backyarddesign.fr',
|
|
'https://backyarddesign.it',
|
|
'https://backyarddesign.mx',
|
|
'https://backyarddesign.nl',
|
|
'https://backyarddesign.se',
|
|
'https://backyarddesignaus.com',
|
|
'https://backyarddesignusa.com',
|
|
'https://backyardstreet.com',
|
|
'https://backyardstreet.de',
|
|
];
|
|
const cacheFolder = path.join(__dirname, 'cache');
|
|
|
|
const visitedUrls = new Set();
|
|
const pageUrls = [...startUrl];
|
|
|
|
async function crawl(url) {
|
|
if (!visitedUrls.has(url)) {
|
|
console.log('Visid URL:', url);
|
|
|
|
try {
|
|
const response = await axios.get(url);
|
|
const $ = cheerio.load(response.data);
|
|
|
|
const links = [];
|
|
$('a').each((index, element) => {
|
|
const link = $(element).attr('href');
|
|
if (link && link.startsWith(startUrl)) {
|
|
links.push(link);
|
|
}
|
|
});
|
|
|
|
console.log('Links found:', links);
|
|
|
|
visitedUrls.add(url);
|
|
|
|
for (const link of links) {
|
|
if (!visitedUrls.has(link)) {
|
|
pageUrls.push(link);
|
|
}
|
|
}
|
|
|
|
await cachePage(url);
|
|
} catch (error) {
|
|
console.error('Error from:', url);
|
|
}
|
|
}
|
|
}
|
|
|
|
async function cachePage(url) {
|
|
try {
|
|
const response = await axios.get(url);
|
|
const pageContent = response.data;
|
|
|
|
const fileName = url.replace(/[^a-z0-9]/gi, '_').toLowerCase() + '.html';
|
|
|
|
const filePath = path.join(cacheFolder, fileName);
|
|
|
|
fs.writeFileSync(filePath, pageContent);
|
|
|
|
console.log(`Visited ${url} scanned.`);
|
|
} catch (error) {
|
|
console.error(`Chache Error ${url}:`, error);
|
|
}
|
|
}
|
|
|
|
if (!fs.existsSync(cacheFolder)) {
|
|
fs.mkdirSync(cacheFolder);
|
|
}
|
|
|
|
// pageUrls.forEach((url) => {
|
|
// cachePage(url);
|
|
// });
|
|
|
|
startUrl.forEach((url) => {
|
|
crawl(url);
|
|
})
|