const axios = require('axios'); const cheerio = require('cheerio'); const fs = require('fs'); const path = require('path'); const startUrl = [ 'https://backyard-design.co.uk', 'https://backyard-design.com', 'https://backyarddesign.be', 'https://backyarddesign.co.nz', 'https://backyarddesign.co.uk', 'https://backyarddesign.co.za', 'https://backyarddesign.es', 'https://backyarddesign.fr', 'https://backyarddesign.it', 'https://backyarddesign.mx', 'https://backyarddesign.nl', 'https://backyarddesign.se', 'https://backyarddesignaus.com', 'https://backyarddesignusa.com', 'https://backyardstreet.com', 'https://backyardstreet.de', ]; const cacheFolder = path.join(__dirname, 'cache'); const visitedUrls = new Set(); const pageUrls = [...startUrl]; async function crawl(url) { if (!visitedUrls.has(url)) { console.log('Visid URL:', url); try { const response = await axios.get(url); const $ = cheerio.load(response.data); const links = []; $('a').each((index, element) => { const link = $(element).attr('href'); if (link && link.startsWith(startUrl)) { links.push(link); } }); console.log('Links found:', links); visitedUrls.add(url); for (const link of links) { if (!visitedUrls.has(link)) { pageUrls.push(link); } } await cachePage(url); } catch (error) { console.error('Error from:', url); } } } async function cachePage(url) { try { const response = await axios.get(url); const pageContent = response.data; const fileName = url.replace(/[^a-z0-9]/gi, '_').toLowerCase() + '.html'; const filePath = path.join(cacheFolder, fileName); fs.writeFileSync(filePath, pageContent); console.log(`Visited ${url} scanned.`); } catch (error) { console.error(`Chache Error ${url}:`, error); } } if (!fs.existsSync(cacheFolder)) { fs.mkdirSync(cacheFolder); } // pageUrls.forEach((url) => { // cachePage(url); // }); startUrl.forEach((url) => { crawl(url); })