Sunday, 10 May 2020

request inside loop

Hi everyone, I have a problem where I request a url and then for each Url on that page that meets a specification I then call a function to request that Url and search the html. The problem is it does everything else I ask before doing all the requests together at the end. I tried using the async package for the forEach loops but to be honest I don't really know what I'm doing. Any help would be greatly appreciated​const request = require('request') const cheerio = require('cheerio') var async = require("async"); const countryRegions = require('./scrapeData') let states = ((Object.values(countryRegions.states2))) const addNewObjects = () => { request('https://www.usbg.org/help/covid-19-state-by-state', (error, response, html) => { if (!error && response.statusCode == 200) { const $ = cheerio.load(html) links = $('a'); // get all hyperlinks from page let arrayLinks = ($(links)) //store all hyperlinks in array async.forEachOf(arrayLinks, function (link, i) { //for each item in array check if the link has a defined href if (link.attribs.href === undefined) { console.log("undefined here") //console.log(link) } else if ((link.attribs.href).includes("https")) { // if the href is defined and includes "https" set some values let linkName = ($(link).text()); let linkUrl = (link.attribs.href); let fullLink = (linkName + " " + linkUrl); async.forEachOf(states, function (state, e) { //for each US state if ((fullLink.includes(state.name))) { // check if fullLink contains the state name console.log(linkName) //if it does console.log the state name verifyUrl(linkUrl) // and verify the URL } }) } }) } }) } function verifyUrl(linkUrl) { // verifyUrl actually requests the current Link is supposed to check things inside the html request(linkUrl, (error, response, html) => { if (!error && response.statusCode == 200) { console.log(html) } }) } addNewObjects()

Submitted May 10, 2020 at 09:32PM by cazzuey

No comments:

Post a Comment