I'm trying to build a web scraper using crawlera. I'm a bit new to this and so I tried creating a wrapper function:const request = require('request') var crawlera = { apikey: 'asdfasdfasdfasdf', limit : 10, count : 0 } crawlera.get = (url) => { console.log('count: ' + crawlera.count + ' limit: ' + crawlera.limit) var promise if(crawlera.count < crawlera.limit){ promise = new Promise((resolve, reject) =>{ let options = {url: url, proxy: 'http://'+crawlera.apikey+':@proxy.crawlera.com:8010', "rejectUnauthorized": false} request(options, (err, res, html) => { crawlera.count -= 1 if (err) { console.log('Proxy Error. Retrying..') crawlera.get(url) } else{ console.log('Crawlera: ' + url) results = {} results.html = html results.url = options.url resolve(results); } }) crawlera.count+=1 }) return promise } else{ return Promise.resolve('concurrency count maxed') } } module.exports = crawlera Which I then call in my main app with.crawlera.get('http://someurl').then(res => console.log('do something with the result.') The operation above sometimes gets a proxy error timeout. So when that happens I want to pretty much retry the request again until a success happens and then continue down the .then chaining but line 19 is not achieving this.
Submitted May 02, 2017 at 09:53PM by Midicide
No comments:
Post a Comment