Detta kanske ger lite inspiration, använder något liknande själv. Helt otestat. Lycka till!
Kod:
var request = require('request');
var url = require('url');
var iconv = require('iconv-lite');
var charset = require('charset');
var jschardet = require("jschardet")
(function(){
var links = ['http://www.aftonbladet.se/', 'http://wn.se/', 'https://wordpress.org/news/', 'http://expressen.se/'];
for(var i = 0; i < links.length - 1; i++){
var info = {
url: links[i];
};
requestGET(info, function(html){
if(html.indexOf('/wp-content/') > -1)
{
console.log(links[i] + " innehåller /wp-content/");
}
});
}
})();
function requestHEAD(info, callback){
var options = {
uri: info.url,
method: 'HEAD',
jar: info.cookieJar
};
request(options, function (error, response) {
if(error || response.statusCode != 200 || !response.headers['content-type'] || response.headers['content-type'].toLowerCase().indexOf('text/html') == -1)
return;
info.cookieJar = request.jar();
info.uri = response.request.uri;
callback(info);
});
}
function requestGET(info, callback){
requestHEAD(info, function(info){
var options = {
uri: info.url,
method: 'GET',
jar: info.cookieJar,
encoding: null
};
request(options, function (error, response, html) {
if(error || !html || response.statusCode != 200 || !response.headers['content-type'] || response.headers['content-type'].toLowerCase().indexOf('text/html') == -1)
return;
// Fix encoding (to utf-8)
enc = charset(response.headers, html) || jschardet.detect(html).encoding;
if (enc && enc.toLowerCase() != 'utf-8'){
html = iconv.decode(html, enc.toLowerCase());
}
callback(info, html);
});
});
}