There is a memory leak when reusing a phantomjs instance, averaging about 1.4mb per page load in debian 7. (for example, loading 200 pages results in 280mb memory use). I have reprod this on Debian7 x64 and Windows8 x64
I have included a phantomjs script that can be used to reproduce the memory leak. I am following what seems to be "best practice" by invoking page.close() after the load is done, but that does not resolve the issue.
fyi, in addition to my repro script, i have tried various combinations of tests such as
- close after page.onLoadFinished() has completed (not during)
- reusing the same page object multiple times
- not calling page.stop()
none of those had any positive impact on the memory leak problem.
here's my repro script, paste it into something like memoryleak.js and run from the command prompt.
NSFW: The test domains are the top 100 domains by traffic, and that includes porn sites, so if you have some corporate IT network traffic monitoring going on, remove the naughty sites from the testUrls array.
/// memoryleak.js
function format(toFormat) {
var args = [];
for (var _i = 0; _i < (arguments.length - 1); _i++) {
args[_i] = arguments[_i + 1];
}
return (toFormat).replace(/\{(\d+)\}/g, function (match, number) {
return typeof args[number] !== "undefined" ? args[number].toString() : match;
});
}
;
var system = require("system");
phantom.onError = function (msg, trace) {
try {
console.log("phantom encountered an error. exiting... " + "msg=\"" + msg + "\"" + " trace=\"" + trace + "\"/>");
}finally {
phantom.exit(-1);
}
};
/** NSFW: The test domains are the top 100 domains by traffic, and that includes porn sites, so if you have some corporate IT network traffic monitoring going on, remove the naughty sites from the testUrls array. */
var testUrls = [
"google.com",
"facebook.com",
"youtube.com",
"yahoo.com",
"amazon.com",
"bing.com",
"ebay.com",
"wikipedia.org",
"craigslist.org",
"linkedin.com",
"live.com",
"twitter.com",
"blogspot.com",
"aol.com",
"go.com",
"pinterest.com",
"msn.com",
"tumblr.com",
"cnn.com",
"ask.com",
"huffingtonpost.com",
"netflix.com",
"paypal.com",
"weather.com",
"conduit.com",
"espn.go.com",
"instagram.com",
"wordpress.com",
"bankofamerica.com",
"akamihd.net",
"imdb.com",
"chase.com",
"microsoft.com",
"about.com",
"avg.com",
"pornhub.com",
"comcast.net",
"foxnews.com",
"apple.com",
"walmart.com",
"xhamster.com",
"mywebsearch.com",
"wellsfargo.com",
"xvideos.com",
"yelp.com",
"imgur.com",
"nytimes.com",
"nbcnews.com",
"cnet.com",
"reddit.com",
"adobe.com",
"ehow.com",
"pandora.com",
"pch.com",
"hulu.com",
"zedo.com",
"etsy.com",
"flickr.com",
"outbrain.com",
"optmd.com",
"indeed.com",
"livejasmin.com",
"zillow.com",
"target.com",
"xnxx.com",
"homedepot.com",
"redtube.com",
"answers.com",
"thepiratebay.sx",
"att.com",
"shopathome.com",
"wikia.com",
"dailymail.co.uk",
"usps.com",
"babylon.com",
"ups.com",
"bestbuy.com",
"youporn.com",
"reference.com",
"godaddy.com",
"groupon.com",
"deviantart.com",
"usatoday.com",
"pof.com",
"capitalone.com",
"bbc.co.uk",
"washingtonpost.com",
"match.com",
"drudgereport.com",
"mlb.com",
"tripadvisor.com",
"pogo.com",
"verizonwireless.com",
"blogger.com",
"buzzfeed.com",
"doublepimp.com",
"inksr.com",
"delta-search.com",
"fedex.com",
"inksdata.com",
"oyodomo.com",
"aweber.com",
"abcnews.go.com",
"vimeo.com",
"hootsuite.com",
"bleacherreport.com",
"lowes.com",
"yellowpages.com",
"americanexpress.com",
"tube8.com",
"yieldmanager.com",
"salesforce.com"
];
var readyToDispose = false;
var _testsRemaining = 1000;
var openRequest;
var lastSeen;
var lastSeenStartTime;
function disposePage() {
openRequest.close();
openRequest = null;
readyToDispose = false;
}
function _tryNextText() {
if(openRequest != null) {
//if (readyToDispose) {
// disposePage();
// return false;
//}
if(lastSeen == openRequest) {
var elapsed = Date.now() - lastSeenStartTime;
if(elapsed > 10000) {
console.log(format("PAGE LOAD TIMEOUT! aborting url={0}, pageLen={1}", openRequest.url, openRequest.content.length));
//timed out, next loop will reacquire
openRequest.stop();
//openRequest.close();
//openRequest = null;
//lastSeen = null;
//lastSeenStartTime = null;
} else {
//not yet timed out
return false;
}
} else {
//set our last seen then wait for next loop
lastSeen = openRequest;
lastSeenStartTime = Date.now();
}
return false;
}
var index = _testsRemaining % testUrls.length;
var targetUrl = "http://www." + testUrls[index];
_testsRemaining--;
if(_testsRemaining <= 0) {
console.log("TESTS COMPLETE! check your memory usage");
clearInterval(loopHandle);
return false;
}
console.log(format("[{0}] = {1} starting...", _testsRemaining, testUrls[index]));
var thisPage = require("webpage").create();
openRequest = thisPage;
openRequest.onLoadFinished = function (status) {
if(openRequest == thisPage) {
console.log(format("got page, url ={0}, targetUrl={1} pageLen={2}", openRequest.url, targetUrl, openRequest.content.length));
//readyToDispose = true;
openRequest.stop();
disposePage();
} else if(openRequest == null) {
console.log("ERROR? page opened while openRequest==null, pageurl=" + targetUrl);
//phantom.exit(-1);
} else {
console.log(format("ERROR! WRONG PAGE OPEN! got page, openPageurl ={0}, targetUrl={1} pageLen={2}", openRequest.url, targetUrl, openRequest.content.length));
phantom.exit(-1);
}
};
openRequest.open(targetUrl);
return true;
}
testUrls.length = _testsRemaining > testUrls.length ? testUrls.length : _testsRemaining;
console.log("running tests, count = " + _testsRemaining);
console.log("setting interval ");
var loopHandle = setInterval(function () {
_tryNextText();
}, 500);
PS: run that script for long enough (usually around 400 page loads) and the phantomjs exe crashes, sometimes silently, sometimes with segfaults
Bug Qt/Webkit