class Crawler {
private $URLList;
private $nextIndex;
function __construct(){
$this->nextIndex=0;
$this->URLList = array("http://SEEDWEBSITE/");
}
private function getNextURLToCrawl(){
return $this->URLList[$this->nextIndex++];
}
private function printSummary(){
echo count($this->URLList)." links. Index:".
$this->nextIndex."
";
foreach($this->URLList as $link){
echo $link."
";
}
}
// THIS CAN BE CALLED FROM LOOP OR CRON
public function doIteration(){
$url = $self->getNextURLToCrawl();
// Do note crawl if not allowed
if (robotsDisallow($url))
return;
echo "Crawling ".$url."
";
//this function finds the links
scrapeHyperlinks($url);
$self->printSummary();
}
}