load_curl($url); } elseif(ini_get('allow_url_fopen')) { return $this->load_fopen($url); } else { error_log(__FUNCTION__ . ' requires either the cURL extension or allow_url_fopen=On in php.ini'); } } return null; } /** * cURL implementation of load */ private function load_curl($url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_ENCODING, ''); // There is no guarantee this request will be fulfilled // -- https://www.php.net/manual/en/function.curl-setopt.php curl_setopt($ch, CURLOPT_BUFFERSIZE, MAX_FILE_SIZE); // There is no guarantee this request will be fulfilled $header = array( 'Accept: text/html', // Prefer HTML format 'Accept-Charset: utf-8', // Prefer UTF-8 encoding ); curl_setopt($ch, CURLOPT_HTTPHEADER, $header); $doc = curl_exec($ch); if(curl_getinfo($ch, CURLINFO_RESPONSE_CODE) !== 200) { return null; } curl_close($ch); if(strlen($doc) > MAX_FILE_SIZE) { return null; } return new HtmlDocument($doc); } /** * fopen implementation of load */ private function load_fopen($url) { // There is no guarantee this request will be fulfilled $context = stream_context_create(array('http' => array( 'header' => array( 'Accept: text/html', // Prefer HTML format 'Accept-Charset: utf-8', // Prefer UTF-8 encoding ), 'ignore_errors' => true // Always fetch content ))); $doc = file_get_contents($url, false, $context, 0, MAX_FILE_SIZE + 1); if(isset($http_response_header)) { foreach($http_response_header as $rh) { // https://stackoverflow.com/a/1442526 $parts = explode(' ', $rh, 3); if(preg_match('/HTTP\/\d\.\d/', $parts[0])) { $code = $parts[1]; } } // Last code is final status if(!isset($code) || $code !== '200') { return null; } } if(strlen($doc) > MAX_FILE_SIZE) { return null; } return new HtmlDocument($doc); } }