我为我的Mediawiki字数项目创建了这个类。
// Copyright PHPExperts.pro
// License: Any user on Stackflow may use this code under the BSD License.
/**
* Web page datatype that holds all the various parts
* and info about a web page.
*/
class WebPage
{
public $url;
public $headers;
public $body;
public $text;
public function __construct($url)
{
// 1. Bail out now if the CURL extension is not loaded.
if (!in_array('curl', get_loaded_extensions()))
{
throw new Exception(WebPageException::MISSING_CURL);
}
// 2. Make sure the URL is valid.
self::ensureValidURL($url);
// 3. Store the URL.
$this->url = $url;
}
/**
* Determine if a URL is valid.
*
* @param string $url
* @returns true if the URL is a string and is a valid URL. False, otherwise.
*/
public static function isURLValid($url)
{
return (is_string($url) &&
filter_var($url, FILTER_VALIDATE_URL) !== false);
}
public static function ensureValidURL($url)
{
if (!self::isURLValid($url))
{
throw new WebPageException(WebPageException::INVALID_URL, array($url));
}
}
// captureHeader() donated by bendavis78@gmail.com,
// via http://us.php.net/curl_setopt_array
private function captureHeader($ch, $header)
{
$this->headers[] = $header;
return strlen($header);
}
public function fetchURL()
{
$ch = curl_init();
curl_setopt_array($ch, array(CURLOPT_URL => $this->url,
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_HEADERFUNCTION => array($this, 'captureHeader'),
CURLOPT_TIMEOUT => 5,
)
);
$data = curl_exec($ch);
curl_close($ch);
if ($data === false || is_null($data) || $data == '')
{
throw new WebPageException(WebPageException::BLANK_URL, array($this->url));
}
// TODO: Need to handle HTTP error messages, such as 404 and 502.
$this->body = $data;
// Uses code from php@wizap.dom
$this->text = remove_HTML($data);
}
}
你跑完以后
WebPage::captureHeader()
你就这么向前冲过去
$this->headers
如果你没有发现HTTP/1.0 403被禁止,你就可以走了。
这完全回答了你的问题,所以我希望得到赞扬。