最近写个程序需要抓取多个远程页面,如果按照正常的 php 程序需要一个页面抓完再抓下一个页面,但一个页面的抓取时间又比较长,如果能像 js 一样用 ajax 效果就好了。查了下,发现可以使用 curl 多多线程来模拟并发访问,可以加快访问的速度。
function rolling_curl($urls, $delay) {
$queue = curl_multi_init();
$map = array();
foreach ($urls as $url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_TIMEOUT, 1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_NOSIGNAL, true);
curl_multi_add_handle($queue, $ch);
$map[(string) $ch] = $url;
}
$responses = array();
do {
while (($code = curl_multi_exec($queue, $active)) == CURLM_CALL_MULTI_PERFORM) ;
if ($code != CURLM_OK) { break; }
// a request was just completed -- find out which one
while ($done = curl_multi_info_read($queue)) {
// get the info and content returned on the request
$info = curl_getinfo($done['handle']);
$error = curl_error($done['handle']);
$results = callback(curl_multi_getcontent($done['handle']), $delay);
$responses[$map[(string) $done['handle']]] = compact('info', 'error', 'results');
// remove the curl handle that just completed
curl_multi_remove_handle($queue, $done['handle']);
curl_close($done['handle']);
}
// Block for data in / output; error handling is done by curl_multi_exec
if ($active > 0) {
curl_multi_select($queue, 0.5);
}
} while ($active);
curl_multi_close($queue);
return $responses;
}
这个函数可以直接使用。 $delay 建议设成 5 ,单位是毫秒。
$urls = array( "http://www.cnn.com/", "http://www.canada.com/", "http://www.yahoo.com/" ); rolling_curl($urls,5);
91云(91yun.co)
你判断是否有货的条件是怎么写的?
kimsufi是判断购买页面的源代码是否有提交按钮(没货的时候是没有提交按钮的)