一般 CURL 抓網頁的方法, 是一頁一頁抓, 假設要抓 4頁, 所費時間各別是 5,10,7,5 秒, 那全部總合所花的時間就是 5 + 10 + 7 + 5 = 27 秒.
  
若能同時間去抓取多個網頁, 所花費的時間 5,10,7,5 秒, 全部總合所花的時間是 10 秒.(花費最多時間的秒數)
  
於 JavaScript 可使用 AJAX 的 async(YAHOO.util.Connect.asyncRequest) 來達成, 於 PHP 可以用 CURL 來達成此 Multi-Threading 的效果.
  
  
 
    - 官方文件: http://farm1.static.flickr.com/128/362353389_fe347a775c_m.jpg); border-bottom: rgb(136,136,170) 1px dotted; background-color: transparent; padding-right: 18px; background-repeat: no-repeat; background-position: 100% 50%; color: #8888aa; cursor: pointer; text-decoration: none\" title=\"PHP: curl_multi_init - Manual\" href=\"http://www.php.net/manual/en/function.curl-multi-init.php\" target=\"_blank\">PHP: curl_multi_init 
  
  
程式(async.php) 
function async_get_url($url_array, $wait_usec = 0)
  
{
  
    if (!is_array($url_array))
  
        return false;
  
  
    $wait_usec = intval($wait_usec);
  
  
    $data    = array();
  
    $handle  = array();
  
    $running = 0;
  
  
    $mh = curl_multi_init(); // multi curl handler
  
  
    $i = 0;
  
    foreach($url_array as $url) {
  
        $ch = curl_init();
  
  
        curl_setopt($ch, CURLOPT_URL, $url);
  
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // return don\'t print
  
        curl_setopt($ch, CURLOPT_TIMEOUT, 30);
  
        curl_setopt($ch, CURLOPT_USERAGENT, \'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)\');
  
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); // 302 redirect
  
        curl_setopt($ch, CURLOPT_MAXREDIRS, 7);
  
  
        curl_multi_add_handle($mh, $ch); // 把 curl resource 放進 multi curl handler 裡
  
  
        $handle[$i++] = $ch;
  
    }
  
  
    /* 執行 */
  
    /* 此種做法會造成 CPU loading 過重 (CPU 100%)
  
    do {
  
        curl_multi_exec($mh, $running);
  
  
        if ($wait_usec > 0) // 每個 connect 要間隔多久
  
            usleep($wait_usec); // 250000 = 0.25 sec
  
    } while ($running > 0);
  
    */
  
  
    /* 此做法就可以避免掉 CPU loading 100% 的問題 */
  
    // 參考自: http://www.hengss.com/xueyuan/sort0362/php/info-36963.html
  
    do {
  
        $mrc = curl_multi_exec($mh, $active);
  
    } while ($mrc == CURLM_CALL_MULTI_PERFORM);
  
  
    while ($active and $mrc == CURLM_OK) {
  
        if (curl_multi_select($mh) != -1) {
  
            do {
  
                $mrc = curl_multi_exec($mh, $active);
  
            } while ($mrc == CURLM_CALL_MULTI_PERFORM);
  
        }
  
    }
  
  
    /* 讀取資料 */
  
    foreach($handle as $i => $ch) {
  
        $content  = curl_multi_getcontent($ch);
  
        $data[$i] = (curl_errno($ch) == 0) ? $content : false;
  
    }
  
  
    /* 移除 handle*/
  
    foreach($handle as $ch) {
  
        curl_multi_remove_handle($mh, $ch);
  
    }
  
  
    curl_multi_close($mh);
  
  
    return $data;
  
}
  
?>
  
使用 
$urls = array(\'http://example1.com\', \'http://example2.com\');
  
print_r(async_get_url($urls)); // [0] => example1, [1] => example2
  
?>
  
測試 
sleep.php # 看時間延長取得的效果
  
  
sleep(intval($_GET[\'time\']));
  
echo intval($_GET[\'time\']);
  
?>
  
$url_array = array(
  
        \'http://example.com/sleep.php?time=5\',
  
        \'http://example.com/sleep.php?time=10\',
  
        \'http://example.com/sleep.php?time=7\',
  
        \'http://example.com/sleep.php?time=5\',
  
        );
  
print_r(async_get_url($url_array));
  
// 總花費時間會是 10 秒, 並印出 [0] => 5, [1] => 10, [2] => 7, [3] => 5
  
?>
 
 
  |