對 wget 這個 tool 不熟,平常使用 wget 下載一些資料時,可以輕易地使用 --referer 來偽造 HTTP Header 資料,因此能夠通過對方 Server 檢查  
wget --referer="REFERER_URL" "TARGET_URL"  
然而,上述的 REFERER_URL 和 TARGET_URL 都是固定的位置,如果是會根據 session / cookie 的而改變的話,不曉得還有沒有辦法?對我而言,寫 PHP 比去看 manpage 來得快 XD 所以我就寫成 PHP 囉!或許\ wget 也有更方便的下法吧,改天再努力看 manpage  
程式碼:  
<?php
  
$output_file = \'result.file\';  // 儲存結果
  
$cookie_file = \'cookie.tmp\';  // cookie file
  
$source_url = \'SOURCE_URL\';  // 之後會變成 REFERER_URL
  
$pattern = \'/class="download" href="(.*?)"/\';  // 此為一個範例, 用來撈 TARGET_URL
  
  
$ch = curl_init();
  
curl_setopt( $ch , CURLOPT_URL, $source_url );
  
curl_setopt( $ch , CURLOPT_COOKIEFILE , $cookie_file );
  
curl_setopt( $ch , CURLOPT_COOKIEJAR , $cookie_file );
  
curl_setopt( $ch , CURLOPT_RETURNTRANSFER , true );
  
  
$result = curl_exec( $ch );
  
  
if( preg_match_all( $pattern , $result , $match ) ) 
  
{
  
        if( isset( $match[1][1] ) ) 
  
        {   
  
                $target_url = $match[1][1];  // 請依 pattern 決定
  
                $referer_url = $source_url;
  
  
                curl_setopt( $ch , CURLOPT_URL, $target_url );
  
                curl_setopt( $ch , CURLOPT_REFERER , $referer_url );
  
                curl_setopt( $ch , CURLOPT_COOKIEFILE , $cookie_file );
  
                curl_setopt( $ch , CURLOPT_COOKIEJAR , $cookie_file );
  
                //curl_setopt( $ch , CURLOPT_RETURNTRANSFER , true );
  
  
                $fp = fopen ( $output_file, \'wb\' );
  
  
                curl_setopt( $ch , CURLOPT_FILE , $fp );
  
  
                echo "GO...\\n";
  
                curl_exec( $ch );
  
                echo "Finish..\\n";
  
  
                fclose( $fp );
  
        }   
  
}
  
  
curl_close( $ch );
  
?>  
以上是要從 SOURCE_URL 上頭, 找到下載位置(target_url), 然而, 那個位置卻每次都不一樣, 最重要的是跟 session 有關係並且下載 target_url 時還必須奉上 cookie 資訊, 所以, 先收集一下 cookie 囉!(上述程式並不謹慎, 例如儲存結果的檔案有可能開檔失敗)  
後記,無聊又改寫成 tool mode:  
<?php
  
  
$shortopt = array();
  
  
$shortopt[\'h\'] =  array( 
  
    \'value\' => \'\' , 
  
    \'text\' => \'-h, help\' );
  
$shortopt[\'c:\'] =  array( 
  
    \'value\'    => \'\' ,
  
    \'text\'    => "-c \'/tmp/cookie_file\' , tmp file for cookie" );
  
$shortopt[\'o:\'] = array( 
  
    \'value\'    => \'\' ,
  
    \'text\'    => "-o \'/tmp/output_file\' , path for result file. default use stdout" );
  
$shortopt[\'u:\'] = array( 
  
    \'value\'    => NULL ,
  
    \'text\'    => "-u \'http://www.google.com\' , source url" );
  
$shortopt[\'e:\'] = array( 
  
    \'value\'    => NULL ,
  
    \'text\'    => "-e \'/class=\\"normal-down\\" href=\\"(.*?)\\"/is\' , regexp pattern for extract the target url" );
  
$shortopt[\'m:\'] = array( 
  
    \'value\'    => \'\' ,
  
    \'text\'    => "-m \'1,1\' , choose the result matched to be used. e.g. use the match[5][2] is \'5,2\'" );
  
$shortopt[\'d\'] = array( 
  
    \'value\'    => \'true\' ,
  
    \'text\'    => "-d , disable test mode for showing the target matched by regexp pattern" );
  
  
// check function
  
if( !function_exists( \'getopt\' ) )
  
{
  
    echo "\'getopt\' is not supported in current PHP version.\\n";
  
    exit;
  
}
  
  
// help menu
  
$shortopt_list = \'\';
  
$shottopt_help = \'\';
  
foreach( $shortopt as $k => $v )
  
{
  
    $shortopt_list .= $k;
  
    $shottopt_help .= "\\t".$v[\'text\']."\\n";
  
}
  
  
// start to parse...
  
$parse_arg = getopt( $shortopt_list );
  
  
// show help 
  
if( isset( $parse_arg[\'h\'] ) )
  
{
  
    echo "Usage> php ".$argv[0]." -h\\n";
  
    echo $shottopt_help;
  
    exit;
  
}
  
  
// set the value
  
foreach( $parse_arg as $k => $v )
  
{
  
    if( isset( $shortopt[$k] ) )
  
        $shortopt[$k][\'value\'] = !strcasecmp( $shortopt[$k][\'value\'] , \'false\' ) ? true : false ;
  
    else if( isset( $shortopt[$k.\':\'] ) )
  
        $shortopt[$k.\':\'][\'value\'] = $v;
  
}
  
  
// check value (cannot be NULL)
  
$check_out = \'\';
  
foreach( $shortopt as $k => $v )
  
    if( !isset( $v[\'value\'] ) )
  
        $check_out .= "\\t".$v[\'text\']."\\n";
  
if( !empty( $check_out ) )
  
{
  
    echo "Usage> php ".$argv[0]." -h\\n";
  
    echo "Must Set:\\n$check_out\\n";
  
    exit;
  
}
  
  
$cookie_file = !empty( $shortopt[\'c:\'][\'value\'] ) ? $shortopt[\'c:\'][\'value\'] : NULL ;
  
$source_url = $shortopt[\'u:\'][\'value\'];
  
$output_file = !empty( $shortopt[\'o:\'][\'value\'] ) ? $shortopt[\'o:\'][\'value\'] : NULL ;
  
$regexp_pattern = $shortopt[\'e:\'][\'value\'];
  
  
if( !empty( $shortopt[\'m:\'][\'value\'] ) )
  
    $shortopt[\'m:\'][\'value\'] = trim( $shortopt[\'m:\'][\'value\'] );
  
$choose_match = !empty( $shortopt[\'m:\'][\'value\'] ) ? explode( \',\' , $shortopt[\'m:\'][\'value\'] ) : NULL;
  
$test_mode = empty( $choose_match ) || $shortopt[\'d\'][\'value\'];
  
  
$ch = curl_init();
  
curl_setopt( $ch , CURLOPT_URL, $source_url );
  
  
if( !empty( $cookie_file ) )
  
{
  
    curl_setopt( $ch , CURLOPT_COOKIEFILE , $cookie_file );
  
    curl_setopt( $ch , CURLOPT_COOKIEJAR , $cookie_file );
  
}
  
curl_setopt( $ch , CURLOPT_RETURNTRANSFER , true );
  
  
$result = curl_exec( $ch );
  
  
if( preg_match_all( $regexp_pattern , $result , $matches ) )
  
{
  
    $target_url = getTargetURL( $matches , $choose_match );
  
    if( $test_mode || empty( $target_url ) )
  
    {
  
        echo "Matched Target URL: \\n";
  
        print_r( $matches );
  
        echo "Choose option(Cannot be empty):".$shortopt[\'m:\'][\'value\']."\\n";
  
        echo "Target(Cannot be empty):$target_url\\n";
  
    }
  
    else
  
    {
  
        curl_setopt( $ch , CURLOPT_URL, $target_url );
  
        curl_setopt( $ch , CURLOPT_REFERER , $source_url );
  
  
        if( !empty( $cookie_file ) )
  
        {
  
            curl_setopt( $ch , CURLOPT_COOKIEFILE , $cookie_file );
  
            curl_setopt( $ch , CURLOPT_COOKIEJAR , $cookie_file );
  
        }
  
  
        if( !empty( $output_file ) )
  
        {
  
            echo "Target URL:$target_url\\n";
  
            echo "Referer URL:$source_url\\n";
  
  
            if( ( $fp = fopen ( $output_file , \'wb\' ) ) == NULL )
  
            {
  
                echo "ERROR: Cannot open the output file to write:$output_file\\n";
  
                exit;
  
            }
  
            curl_setopt( $ch , CURLOPT_FILE , $fp );
  
  
            echo "Begin...\\n";
  
            curl_exec( $ch );
  
            echo "...Finish\\n";
  
            fclose( $fp );
  
        }
  
        else
  
        {
  
            curl_exec( $ch );
  
        }
  
    }
  
}
  
curl_close( $ch );
  
exit;
  
  
function getTargetURL( $matches , $choose )
  
{
  
    if( !isset( $matches ) )
  
        return NULL;
  
    if( is_array( $matches ) && is_array( $choose ) && count( $choose ) > 0 )
  
    {
  
        $index = array_shift( $choose );
  
        if( isset( $matches[ $index ] ) )
  
            return getTargetURL( $matches[ $index ] , $choose );
  
        return NULL;
  
    }
  
  
    if( !is_array( $matches ) )
  
        return $matches;
  
    else if( isset( $matches[ $choose ] ) )
  
        return $matches[ $choose ];
  
    return NULL;
  
}
  
?>  
用法:  
單純以抓 Yahoo! New 為例  
尚未指定 -m 
  
# php my_wget.php -u \'http://tw.yahoo.com\' -e \'/<h3><a href="([^"]+)" title="([^"]+)"/is\'  
Matched Target URL:
  
Array
  
(
  
    [0] => Array
  
        (
  
            [0] => <h3><a href="news/a/h1/t/*http://tw.news.yahoo.com/article/url/d/a/100628/5/289yr.html" title="莫拉克風災 學者:無關暖化"
  
            [1] => <h3><a href="news/a/h2/t/*http://tw.news.yahoo.com/article/url/d/a/100628/69/289tr.html" title="立院藏七寶 總價數億元"
  
        )
  
  
    [1] => Array
  
        (
  
            [0] => news/a/h1/t/*http://tw.news.yahoo.com/article/url/d/a/100628/5/289yr.html
  
            [1] => news/a/h2/t/*http://tw.news.yahoo.com/article/url/d/a/100628/69/289tr.html
  
        )
  
  
    [2] => Array
  
        (
  
            [0] => 莫拉克風災 學者:無關暖化
  
            [1] => 立院藏七寶 總價數億元
  
        )
  
  
)
  
Choose option(Cannot be empty):
  
Target(Cannot be empty):  
指定 -m \'1,1\'
  
# php my_wget.php -u \'http://tw.yahoo.com\' -e \'/<h3><a href="([^"]+)" title="([^"]+)"/is\' -m \'1,1\'  
Matched Target URL:
  
Array
  
(
  
    [0] => Array
  
        (
  
            [0] => <h3><a href="news/a/h1/t/*http://tw.news.yahoo.com/article/url/d/a/100628/5/289yr.html" title="莫拉克風災 學者:無關暖化"
  
            [1] => <h3><a href="news/a/h2/t/*http://tw.news.yahoo.com/article/url/d/a/100628/69/289tr.html" title="立院藏七寶 總價數億元"
  
        )
  
  
    [1] => Array
  
        (
  
            [0] => news/a/h1/t/*http://tw.news.yahoo.com/article/url/d/a/100628/5/289yr.html
  
            [1] => news/a/h2/t/*http://tw.news.yahoo.com/article/url/d/a/100628/69/289tr.html
  
        )
  
  
    [2] => Array
  
        (
  
            [0] => 莫拉克風災 學者:無關暖化
  
            [1] => 立院藏七寶 總價數億元
  
        )
  
  
)
  
Choose option(Cannot be empty):1,1
  
Target(Cannot be empty):news/a/h2/t/*http://tw.news.yahoo.com/article/url/d/a/100628/69/289tr.html  
正式要下載請記得加 -d (disable test) , 但此例不適用, 因為抓出來的 url 並不完整, 開頭只是 "news/a/h2/t/*....."
  
# php my_wget.php -u \'http://tw.yahoo.com\' -e \'/<h3><a href="([^"]+)" title="([^"]+)"/is\' -m \'1,1\' -d  
輸出到檔案
  
# php my_wget.php -u \'http://tw.yahoo.com\' -e \'/<h3><a href="([^"]+)" title="([^"]+)"/is\' -m \'1,1\' -d -o \'/tmp/output\'  
需要 cookie
  
# php my_wget.php -u \'http://tw.yahoo.com\' -e \'/<h3><a href="([^"]+)" title="([^"]+)"/is\' -m \'1,1\' -d -o \'/tmp/output\' -c \'/tmp/cookie\' 
  |