<?
function full_url ($url,$host)
{
if ($url[0]=='/')
{$host_tmp=str_replace ("http://","",$host);
$ar=explode ("/",$host_tmp);
$fullurl='http://'.$ar[0].$url;}
elseif ($url[0]=='h'&&$url[4]==':'){
$fullurl=$url;
}
else
{
$host_tmp=str_replace ("http://","",$host);
$ar=explode ("/",$host_tmp);
unset ($ar[count($ar)-1]);
$fullurl='http://'.implode ("/",$ar).'/'.$url;
}
return ($fullurl);
}
function uploading ($urlf){
$postdata = array( 'upload' => 'yes',
'im'=>'7',
'JQ'=>'85',
'URLF' => trim($urlf),
'j' => 'yes');
$agent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322)";
$ch = curl_init ("http://radikal.ru/action.aspx");
@curl_setopt ( $ch , CURLOPT_USERAGENT , $agent );
@curl_setopt ( $ch , CURLOPT_HTTPHEADER , false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_TIMEOUT, 20);
curl_setopt($ch,CURLOPT_ENCODING,"gzip,deflate");
curl_setopt($ch, CURLOPT_REFERER, "");
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postdata);
$tmp = @curl_exec ( $ch );
curl_close ( $ch );
preg_match('|id="input_link_1" value="(.*?)"|is', $tmp, $out);
$result = $out[1];
return($result);
}
function parsing ($url2,$theme){
$html=file_get_contents ($url2);
preg_match ('|<span id="midArticle_start"></span>(.*?)<div>|is',$html,$preg);
$pos = strpos($text, "-");
$len=strlen($text);
$text = substr($text, $pos,$len-$pos);
preg_match ('|<h1>(.*?)</h1>|is',$html,$preg);
$title=strip_tags($preg[1]);
preg_match ('#<img src="(.+)" border="0" alt="Photo"#',$html,$preg);
var_dump($preg);
echo $html;
$imgurl=full_url($preg[1],'ru.reuters.com');
}
$db=mysql_connect ("localhost","root","");
mysql_select_db ("parser",$db);
mysql_query ("SET NAMES Utf8");
mysql_set_charset('utf8',$db);
$html=file_get_contents ("http://ru.reuters.com/news/archive/topNews?date=03242010");
preg_match('|<a href="/article/(.*?)"|is',$html,$preg);
parsing ("http://ru.reuters.com/article/topNews/idRUMSE62N1LM20100324",'hot');
?>