Ewsen
Гуру форума
- Регистрация
- 26 Июл 2008
- Сообщения
- 163
- Реакции
- 59
- Автор темы
- #1
Для просмотра скрытого содержимого вы должны войти или зарегистрироваться.
Follow along with the video below to see how to install our site as a web app on your home screen.
Примечание: This feature may not be available in some browsers.
<?
/////////////////////////////////////////////////////////////////////////////////////////////
//парсилка html
////////////////////////////////////////////////////////////////////////////////////////////
error_reporting(0);
class HtmlParser {
var $pos,
$tagpos,
$length,
$data,
$stacktag,
$stacktagpos,
$name,
$quotstate,
$quottype,
$parname,
$pars,
$tagname,
$content,
$contentpos,
$allreadyparsed,
$pg,
$dc,
$nc,
$qc,
$prevstate,
$processtag,
$processpar,
$processparvalue,
$c,
$cp,
$text,
$incomment,
$skipto,
$tagreg,
$arr,
$wasquot;
function HtmlParser($data,$grammar,$name="",$datatype=0) {
$this->dc=array(" ","\t","\r","\n","<",">","\"","'","=","/");
$this->nc=array("<",">","=","/");
$this->qc=array("\"","'");
$this->sc=array("\r","\n"," ","\t");
$this->prevstate=array("state"=>0,"word"=>"");
$this->pg=&$grammar;
$this->pos=0;
$this->stacktag=array();
$this->stacktagpos=-1;
$this->content=array();
$this->content["contentpos"]=-1;
$this->c=&$this->content;
$this->cp=-1;
$this->quotstate=-1;
$this->allreadyparsed=0;
$this->text="";
$this->processtag=0;
$this->processpar=0;
$this->processparvalue=0;
$this->slevel=array(0);
$this->slevelpos=0;
$this->quottype="";
$this->skipto="";
$this->incomment=0;
$this->tagreg=array();
$this->arr=array();
$this->wasquot=0;
if(is_array($this->data)) {
$this->content=&$data;
$this->allreadyparsed=1;
return;
}
clearstatcache();
$this->name=$data;
if (!$datatype) {
$this->name=$name;
$this->data=$data;
$this->length=strlen($this->data);
return;
}
if (!$fp=fopen($this->name,"rb")) {
$this->SetError(1,"Can't open file $this->name.",0,0,"Error");
return;
}
flock($fp,1);
$this->data=fread($fp,filesize($this->name));
flock($fp,3);
fclose($fp);
$this->length=strlen($this->data);
}
function GetWord($word) {
$word="";
$this->wasquot=0;
if ($this->pos>$this->length) return false;
while (1) {
if ($this->pos>$this->length) return false;
if ($this->pos==$this->length) {
$this->pos++;
return true;
}
if ($this->data[$this->pos]=="<") {
if ($this->data[$this->pos+1]=="!")
if ($this->length>6 && $this->length-$this->pos+1>6) {
if (substr($this->data,$this->pos,4)=="<!--") {
$this->incomment=1;
while($this->pos<$this->length-3) {
if (substr($this->data,$this->pos,3)=="-->") {
$word.="-->";
$this->pos+=3;
break;
} else
$word.=$this->data[$this->pos++];
}
if ($this->incomment) break;
}
}
}
if (!$this->processtag) {
if ($this->data[$this->pos]=="<") {
$this->processtag=1;
$this->tagpos=strlen($this->text);
} else {
$this->text.=$this->data[$this->pos++];
continue;
}
}
if (in_array($this->data[$this->pos],$this->dc)) {
if (($this->data[$this->pos]=="<" || $this->data[$this->pos]==">") && $this->quotstate==-1 && $this->processparvalue) {
$this->processparvalue=0;
return true;
}
if (in_array($this->data[$this->pos],$this->sc) && $this->quotstate==-1) {
$this->text.=$this->data[$this->pos++];
if (strlen($word)) {
if ($this->processparvalue) $this->processparvalue=0;
return true;
} else
continue;
}
if (!strlen($word)) {
if (in_array($this->data[$this->pos],$this->qc) && $this->processpar) {
if ($this->quotstate==-1) {
$this->wasquot=1;
$this->quotstate*=-1;
$this->quottype=$this->data[$this->pos];
$this->text.=$this->data[$this->pos++];
continue;
} elseif ($this->quottype==$this->data[$this->pos]) {
$this->quotstate*=-1;
$this->quottype=$this->data[$this->pos];
$this->processpar=$this->processparvalue=0;
$this->text.=$this->data[$this->pos++];
return true;
}
} elseif (in_array($this->data[$this->pos],$this->nc)) {
$word.=$this->data[$this->pos];
$this->text.=$this->data[$this->pos++];
if ($this->processparvalue)
continue;
else
return true;
}
} else {
if (in_array($this->data[$this->pos],$this->qc) && $this->processpar) {
if ($this->quotstate==1) {
if ($this->data[$this->pos]==$this->quottype && $this->processparvalue) {
$this->quotstate*=-1;
$this->quottype=$this->data[$this->pos];
$this->processpar=$this->processparvalue=0;
$this->text.=$this->data[$this->pos++];
// continue;
} else {
if ($this->data[$this->pos]==$this->quottype) {
$this->quotstate*=-1;
$this->quottype="";
}
$word.=$this->data[$this->pos];
$this->text.=$this->data[$this->pos++];
continue;
}
}
return true;
} else {
if (in_array($this->data[$this->pos],$this->nc)) {
if ($this->quotstate==-1) {
if ($this->processparvalue) {
if($this->data[$this->pos]!="/" && $this->data[$this->pos]!="=") return true;
$word.=$this->data[$this->pos];
$this->text.=$this->data[$this->pos++];
continue;
}
} else {
$word.=$this->data[$this->pos];
$this->text.=$this->data[$this->pos++];
continue;
}
return true;
} elseif ($this->quotstate==-1 && $this->processparvalue && strlen($word)) {
if ($this->data[$this->pos]==" ") {
$this->text.=$this->data[$this->pos++];
$this->processparvalue=0;
return true;
}
}
}
}
}
$word.=$this->data[$this->pos];
$this->text.=$this->data[$this->pos++];
}
return true;
}
function Parse() {
$automat=array(
// states 0 1 2 3 4 5 6 7 8
"0"=>array( 1, -1, -1, -1, -1, -1, -1, -1, -1),// <
"1"=>array(-1, 7, 6, 6, 6, 6, -1, -1, -1),// /
"2"=>array(-1, -1, -1, 4, -1, -1, -1, -1, -1),// =
"3"=>array(-1, -1, -2, -2, -2, -2, -2, -1, -3),// >
"4"=>array(-1, 2, 3, 3, 5, 3, -1, 8, -1) // any word
);
if (!strlen($this->data)) return;
$instates=array("<"=>0,"/"=>1,"="=>2,">"=>3);
$parcount=0;
$state=0;
$this->c=&$this->content;
$this->cp=&$this->content["contentpos"];
$this->stacktag[0]["tag"]=&$this->c;
$this->stacktag[0]["level"]=&$this->slevel;
$this->stacktag[0]["levelpos"]=0;
$this->stacktagpos=0;
while(1) {
if (!$isword=$this->GetWord(&$word)) break;
$w=strtolower($word);
if (!isset($instates[$w]))
$instate=4;
else
$instate=$instates[$w];
//print htmlspecialchars($word).",$state,$instate,$this->quottype<br>";
$state=$automat[$instate][$state];
if ($this->wasquot && $state==6) $state=5;
//print htmlspecialchars($word).",$state<br>";
switch($state) {
case -3:// end parse close tag
if (strlen($this->skipto) && $this->tagname!=$this->skipto) {
$parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
$this->pars=array();
break;
} else
$this->skipto="";
$script=($this->tagname=="script") ? 1:0;
$this->AddNewText(substr($this->text,0,$this->tagpos),$script);
$this->AddNewTag(0);
$parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
$this->quottype="";
$this->quotstate=-1;
$this->text="";
$this->pars=array();
$this->tagpos=0;
break;
case -2:// end parse open tag
if (strlen($this->skipto)) {
$parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
$this->pars=array();
break;
}
$this->AddNewText(substr($this->text,0,$this->tagpos));
$this->AddNewTag(1,$xmlclose);
$parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
$this->quottype="";
$this->quotstate=-1;
$this->text="";
$this->pars=array();
$this->tagpos=0;
if (isset($this->pg[$this->tagname]["nohavetags"]) && !strlen($this->skipto)) $this->skipto=$this->tagname;
break;
case -1:// Error found
$parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
$this->pars=array();
if ($this->incomment) {
if (strlen($this->text)) {
$this->AddNewText($this->text);
$this->text="";
$this->tagpos=0;
}
$this->AddNewText($word,0,1);
$this->incomment=0;
break;
}
if ($word=="<") {
$state=1;
$this->processtag=1;
$this->processparvalue=0;
$this->tagpos=strlen($this->text)-1;
$this->quottype="";
$this->quotstate=-1;
}
break;
case 2:// got any word as tagname, waiting '/' or '>' or any word as parameter name
$this->tagname=$w;
$xmlclose=0;
if (!ereg("^[a-zA-Z0-9!_-]+$",$this->tagname) || strlen($this->skipto)) {
$parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
$this->quottype="";
$this->quotstate=-1;
$this->pars=array();
break;
}
break;
case 3:// got any word as parameter name, waiting '/' or '>' or '=' or any word as parameter name
$this->parname=$w;
if (!ereg("^[a-zA-Z0-9!_-]+$",$this->parname) || strlen($this->skipto)) {
$parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0;
$this->quottype="";
$this->quotstate=-1;
$this->pars=array();
break;
}
$this->processpar=1;
if ($w!="/") {
$parcount++;
$this->pars[$this->parname]["single"]=1;
} else
$xmlclose=1;
break;
case 4:// got '=' waiting '/' or '>' or any word as parameter value
$this->processparvalue=1;
break;
case 5:// got any word as parameter value, waiting '/' or '>' or any word as parameter name
if ($this->parname!="/") {
unset($this->pars[$this->parname]["single"]);
$this->pars[$this->parname]["value"]=$word;
$this->pars[$this->parname]["quot"]=$this->quottype;
}
$this->quottype="";
$this->processpar=$this->processparvalue=0;
break;
case 6:// got '/' waiting '>'
$xmlclose=1;
break;
case 8:// got any word as close tag name, waiting '>'
$this->tagname=$w;
break;
}
$this->prevstate["states"]=$state;
$this->prevstate["word"]=$word;
}
if (strlen($this->text)) $this->AddNewText($this->text);
}
function AddNewTag($open,$xmlclose=0) {
$actionclose=0;
if (!$open && $this->pg[$this->tagname]["endtag"]!="absent") $actionclose=1;
if ($open)
for ($i=$this->stacktagpos;$i>0;$i--) {
$ct=&$this->stacktag[$i]["tag"];
$t=&$ct[$ct["contentpos"]];
$tagname=$t["data"]["name"];
if (isset($this->pg[$tagname]["closeon"])) {
if (sizeof($this->pg[$tagname]["closeon"]["in"]) && in_array($this->tagname,$this->pg[$tagname]["closeon"]["in"]) || sizeof($this->pg[$tagname]["closeon"]["notin"]) && !in_array($this->tagname,$this->pg[$tagname]["closeon"]["notin"])) {
$actionclose=2;
break;
}
}
if ($actionclose!=2) $i=-1;
}
if ($actionclose) {
if ($actionclose==1) {
$i=$this->FindTag($this->tagname);
if ($i>-1)
if ($this->tagreg[$this->tagname]!=$this->stacktag[$i]["num"])
$i=-1;
}
if ($i>-1) {
$this->c=&$this->stacktag[$i]["tag"];
$this->cp=&$this->c["contentpos"];
$this->stacktagpos=$i;
if ($actionclose==1) {
$c=&$this->c[$this->c["contentpos"]]["content"];
$cp=&$this->c[$this->c["contentpos"]]["content"]["contentpos"];
$cp++;
$c[$cp]["type"]="tag";
$c[$cp]["data"]["name"]=$this->tagname;
$c[$cp]["data"]["type"]="close";
if (isset($this->tagreg[$this->tagname]))
if ($this->tagreg[$this->tagname])
$this->tagreg[$this->tagname]--;
$this->stacktag[$this->stacktagpos]["num"]=$this->tagreg[$this->tagname];
$this->stacktagpos--;
}
if ($this->stacktagpos<sizeof($this->stacktag))
for ($i=$this->stacktagpos+1;$i<sizeof($this->stacktag);$i++)
unset($stacktag[$i]);
if ($actionclose==1) return;
}
}
$this->cp++;
$this->c[$this->c/p]["type"]="tag";
$this->c[$this->cp]["data"]["name"]=$this->tagname;
$this->c[$this->cp]["data"]["type"]=($open) ? "open" : "close";
if (!$open)
if (isset($this->tagreg[$this->tagname]))
if ($this->tagreg[$this->tagname])
$this->tagreg[$this->tagname]--;
if ($xmlclose) $this->c[$this->cp]["xmlclose"]=1;
if (sizeof($this->pars)) $this->c[$this->cp]["pars"]=$this->pars;
if ($open && !$xmlclose && $this->pg[$this->tagname]["endtag"]!="absent") {
if (!isset($this->tagreg[$this->tagname])) $this->tagreg[$this->tagname]=0;
$this->tagreg[$this->tagname]++;
$this->stacktagpos++;
$this->stacktag[$this->stacktagpos]["tag"]=&$this->c;
$this->stacktag[$this->stacktagpos]["num"]=$this->tagreg[$this->tagname];
$this->c[$this->cp]["content"]=array();
$this->c[$this->cp]["content"]["contentpos"]=-1;
$this->c=&$this->c[$this->cp]["content"];
$this->cp=&$this->c["contentpos"];
}
}
function AddNewText($text,$script=0,$comment=0) {
if (!strlen($text)) return;
$this->cp++;
if (!$comment){
$this->c[$this->cp]["type"]="text";
$this->c[$this->cp]["ot"]=$this->tagname;
}
else
$this->c[$this->cp]["type"]="comment";
if ($script) {
$inputarray=array("/_top/","/top.location.href/","/([ \n]+)?window\.name/","/parent.location/");
$replarray=array("_echoserver_file_space","parent.frames('_echoserver_file_space').src","//window.name","parent.frames('_echoserver_file_space').src");
$text=preg_replace($inputarray,$replarray,$text);
}
$this->c[$this->cp]["data"]=$text;
$this->text="";
}
function FindTag($tagname) {
for($i=$this->stacktagpos;$i>=0;$i--)
if ($this->stacktag[$i]["tag"][$this->stacktag[$i]["tag"]["contentpos"]]["data"]["name"]==$tagname)
return $i;
return -1;
}
function getarr($arr){
$arr['contentpos']=$arr['contentpos']+1;
for ($i=0; $i<$arr['contentpos']; $i++) {
if(is_array($arr[$i]['data'])){
if(((strtolower($arr[$i]['data']['name'])=="td")or(strtolower($arr[$i]['data']['name'])=="div"))and($arr[$i]['data']['type']=="open")){
$this->arr['arr'][]=$arr[$i]['content'];
$str=$this->getmytext($arr[$i]['content']);
$strip=true;
$tags="<img>";
//$tags="<p><b><i><em><strong><ul><li><font><span><pre><br>";
if($strip){
$this->arr['text'][]=strip_tags($str,$tags);
}else{
$this->arr['text'][]=$str;
}
$this->arr['tag'][]=$arr[$i]['data']['name'];
$this->arr['len'][]=strlen($str);
$this->getarr($arr[$i]['content']);
}else{
$this->getarr($arr[$i]['content']);
}
}
}
}
function getmytext($arr){
$text='';
$arr['contentpos']=$arr['contentpos']+1;
for ($i=0; $i<$arr['contentpos']; $i++) {
if(($arr[$i]['type']=="text")or(!is_array($arr[$i]['data']))){
if ((preg_match("/[.!?,]/i",$arr[$i]['data']))and(strlen(trim($arr[$i]['data']))>0)){
if(strlen($arr[$i]['ot'])>0){
$text.="<".$arr[$i]['ot']."> ".trim(strip_tags($arr[$i]['data'],"<img>"))."</".$arr[$i]['ot'].">\r\n";
}else{
$text.=" ".trim(strip_tags($arr[$i]['data'],"<img>"));
}
}elseif (($this->issettt($arr[$i]['ot']))and(strlen(trim($arr[$i]['data']),"<img>")>0)){
$text.=" ".trim(strip_tags($arr[$i]['data'],"<img>"));
}
}else{
if($this->issetm($arr[$i]['data']['name'])){
$text.=$this->getmytext($arr[$i]['content']);
}
}
}
return $text;
}
function issettt($str,$param="none"){
$str=strtolower($str);
if(($str=="b")
or($str=="i")
or($str=="strong")
or($str=="em")
or($str=="a"))
{
return true;}else{
return false;
}
}
function issetm($str,$param="none"){
$str=strtolower($str);
if(($str=="td")
or($str=="tr")
or($str=="table")
or($str=="div")
or($str=="a")
or($str=="script"))
{
return false;}else{
return true;
}
}
function maxstr(){
$max=0;
$ch=0;
for ($i=0; $i<count($this->arr['len']); $i++) {
$tec=$this->arr['len'][$i];
if($max < $tec){
$max=$this->arr['len'][$i];
$ch=$i;
}
}
return $this->arr['text'][$ch];
}
}
function getcontent($text){
$p=new HtmlParser($text,unserialize(file_get_contents("ff1.php")));
$p->Parse();
$arr=&$p->content;
$p->getarr($arr);
$out=$p->maxstr();
unset($p);
return $out;
}
?>
рекомендую посмотреть в сторону *** скрытое содержание ***
офигенный функционал + куча примеров на все случаи жизни