<?php
include_once('simple_html_dom.php');
$html = file_get_html('http://google.com');
foreach($html->find('body') as $e) //put <body> tag to $e
$body = $e->innertext . '<br>'; //get <body>
echo '<p><b><u>This is the body: </u></b>'.$body.'</p>{end body}<br>';
$array = $e->children(); //put <body> children tags to an array
foreach ($array as $key=>$val)
echo "<br><b>Body's child #$key:</b> <p>$val</p>";
//put all common <body> attributes to an array
$body_attr_arr=
array('class','dir','id','lang','style','title','xml:lang','alink','background','bgcolor','link','text','vlink');
foreach ($body_attr_arr as $attr)
if(isset($e->$attr)) //check if <body> has an attribute from the array and extract its (attrib.) value
$return[] = $e->$attr; //the results to an array
if (isset($return[0])&&$return[0]!='')
{
foreach ($return as $key=>$val)
echo "<br>Body's $key attribute: <u><i>$val</i></u><br>";
}
//body's parts, which are not in tags:
$attrs='\b(?: \s*\b\w+\s*=\s*(?:"[^"]*"|\'[^\']*\'|[^<>\s"\']+) )*+ \s*+ /?+';
$singletag='(?:BASEFONT|BR|AREA|LINK|IMG|PARAM|HR|INPUT|COL|FRAME|ISINDEX|BASE|META)';
$blocktag='(?:P|DL|DIV|CENTER|NOSCRIPT|NOFRAMES|BLOCKQUOTE|FORM|ISINDEX|HR|TABLE|FIELDSET|ADDRESS|UL|OL|DIR|MENU|PRE|H[1-6])';
$re="{
((?:[^<]++|.)*?)
(?:
<$singletag$attrs>
|
<p$attrs>.*?(?: </p\s*> | (?= <$blocktag$attrs>) | $)
|
<(\w+)$attr>
(?:
[^<]++|
(
<\g{2}$attr>
(?:
[^<]++|
(?-1)|
. )*?
(?:</\g{2}\s*>|$)
)|
. )*?
(?:</\g{2}\s*>|$)
|
$
)
}six";
$text = "<body>$body</body>";
$text=preg_replace("{<body$attrs>(.*)</body>}six", '$1', $text);
preg_match_all($re, $text, $m);
echo "<p><b>Body's parts, which are not in tags:</b></p>";
print_r($m[1]);
echo "<br>********************************************************";
//****************Paragraphs********************
foreach($html->find('p') as $p) {
$prg = $p->outertext . '<br>';
echo '<br><br><p><b><u>This is the paragraph: </u></b>'.$prg .'</p>{end paragraph}<br>';
$array_p = $p->children();
foreach ($array_p as $key=>$val)
echo "<br><b>Paragraph's child #$key:</b> <p>$val</p>";
$prg_attr_arr=
array('align','class','dir','id','lang','style','title','xml:lang');
foreach ($prg_attr_arr as $attr_p)
if(isset($p->$attr_p))
$return_p[] = $p->$attr_p;
foreach ($return_p as $pkey=>$pval)
echo "<br>Paragraph's $pkey attribute: <u><i>$pval</i></u><br>";
}
echo "********************************************************";
//****************Links********************
foreach($html->find('a') as $a) {
$lnk = $a->outertext . '<br>';
echo '<br><br><p><b><u>This is the links: </u></b>'.$lnk.'</p>{end link}<br>';
$array_a = $a->children();
foreach ($array_a as $key=>$val)
echo "<br><b>Link's child #$key:</b> <p>$val</p>";
$lnk_attr_arr=
array('accesskey','class','dir','id','lang','style','title','xml:lang','tabindex','charset','coords','href','hreflang','name','rel','rev','shape','target');
foreach ($lnk_attr_arr as $attr_a)
if(isset($a->$attr_a))
$return_a[] = $a->$attr_a;
foreach ($return_a as $key_a=>$val_a)
echo "<br>Link's $key_a attribute: <u><i>$val_a</i></u><br>";
}
echo "********************************************************";
//****************Images********************
foreach($html->find('img') as $g) {
$img = $g->outertext . '<br>';
echo '<br><br><p><b><u>This is the images: </u></b>'.$img.'</p>{end link}<br>';
$img_attr_arr=
array('alt','src','align','border','height','hspace','ismap','longdesc','usemap','vspace','width','class','dir','id','lang','style','title','xml:lang');
foreach ($img_attr_arr as $attr_img)
if(isset($g->$attr_img))
$return_img[] = $g->$attr_img;
foreach ($return_img as $key_img=>$val_img)
echo "<br>Image's $key_img attribute: <u><i>$val_img</i></u><br>";
}
?>