PHP/HTML/tidy parse file
Содержание
Beautifying HTML Using Tidy
<?php
$options = array("indent" => true,
"indent-spaces" => 4,
"wrap" => 4096);
$tidy = tidy_parse_file("http://www.php.net/", $options);
tidy_clean_repair($tidy);
echo $tidy;
?>
Extracting URLs Using Tidy
<?php
function dump_urls(tidy_node $node, &$urls = NULL) {
$urls = (is_array($urls)) ? $urls : array();
if(isset($node->id)) {
if($node->id == TIDY_TAG_A) {
$urls[] = $node->attribute["href"];
}
}
if($node->hasChildren()) {
foreach($node->child as $child) {
dump_urls($child, $urls);
}
}
return $urls;
}
$tidy = tidy_parse_file("http://www.php.net/");
$urls = dump_urls($tidy->body());
print_r($urls);
?>
Reducing Bandwidth Usage Using Tidy
<?php
$options = array("clean" => true,
"drop-proprietary-attributes" => true,
"drop-font-tags" => true,
"drop-empty-paras" => true,
"hide-comments" => true,
"join-classes" => true,
"join-styles" => true);
$tidy = tidy_parse_file("http://www.php.net/", $options);
tidy_clean_repair($tidy);
echo $tidy;
?>
Retrieving an Entrance Node in Tidy
<?php
$tidy = tidy_parse_file("http://www.php.net/");
$root = $tidy->root();
?>