manual 手册文件夹,重点看懂这手册即可 include "simple_html_dom.php" ;
// Create DOM from URL or file
// Create DOM from URL
// Create DOM from string __construct ( [string $filename] ) plaintext clear () load ( string $content ) save ( [string $filename] ) load_file ( string $filename ) set_callback ( string $function_name ) find ( string $selector [, int $index] ) $e->children ( [int $index] ) $e->parent () $e->first_child () $e->last_child () $e->next_sibling () $e->prev_sibling ()<?php
$html = file_get_html('http://www.google.com/');
// Find all images
foreach($html->find('img') as $element)
echo $element->src . '<br>';
// Find all links
foreach($html->find('a') as $element)
echo $element->href . '<br>';
$html = file_get_html('http://slashdot.org/');
// Find all article blocks
foreach($html->find('div.article') as $article) {
$item['title'] = $article->find('div.title', 0)->plaintext;
$item['intro'] = $article->find('div.intro', 0)->plaintext;
$item['details'] = $article->find('div.details', 0)->plaintext;
$articles[] = $item;
}
print_r($articles);
$html = str_get_html('<div id="hello">Hello</div><div id="world">World</div>');
$html->find('div', 1)->class = 'bar';
$html->find('div[id=hello]', 0)->innertext = 'foo';
echo $html; // Output: <div id="hello">foo</div><div id="world" class="bar">World</div>
//$html 所拥有的方法如下表所示
& properties
Name
Description
Constructor, set the filename parameter will automatically load the contents, either text or file/url.
Returns the contents extracted from HTML.
Clean up memory.
Load contents from a string.
Dumps the internal DOM tree back into a string. If the $filename is set, result string will save to file.
Load contents from a from a file or a URL.
Set a callback function.
Find elements by the CSS selector. Returns the Nth element object if index is set, otherwise return an array of object.
$ret = $html->find('a');
// Find (N)th anchor, returns element object or null if not found (zero
based)
$ret = $html->find('a', 0);
// Find lastest anchor, returns element object or null if not found (zero
based)
$ret = $html->find('a', -1);
// Find all <div> with the id attribute
$ret = $html->find('div[id]');
// Find all <div> which attribute id=foo
$ret = $html->find('div[id=foo]');
$ret = $html->find('#foo');
// Find all element which class=foo
$ret = $html->find('.foo');
// Find all element has attribute id
$ret = $html->find('*[id]');
// Find all anchors and images
$ret = $html->find('a,
img');
// Find all anchors and images with the "title" attribute
$ret = $html->find('a[title],
img[title]');
$es = $html->find('ul
li');
// Find Nested <div> tags
$es = $html->find('div
div div');
// Find all <td> in <table> which class=hello
$es = $html->find('table.hello
td');
// Find all td tags with attribite align=center in table tags
$es = $html->find(''table td[align=center]');
Element 的方法
//$e 所拥有的方法如下表所示
Attribute Name
Usage
$e->tag
Read or write the tag name of element.
$e->outertext
Read or write the outer HTML text of element.
$e->innertext
Read or write the inner HTML text of element.
$e->plaintext
Read or write the plain text of element.
$html
= str_get_html("<div>foo
<b>bar</b></div>");
$e = $html->find("div", 0);
echo $e->tag; //
Returns: " div"
echo $e->outertext; //
Returns: " <div>foo <b>bar</b></div>"
echo $e->innertext; //
Returns: " foo <b>bar</b>"
echo $e->plaintext; //
Returns: " foo bar"
6.DOM traversing
方法
Method
Description
Returns the Nth child object if index is set, otherwise return an array of children.
Returns the parent of element.
Returns the first child of element, or null if not found.
Returns the last child of element, or null if not found.
Returns the next sibling of element, or null if not found.
Returns the previous sibling of element, or null if not found.
echo $html->find("#div1",
0)->children(1)->children(1)->children(2)->id;
// or
echo $html->getElementById("div1")->childNodes(1)->childNodes(1)->childNodes(2)->getAttribute('id');
使用方法
function my_callback($element)
{
//
Hide all <b> tags
if ($element->tag=='b')
$element->outertext = '';
}
// Register the callback function with it's function name
$html->set_callback('my_callback');
// Callback function will be invoked while dumping
echo $html;
1.文件夹结构如下:
2.简单范例
3.DOM的方法
$html = file_get_html('http://www.google.com/');
$html->clear() ; //调用方法
DOM methods
void
string
void
void
string
void
void
mixed
4.find 方法详细介绍
find ( string $selector [, int $index] )
// Find all anchors, returns a array of element objects
// Find all element which id=foo
// Find all <li> in <ul>
5.
$e = $html->find("div", 0);
// Example
mixed
element
element
element
element
element
// Example
附带: DOM方法 set_callback('my_callback')
// Write a function with parameter "$element"