1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
| <?php
echo "<meta charset=\"UTF-8\">";
// Conversion d'une chaine
$ch = curl_init();
$user_agent="Mozilla/5.0 (Windows NT 6.1; rv:8.0) Gecko/20100101 Firefox/8.0";
curl_setopt ($ch, CURLOPT_URL, 'http://www.taobao.com');
curl_setopt ($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt ($ch, CURLOPT_HEADER, 0);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch,CURLOPT_CONNECTTIMEOUT,10);
curl_setopt ($ch,CURLOPT_TIMEOUT,50);
curl_setopt ($ch,CURLOPT_MAXREDIRS,2);
$content = curl_exec($ch);
$content_type = curl_getinfo($ch,CURLINFO_CONTENT_TYPE);
$encoding = mb_detect_encoding($content);
$checkencoding = mb_check_encoding($content, 'UTF-8');
$dom = new DOMDocument;
if($encoding == 'UTF-8') {
$dom->loadHTML($content);
} else {
$dom->loadHTML(mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8'));
}
$tab = array();
$htmls = $dom->getElementsByTagName('p');
foreach ($htmls as $html)
{
$htmlvalue = $html->nodeValue;
$htmlvalue = strip_tags($htmlvalue);
array_push($tab, $htmlvalue);
}
$htmls = $dom->getElementsByTagName('span');
foreach ($htmls as $html)
{
$htmlvalue = $html->nodeValue;
$htmlvalue = strip_tags($htmlvalue);
array_push($tab, $htmlvalue);
}
foreach($tab as $line) {
$encoding = mb_detect_encoding($line);
$string = mb_convert_encoding($line, 'UTF-8', $encoding);
echo $line;
}
?> |
Partager