View Single Post
Old 03-16-2007, 05:03 PM  
teksonline
So Fucking Banned
 
Join Date: Jan 2005
Location: At My Desk
Posts: 2,904
<?
include('config.php');

$url = $row['URL']; // gallery url
$ch = curl_init();
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5); // 5 seconds
curl_setopt($ch, CURLOPT_USERAGENT, "teksonline spider");
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_MAXREDIRS, 2);
$output = curl_exec($ch);

$response_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);

// fixes a whitespace problem between tags on some gay html
$output = preg_replace ('/ {2,}/is', ' ', $output);

$array = parseHTML($output);
echo "{$row['ID']} $response_code: $url\n";

//echo $html[A][0][HREF]; example of html data is in array
echo"\n";

foreach($array['A'] as $href) {

/* hey im one of the href in the web page, do something with me as I am probably a picture link or a movie link
*/
}



function parseHtml($s_str) {
$i_indicatorL = 0;
$i_indicatorR = 0;
$s_tagOption = "";
$i_arrayCounter = 0;
$a_html = array();

// Search for a tag in string
while(is_int(($i_indicatorL=strpos($s_str,"<",$i_i ndicatorR))) ) {
// Get everything into tag...
$i_indicatorL++;
$i_indicatorR = strpos($s_str,">", $i_indicatorL);
$s_temp = substr($s_str, $i_indicatorL, ($i_indicatorR-$i_indicatorL) );
$a_tag = explode( ' ', $s_temp );
// Here we get the tag's name
list( ,$s_tagName,, ) = each($a_tag);
$s_tagName = strtoupper($s_tagName);
// Well, I am not interesting in <br>, </font> or anything else like that...
// So, this is false for tags without options.
$b_boolOptions = is_array(($s_tagOption=each($a_tag))) && $s_tagOption[1];
if( $b_boolOptions ) {
// Without this, we will mess up the array
$i_arrayCounter = (int)count($a_html[$s_tagName]);

// get the tag options, like src="htt://". Here, s_tagTokOption is 'src' and s_tagTokValue is '"http://"'
do {
$s_tagTokOption = strtoupper(strtok($s_tagOption[1], "="));
$s_tagTokValue = trim(strtok("="));
$a_html[$s_tagName][$i_arrayCounter][$s_tagTokOption] = $s_tagTokValue;
$b_boolOptions = is_array(($s_tagOption=each($a_tag))) && $s_tagOption[1];
} while( $b_boolOptions );
}
}
return $a_html;
}

?>

there you are half way home
teksonline is offline   Share thread on Digg Share thread on Twitter Share thread on Reddit Share thread on Facebook Reply With Quote