| fünkmaster |
01-29-2005 08:08 PM |
... use this to harvest domains that have reflogs or blogs that show reflogs.
$query_string = 'XXX'; -> replace XXX with your keyword p.e. "recent referrers" works great.
Code:
<?php
require("google.class.php");
require("db.php");
$query_string = 'XXX';
$max = 1000;
function get_domain($domain) {
$tld = Array("co", "com", "net", "org", "id", "ac", "edu", "gov", "mil");
eregi('^([a-zA-Z0-9\.-]+)', $domain, $regs);
$domain = $regs[1];
$parts = explode('.', $domain);
$result = "";
for ($i=count($parts)-1; $i>=0; $i--) {
$result = $parts[$i].($result ? '.' : '').$result;
if ($i != count($parts)-1
&& strlen($parts[$i]) > 2
&& !in_array($parts[$i], $tld)
) break;
}
return $result;
}
function valid($domain) {
$bad = Array("gov", "mil", "edu");
if (!ereg('^([a-zA-Z0-9\.-]+)$', $domain)) return 0;
eregi('^([a-zA-Z0-9\.-]+)', $domain, $regs);
$domain = $regs[1];
$parts = explode('.', $domain);
foreach ($parts as $part) {
if (in_array($part, $bad)) return 0;
}
return 1;
}
$google = new google_search;
$query = @mysql_query("SELECT * FROM TLD");
while ($row = mysql_fetch_array($query)) {
for ($start=0; $start<$max; $start+=100) {
$google->query($query_string.' site:'.$row['TLD'].' ', $start);
if (count($google->results) hahahaha 0) break;
$old_url = $first_url; $first_url = "";
foreach ($google->results as $result) {
eregi(' for ([^ ]+)', $result['title'], $regs);
eregi('^http://([^/]+)', $result['url'], $regs2);
if (!ereg('\.', $regs[1])) $url = $regs2[1];
else $url = $regs[1];
$url = eregi_replace('http://','',$url);
if (!$first_url) {
$first_url = $url;
if ($old_url && $old_url hahahaha $first_url) break(2);
}
if (!valid($url)) continue;
mysql_query("INSERT INTO Sites VALUES (
'".get_domain($url)."',
'".$url."',
'".$result['url']."',
0, 0);");
}
}
}
?>
here is google.class.php:
Code:
<?php
class google_search {
var $url = 'http://www.google.com/search?num=100&hl=en&as_qdr=m3&ie=UTF-8&oe=UTF-8&q=';
var $results;
function query($query, $start = 0) {
$fd = fopen($this->url.urlencode($query)."&start=".$start."&sa=N", "r");
$this->results = Array();
while ($line = fgets($fd)) {
$count = preg_match_all("/<p class=g><a href=(.+?)>(.+?)<\/a>/", $line, $matches, PREG_SET_ORDER);
for ($i=0; $i<$count; $i++) {
$this->search_callback($matches[$i]);
}
}
fclose($fd);
}
function search_callback($match) {
$this->results[] = Array(
'url' => $match[1],
'title' => $match[2]);
}
}
?>
... now put all the domains you want to show up in reflogs in a file (domains) in the and run import.sh (./import.sh domains).
Code:
#!/usr/local/bin/bash
echo DELETE FROM Referrers\; | mysql -u root hitter
for i in `cat $1` ; do echo INSERT INTO Referrers VALUES \(\'$i\'\)\; ; done | mysql -u root hitter
... that's it and NO, I will not give any installation support, if you can't get it working you don't deserve it to be working.
enjoy
|