Парсер овертюр. Скрипт для поиска популярных кейвордов.
В форму вводите слова, скрипт обрабатывает их. По возможности отсылает отчет на мыло, в противном случае результат сейвится локально в csv формате (excel). Для отправки отчета требуется СМТП сервер без авторизации.
Скриншот|Screenshot
Скриншот|Screenshot
Скриншот|Screenshot
В форму вводите слова, скрипт обрабатывает их. По возможности отсылает отчет на мыло, в противном случае результат сейвится локально в csv формате (excel). Для отправки отчета требуется СМТП сервер без авторизации.
Код:
#!perl -w
use strict;
use LWP::UserAgent;
use HTTP::Request::Common;
use Net::SMTP;
$|=1;
print "Content-Type: text/html; charset=windows-1251\n\n";
print<<HTML;
<html><head>
<title>Overture parser</title>
<meta http-equiv="Content-Type" content="text/html; charset=windows-1251">
<meta name="description" content="overture parser">
<style type="text/css">
body { scrollbar-face-color: #999999; scrollbar-highlight-color: white;
scrollbar-shadow-color: white; scrollbar-3dlight-color: #C0C0C0;
scrollbar-arrow-color: white; scrollbar-track-color: #354560;
scrollbar-darkshadow-color: #999999; background-color: #1E303C; color: #959FA5; }
.text { border: 1px solid #0B1D28; background-color: #354550; color: #FFFFFF;
font-family: Arial; font-size: 11px; padding: 2px; }
#textarea { border: 1px solid #0B1D28; background-color: #354550; color: white;
font-family: Arial; font-size: 11px; padding: 2px; height: 100px; width: 165px; }
#button { border: 1px solid #0B1D28; background-color: #354560; color: #FFFFFF;
font-family: Tahoma; font-size: 12px; padding: 3px; }
a:link { color: #959FA5; }
a:visited { color: #959FA5; }
a:hover,a:active { color: #FFFFFF; }
</style></head><body>
<table width="350" align="center">
<tr><td align="center"><strong>
# $ENV{SCRIPT_NAME}
# ©oded by .:[KSURi]:.
# <a href="http://cup.su/">http://cup.su/</a>
</strong></td></tr>
<form action="$ENV{SCRIPT_NAME}" method="post">
<tr><td align="center">
Keywords:
<textarea name="keywords" id="textarea"></textarea></td></tr>
<tr><td align="center">
Report to: <input type="text" name="report_to" size="15" class="text">
</tr></td>
<tr><td align="center">
SMTP: <input type="text" name="smtp" size="20" class="text">
</tr></td>
<tr><td align="center">
<input type="submit" value="Check keywords" id="button">
</td></tr></form></table>
<center>
HTML
my($_POST,$words,$reportMail,$reportSmtp,@keywords );
read(STDIN,$_POST,$ENV{CONTENT_LENGTH},0)||exit(1) ;
my @pairs=split('&',$_POST);
foreach (@pairs)
{
my($k,$v)=split('=',$_);
if($k eq "keywords")
{
$words=$v;
$words=~s/%([0-9A-H]{2})/pack('C',hex($1))/ge;
foreach(split("\n",$words)) { push(@keywords,$_) }
}
elsif($k eq "report_to")
{
$reportMail=$v;
$reportMail=~s/%([0-9A-H]{2})/pack('C',hex($1))/ge;
}
elsif($k eq "smtp") { $reportSmtp=$v }
}
exit(1) if($#keywords<1||!defined($reportMail)||!defined($reportSmtp));
chomp(@keywords);
undef $_POST;
undef @pairs;
undef $words;
my $overtureEngine="http://inventory.overture.com/d/searchinventory/suggestion/";
my $googleEngine="http://www.google.com/search?hl=en&btnG=Search&q=";
my @userAgents=("Mozilla/5.0 (Windows NT 5.1; U; ru) Opera 9.01",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;)",
"Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US; rv:1.8) Gecko/20051107 Camino/1.0b1",
"Mozilla/4.8 [en] (Windows NT 5.0; U)",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1) Gecko/20061010 Firefox/2.0");
my $reportName="report_".time.".csv";
my %reportContent=(keyword=>"",
google=>"",
queries=>{query=>[],
count=>[]});
foreach(@keywords)
{
checkKw($_);
prepeareCsvReport();
}
if(sendReport() eq "sent") { print "<font color=\"green\">Report sent (".localtime().")</font>
" }
else { print "<font color=\"red\">Report sending failed! Saved in <a href=\"".$reportName."\" target=\"_new\">".$reportName."</a></font>
" }
sub checkKw
{
my $keyword=shift;
$reportContent{keyword}=$keyword;
my $ua=LWP::UserAgent->new(agent=>$userAgents[rand($#userAgents)],
timeout=>60);
my $response=$ua->request(POST $overtureEngine,
Content_Type=>"application/x-www-form-urlencoded",
Content=>[mkt=>"us",
lang=>"en_US",
term=>$keyword]);
print "<font color=\"red\">Overture is unreachable!</font>"&&return if !$response->is_success;
foreach(split("\n",$response->content))
{
if(/<td><font face=\"verdana,sans-serif\"\s*size=1> (.*)<\/td>/) { push(@{$reportContent{queries}{count}},$1) }
elsif(/<td> <a href=\".*\"><font face=\"verdana,sans-serif\"\s*size=1\s*color=\#000000>(.*)<\/a><\/td>/) { push(@{$reportContent{queries}{query}},$1) }
}
my $googleResult;
$response=$ua->request(GET $googleEngine.$keyword,
Referer=>"http://www.google.com/");
print "<font color=\"red\">Google is unreachable!</font>"&&return if !$response->is_success;
foreach(split("\n",$response->content))
{
if(/of about\s*<b>(.*)<\/b>\s*for\s*<b>\s*$keyword\s*<\/b>/)
{
$reportContent{google}=$1;
$reportContent{google}=~s/,//g;
}
}
}
sub prepeareCsvReport
{
open(CSV,">>$reportName");
print CSV $reportContent{keyword};
print CSV ";";
$reportContent{google}="n/a" if(!$reportContent{google});
print CSV $reportContent{google};
print CSV ";";
for(0..9)
{
$reportContent{queries}{query}[$_]="n/a" if(!$reportContent{queries}{query}[$_]);
$reportContent{queries}{count}[$_]="n/a" if(!$reportContent{queries}{count}[$_]);
print CSV ($reportContent{queries}{query}[$_]."-".$reportContent{queries}{count}[$_]);
print CSV ";" if($_!=9);
}
print CSV "\n";
close CSV;
$reportContent{keyword}="";
$reportContent{google}="";
@{$reportContent{queries}{query}}=();
@{$reportContent{queries}{count}}=();
}
sub sendReport
{
my $smtp=Net::SMTP->new($reportSmtp,
Timeout=>7,
Debug=>1)||return "failed";
$smtp->mail("overture\@parser.cgi");
$smtp->recipient($reportMail);
$smtp->data;
$smtp->datasend("To: ".$reportMail."\n");
$smtp->datasend("From: overture\@parser.cgi\n");
$smtp->datasend("Subject: Report from overture_parser.cgi\n");
$smtp->datasend("MIME-Version: 1.0");
$smtp->datasend("Content-Type: multipart/mixed; boundary=\"splitter\"");
$smtp->datasend("--splitter");
$smtp->datasend("Content-Type: text/html; name=\"".$reportName."\"");
$smtp->datasend("Content-Transfer-Encoding: 7bit");
$smtp->datasend("Content-Disposition: attachment; filename=\"".$reportName."\"");
$smtp->datasend("\n");
open(REPORT,$reportName)||return "failed";
my @attachment=<REPORT>;
$smtp->datasend(@attachment);
close REPORT;
$smtp->datasend("--splitter--");
$smtp->datasend("\n");
$smtp->dataend;
$smtp->quit;
return "sent";
}
END { print "</center></body></html>" }
Скриншот|Screenshot
Скриншот|Screenshot
Скриншот|Screenshot