Perl/ウェブサーバのアクセスログからクエリの解析
をテンプレートにして作成
[
トップ
] [
新規
|
一覧
|
単語検索
|
最終更新
|
ヘルプ
]
開始行:
[[Perl]]
@ls = `ls access_log.200*`;#ファイル名を配列に格納
chomp @ls;
foreach $file (@ls){
print "$file\n";
open(IN,$file);
while(<IN>){
chomp;
@tmp = split(/ /,$_);
#for($i=0;$i<@tmp;$i++){print "$i $tmp[$i]\n";}
if(/\s"(http.*?)"\s/){#半角スペース ダブルコーテーション http で始まり、ダブルコーテーション 半角スペースで囲まれた場所を抽出
$url = $1;
$U{$url}++;
if($url =~ /google/ && $url =~ /q=(.*?)(\&|$)/){#Googleのクエリ箇所を抽出
#print "Google $url\n$1\n\n";
$query = $1;
if($url =~ /%/){
$G_UTF8{$query}++;
}
else{
$G{$query}++;
}
}
elsif($url =~ /yahoo/ && $url =~ /p=(.*?)(\&|$)/){#Yahooのクエリ箇所を抽出
#print "Yahoo! $url\n$1\n\n";
$query = $1;
if($url =~ /ei=UTF-8/){
$Y_UTF8{$query}++;
}
else{
$Y{$query}++;
}
}
}
}
close(IN);
}
@U = sort {$U{$b} <=> $U{$a}} keys %U;
@G = sort {$G{$b} <=> $G{$a}} keys %G;
@G_UTF8 = sort {$G_UTF8{$b} <=> $G_UTF8{$a}} keys %G_UTF8;
@Y_UTF8 = sort {$Y_UTF8{$b} <=> $Y_UTF8{$a}} keys %Y_UTF8;
open(OUT,">result.dat");
foreach(@G){
$str = $_;
$str =~ tr/+/ /;
$str =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
print OUT "G $str $G{$_} \n";
}
foreach(@G_UTF8){
$str = $_;
$str =~ tr/+/ /;
$str =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
print OUT "G_UTF8 $str $G_UTF8{$_} \n";
}
foreach(@Y){
$str = $_;
$str =~ tr/+/ /;
$str =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
print OUT "Y $str $Y{$_} \n";
}
foreach(@Y_UTF8){
$str = $_;
$str =~ tr/+/ /;
$str =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
print OUT "Y_UTF8 $str $Y_UTF8{$_} \n";
}
close(OUT);
system("nkf -w result.dat > result_utf8.dat");
終了行:
[[Perl]]
@ls = `ls access_log.200*`;#ファイル名を配列に格納
chomp @ls;
foreach $file (@ls){
print "$file\n";
open(IN,$file);
while(<IN>){
chomp;
@tmp = split(/ /,$_);
#for($i=0;$i<@tmp;$i++){print "$i $tmp[$i]\n";}
if(/\s"(http.*?)"\s/){#半角スペース ダブルコーテーション http で始まり、ダブルコーテーション 半角スペースで囲まれた場所を抽出
$url = $1;
$U{$url}++;
if($url =~ /google/ && $url =~ /q=(.*?)(\&|$)/){#Googleのクエリ箇所を抽出
#print "Google $url\n$1\n\n";
$query = $1;
if($url =~ /%/){
$G_UTF8{$query}++;
}
else{
$G{$query}++;
}
}
elsif($url =~ /yahoo/ && $url =~ /p=(.*?)(\&|$)/){#Yahooのクエリ箇所を抽出
#print "Yahoo! $url\n$1\n\n";
$query = $1;
if($url =~ /ei=UTF-8/){
$Y_UTF8{$query}++;
}
else{
$Y{$query}++;
}
}
}
}
close(IN);
}
@U = sort {$U{$b} <=> $U{$a}} keys %U;
@G = sort {$G{$b} <=> $G{$a}} keys %G;
@G_UTF8 = sort {$G_UTF8{$b} <=> $G_UTF8{$a}} keys %G_UTF8;
@Y_UTF8 = sort {$Y_UTF8{$b} <=> $Y_UTF8{$a}} keys %Y_UTF8;
open(OUT,">result.dat");
foreach(@G){
$str = $_;
$str =~ tr/+/ /;
$str =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
print OUT "G $str $G{$_} \n";
}
foreach(@G_UTF8){
$str = $_;
$str =~ tr/+/ /;
$str =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
print OUT "G_UTF8 $str $G_UTF8{$_} \n";
}
foreach(@Y){
$str = $_;
$str =~ tr/+/ /;
$str =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
print OUT "Y $str $Y{$_} \n";
}
foreach(@Y_UTF8){
$str = $_;
$str =~ tr/+/ /;
$str =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
print OUT "Y_UTF8 $str $Y_UTF8{$_} \n";
}
close(OUT);
system("nkf -w result.dat > result_utf8.dat");
ページ名: