Perl
2010ǯ03·î15Æü»ÃÄêÈÇ †
sub AnoB()
{
my $Hline=$_[0];
$MecabMaxLength=1000;
#ʸ¤òû¤¯¤¹¤ë¡¥
@lines = split(/¡£|¡¢|\n/, $Hline);
foreach $line (@lines){
#ʸ»ú¿ôÀ©¸Â
if(length $line > $MecabMaxLength){
print "ʸ»ú¿ô¤Î·Ù¹ð Skip $line\n";
next;
}
#²ñµÄÏ¿¤ÎÆâÍÆ
open(OUT,">tmp.dat");
print OUT "$line\n";
close(OUT);
#mecab
$mecab = `mecab --node-format="%m:%H\t" tmp.dat`;
@mecabs= split(/\t/,$mecab);
undef $AB;
undef $previous_word;
undef $previous_pos;
for($i=0;$i<@mecabs;$i++){
#½é´ü²½
undef $word;
undef $pos;
#ñ¸ì¡¢Éʻ졡Ãê½Ð
if($mecabs[$i] =~/^(.*?):(.*?),/){
$word = $1;
$pos = $2;
}
#A¤ÎB¡¡¤òÃê½Ð
#1.̾»ì¤Î¾ì¹ç¡¡
#2.¡Ö¤Î¡×¤Î¾ì¹ç
#3.¤½¤ì°Ê³°
if($pos =~ /̾»ì/){
$AB .= $word;
$B .= $word;
if($previous_word eq "¤Î"){$ABcount++;}
}
elsif($word eq "¤Î" && $AB){#¡¡$AB¤Ï¡Ö¤Î¡×¤«¤é»Ï¤Þ¤ë¤Î¤òËɤ°
$A = $AB;
$AB .= $word;
undef $B;
if($previous_pos =~ /̾»ì/){$ABcount++;}
}
else{
if($ABcount == 2){
$A =~ s/\(/¡Ê/g;
$A =~ s/\)/¡Ë/g;
$B =~ s/\(/¡Ê/g;
$B =~ s/\(/¡Ê/g;
$AB =~ s/\)/¡Ë/g;
$AB =~ s/\)/¡Ë/g;
$AnoB{"$AB"}="$A:$B";
$AnoB_Pattern{"$AB"}++;
}
undef $AB;
$ABcount=0;
}
$previous_word = $word;
$previous_pos = $pos;
}
}
#A¤ÎB¥ê¥¹¥È
@AnoB_Pattern = sort{$AnoB_Pattern{$b} <=> $AnoB_Pattern{$a}} keys %AnoB_Pattern;
}