Perl/複数ファイルの単語出現頻度を求める
をテンプレートにして作成
[
トップ
] [
新規
|
一覧
|
単語検索
|
最終更新
|
ヘルプ
]
開始行:
[[Perl]]
@txt = `ls *txt`;
foreach $file (@txt){
#mecab でわかち書き
@mecab = `mecab -O wakati $file`;
#DF用に初期化
undef %TmpWords;
#単語の出現頻度を求める。
foreach(@mecab){
chomp;
@tmp = split(/\s/,$_);
foreach(@tmp){
$Words{$_}++;
$TmpWords{$_}++;
}
}
@TmpWords = sort{$TmpWords{$b}<=>$TmpWords{$a}} keys %TmpWords;
foreach(@TmpWords){ $DF{$_}++;}
}
@Words = sort{$Words{$b}<=>$Words{$a}} keys %Words;
@DF = sort{$DF{$b}<=>$DF{$a}} keys %DF;
#出現頻度の書き込み
open(OUT,">result.txt");
foreach(@Words){ print OUT "$_\t$Words{$_}\n";}
close(OUT);
#DFの書き込み
open(OUT,">DFresult.txt");
foreach(@DF){print OUT "$_\t$DF{$_}\n";}
close(OUT);
終了行:
[[Perl]]
@txt = `ls *txt`;
foreach $file (@txt){
#mecab でわかち書き
@mecab = `mecab -O wakati $file`;
#DF用に初期化
undef %TmpWords;
#単語の出現頻度を求める。
foreach(@mecab){
chomp;
@tmp = split(/\s/,$_);
foreach(@tmp){
$Words{$_}++;
$TmpWords{$_}++;
}
}
@TmpWords = sort{$TmpWords{$b}<=>$TmpWords{$a}} keys %TmpWords;
foreach(@TmpWords){ $DF{$_}++;}
}
@Words = sort{$Words{$b}<=>$Words{$a}} keys %Words;
@DF = sort{$DF{$b}<=>$DF{$a}} keys %DF;
#出現頻度の書き込み
open(OUT,">result.txt");
foreach(@Words){ print OUT "$_\t$Words{$_}\n";}
close(OUT);
#DFの書き込み
open(OUT,">DFresult.txt");
foreach(@DF){print OUT "$_\t$DF{$_}\n";}
close(OUT);
ページ名: