CRF++

  • »ÃÄêÈÇ 11/09
    	$file = "";#¥¿¥°Éեǡ¼¥¿
    	$fileOut = "";#CRF++·Á¼°¤Î¥Õ¥¡¥¤¥ë¤ØÊÑ´¹
    	
    	open(IN,$file);
    	open(OUT,">$fileOut");
    	while(<IN>){
    	chomp;
    	#¥¿¥°¤òºï½ü¤¹¤ë
    	$after = $before  =$_;
    	print "[Á°]$before\n";
    	$after =~ s/<.*?>//g;
    	print "[¸å]$after\n";
    	#¥¿¥°¤¬ÉÕ¤¤¤Æ¤¤¤ëñ¸ì¤ò³ÊǼ¤¹¤ë¡£
    	@tmp = split(/<\//);
    	undef %NE;
    	foreach(@tmp){
    		if(/^(.*)<(.*?)>(.*?)$/){
    			$pre  =$1;
    			$name  =$2;
    			$value =$3;
    			$NE{$value} .= "$name";
    		}
    	}
    	#¥¿¥°¤òºï½ü¤·¤¿¥Ç¡¼¥¿¤ò·ÁÂÖÁDzòÀϤ¹¤ë¡£
    	open(MECAB,">mecab.input");
    	print MECAB "$after";
    	close(MECAB);
    	#Mecab¼Â¹Ô
    	@M = `/usr/local/bin/mecab -Ocrf < mecab.input`;
    	chomp @M;
    	#IOB¥¿¥°
    	#B: ¥Á¥ã¥ó¥¯¤ÎÀèƬ
    	#E: ¥Á¥ã¥ó¥¯¤ÎËöÈø
    	#I : ¥Á¥ã¥ó¥¯¤ÎÆâÉô
    	#S: °ì¤Ä¤Î¸ì¤Ç¥Á¥ã¥ó¥¯¤ò¹½À®¤¹¤ë
    	#O: ¥Á¥ã¥ó¥¯¤Î³°Éô
    	undef $comb;
    	undef @Mp;#IOB¥¿¥°³ÊǼ
    	for($i=0;$i<@M;$i++){
    		@Ms = split(/\s/,$M[$i]);
    		#print "$i $Ms[0] --> $M[$i]\n";
    		#¥Þ¥Ã¥Á¥ó¥°¤Î¤¿¤á¤Ë·ë¹ç
    		$comb .= "$Ms[0] ";
    		@combs = split(/ /,$comb);
    		#¸å¤í¤«¤é·ë¹ç¤·¤Æ¥Þ¥Ã¥Á¤·¤¿Ã±¸ì¤ËIOB¥¿¥°¤òÉÕÍ¿¡Ê»ÃÄêÈÇ¡Ë
    		undef $comb2;
    		for($j=@combs-1;$j>=0;$j--){
    			#$i ¤Þ¤Ç¤Î¾õÂÖ¤ò¸å¤í¤«¤é·ë¹ç
    			$comb2 = $combs[$j] . $comb2;
    			#¥¿¥°ÉÕ¤±¤µ¤ì¤¿Ã±¸ì¤È°ìÃפ·¤¿¾ì¹ç¡¢IOB¥¿¥°¤òÉÕÍ¿¤¹¤ë¡£
    			if($NE{$comb2}){
    				$n = @combs - $j -1; 
    				#B¥¿¥°¤À¤±¤ÏºÇ½é¤Ë¤Ä¤±¤ë	
    				$Mp[$i-$n] = "B-$NE{$comb2}";
    				#I¥¿¥°¤ÏB¥¿¥°¤Î¸å¤Ë¤Ä¤±¤ë
    				for($k=$i-$n+1;$k<=$i;$k++){
    					$Mp[$k] = "I-$NE{$comb2}";
    				}
    				last;
    			}
    		}
    	}
    	#print OUT "\n$before\n";
    	for($i=0;$i<@M;$i++){
    		if($Mp[$i]){
    			print OUT "$M[$i] $Mp[$i]\n";
    		}
    		else{
    			#¥¿¥°¤¬ÉÕ¤±¤é¤ì¤Æ¤¤¤Ê¤¤¾ì¹ç¡¢O¥¿¥°¤ò¤Ä¤±¤ë
    			print OUT "$M[$i] O\n";
    		}
    	}
    	}
    	close(OUT);
    	close(IN);

ÌÚ¼¥¼¥ßÀ¸¸ÂÄê

ÊÔ½¸²èÌÌ
¥¼¥ßÀ¸
2021-2022ǯÅÙÀ¸(14´ü)
°¤ÉôÍÚÂçÆâñ¥
²¬ÅÄ°¼²»¶½À±ÍÛ
³á±ï²ÏÌîͳÌï
º´¡¹ÌÚô¥º´¡¹ÌÚÈþÇÈ
ßÀÅÄϵ®×¢µÈϵ®
Æ£°æ°ì»ÖÆ£ÅĽ¡¿¿
2020-2021ǯÅÙÀ¸(13´ü)
¾®ß·¿¿ô¥³Þ¸¶Í­¿¿
²ÃÆ£ÀµÃè³÷ÅÄÌöÅÍ
ºä¼Íã½»µÈ¿¿Æà
¹âÌîÂç²ÏÃæ°æÍÕ·î
±ÊÞ¼·ÊÍ´Ê¿´ÛºÚ¡¹»Ò
2019-2020ǯÅÙÀ¸(12´ü)
Âç°²¶³Ê¿--
¶áÆ£ÂÀͺÀ¶¿åÈþΤ
Ãæ¼²ÄÎçÊ¡»³³èµ¯
Ê¡²ÈÍ´µªÁ¥±ÛÅ·ºÌ
Æ°áΤ»³²¼²À·î
ºäËÜÎÃÂÀÅÚ²°ºÌ²Æ
2018-2019ǯÅÙÀ¸(ÉÔºß)
SEA-NAÂåɽ¼èÄùÌò
Ê¿²ìľµ±²£»³è½²Ö
½»µÈ¼Âµ§¼¼¶¶ÏºÈ
2017-2018ǯÅÙÀ¸(11´ü)
ÀйõÛÙÆà°ìµÜÂó³¤
µµ°æ³¤½®º´Æ£ÛÙ
º´Æ£Í­´õÉ°¿¹Âó¿¿
Æ£Ëܼë²Æ¥Û¥ï¥¤¥È¥¸¥Ë¡¼
ÁýÅÄÍ¥ºîëÆâ·òÂÀ
2016-2017ǯÅÙÀ¸(10´ü)
°ËÆ£¤ß¤­²¬Åç·ò¸ç
¾®À¾ÀãÍÕÍ´ÀîÂÙµ±
ÎëÌÚͤºÚÂçÌçÂó»Ë
ÅÄƬ¤ï¤«¤Ð¸ÍÅèºéÊæ
Ãæ¼ÃÒµ®À¾ÌîůÀ¸
²£»³Í´²ÌÀî´ßÍ´²Ì
2015-2016ǯÅÙÀ¸(09´ü)
Àõ²ì¼·³¤¾®ÎÓ¿¿ºÚ
À¾Â¼°Ë±ûËÙ¹¾ÃÎ̤
¿ËÀ¸°Ô´õ¼¾å¹ÀÂÀ
2014-2015ǯÅÙÀ¸(08´ü)
ÂçÀÐÀ¿ÂçÌÂÀϯ
²Ãƣ͵¼ùº´¡¹ÌÚº¸¶á
¹â¶¶Íýº»ÉðÅÄè½Êæ
»ûÅçÉñ»ÒȪ²ìÂç
»³ÅĽ¤À¤
2013-2014ǯÅÙÀ¸(07´ü)
²ÃÆ£»Ë¿¥¹©Æ£ÃÒ»Ò
º´¡¹ÌÚÍÕ»Ò»Ö³ù¼þ
¹â¶¶¸¼Î¶üâ¾¾æÆ
ÃæÈøÀéºéÃæÀîÎèºÚ
Ãæé®Âçµ®
2012-2013ǯÅÙÀ¸(06´ü)
±óÆ£À±ÃÏÂçÌî¼Óµ¨
³ùÅĤᤰ¤ßÌÚ²¼ÏÂÂç
ã·ÌÚÎò𺴡¹ÌÚÍÚ
º´Æ£Í¥»Ò¾Â߷ʸ¹á
¸Å²°¿¿ÍýµÈÅÄÃÒ¹°
2010-2011ǯÅÙÀ¸(05´ü)
°±¸¶»ËÉÒ°ËÆ£Â絯
°ËÆ£¤ß¤É¤ê±Êºäʸǵ
Æ£ÅĹҺÈÁ°Â¿ÂçÊå
¾¾ËÜÎÍͤµÜÄÅÍ­º»
»³ÅÄ°¡µ¨
2009-2010ǯÅÙÀ¸(04´ü)
´ßËÜδ»Ö·¦ÃÏͳ·Ã
»Ö²ìÀéÄáÄÅÅÄÍ­»Ò
»°±º¹©Ìï
2008-2009ǯÅÙÀ¸(03´ü)
°ÀÄŹ¯Í¤°æ¾å¤µ¤æ¤ê
Ë̺êͤ¼ù¹©Æ£Ï´²
¸ÅËóÍ¥²Öº´Àî¾´¹¨
º´Æ£Ä÷ÍÎÎëÌÚ°¡°á
Ãݸ¶´õÈþÆ£°æÍ¥ºî
ËÙ¸ø°ìËÙÆâ¾®¿¥
ÊÆß·¹¨»Ë
2007-2008ǯÅÙÀ¸(02´ü)
º´Æ£·òÂÀ¾å¼²Â¹°
±üÅÄ·¼µ®¾®ÌîÀ¿
Çò°æ¤«¤º¤ß¹â°æÍDzð
¿¹Ã«Î¼²ðÏ»ÅÏÍ­Íü·Ã
¼ãËÜůʿ
2006-2007ǯÅÙÀ¸(01´ü)
¿û°æ°´ÅÏÉô¸¬ÂÀϺ
Áêºä¿¿Â缲¿µ
±üÅí»Ò³Þ°æÌÔ
¾®ÎÓϹ¬óîÆ£¤¤¤Ä¤³
óîƣͺµªº´¡¹ÌÚËã̤
º´Æ£Æü²ÃÍùëËܵ®Ç·
ÆÁ¹¾Í¤²ðĹÎææûÊ¿
À¾Ëܤߤ椭ÎÓ³¨Î¤»Ò
ß·ÅÄÂçµ±