- 投稿者: みゅ
- カテゴリ: mecab
- 優先度: 普通
- 状態: 完了
- 日時: 2011年11月13日 10時24分35秒
- mecabのユーティリティ?コマンドの使い方がいまいちわからなかったので、「make check」を参考に使い方をまとめてみる
- 特にパラメータ推定について知りたい
#!/bin/sh
DIR="shiin t9 latin katakana autolink chartype ngram"
for dir in $DIR
do
(cd $dir;
../../src/mecab-dict-index -f euc-jp -c euc-jp;
../../src/mecab -r /dev/null -d . test > test.out;
diff -b test.gld test.out;
if [ "$?" != "0" ]
then
echo "runtests faild in $dir"
exit -1
fi;
rm -f *.bin *.dic test.out)
done
exit 0
#!/bin/sh
#
cd eval
../../src/mecab-system-eval --level="0 1 2 3 4" system answer > test.out
diff test.gld test.out;
if [ "$?" != "0" ]
then
echo "runtests faild in $dir"
exit -1
fi;
rm -f test.out
exit 0
#!/bin/sh
cd cost-train
CORPUS=ipa.train
TEST=ipa.test
MODEL=model-ipadic
SEEDDIR=seed
FREQ=1
C=1.0
EVAL="0 1 2 4"
DIR=../../src
#DIR=/usr/local/libexec/mecab
RMODEL=${MODEL}.c${C}.f${FREQ}
DICDIR=${RMODEL}.dic
for algo in crf hmm
do
mkdir ${DICDIR}
cp -f ${SEEDDIR}/rewrite.def.${algo} ${SEEDDIR}/rewrite.def
cp -f test.gld.${algo} test.gld
cp -f dic.gld.${algo} dic.gld
${DIR}/mecab-dict-index -d ${SEEDDIR} -o ${SEEDDIR}
${DIR}/mecab-cost-train -a ${algo} -c ${C} -d ${SEEDDIR} -f ${FREQ} ${CORPUS} ${RMODEL}.model
${DIR}/mecab-dict-gen -a ${algo} -d ${SEEDDIR} -m ${RMODEL}.model -o ${DICDIR}
${DIR}/mecab-dict-index -d ${DICDIR} -o ${DICDIR}
${DIR}/mecab-test-gen < ${TEST} | ${DIR}/mecab -r /dev/null -d ${DICDIR} > ${RMODEL}.result
${DIR}/mecab-system-eval -l "${EVAL}" ${RMODEL}.result ${TEST} | tee ${RMODEL}.score
diff test.gld ${RMODEL}.result
if [ "$?" != "0" ]
then
echo "runtests faild in $dir"
exit -1
fi;
diff dic.gld ${DICDIR}/dic.csv
if [ "$?" != "0" ]
then
echo "runtests faild in $dir"
exit -1
fi;
rm -fr ${DICDIR}
rm -fr ${RMODEL}*
rm -fr ${SEEDDIR}/*.dic
rm -fr ${SEEDDIR}/*.bin
rm -fr ${SEEDDIR}/*.dic
rm -fr test.gld
done
exit 0
cd cost-train
mkdir model-ipadic.c1.0.f1.dic
cp -f seed/rewrite.def.crf seed/rewrite.def
cp -f test.gld.crf test.gld
cp -f dic.gld.crf dic.gld
../../src/mecab-dict-index -d seed -o seed
- seedディレクトリにchar.bin、matrix.bin、sys.dic、unk.dicが作成される
../../src/mecab-cost-train -a crf -c 1.0 -d seed -f 1 ipa.train model-ipadic.c1.0.f1.model
- model-ipadic.c1.0.f1.modelとmodel-ipadic.c1.0.f1.model.txtが作成される
../../src/mecab-dict-gen -a crf -d seed -m model-ipadic.c1.0.f1.model -o model-ipadic.c1.0.f1.dic
- model-ipadic.c1.0.f1.dicにファイルが作成される
- 配布用ユーザー辞書 >>> dic.csv
- コストが入っている
$ ls model-ipadic.c1.0.f1.dic/
char.def dic.csv dicrc feature.def left-id.def matrix.def rewrite.def right-id.def unk.def
../../src/mecab-dict-index -d model-ipadic.c1.0.f1.dic -o model-ipadic.c1.0.f1.dic
$ ls model-ipadic.c1.0.f1.dic/
char.bin char.def dic.csv dicrc feature.def left-id.def matrix.bin matrix.def rewrite.def right-id.def sys.dic unk.def unk.dic
../../src/mecab-test-gen < ipa.test | ../../src/mecab -r /dev/null -d model-ipadic.c1.0.f1.dic > model-ipadic.c1.0.f1.result
../../src/mecab-system-eval -l ""0 1 2 4"" model-ipadic.c1.0.f1.result ipa.test | tee model-ipadic.c1.0.f1.score
diff test.gld model-ipadic.c1.0.f1.result
diff dic.gld model-ipadic.c1.0.f1.dic/dic.csv
rm -fr model-ipadic.c1.0.f1.dic
rm -fr model-ipadic.c1.0.f1*
rm -fr seed/*.dic
rm -fr seed/*.bin
rm -fr seed/*.dic
rm -fr test.gld
R備忘録 /状態空間モデリング/donlp2/その他のメモ