#!/bin/bash -u

# 05/12/05 Changes
#
# * merged parser and language modeler training scripts
# * replaced copied code with loops
# * enabled running this script from outside TRAIN directory
# * made the final "/" on the path argument optional (not required)
# * no longer assumes "." is in $PATH
# * specified shell at top (make script invariant to user shell)
# * outputs program exit codes for better error detection
# * added a usage statement

#-----------------------------------------------------------------

typeset -r HERE=`dirname $0 | xargs realpath`

# 05/16/05: Currently the revised script requires the caller to 
# always pass <lm/parser> as the first argument to say which mode to 
# run in. Here's a nicer version that assumes parser mode unless the
# calller specifies "-lm":
#
#function usage () {
#    echo "Usage: `basename $0` [-lm] DATA_dir train.txt tune.txt"
#    exit 1
#}
# if [ $1 = -lm ]; then
#     typeset -r MODE=lm
#     shift;
# else
#     typeset -r MODE=parser
# fi
# 
# if [ $# -ne 3 ]; then usage; fi
# typeset -r DATA=`realpath $1`
# typeset -r TRAIN="`realpath $2` $DATA/bugFix.txt"
# typeset -r TUNE="`realpath $3` $DATA/bugFix.txt" 
#
function usage () {
    echo "Usage: `basename $0` <parser/lm> DATA_dir train.txt tune.txt"
    exit 1
}
if [ $# -ne 4 ]; then usage; fi
typeset -r MODE=$1 
typeset -r DATA=`realpath $2`
typeset -r TRAIN="`realpath $3` ${DATA}/bugFix.txt"
typeset -r TUNE="`realpath $4` ${DATA}/bugFix.txt" 

# Are we training the parser or the language model?
if [ $MODE = parser ]; then
    typeset -r SWITCH=""
elif [ $MODE = lm ]; then
    typeset -r SWITCH="-M"
elif [ $MODE = Ch ]; then
    typeset -r SWITCH="-LCh"
elif [ $MODE = En ]; then
    typeset -r SWITCH="-LEn"
else
    echo "Unknown training mode argument: $MODE"
    exit 1
fi

# Build training programs as needed
cd $HERE # this cd only needed for making
make all

# executes the given command and prints its exit code
function run () {
    eval $1
    echo "Exit code: $?"
}

# Train
for f in pSgT.txt pUgT.txt nttCounts.txt; do
    rm -f $DATA/$f
done
run "cat $TRAIN | $HERE/pSgT $SWITCH $DATA/"
run "cat $TRAIN | $HERE/pUgT $SWITCH $DATA/" 

# The Chinese head information file is in a different format
if [ $MODE = Ch ]; then 
    run "cat $TRAIN | $HERE/pSfgT $SWITCH $DATA/" 
else run "cat $TRAIN | $HERE/pTgNt $SWITCH $DATA/" 
fi


for x in r m l u h lm ru rm tt; do

    cutoff=50
    if [ $x = ru ]; then
	cutoff=98
    elif [ $x = tt ]; then
	cutoff=100
    fi

    run "cat $TRAIN | $HERE/rCounts $SWITCH $x $DATA/"
    run "$HERE/selFeats $x $cutoff $DATA/" 
    rm -f $DATA/$x.g
    run "$HERE/iScale $x $DATA/"
    run "cat $TUNE | $HERE/trainRs $SWITCH $x $DATA/" 
    rm -f $DATA/$x.f $DATA/$x.ff
    
done

# TO DO: delete whatever files kn3Counts creates 
if [ $MODE = lm ]; then
    run "cat $TRAIN | $HERE/kn3Counts ww $DATA/"
fi