#!/usr/local/bin/cbsd
#v12.1.10
MYARG=""
MYOPTARG="cast_args cmd cmdfile connect_timeout netgroup node nodelist quiet tryoffline tty"
MYDESC="Execute remote command using SSH on the CBSD node"
CBSDMODULE="sys"
ADDHELP="
${H3_COLOR}Description${N0_COLOR}:

 An argument generator and a call to the /usr/bin/ssh utility to execute
 remote commands on the added node. Use by default the private key per node,
 which is located in ~cbsd/.ssh directory (added by 'cbsd node mode=add')

${H3_COLOR}Options${N0_COLOR}:

 ${N2_COLOR}node=${N0_COLOR}            - comma-separated host list,
                   eg. node=s1,s2 or mask, e.g.: '*'.
 ${N2_COLOR}nodelist=${N0_COLOR}        - path to ascii file with node list.
 ${N2_COLOR}quiet=${N0_COLOR}           - when set to 1 execute ssh command with '-q'.
 ${N2_COLOR}tty=${N0_COLOR}             - when set to 1 force tty alloc (0 by default).
 ${N2_COLOR}tryoffline${N0_COLOR}       - when set to 1 - try to fetch (0 by default).
                    inventory when node is offline.
 ${N2_COLOR}connect_timeout=${N0_COLOR} - change ssh connect timeout, 5 by default.

${H3_COLOR}Examples${N0_COLOR}:

 # cbsd rexe node=s1.my.domain ls -la /tmp 
 # cbsd rexe hostlist=/tmp/linuxhosts scr=/root/bin/sh.sh
 # cbsd rexe host=\"s1 s2 s3\" date

${H3_COLOR}See also${N0_COLOR}:

 cbsd node --help

"

. ${subrdir}/nc.subr
cast_args=1
. ${cbsdinit}
. ${nodes}

#set -o errexit

DEBUG=0
SSH_VERBOSE=
QUIET=0
UNIQUE_HOSTS=1
REMOTE_SHELL='sh'
TEMP_BASE=${ftmpdir}
CONCURRENT=0
MULTIHOST=0
TTY=1
[ -z "${connect_timeout}" ] && connect_timeout="5"

ocmd=
[ -n "${cmd}" ] && ocmd="${cmd}"		# store original cmd, it may be useful later

# exclude system params from args string
if [ ${cast_args} -eq 1 ]; then
	# Pass '"' as ' in cmd
	INIT_IFS="${IFS}"
	IFS="~"
	cmd="$@"
	IFS="${INIT_IFS}"
	cmd=$( while [ -n "${1}" ]; do
		IFS="~"
		strpos --str="${1}" --search="="
		_pos=$?
		if [ ${_pos} -eq 0 ]; then
			# not params=value form
			echo -n "${1} "
			shift
			continue
		fi

		_arg_len=$( strlen ${1} )
		_pref=$(( _arg_len - _pos ))
		ARG=$( substr --pos=0 --len=${_pos} --str="${1}" )
		VAL=$( substr --pos=$(( ${_pos} +2 )) --len=${_pref} --str="${1}" )

		case "${ARG}" in
			node|nodelist|cmdfile|netgroup|quiet|tty|tryoffline|connect_timeout|cast_args)
				;;
			cmd)
				printf "${VAL} "
				;;
			*)
				printf "${ARG}='${VAL}' "
				;;
		esac
		shift
	done )
else
	cmd="$@"
fi

# ocmd var exist? restore original
[ -n "${ocmd}" ] && cmd="${ocmd}"

# empty known params and found unknown: so return all
[ -z "${cmd}" ] && err 1 "${N1_COLOR}empty exec args${N0_COLOR}"

node_is_multiple	# import node_list if node is mask

if [ -n "${node_list}" ]; then
	. ${subrdir}/multiple.subr

	${ECHO} "${N1_COLOR}Hint: Press ${N2_COLOR}'Ctrl+t'${N1_COLOR} to see last logfile line for active task${N0_COLOR}" 1>&2
	task_owner="rexec_multiple"

	task_id=
	task_id_cur=

	# spawn command for all node
	for node in ${node_list}; do
		task_id_cur=$( task mode=new jname=${node} logfile=${tmpdir}/${task_owner}.${jname}.log.$$ client_id=${jname} autoflush=0 owner=${task_owner} /usr/bin/env NOCOLOR=1 /usr/local/bin/cbsd rexe node=${node} "${cmd}" 2>/dev/null )
		task_id="${task_id} ${task_id_cur}"
	done

	multiple_task_id_all=$( echo ${task_id} | ${TR_CMD} " " "," )
	sleep 1
	multiple_processing_spawn -o ${task_owner} -n "rexe"
	echo
	exit 0
fi

ssh_connect()
{
	local ret

	if [ "${TTY}" = 0 ]; then
		TTYOP="-T"
	else
		TTYOP="-tt"
	fi

	#    echo "$THESCRIPT" | ${SSH_CMD} -l ${cbsduser} $SSH_ARGS ${TTYOP} "${REMOTE_SHELL}" | \
	#    ${SSH_CMD} -l ${cbsduser} $SSH_ARGS ${TTYOP} "${THESCRIPT}"| \
	${SSH_CMD} -oControlPath=${sshdir}/sockets/%r@%h:%p $SSH_ARGS ${TTYOP} "/usr/local/bin/cbsd -c \"update_idle ${nodename}\"; ${THESCRIPT}"| \
		while read SSH_LINE ; do
			[ "$QUIET" -lt 1 -a "$SSH_LINE" != "" -a "$MULTIHOST" = "0" ] && echo "$SSH_LINE"
#			[ "$QUIET" -lt 1 -a "$SSH_LINE" != "" -a "$MULTIHOST" = "1" ] && echo "$SSH_LINE" | ${SED_CMD} -e "s/^/$HOST: /"
		done
	ret=$?
	return ${ret}
}

##############################
# FUNCTIONS FOR FORKED PROCS #
##############################
set_hostlist()
{
	# Create a hostlist file.
	[ "$DEBUG" -ge 2 ] && echo "DEBUG: BUILDING HOST LIST FILE $TEMP_DIR/hostlist" 1>&2
	${RM_CMD} -f $TEMP_DIR/hostlist || exit 1
	for HOST in $HOSTLIST ; do
		echo $HOST >> "$TEMP_DIR/hostlist" || exit 1
	done
}

get_next_host()
{
	# lock file
	while [ 1 ] ; do
		echo $CHILDNUM >> "$TEMP_DIR/hostlist.lock"
		TOP_PID=$(head -1 "$TEMP_DIR/hostlist.lock" 2>/dev/null)
		if [ "$TOP_PID" = "$CHILDNUM" ] ; then
			break
		fi
		[ "$DEBUG" -ge 2 ] && echo "DEBUG[#$CHILDNUM]: hostlist file already locked.  Sleep..." 1>&2
		#usleep 1000
		sleep 1
	done
	[ "$DEBUG" -ge 2 ] && echo "DEBUG[#$CHILDNUM]: Locked hostfile." 1>&2

	# get next host
	NEXT_HOST=$( ${HEAD_CMD} -1 $TEMP_DIR/hostlist)
	HOSTFILE_LEN=$( ${WC_CMD} -l $TEMP_DIR/hostlist | ${AWK_CMD} '{print $1}')

	if [ -z "$HOSTFILE_LEN" -o "$HOSTFILE_LEN" = 0 ] ; then
		${RM_CMD} -f "$TEMP_DIR/hostlist.lock"
		return
	fi

	[ "$DEBUG" -ge 2 ] && echo "DEBUG[#$CHILDNUM]: Next host: $NEXT_HOST" 1>&2

	# re-write file removing new host
	${RM_CMD} -f "$TEMP_DIR/hostlist.new"
	${TAIL_CMD} -$(( $HOSTFILE_LEN - 1 )) $TEMP_DIR/hostlist > $TEMP_DIR/hostlist.new || exit 1
	${RM_CMD} -f "$TEMP_DIR/hostlist"
	${MV_CMD} "$TEMP_DIR/hostlist.new" "$TEMP_DIR/hostlist"

	# unlock file
	[ "$DEBUG" -ge 2 ] && echo "DEBUG[#$CHILDNUM]: Removing hostfile lock." 1>&2
	${RM_CMD} -f "$TEMP_DIR/hostlist.lock"

	# return hostname
	echo $NEXT_HOST
}

run_child()
{
	trap "exit 0" SIGHUP
	CHILDNUM=$1
	[ "$DEBUG" -ge 2 ] && echo "DEBUG: FORKING CHILD #$CHILDNUM of $CONCURRENT (pid $!/$$)" 1>&2

	while [ 1 ] ; do
		# issue:  Cannot call get_next_host inside $() or `` because our trap won't be able to kill that.
		#     solution: avoid subshell here by directing to a file.
		${RM_CMD} -f $TEMP_DIR/$CHILDNUM.next_host
		get_next_host >$TEMP_DIR/$CHILDNUM.next_host
		HOST=$( ${CAT_CMD} $TEMP_DIR/$CHILDNUM.next_host )

		if [ -z "$HOST" ] ; then
			${RM_CMD} -f "$TEMP_DIR/$CHILDNUM.pid"
			break
		fi

		[ "$DEBUG" -ge 1 ] && echo "DEBUG[#$CHILDNUM]: CONNECT $HOST" 1>&2
		${RM_CMD} -f "$TEMP_DIR/$CHILDNUM.active"
		echo "$HOST" > "$TEMP_DIR/$CHILDNUM.active"
		#	cbsdsqlro nodes select ip,port,keyfile from nodelist where nodename=\"${HOST}\"
		NODEDATA=$( cbsdsqlro nodes select ip,port,keyfile from nodelist where nodename=\"${HOST}\" )

		if [ -z "${NODEDATA}" ]; then
			echo "${HOST}: No such node in base"
			continue
		fi

		sqllist "$NODEDATA" myip myport mykey
		SSH_ARGS="-oBatchMode=yes -oStrictHostKeyChecking=no -oConnectTimeout=${connect_timeout} -q -oPort=${myport} -i ${mykey} ${myip}"

		if [ ${tryoffline} -ne 1 ]; then
			if ! check_locktime ${ftmpdir}/shmux_${myip}.lock >/dev/null 2>&1; then
				echo "Node is offline: ${HOST}"
				continue
			fi
		fi

		#	ssh_connect > $TEMP_DIR/$HOST.out
		#	${CAT_CMD} $TEMP_DIR/$HOST.out
		ssh_connect
		ret=$?
		[ $ret -eq 0 ] && update_idle ${HOSTLIST}
		[ $TTY -eq 0 ] && exit
	done
	[ "$DEBUG" -ge 2 ] && echo "DEBUG: CHILD #$CHILDNUM done" 1>&2
	${RM_CMD} -f "$TEMP_DIR/$CHILDNUM.pid" "$TEMP_DIR/$CHILDNUM.active"
}

create_temp()
{
	TEMP_DIR=$( ${MKTEMP_CMD} -d ${TEMP_BASE}/$(basename $0).XXXXXX) || err 1 "Error mktemp"
}

destroy_temp()
{
	[ -d "$TEMP_DIR" ] && ${RM_CMD} -rf "$TEMP_DIR" 2>/dev/null
}

########################################
# REMEMBER TO CLEAN UP BEFORE WE PANIC #
########################################
shutdown()
{
	[ "$DEBUG" -ge 1 ] && echo "DEBUG: shutting down children." 1>&2
	CPIDS=$( ${CAT_CMD} $TEMP_DIR/*.pid 2>/dev/null)

	for CPID in $CPIDS ; do
		[ "$DEBUG" -ge 2 ] && echo "DEBUG: Killing pid: $CPID" 1>&2
		kill -HUP $CPID
	done

	[ "$DEBUG" -ge 2 ] && echo "DEBUG: shutting down ssh-agent" 1>&2
	[ "$DEBUG" -ge 2 ] && echo "DEBUG: removing temp dir" 1>&2

	destroy_temp

	[ "$DEBUG" -ge 2 ] && echo "DEBUG: done shutting down." 1>&2
	exit 1
}

spew_hostlist()
{
	echo "HOSTS RUNNING:"  1>&2
	${CAT_CMD} $TEMP_DIR/*.active 2>/dev/null | ${SED_CMD} 's/^/    /'  1>&2
	echo "HOSTS REMAINING:"  1>&2
	${CAT_CMD} $TEMP_DIR/hostlist 2>/dev/null | ${SED_CMD} 's/^/    /'  1>&2
	return
}

rexec_node()
{
	local _host NODEDATA _ret

	_host="${1}"

	[ -z "${_host}" ] && return 1

	# its me ?
	if [ "${_host}" = "${nodename}" ]; then
		${SUDO_CMD} -u cbsd /bin/sh <<EOF
${THESCRIPT}
EOF
		_ret=$?
		return ${_ret}
	fi

	NODEDATA=$( cbsdsqlro nodes select ip,port,keyfile from nodelist where nodename=\"${_host}\" )

	if [ -z "${NODEDATA}" ]; then
		${ECHO} "${N1_COLOR}no such node in DB: ${N2_COLOR}${_host}${N0_COLOR}" 1>&2
		return 1
	fi

	sqllist "${NODEDATA}" myip myport mykey

	if [ ${tryoffline} -ne 1 ]; then
		if ! check_locktime ${ftmpdir}/shmux_${myip}.lock >/dev/null 2>&1; then
			${ECHO} "${N1_COLOR}node is offline: ${N2_COLOR}${_host}${N0_COLOR}" 1>&2
			return 1
		fi
	fi

	SSH_ARGS="-l ${cbsduser} -oBatchMode=yes -oStrictHostKeyChecking=no -oConnectTimeout=${connect_timeout} -q -oPort=${myport} -i ${mykey} ${myip}"
	SSHMUX="${sshdir}/sockets/${cbsduser}@${myip}:${myport}"
	${SSH_CMD} -oControlPath=${sshdir}/sockets/%r@%h:%p -t ${SSH_ARGS} ${TTOP} -C "/usr/local/bin/cbsd -c \"update_idle ${nodename}\"; ${THESCRIPT}"
	_ret=$?
	[ ${_ret} -eq 0 ] && update_idle ${_host}
	return ${_ret}
}

## Main()
trap shutdown SIGINT
trap shutdown SIGTERM
trap spew_hostlist SIGQUIT
trap "exit 0" SIGHUP

[ -n "${blocking}" ] && BLOCKING=1 && shift

if [ -n "${quiet}" ]; then
	QUIET=1
	DEBUG=0
	SSH_VERBOSE="-q"
	shift
fi

[ -n "${debug}" ] && DEBUG="${debug}" && shift
[ -n "${node}" ] && HOSTLIST=$( echo ${node} | ${TR_CMD} ',' ' ' ) && shift
[ -n "${cmd}" ] && THESCRIPT="${cmd}" && shift
[ -n "${tty}" ] && TTY=${tty} && shift

if [ -z "${tryoffline}" ]; then
	tryoffline=0
	checkforonline="tryoffline=0"
else
	checkforonline="tryoffline=${tryoffline}"
	shift
fi

if [ -n "${nodelist}" ]; then
	[ ! -f "${nodelist}" ] && err 1 "${nodelist} file doesn't exist"
	HOSTLIST="$( ${CAT_CMD} $nodelist | ${SED_CMD} -e 's/#.*//' | ${GREP_CMD} -E -v "^ *$" )"
	shift
fi

if [ -n "${netgroup}" ]; then
	# Negtgoup
	NETGROUP=$2
	NETGROUP_LIST="$( ${GETENT_CMD} netgroup $NETGROUP | ${XARGS_CMD} -n 1 echo | ${SED_CMD} -n '/^(.*,$/s/[,(]//gp')"
	[ -z "${NETGROUP_LIST}" ] && err 1 "Failed to get netgroup: $NETGROUP"
	HOSTLIST="$NETGROUP_LIST $HOSTLIST"
	shift
fi

if [ -n "${cmdfile}" ]; then
	[ ! -e "$cmdfile" ] && err 1 "myssh: Script File '$SCRIPTFILE' does not exist!"
	THESCRIPT="$THESCRIPT $( ${CAT_CMD} $cmdfile )"
	shift
fi

HOSTLIST=$( echo "${HOSTLIST}" | ${TR_CMD} "," " " )
if [ -z "${HOSTLIST}" ]; then
	${ECHO} "${N1_COLOR}host list is empty${N0_COLOR}"
	${ECHO} "${N1_COLOR}Available nodes:${N0_COLOR}"
	node mode=list display=nodename header=0
	exit 1
fi

COUNT_HOSTS=$( echo "${HOSTLIST}" | ${WC_CMD} -w | ${AWK_CMD} '{printf $1}' )
CONCURRENT=${COUNT_HOSTS}
[ "$COUNT_HOSTS" = "1" ] && MULTIHOST=0
[ -z "${cmd}" -a -z "${cmdfile}" ] && THESCRIPT="$@"

if [ "${TTY}" = 0 ]; then
	TTYOP="-T"
else
	TTYOP="-tt"
fi

_ret=0
_all_ret=0

for i in ${HOSTLIST}; do
	rexec_node "${i}"
	_ret=$?
	_all_ret=$(( _all_ret + _ret ))
done

exit ${_ret}
