#!/usr/local/bin/cbsd
#v12.0.9
MYARG="jname node"
MYOPTARG=""
MYDESC="bhyve live migration"
CBSDMODULE="bhyve"
ADDHELP=""

. ${subrdir}/nc.subr
. ${tools}
. ${strings}
. ${nodes}

. ${cbsdinit}

. ${subrdir}/rcconf.subr
[ $? -eq 1 ] && err 1 "${N1_COLOR}No such domains: ${N2_COLOR}${jname}${N0_COLOR}"
[ "${emulator}" != "bhyve" ] && log_err 1 "${N1_COLOR}Not in bhyve mode${N0_COLOR}"
[ ${jid} -eq 0 ] && err 1 "Not running"

remote_node_rip=$( cbsdsqlro nodes SELECT ip FROM nodelist WHERE nodename=\"${node}\" )
[ -z "${remote_node_rip}" ] && log_err 1 "${N1_COLOR}bmigrate: no such node ${node}. Please add node via: ${N2_COLOR}cbsd add node=${node}${N0_COLOR}"

remote_node_ip=$( getinfo mode=quiet nodeip )

${ECHO} "  ${N2_COLOR}Data gathering for live migration${N0_COLOR}"
printf "   ${H5_COLOR}* ${N1_COLOR}check for remote CBSD version: ${N0_COLOR}"
d_cbsd_ver=$( rexe node=${node} /usr/local/bin/cbsd -c version 2>/dev/null | ${TR_CMD} -d \\r | ${AWK_CMD} '/./{print $1}' )
[ $? -ne 0 ] && err 1 "${N1_COLOR}bmigrate: failed: rexe node=${node}${N0_COLOR}"
printf "${H3_COLOR}ok${N0_COLOR}"
echo
s_cbsd_ver=$( version | ${AWK_CMD} '/./{print $1}' )

# Check for compiatible 
# - todo: CPU features/modesl check
# - todo: Memory/free amount check
remote_process_status=$( rexe tryoffline=1 node=${node} /usr/local/bin/cbsd bhyve-exist jname=${jname} | ${TR_CMD} -d \\r | ${AWK_CMD} '/./{print $1}' )
[ -n "${remote_process_status}" ] && err 1 "${N1_COLOR}bmigrate error: node ${N2_COLOR}${jname}${N1_COLOR} already has the running bhyve process with jname: ${N2_COLOR}${jname}, pid: ${remote_process_status}${N0_COLOR}"

shared_dir="jails-data jails-rcconf jails-system"

${ECHO} "   ${H5_COLOR}* ${N1_COLOR}check for shared storage: ${N0_COLOR}"

for i in ${shared_dir}; do
	printf "      ${H5_COLOR}* ${N2_COLOR}${i}: ${N0_COLOR}"
	check_for_shared_dir -d ${i} -n ${node} || err 1 "${N1_COLOR}directory id do not match, dir not shared: ${N2_COLOR}${i}${N1_COLOR} ?${N0_COLOR}"
	printf "${H3_COLOR}ok${N0_COLOR}"
	echo
done

source_vm_name="${jname}"
dest_vm_name="${jname}"

LOCAL_FS="local,shared"
REMOTE_FS="local,shared"
s_migrator_ver="0.1"
d_migrator_ver="0.1"

my_nodename=$( ${CAT_CMD} ~cbsd/nodename | ${AWK_CMD} '{printf $1}' )
remote_node_name="${node}"

DC_NAME="local"
if [ ${jid} -ne 0 ]; then
	jail_status="active"
else
	jail_status="offline"
fi

echo
${ECHO} "  ${N2_COLOR}Data gathering complete!${N0_COLOR}"
srcpad=" "
destpad=" "

${CAT_CMD} <<XxX1387784305xXx

We will be migrating:
     INSTANCE:
               jname:  ${jname}
                fqdn:  ${host_hostname}
          IP Addr(s):  ${ip4_addr}
          datacenter:  ${DC_NAME}
      instance state:  ${jail_status}
                type:  ${emulator}
               owner:  root
           create at:  -
          base image:  -
  total dataset size:  ${SZMDS} ${rise} across ${dsCNT} datasets
        migration id:  $$
XxX1387784305xXx

${CAT_CMD} <<XxX1394397713xXx

                    Source                                        Destination
----------------------------------------------  ----------------------------------------------
XxX1394397713xXx
printf "Host:     %-36s  Host:     %-36s\n" ${my_nodename} ${remote_node_name}
printf "JNAME:    %-36s  JNAME:    %-36s\n" ${source_vm_name} ${dest_vm_name}
printf "SDC Ver:  %-36s  SDC Ver:  %-36s\n" ${s_cbsd_ver} ${d_cbsd_ver}
printf "IP Addr:  %-36s  IP Addr:  %-36s\n" ${remote_node_ip} ${remote_node_rip}
printf "RemoteFS: %-36s  RemoteFS: %-36s\n" ${LOCAL_FS} ${REMOTE_FS}
printf "API ver:  %-36s  API ver:  %-36s\n" ${s_migrator_ver} ${d_migrator_ver}

echo

if getyesno "Are you ready to proceed? "; then
	echo
else
	${ECHO} "${N1_COLOR}Exiting.${N0_COLOR}"
	exit 1
fi

. ${subrdir}/time.subr
st_time=$( ${DATE_CMD} +%s )

${ECHO} ${N1_COLOR} "  ${N2_COLOR}Preparing destination node...${N0_COLOR}"

printf "   ${H5_COLOR}* ${N1_COLOR}Phase1: launch ${jname} domain on remote node ... ${N0_COLOR}"

# remove old live migration config
/bin/rm -f ${jailsysdir}/${jname}/live_migration.conf

r_task_id=$( rexe tryoffline=1 node=${node} /usr/local/bin/cbsd task mode=new owner=migration.$$ /usr/local/bin/cbsd bstart jname=${jname} lm=1 debug_engine="${debug_engine}" | ${TR_CMD} -d \\r | ${AWK_CMD} '/./{print $1}' )
[ -z "${r_task_id}" ] && err 1 "${N1_COLOR}cbsd task on ${node} failed: no task id${N0_COLOR}"

sleep 1
printf "${H3_COLOR}ok${N0_COLOR}"

### test 1: waiting for remote bhyve is running
echo
printf "   ${H5_COLOR}* ${N1_COLOR}Phase2 (1/3): waiting for bhyve instance ready by task id ${N2_COLOR}${r_task_id}${N1_COLOR}...${N0_COLOR}"
max_attempt=10		# ( sleep 2 x 10 = 20 seconds for run: this should be enough )
cur_attempt=0

for i in $( /usr/bin/seq 1 ${max_attempt} ); do
	cur_attempt=$(( cur_attempt + 1 ))
	if [ ! -r ${jailsysdir}/${jname}/live_migration.conf ]; then
		sleep 2
		continue
	fi
	r_job_status=$( rexe tryoffline=1 node=${node} /usr/local/cbsd/misc/sqlcli var/db/cbsdtaskd.sqlite "SELECT status FROM taskd WHERE id=${r_task_id}" 2>/dev/null | ${TR_CMD} -d \\r | ${AWK_CMD} '/./{print $1}' )
	[ -z "${r_job_status}" ] && err 1 "${N1_COLOR}unable to determine job status for ${N2_COLOR}${r_task_id}${N1_COLOR} on remote host, empty val${N0_COLOR}"
	if [ "${r_job_status}" != "2" ]; then
		printf "${N1_COLOR}.[${r_job_status}].${N0_COLOR}"
		sleep 2
	else
		printf "${H3_COLOR} ok${N0_COLOR}"
		break
	fi
done

[ ${cur_attempt} -eq ${max_attempt} ] && err 1 "${N1_COLOR}unable to determine job status for ${N2_COLOR}$r_task_id${N1_COLOR} on remote host${N0_COLOR}"

echo
[ ${cur_attempt} -gt ${max_attempt} ] && err 1 "${N1_COLOR}Taskd timeout, max attempt exceeded: ${N2_COLOR}${cur_attempt}${N1_COLOR}. Please check on node ${N2_COLOR}${node}${N1_COLOR}: ${N2_COLOR}cbsd taskls${N0_COLOR}"

### test 2: check config
echo
printf "   ${H5_COLOR}* ${N1_COLOR}Phase2 (2/3): check migration configuration, task id ${N2_COLOR}${r_task_id}${N1_COLOR}...${N0_COLOR}"

live_migration_src_nodename=
live_migration_dst_nodename=
live_migration_dst_port=
live_migration_src_nodename_ip=
live_migration_dst_nodename_ip=

. ${jailsysdir}/${jname}/live_migration.conf

# trap for cleanup
trap "cbsd rexe tryoffline=1 node=${node} /usr/local/bin/cbsd task mode=new owner=migration.$$ /usr/local/bin/cbsd bstop jname=${jname} lm=1" HUP INT ABRT BUS TERM EXIT

[ -z "${live_migration_src_nodename}" ] && err 1 "${N1_COLOR}Empty ${live_migration_src_nodename}${N0_COLOR}"
[ -z "${live_migration_dst_nodename}" ] && err 1 "${N1_COLOR}Empty ${live_migration_dst_nodename}${N0_COLOR}"
[ -z "${live_migration_dst_port}" ] && err 1 "${N1_COLOR}Empty ${live_migration_dst_port}${N0_COLOR}"
[ -z "${live_migration_src_nodename_ip}" ] && err 1 "${N1_COLOR}Empty ${live_migration_src_nodename_ip}${N0_COLOR}"
[ -z "${live_migration_dst_nodename_ip}" ] && err 1 "${N1_COLOR}Empty ${live_migration_dst_nodename_ip}${N0_COLOR}"

echo
${ECHO} "    ${H5_COLOR}- (cfg) ${N1_COLOR}live_migration_src_nodename: ${N2_COLOR}${live_migration_src_nodename}${N0_COLOR}"
${ECHO} "    ${H5_COLOR}- (cfg) ${N1_COLOR}live_migration_dst_nodename: ${N2_COLOR}${live_migration_dst_nodename}${N0_COLOR}"
${ECHO} "    ${H5_COLOR}- (cfg) ${N1_COLOR}live_migration_dst_port: ${N2_COLOR}${live_migration_dst_port}${N0_COLOR}"
${ECHO} "    ${H5_COLOR}- (cfg) ${N1_COLOR}live_migration_src_nodename_ip: ${N2_COLOR}${live_migration_src_nodename_ip}${N0_COLOR}"
${ECHO} "    ${H5_COLOR}- (cfg) ${N1_COLOR}live_migration_dst_nodename_ip: ${N2_COLOR}${live_migration_dst_nodename_ip}${N0_COLOR}"

### test 3: waiting for bind/socket's open
echo
printf "   ${H5_COLOR}* ${N1_COLOR}Phase2 (3/3): check for remote host bind/socket's open, task id ${N2_COLOR}${r_task_id}${N1_COLOR}...${N0_COLOR}"
# waiting for remote bhyve is running
max_attempt=3		# sleep 2 x 3 = 6 seconds for run: this should be enough
cur_attempt=0

for i in $( /usr/bin/seq 1 ${max_attempt} ); do
	cur_attempt=$(( cur_attempt + 1 ))

	# scan by next_tcp_port
	_tmp=$( get-next-tcp-port address=${live_migration_src_nodename_ip} end_port=${live_migration_dst_port} start_port=${live_migration_dst_port} )
	if [ "${_tmp}" != "0" ]; then
		printf "${N1_COLOR}.${N0_COLOR}"
		sleep 2
	else
		printf "${H3_COLOR} ok${N0_COLOR}"
		break
	fi
done

echo
[ ${cur_attempt} -gt ${max_attempt} ] && err 1 "${N1_COLOR}Taskd timeout, max attempt exceeded: ${N2_COLOR}${cur_attempt}${N1_COLOR}. Please check on node ${N2_COLOR}${node}${N1_COLOR}: ${N2_COLOR}cbsd taskls${N0_COLOR}"

r_job_logfile=$( cbsd rexe tryoffline=1 node=${node} /usr/local/cbsd/misc/sqlcli var/db/cbsdtaskd.sqlite "SELECT logfile FROM taskd WHERE id=${r_task_id}" 2>/dev/null | ${TR_CMD} -d \\r | ${AWK_CMD} '/./{print $1}' )
sleep 1
${ECHO} "   ${H5_COLOR}* ${N1_COLOR}Phase3: migration, please wait...${N0_COLOR}"
${ECHO} "${N4_COLOR}     [debug]remote cmd: bhyvectl --migrate=${live_migration_dst_nodename_ip},${live_migration_dst_port} --vm=${jname}${N0_COLOR}"

# clean trap
trap "date > /dev/null" HUP INT ABRT BUS TERM EXIT

/usr/sbin/bhyvectl --migrate=${live_migration_dst_nodename_ip},${live_migration_dst_port} --vm=${jname}

bhyve_migrate_ret=$?
bhyve_remote_task_complete=0

# Well, at this point, we no longer know in what state everything is 
# and we need to monitor both systems to draw some conclusions.
# First: let's wait for the completion of the remote task
# Second: find bhyve process with ${jname} on both host

# waiting for task finished
max_attempt=5
cur_attempt=0

printf "   ${H5_COLOR}* ${N1_COLOR}Phase4: Waiting for task finished: ${N2_COLOR}${r_task_id}...${N0_COLOR}"

for i in $( /usr/bin/seq 1 ${max_attempt} ); do
	cur_attempt=$(( cur_attempt + 1 ))
	r_job_status=$( cbsd rexe tryoffline=1 node=${node} /usr/local/cbsd/misc/sqlcli var/db/cbsdtaskd.sqlite "SELECT status FROM taskd WHERE id=${r_task_id}" 2>/dev/null | ${TR_CMD} -d \\r | ${AWK_CMD} '/./{print $1}' )
	if [ "${r_job_status}" != "2" ]; then
		printf "${N1_COLOR}.${N0_COLOR}"
		sleep 1
	else
		printf "${H3_COLOR} ok${N0_COLOR}"
		break
	fi
done

echo
if [ ${cur_attempt} -gt ${max_attempt} ]; then
	bhyve_remote_task_complete=1
	${ECHO} "${W1_COLOR}bmigrate error: ${N1_COLOR}Taskd timeout, max attempt exceeded: ${N2_COLOR}${cur_attempt}${N1_COLOR}. Please check on node ${N2_COLOR}${node}${N1_COLOR}: ${N2_COLOR}cbsd taskls${N0_COLOR}"
else
	bhyve_remote_task_complete=1
fi

${ECHO} "      ${N1_COLOR}Remote log for taskid ${H3_COLOR}${r_task_id}${N1_COLOR}: ${N2_COLOR}${r_job_logfile}${N1_COLOR}:${N0_COLOR}"
${ECHO} "${H5_COLOR}----------------------${H1_COLOR}${BOLD}[log]${H5_COLOR}----------------------${N0_COLOR}"
rexe tryoffline=1 node=${node} ${CAT_CMD} ${r_job_logfile}
${ECHO} "${H5_COLOR}----------------------${H1_COLOR}${BOLD}[log]${H5_COLOR}----------------------${N0_COLOR}"
echo

echo
printf "   ${H5_COLOR}* ${N1_COLOR}Phase5: waiting for the bhyve process to go away${N0_COLOR}"

# waiting for config
max_attempt=20
cur_attempt=0

for i in $( /usr/bin/seq 1 ${max_attempt} ); do
	cur_attempt=$(( cur_attempt + 1 ))
	if [ -e "/dev/vmm/${jname}" ]; then
		printf "${N1_COLOR}.${N0_COLOR}"
		sleep 3
	else
		printf "${H3_COLOR} ok${N0_COLOR}"
		break
	fi
done

echo
remote_process_status=$( rexe tryoffline=1 node=${node} /usr/local/bin/cbsd bhyve-exist jname=${jname} | ${TR_CMD} -d \\r | ${AWK_CMD} '/./{print $1}' )
local_process_status=$( bhyve-exist jname=${jname} | ${TR_CMD} -d \\r | ${AWK_CMD} '/./{print $1}' )

if [ -n "${remote_process_status}" ]; then
	printf "     ${H5_COLOR}* ${N1_COLOR}Phase5: remote bhyve process: ${H3_COLOR}registered${N0_COLOR}"
else
	printf "     ${H5_COLOR}* ${N1_COLOR}Phase5: remote bhyve process: ${W1_COLOR}not registered${N0_COLOR}"
fi

# Final status handler

####### Remote side failed #####
# Scenario 1: remote task log completed but no bhyve process: migration failed
echo
${ECHO} "${N4_COLOR}     [debug]error handler: test 1${N0_COLOR}"
if [ "${bhyve_remote_task_complete}" = "1" -a -z "${remote_process_status}" ]; then
	err 1 "${N1_COLOR}bmigrate: migration failed: task complete (id:${N2_COLOR}${r_task_id}${N1_COLOR}) but bhyve process is absent, check log${N0_COLOR}"
fi

${ECHO} "${N4_COLOR}     [debug]error handler: test 2${N0_COLOR}"
# Scenatio 2: remote task log still waiting but no bhyve process: migration failed
if [ "${bhyve_remote_task_complete}" = "0" -a -z "${remote_process_status}" ]; then
	${ECHO} "${N1_COLOR}bmigrate: migration failed: task still in progress (id:${N2_COLOR}${r_task_id}${N1_COLOR}) but bhyve process is absent. Force to kill. Please check log${N0_COLOR}"
	[ -z "${local_process_status}" ] && err 1 "${W1_COLOR}Warning: ${N1_COLOR}No such bhyve process on current host, bhyve was crashed???${N0_COLOR}"
	exit 1
fi

${ECHO} "${N4_COLOR}     [debug]error handler: test 3${N0_COLOR}"
# Scenario 3: remote node have bhyve process and local node have process, split brain, stop VM on remote node
if [ -n "${remote_process_status}" -a -n "${local_process_status}" ]; then
	${ECHO} "${N1_COLOR}bmigrate: migration failed: Split brain: two bhyve process with ${N2_COLOR}${jname}${N1_COLOR} on both nodes. Force to kill on remote node. Please check log${N0_COLOR}"
	exit 1
fi

${ECHO} "${N4_COLOR}     [debug]error handler: test 4${N0_COLOR}"
# Scenario 4: local node still have bhyve process with $jname and no bhyve process on remote
if [ -z "${remote_process_status}" -a -n "${local_process_status}" ]; then
	${ECHO} "${N1_COLOR}bmigrate: migration failed: no bhyve process on remote node with ${N2_COLOR}${jname}${N1_COLOR} while local still has it. Please check log${N0_COLOR}"
	exit 1
fi

# Now the last scenario remains - when everything went well
. ${subrdir}/rcconf.subr > /dev/null 2>&1

# TODO: SQLite backup ?
${ECHO} "   ${H5_COLOR}* ${N1_COLOR}Phase6: unregister ${jname} on ${my_nodename}${N0_COLOR}"
bunregister jname=${jname}

if [ $? -ne 0 ]; then
	${ECHO} "${W1_COLOR}Warning: ${N1_COLOR}Unregister error: ${N2_COLOR}${jname}${N0_COLOR}"
	local_unregister=0
else
	local_unregister=1
fi

JAILRCCONF="${jailsysdir}/${jname}/rc.conf_${jname}"

# lets wait for rc-conf file saved
# waiting for config
printf "   ${H5_COLOR}* ${N1_COLOR}Phase7: waiting for rc.conf: ${N0_COLOR}"
max_attempt=10
cur_attempt=0

for i in $( /usr/bin/seq 1 ${max_attempt} ); do
	cur_attempt=$(( cur_attempt + 1 ))
	if [ ! -r "${JAILRCCONF}" ]; then
		printf "${N1_COLOR}.${N0_COLOR}"
		sleep 3
	else
		printf "${H3_COLOR} ok${N0_COLOR}"
		break
	fi
done

echo
if [ ${cur_attempt} -gt ${max_attempt} ]; then
	${ECHO} "${N1_COLOR}No rcconf saved. bregister on remode node is failed${N0_COLOR}"
	# Restore backup from SQLite3? No RC Conf here?
fi

${ECHO} "   ${H5_COLOR}* ${N1_COLOR}Phase7: register ${jname} on ${remote_node_name}${N0_COLOR}"
rexe tryoffline=1 node=${node} /usr/local/bin/cbsd bregister jname=${jname}
${ECHO} "   ${H5_COLOR}* ${N1_COLOR}Phase8: update inventory${N0_COLOR}"
retrinv node=${remote_node_name} > /dev/null 2>&1
rexe tryoffline=1 node=${node} /usr/local/bin/cbsd retrinv node=${my_nodename} > /dev/null 2>&1

end_time=$( ${DATE_CMD} +%s )
diff_time=$(( end_time - st_time ))
diff_time=$( displaytime ${diff_time} )
${ECHO} "${N1_COLOR}${CBSD_APP} done ${N2_COLOR}in ${diff_time}${N0_COLOR}"
