#! /bin/bash
# FSQA Test No. btrfs/011
#
# Test of the btrfs replace operation.
#
# The amount of tests done depends on the number of devices in the
# SCRATCH_DEV_POOL. For full test coverage, at least 5 devices should
# be available (e.g. 5 partitions).
#
# The source and target devices for the replace operation are
# arbitrarily chosen out of SCRATCH_DEV_POOl. Since the target device
# mustn't be smaller than the source device, the requirement for this
# test is that all devices have _exactly_ the same size. If this is
# not the case, this test is not run.
#
# To check the filesystems after replacing a device, a scrub run is
# performed, a btrfsck run, and finally the filesystem is remounted.
#
#-----------------------------------------------------------------------
# Copyright (C) 2013 STRATO.  All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
#
#-----------------------------------------------------------------------
#

seq=`basename $0`
seqres=$RESULT_DIR/$seq
echo "QA output created by $seq"

here=`pwd`
tmp=/tmp/$$
status=1
noise_pid=0

_cleanup()
{
	if [ $noise_pid -ne 0 ] && ps -p $noise_pid | grep -q $noise_pid; then
		kill -TERM $noise_pid
	fi
	wait
	rm -f $tmp.tmp
}
trap "_cleanup; exit \$status" 0 1 2 3 15

# get standard environment, filters and checks
. ./common/rc
. ./common/filter

# real QA test starts here
_supported_fs btrfs
_require_scratch_nocheck
_require_scratch_dev_pool 4
_require_btrfs_dump_super

rm -f $seqres.full
rm -f $tmp.tmp

echo "*** test btrfs replace"

workout()
{
	local mkfs_options="$1"
	local num_devs4raid="$2"
	local with_cancel="$3"
	local quick="$4"
	local source_dev="`echo ${SCRATCH_DEV_POOL} | awk '{print $1}'`"
	local target_dev="`echo ${SCRATCH_DEV_POOL} | awk '{print $NF}'`"
	local fssize

	echo >> $seqres.full
	echo "---------workout \"$1\" $2 $3 $4-----------" >> $seqres.full

	if [ "`echo $SCRATCH_DEV_POOL | wc -w`" -lt `expr $num_devs4raid + 1` ]; then
		echo "Skip workout $1 $2 $3 $4" >> $seqres.full
		echo "Too few devices in SCRATCH_DEV_POOL $SCRATCH_DEV_POOL, required: " `expr $num_devs4raid + 1` >> $seqres.full
		return 0
	fi

	# use min number of disks in order to fill up the disk to replace
	# as much as possible
	local used_devs_without_1st="`echo $SCRATCH_DEV_POOL | \
		awk '{ORS=\" \"; for (i = 2; i <= (NF - 1 < '$num_devs4raid' ? NF - 1 : '$num_devs4raid'); i++) print $i}'`"

	# _scratch_mkfs adds the 1st device again (which is $SCRATCH_DEV)
	_scratch_mkfs $mkfs_options $used_devs_without_1st >> $seqres.full 2>&1 || _fail "mkfs failed"

	# create a filesystem on the target device just for the sake of
	# being able to query its size with btrfs-show-super
	$MKFS_BTRFS_PROG $MKFS_OPTIONS $target_dev >> $seqres.full 2>&1 || _fail "mkfs target_dev failed"

	# The source and target devices for the replace operation are
	# arbitrarily chosen out of the pool. Since the target device mustn't
	# be smaller than the source device, the requirement for this test is
	# that all devices have _exactly_ the same size. If this is not the
	# case, this test is not run.
	local num_lines=`$BTRFS_SHOW_SUPER_PROG $SCRATCH_DEV $used_devs_without_1st $target_dev | grep dev_item.total_bytes | uniq | wc -l`
	if [ $num_lines -gt 1 ]; then
		_notrun "Different device sizes detected"
	fi

	if [ `$BTRFS_SHOW_SUPER_PROG $SCRATCH_DEV | grep dev_item.total_bytes | awk '{print $2}'` -lt 2500000000 ]; then
		_notrun "device size too small"
	fi

	_scratch_mount

	echo "$BTRFS_UTIL_PROG filesystem show" >> $seqres.full
	$BTRFS_UTIL_PROG filesystem show >> $seqres.full

	# Generate metadata and some minimal user data, generate 500 times
	# 20K extents in the data chunk and fill up metadata with inline
	# extents.
	for i in `seq 1 500`; do
		_ddt of=$SCRATCH_MNT/l$i bs=16385 count=1
		_ddt of=$SCRATCH_MNT/s$i bs=3800 count=1
	done > /dev/null 2>&1

	# Generate a template once and quickly copy it multiple times.
	_ddt of=$SCRATCH_MNT/t0 bs=1M count=1 > /dev/null 2>&1

	if [ "${quick}Q" = "thoroughQ" ]; then
		# The intention of this "thorough" test is to increase
		# the probability of random errors, in particular in
		# conjunction with the background noise generator and
		# a sync call while the replace operation is ongoing.
		fssize=2048
	elif [ "${with_cancel}Q" = "cancelQ" ]; then
		# The goal is to produce enough data to prevent that the
		# replace operation finishes before the cancel request
		# is started.
		fssize=1024
	else
		fssize=64
	fi

	# since the available size was tested before, do not tolerate
	# any failures
	for i in `seq $fssize`; do
		cp $SCRATCH_MNT/t0 $SCRATCH_MNT/t$i || _fail "cp failed"
	done > /dev/null 2>> $seqres.full
	sync; sync

	btrfs_replace_test $source_dev $target_dev "" $with_cancel $quick
	_scratch_unmount > /dev/null 2>&1

	if echo $mkfs_options | egrep -qv "raid1|raid5|raid6|raid10" || \
	   [ "${with_cancel}Q" = "cancelQ" ]; then
		# the -r option has no effect without mirrors, skip -r test
		# in this case, and if only the canceling should be tested
		# as well
		return 0
	fi

	# One more time with the '-r' option this time. Instead of wasting
	# time to populate the filesystem with data again, use the
	# existing filesystem in the state as it is after the previous
	# replace operation.
	# If possible, use a strategy to select the source and target
	# device so that we really change bits on the target disk, see
	# below.

	# The default: For the 2nd run, the new target drive is the old
	# source drive, and the new source drive is the old target drive.
	# Since except for the noise data, the copied data is already on
	# the new target disk (which is the old source disk), this is not
	# optimal to check whether data is copied correctly.
	local tmp_dev="$source_dev"
	source_dev="$target_dev"
	target_dev="$tmp_dev"

	# If we have at least one more device in the SCRATCH_DEV_POOL than
	# used so far, use one of those for the new target devive.
	if [ "`echo $SCRATCH_DEV_POOL | wc -w`" -gt `expr $num_devs4raid + 1` ]; then
		target_dev="`echo ${SCRATCH_DEV_POOL} | awk '{print $(NF-1)}'`"
	fi

	# If the filesystem is built out of more than one devices, use a
	# different source device for this round.
	if [ $num_devs4raid -gt 1 ]; then
		source_dev="`echo ${SCRATCH_DEV_POOL} | awk '{print $2}'`"
	fi

	# Mount similar to _scratch_mount, but since the SCRATCH_DEV (the
	# 1st device in SCRATCH_DEV_POOL) was replaced by the previous
	# btrfs replace operation, substitute SCRATCH_DEV with a device
	# that is known to be part of the SCRATCH_MNT filesystem.
	_mount -t $FSTYP `_scratch_mount_options | sed "s&${SCRATCH_DEV}&${source_dev}&"`
	if [ $? -ne 0 ]; then
		echo "mount failed"
		return 1
	fi

	btrfs_replace_test $source_dev $target_dev "-r" $with_cancel $quick
	_scratch_unmount > /dev/null 2>&1
}

btrfs_replace_test()
{
	local source_dev="$1"
	local target_dev="$2"
	local replace_options="$3"
	local with_cancel="$4"
	local quick="$5"

	# generate some (slow) background traffic in parallel to the
	# replace operation. It is not a problem if cat fails early
	# with ENOSPC.
	cat /dev/urandom | od > $SCRATCH_MNT/noise 2>> $seqres.full &
	noise_pid=$!

	if [ "${with_cancel}Q" = "cancelQ" ]; then
		# background the replace operation (no '-B' option given)
		echo "$BTRFS_UTIL_PROG replace start -f $replace_options $source_dev $target_dev $SCRATCH_MNT" >> $seqres.full
		$BTRFS_UTIL_PROG replace start -f $replace_options $source_dev $target_dev $SCRATCH_MNT >> $seqres.full 2>&1 || _fail "btrfs replace start failed"
		sleep 1
		echo "$BTRFS_UTIL_PROG replace cancel $SCRATCH_MNT" >> $seqres.full
		$BTRFS_UTIL_PROG replace cancel $SCRATCH_MNT >> $seqres.full 2>&1 || _fail "btrfs replace cancel failed"

		# 'replace status' waits for the replace operation to finish
		# before the status is printed
		$BTRFS_UTIL_PROG replace status $SCRATCH_MNT > $tmp.tmp 2>&1
		cat $tmp.tmp >> $seqres.full
		grep -q canceled $tmp.tmp || _fail "btrfs replace status (canceled) failed"
	else
		if [ "${quick}Q" = "thoroughQ" ]; then
			# On current hardware, the thorough test runs
			# more than a second. This is a chance to force
			# a sync in the middle of the replace operation.
			(sleep 1; sync) > /dev/null 2>&1 &
		fi
		echo "$BTRFS_UTIL_PROG replace start -Bf $replace_options $source_dev $target_dev $SCRATCH_MNT" >> $seqres.full
		$BTRFS_UTIL_PROG replace start -Bf $replace_options $source_dev $target_dev $SCRATCH_MNT >> $seqres.full 2>&1 || _fail "btrfs replace start failed"

		$BTRFS_UTIL_PROG replace status $SCRATCH_MNT > $tmp.tmp 2>&1
		cat $tmp.tmp >> $seqres.full
		grep -q finished $tmp.tmp || _fail "btrfs replace status (finished) failed"
	fi

	if ps -p $noise_pid | grep -q $noise_pid; then
		kill -TERM $noise_pid 2> /dev/null
	fi
	noise_pid=0
	wait

	# scrub tests on-disk data, that's the reason for the sync.
	# With the '-B' option (don't background), any type of error causes
	# exit values != 0, including detected correctable and uncorrectable
	# errors on the device.
	sync; sync
	$BTRFS_UTIL_PROG scrub start -B $SCRATCH_MNT >> $seqres.full 2>&1 || _fail "btrfs scrub failed"

	# Two tests are performed, the 1st is to btrfsck the filesystem,
	# and the 2nd test is to mount the filesystem.
	# Usually _check_btrfs_filesystem would perform the mount test,
	# but it gets confused by the mount output that shows SCRATCH_MNT
	# mounted but not being mounted to SCRATCH_DEV. This happens
	# because in /proc/mounts the 2nd device of the filesystem is
	# shown after the replace operation. Let's just do the mount
	# test manually after _check_btrfs_filesystem is finished.
	_scratch_unmount > /dev/null 2>&1
	if [ "${with_cancel}Q" != "cancelQ" ]; then
		# after the replace operation, use the target_dev for everything
		echo "$BTRFS_UTIL_PROG filesystem show -d" >> $seqres.full
		$BTRFS_UTIL_PROG filesystem show -d >> $seqres.full
		echo "$BTRFS_UTIL_PROG filesystem show" >> $seqres.full
		$BTRFS_UTIL_PROG filesystem show >> $seqres.full
		echo "_check_btrfs_filesystem $target_dev" >> $seqres.full
		_check_btrfs_filesystem $target_dev
		_mount -t $FSTYP `_scratch_mount_options | sed "s&${SCRATCH_DEV}&${target_dev}&"`
	else
		_check_btrfs_filesystem $source_dev
		_scratch_mount
	fi
}

workout "-m single -d single" 1 no quick
workout "-m single -d single -M" 1 no quick
workout "-m dup -d single" 1 no quick
workout "-m dup -d single" 1 cancel quick
workout "-m dup -d dup -M" 1 no quick
workout "-m raid0 -d raid0" 2 no quick
workout "-m raid1 -d raid1" 2 no thorough
workout "-m raid5 -d raid5" 2 no quick
workout "-m raid6 -d raid6" 3 no quick
workout "-m raid10 -d raid10" 4 no quick

echo "*** done"
status=0
exit
