#!/bin/sh - # # $Header: /afs/linux.ibm.com/src/afs/@cell/scripts/afs_vrfy_csdb/RCS/afs_vrfy_csdb,v 1.26 2005/09/15 11:51:58 mpb Exp $ # $Locker: $ # # --------------------------------------------------------------------------- # NAME afs_vrfy_csdb # AUTHOR Paul Blackburn http://acm.org/~mpb # DATE Tue Sep 13 11:58:20 BST 2005 # PURPOSE Check an AFS CellServDB file for active cells. # Generate a CellServDB with just valid cells # and another file of the "failed" cells. # USAGE afs_vrfy_csdb [${CellServDB}] [-ok ok_cells] [-nogo nogo_cells] [-verbose] # ACKNOWLEDGEMENTS # Thanks to Ted Anderson for reviewing and suggesting improvements. # HISTORY # 2005_09_13 mpb first draft # 2005_09_14 mpb added usage (), added "-w 5" to Linux ping, added wrap_ping () # 2005_09_14 mpb added "-autofix" option to replace old CellServDB with OK list # 2005_09_15 mpb added check for no errors found so modify displayed messages # 2005_09_15 mpb removed the "-cell $cell" from the "bos status". It's not needed. # --------------------------------------------------------------------------- # let's get defensive PATH=/usr/afs/bin:/usr/afsws/bin:/bin:/usr/bin:/sbin:/usr/sbin IFS=" " unset ENV # Undo any aliases for commands used. # This is to avoid problems such as "ls" having unexpected format output unalias awk 2>/dev/null unalias cat 2>/dev/null unalias cat 2>/dev/null unalias chmod 2>/dev/null unalias cp 2>/dev/null unalias cut 2>/dev/null unalias date 2>/dev/null unalias df 2>/dev/null unalias grep 2>/dev/null unalias head 2>/dev/null unalias ls 2>/dev/null unalias mkdir 2>/dev/null unalias mv 2>/dev/null unalias ping 2>/dev/null unalias rm 2>/dev/null unalias rmmod 2>/dev/null unalias rsync 2>/dev/null unalias sed 2>/dev/null unalias sort 2>/dev/null unalias strings 2>/dev/null unalias tail 2>/dev/null unalias uname 2>/dev/null unalias whois 2>/dev/null # enough defensiveness already! cmd=$(basename ${0}) cmdline="${cmd} $*" logdir=/var/log/install log=${logdir}/${cmd} default_notify=mpb@acm.org # where to send bug reports OpenAFS="false" TransarcAFS="false" f=/etc/openafs/CellServDB if [ -s ${f} ]; then OpenAFS="true" default_csdb=${f} fi f=/usr/local/etc/openafs/CellServDB if [ -s ${f} ]; then OpenAFS="true" default_csdb=a${f} fi f=/usr/vice/etc/CellServDB if [ -s ${f} ]; then TransarcAFS="true" default_csdb=${f} fi default_autofix="false" # functions ----------------------------------------------------------------- usage () { cat <" ) { printf("\n%s ", substr($1,2,length($1))) } else { printf("%s ", $1) } } END { printf("\n") }' < ${csdb} } # end of function process_csdb() # --------------------------------------------------------------------------- extract_cell () { awk '{ if (( substr ($1,1,1) == ">" ) && ( substr($1,2,length($1)) == cell )) { printme = 1 } if (( substr ($1,1,1) == ">" ) && ( substr($1,2,length($1)) != cell )) { printme = 0 } if ( printme == 1 ) print $0 }' cell=$1 < ${csdb} } #end of function extract_cell # --------------------------------------------------------------------------- fatal() { echo "${cmd} fatal: ${1}" >&2 exit 1 } #end of function fatal # --------------------------------------------------------------------------- error() { echo "${cmd} error: ${1}" >&2 } #end of function error # --------------------------------------------------------------------------- warning() { echo "${cmd} warning: ${1}" >&2 } #end of function warning # --------------------------------------------------------------------------- tstamp() { echo "`date '+''%H''%M'':%S'` ${cmd}: ${1}" } #end of function tstamp # --------------------------------------------------------------------------- doit() { tstamp "${1}" eval ${1} retcode=$? if [ ${retcode} != 0 ]; then error "\$?=${retcode}" fi } #end of function doit # --------------------------------------------------------------------------- elapsed_time () { # # for both Linux and AIX, ${os} shell variable needed # start=${1} end=${2} unset x unset y if [ ${start} = ${end} ]; then case ${os} in Linux ) echo -n "zero seconds" ;; AIX ) echo "zero seconds \c" ;; esac return fi let elapsed_seconds=${end}-${start} let days=${elapsed_seconds}/86400 let days_sec=${days}*86400 let x=${elapsed_seconds}-${days_sec} let hours=${x}/3600 let hours_sec=${days}*3600 let x=${days_sec}+${hours_sec} let y=${elapsed_seconds}-${x} let mins=${y}/60 let mins_sec=${mins}*60 let x=${days_sec}+${hours_sec}+${mins_sec} let secs=${elapsed_seconds}-${x} m="" if [ ${days} != 0 ]; then if [ ${days} = 1 ]; then m=${m}"1 day " else m=${m}"${days} days" fi fi if [ ${hours} != 0 ]; then if [ ${hours} = 1 ]; then m=${m}"1 hour " else m=${m}"${hours} hours " fi fi if [ ${mins} != 0 ]; then if [ ${mins} = 1 ]; then m=${m}"1 minute " else m=${m}"${mins} minutes " fi fi if [ ${secs} != 0 ]; then if [ ${secs} = 1 ]; then m=${m}"1 second " else m=${m}"${secs} seconds" fi fi echo ${m} } # end of function elapsed_time # main ---------------------------------------------------------------------- tuple_file=/tmp/${cmd}_tuple_$$ default_ok_cells_file=/tmp/${cmd}_ok_cells_$$ default_nogo_cells_file=/tmp/${cmd}_nogo_cells_$$ verbose="false" while [ ! -z "${1}" ]; do # crack command line arguments case "${1}" in -verbose | -VERBOSE ) verbose="true" ;; -autofix | -AUTOFIX ) autofix="true" ;; -nogo | -NOGO ) shift if [ -z "${1}" ]; then fatal "missing nogo cells file name" else nogo_cells_file=${1} fi ;; -nogo* | -NOGO* ) ok_cells_file=$(echo ${1} | cut -c6-) ;; -ok | -OK ) shift if [ -z "${1}" ]; then fatal "missing OK cells file name" else ok_cells_file=${1} fi ;; -ok* | -OK* ) ok_cells_file=$(echo ${1} | cut -c4-) ;; -? | -help | -usage | --?| --help | --usage ) usage exit ;; * ) csdb="${1}" ;; esac shift done if [ -z "${autofix}" ]; then autofix=${default_autofix} fi if [ -z "${nogo_cells_file}" ]; then nogo_cells_file=${default_nogo_cells_file} fi if [ -z "${ok_cells_file}" ]; then ok_cells_file=${default_ok_cells_file} fi if [ -z "${csdb}" ]; then csdb=${default_csdb} fi > ${tuple_file} chmod 644 ${tuple_file} > ${ok_cells_file} chmod 644 ${ok_cells_file} > ${nogo_cells_file} chmod 644 ${nogo_cells_file} # sanity checking starts here ----------------------------------------------- if [ "${autofix}" = "true" ]; then x=$(whoami) if [ "${x}" != 'root' ]; then fatal "You must be logged in as root to use the \"-autofix\" option with ${cmd}" fi fi # # Check what Operating System we are running on. # The syntax of "ping" is different on Linux and AIX # os=$(uname) case "${os}" in Linux ) start_epoch=$(date +%s) ;; AIX ) h=$(date +%H) m=$(date +%M) s=$(date +%S) let start_epoch=\(${h}*60*60\)+\(${m}*60\)+${s} ;; * ) warning "We have unsupported operating system: ${os}." warning "This may not work." ;; esac if [ -z "${csdb}" ]; then echo "Missing CellservDB file" >&2 exit 1 else if [ -s ${csdb} ]; then tstamp "\$Revision: 1.26 $ commenced on $(date '+%a %d %h %y')" tstamp "Verifying contents of ${csdb}" tstamp "Please be patient as we check each AFS cell and server" process_csdb ${csdb} | sed -e "/^$/d" | sort > ${tuple_file} while read tuple; do cell=$(echo ${tuple} | awk '{print $1}') echo tstamp "Checking cell $cell" ip_list=$(echo ${tuple} | sed -e "s/${cell} //" -e "s/ $//" ) # # first, check we can ping the list of servers # This test just verifies we have network connectivity to ${ip} # failed_ping="false" for ip in ${ip_list} ; do ping_count=2 ping_packet_size=56 case "${os}" in "Linux" ) if [ "${verbose}" = "true" ]; then tstamp "ping -w 5 -c ${ping_count} -s ${ping_packet_size} ${ip}" fi ping -w 5 -c ${ping_count} -s ${ping_packet_size} ${ip} 2>&1 > /dev/null if [ $? != 0 ]; then failed_ping="true" warning "ping ${ip} failed" fi ;; "AIX" ) if [ "${verbose}" = "true" ]; then tstamp "ping ${ip} ${ping_packet_size} ${ping_count}" fi ping ${ip} ${ping_packet_size} ${ping_count} 2>&1 > /dev/null if [ $? != 0 ]; then failed_ping="true" warning "ping ${ip} failed" fi ;; * ) failed_ping="true" echo "Unknown os=${os}. How to ping here?" >&2 ;; esac done if [ "${failed_ping}" = "true" ]; then warning "${cell} ping failed" extract_cell ${cell} >> ${nogo_cells_file} else tstamp "${cell} ping OK" # # second, check for bos processes # this test verifies that there are AFS server processes on ${ip} # failed_bos="false" for ip in ${ip_list}; do if [ "${verbose}" = "true" ]; then tstamp "bos status ${ip} -noa" fi bos status ${ip} -noa 2>&1 > /dev/null if [ $? != 0 ]; then failed_bos="true" warning "bos status ${ip} failed" fi done if [ "${failed_bos}" = "true" ]; then warning "${cell} bos status failed" extract_cell ${cell} >> ${nogo_cells_file} else tstamp "${cell} bos status OK. This one is good!!" extract_cell ${cell} >> ${ok_cells_file} fi fi done < ${tuple_file} rm ${tuple_file} cell_count=$(grep "^>" ${csdb} | wc -l) cell_count=$(echo ${cell_count}) # for AIX weirdness ok_cell_count=$(grep "^>" ${ok_cells_file} | wc -l) ok_cell_count=$(echo ${ok_cell_count}) # for AIX weirdness nogo_cell_count=$(grep "^>" ${nogo_cells_file} | wc -l) nogo_cell_count=$(echo ${nogo_cell_count}) # for AIX weirdness echo tstamp "summary: ${cell_count} cells checked, ${ok_cell_count} OK and ${nogo_cell_count} failed" tstamp "OK cells are in ${ok_cells_file}" # # Check for case when all cells are OK # if [ "${nogo_cell_count}" = "0" ]; then rm ${nogo_cells_file} tstamp "Good news: no errors found in ${csdb}" else tstamp "No-go cells are in ${nogo_cells_file}" fi echo if [ "${autofix}" = "true" ]; then suffix=$(date +%Y_%m_%d:%H%M:%S) doit "mv ${csdb} ${csdb}.${suffix}" doit "cp ${ok_cells_file} ${csdb}" tstamp "Attention: Option \"-autofix\" has caused the old CellServDB to be updated" tstamp "previous CellServDB is ${csdb}.${suffix}" echo fi # # Compute how long it took us. # case "${os}" in Linux ) end_epoch=$(date +%s) ;; AIX ) h=$(date +%H) m=$(date +%M) s=$(date +%S) let end_epoch=\(${h}*60*60\)+\(${m}*60\)+${s} # # test for case of midnight flip over...if so, add 1 day in seconds (86400) # we assume this script always takes less than 24 hours to complete # If only AIX had "date +%s" ...sigh... # if [ ${end_epoch} -lt ${start_epoch} ]; then let end_epoch=${end_epoch}+86400 fi ;; esac tstamp "duration $(elapsed_time ${start_epoch} ${end_epoch})" tstamp "completed" else echo "Empty CellServDB file" >$2 exit 1 fi fi exit