#!/bin/bash
# condor_remote_host: Used to install ssh keys and blahp on remote hosts.
#
# BOSCO can manage max 254 clusters, the # of clusters is returned sometime
# as function exit code (by return) and this is modulo 256
# see cluster_list_iterator, list


# Bosco key location
bosco_key=$HOME/.ssh/bosco_key.rsa

# Should the bosco ssh key be copied to the remote host being added
copy_ssh_key=yes

# Bosco password location
PASSPHRASE_LOCATION=$HOME/.bosco/.pass

# Bosco cluster list location
CLUSTERLIST_LOCATION=$HOME/.bosco/.clusterlist

# Directory under $HOME to install into on the remote machine
remote_base_dir_host=bosco

# Additional arguments for all ssh commands
ssh_args=""
ssh_host=""

# SSH Control Path
ssh_control_path="/tmp/bosco_ssh_control.%r@%h:%p"

# Executables
TR=tr

usage()
{
cat << EOM
usage: $0 command

commands:
 -l|--list                  List the installed clusters
 -a|--add host sched        Install and add a cluster, with scheduler sched
 -r|--remove [host]         Remove the installed cluster (first in list)
 -s|--status [host]         Get status of installed cluster
 -t|--test [host]           Test the installed cluster (all clusters)
 -o|--override directory    Override the remote installation with a directory structure
 -b|--base-dir directory    Set the name of the remote base install directory (default: bosco)
 -u|--url URL               Set the URL to fetch HTCondor binaries from
 --copy-ssh-key (yes|no)    Should the ssh key be copied to the cluster host (default: yes)
 -d|--debug                 Display debugging output
 -h|--help                  Show this help message

Where host is [user@]fqdn.example.com[:22]
$0 can manage max 254 clustars

EOM
}

init_ssh_args () {
    if [[ $1 =~ ^([^:]+):([0-9]+)$ ]]; then
        ssh_host="${BASH_REMATCH[1]}"
        ssh_args="-p ${BASH_REMATCH[2]}"
    else
        ssh_host=$1
    fi
}

cluster_list_iterator () {
    # $1 function iterating on list, will accept 3 parameters
    #  if the return value is 1, the loop is interropted
    # File:  $CLUSTERLIST_LOCATION
    # Line looks like:
    # entry=user@host.edu max_queued=2000 cluster_type=pbs
    # Check for the clusterlist file
    # call the function on each entry
    # The exit code is the number of entries visited (iterations on the cluster list)
    let counter=0
    if [ -s $CLUSTERLIST_LOCATION ]; then
        while read line; do
            name=`expr "$line" : "entry=\(.*\) max"`
            max_q=`expr "$line" : ".*max_queued=\(.*\) cluster"`
            type_q=`expr "$line" : ".*cluster_type=\(.*\)"`
            $1 $name $type_q $max_q
            [ $? -eq 1 ] && break
            let counter+=1
        done < $CLUSTERLIST_LOCATION
    else
        # file not existing or zero length
        echo "No clusters configured"
        return 255
    fi
    return $counter

}

list()
{
    # Line looks like:
    # entry=user@host.edu max_queued=2000 cluster_type=pbs
    # Check for the clusterlist file
    # $1 control the output format (default user@host.edu/cluster_type)
    #  host - user@host.edu
    #  remote_gahp - user@host.edu  (to use in HTCondor remote_gahp)
    #  queue - queue_type
    #  all -  user@host.edu cluster_type max_queued
    # The exit code is the number of entries (lines)
    let counter=0
    if [ -e $CLUSTERLIST_LOCATION ]; then
        while read line; do
            name=`expr "$line" : "entry=\(.*\) max"`
            max_q=`expr "$line" : ".*max_queued=\(.*\) cluster"`
            type_q=`expr "$line" : ".*cluster_type=\(.*\)"`
            if [ "x$1" = "xhost" ]; then
                echo $name
            elif [ "x$1" = "xremote_gahp" ]; then
                echo $name
                return 1
            elif [ "x$1" = "xgrid_resource" ]; then
                echo "batch $type_q $name"
                return 1
            elif [ "x$1" = "xall" ]; then
                echo $name $type_q $max_q
            else
                echo $name/$type_q
            fi
            let counter+=1
        done < $CLUSTERLIST_LOCATION
    else
        echo "No clusters configured"
        return 0
    fi
    return $counter
}

start_ssh () {
    # Copy blahp
    PASSPHRASE=""
    if [ -f $PASSPHRASE_LOCATION ]; then
        PASSPHRASE=`cat $PASSPHRASE_LOCATION`
    fi

    # Run stop_ssh on shell exit
    trap stop_ssh EXIT

    # Start the ssh-agent
    eval `ssh-agent -s` > /dev/null

    # Call the external program to do ssh-add
    condor_ssh_start
    if [ $? -eq 0 ]; then
        # Make a master ssh connection
        ssh -o ControlMaster=auto -o "ControlPath=$ssh_control_path" -MNn $ssh_args $ssh_host &
        ssh_master_connection=$!
        return 0
    else
        return 1
    fi

}    

display_dots () {
    while [ 1 ]; do
        # Get parent ID
        ppid=`ps -p $$ -o ppid=`
        if [ "$ppid" -eq 1 ]; then
            exit
        fi
        echo -n "."
        sleep 1
    done

}

show_progress () {
    message=$1
    shift
    command=$@

    # Display the message & dots
    echo -n $message
    display_dots 2>/dev/null &
    dots_pid=$!

    # Run the command
    "$@"  
    exit_code=$?
    disown $dots_pid
    kill -9 $dots_pid
    if [ $exit_code -ne 0 ] ; then
        echo "Failure"
    else
        echo ""
    fi

    return $exit_code

}



ssh_find_remote () {
    # Find the platform of the remote host
    # 1. remote host
    remote_host=$1

    # Returns 'Darwin' for Mac OS X or 'Linux'
    detect=(`ssh $ssh_args $ssh_host "uname -s -m"`)
    [[ $? -eq 0 ]] || return 1

    detect_os=${detect[0]}
    detect_arch=${detect[1]}

    case "$detect_os" in
        Linux)
            echo ${detect_arch}_`ssh_detect_linux_distro "$remote_host"`
            return $?
            ;;
        Darwin)
            echo "x86_64_macOS"
            return 0
            ;;
        *)
            return 1
            ;;
    esac
}


ssh_detect_linux_distro () {
    # Find the linux distro of the remote host
    # 1. remote host
    remote_host=$1

    os_release=`ssh $ssh_args $ssh_host "cat /etc/os-release" 2> /dev/null`
    [[ $? -eq 0 ]] || return 1

    dist_id=`echo "$os_release" | awk -F '=' '/^ID=/ {print $2}'`
    dist_id_like=`echo "$os_release" | awk -F '=' '/^ID_LIKE=/ {print $2}'`
    ver=`echo "$os_release" | awk -F '=' '/^VERSION_ID=/ {print $2}' | tr -d '"'`
    major_ver="${ver%%.*}"

    if [[ $dist_id_like =~ (rhel|centos|fedora) ]]; then
        if [ ${major_ver} -eq 7 ]; then
            echo "CentOS${major_ver}"
        else
            echo "AlmaLinux${major_ver}"
        fi
    elif [[ $dist_id_like =~ suse ]]; then
            echo "openSUSE${major_ver}"
    else
        case "$dist_id" in
            debian)
                echo "Debian${major_ver}" ;;
            ubuntu)
                echo "Ubuntu${major_ver}" ;;
            *)
                return 1 ;;
        esac
    fi
    return 0
}


check_cluster_list2 () {
    # $1 = cluster to check for
    remote_host=$1
    # $2 = action v|e|ve v=verbose e=exit
    case "x$2" in
     xve|xev) action=2;;
     xv) action=1;;
     xe) action=3;;
     *) action=0;;
    esac
    grep "$remote_host" $CLUSTERLIST_LOCATION >/dev/null 2>/dev/null
    if [ $? -eq 0 ]; then
        return 0
    else
        if [ $action -eq 0 ]; then
            return 1
        fi
        if [ $action -le 2 ]; then
            echo "Unable to find $remote_host in list of installed clusters."
            echo "Available clusters:"
            list
        fi
        if [ $action -ge 2 ]; then
            exit 1
        fi
        return 1
    fi
}

add_cluster_to_list () {
    # $1 = cluster to add
    # $2 = max queued setting of cluster
    # $3 = cluster batch system (pbs, lsf, sge)
    # Remove the host (if already there), and (re)add
    remote_host=$1
    max_queued=$2
    cluster_type=$3
    check_cluster_list2 $remote_host
    if [ $? -eq 0 ]; then
        tmpfile=`mktemp -t tmpclusterlist.XXXXXX`
        sed "/entry=$remote_host max_queued=.*/d" $CLUSTERLIST_LOCATION > $tmpfile
        mv $tmpfile $CLUSTERLIST_LOCATION
    fi

    mkdir -p `dirname $CLUSTERLIST_LOCATION`
    echo "entry=$remote_host max_queued=$max_queued cluster_type=$cluster_type" >> $CLUSTERLIST_LOCATION
}

get_min_queue () {
    # Should we do this in awk? ugh, ok
    min_queued=1000
    detected=0
    while read line; do
        max_queued=`expr "$line" : ".*max_queued=\(-*[0-9]*\)"`
        if [ $max_queued -lt $min_queued -a $max_queued -ne -1 ]; then
            detected=1
            min_queued=$max_queued
        fi
    done < $CLUSTERLIST_LOCATION
    if [ $detected == 0 ]; then
        min_queued=10
    fi
    echo $min_queued
    return $min_queued

}


stop_ssh () {
    # Remove on-exit call of stop_ssh
    trap - EXIT

    # Shut down the master ssh socket
    if [ "x$ssh_master_connection" != "x" ]; then
        kill $ssh_master_connection
    fi
    
    # Shut down ssh-agent
    eval `ssh-agent -sk` > /dev/null
}

remove_cluster () {
    # $1 = cluster to remove 
    # If no cluster is provided, remove all clusters
    if [ "x$1" != "x" ]; then
        # $1 could be user@hostname/queue, queue part removed
        # bash specific, generic no removing queue: remote_host=$1 
 
        remote_host=$1

        # First, check if the cluster is in the cluster list, exit if not
        check_cluster_list2 $remote_host "ve"

        # Remove the cluster
        do_remove_cluster $remote_host 
        return $?
    else
        #list > /dev/null
        #[ $? = "0" ] && echo "No cluster " 
        # This is removing only one cluster because do_remove_cluster creates a new cluster file
        cluster_list_iterator  do_remove_cluster
        retv=$?
        if [ $retv = 255 ]; then
            echo "No clusters configured."
        else
            echo "$retv cluster/s removed."
        fi
    fi
}

do_remove_cluster () {
    # $1 = cluster to remove 
    # $1 could be user@hostname/queue, queue part removed
    # bash specific, generic no removing queue: remote_host=$1 
    # $2 and $3 ignored
    remote_host=${1%%/*}

    # First, check if the cluster is in the cluster list, exit if not
    check_cluster_list2 $remote_host "ve"

    # If here cluster is in cluster list

    # Remove bosco from the remote cluster
    init_ssh_args $remote_host
    start_ssh
    ssh $ssh_args $ssh_host "rm -rf $remote_base_dir_host"
    stop_ssh

    # Remove the host from the cluster list
    tmpfile=`mktemp -t tmpclusterlist.XXXXXX`
    sed "/$remote_host/d" $CLUSTERLIST_LOCATION > $tmpfile
    mv $tmpfile $CLUSTERLIST_LOCATION
}


check_condor_q_classad () {
    # Check a classad with a timeout
    # Will continue to check classad until any output is given
    # $1 = classad expression
    # $2 = timeout (seconds)
    # Returns 0 if not found, 1 if found
    classad=$1
    counter=$2

    while [ $counter -gt 0 ]
    do
        cmd_out=`condor_q -const "$classad" -format '%s' ClusterId `
        if [ ! "x$cmd_out" == "x" ]; then
            return 1
        fi
        counter=$(( $counter - 1 ))
        sleep 1
    done
    return 0

}


test_cluster () {
    if [ "x$1" != "x" ]; then
        remote_host=$1

        # First, check if the cluster is in the cluster list, exit if not
        check_cluster_list2 $remote_host "ve"

        # Get the cluster type
        cluster_out=`grep "$remote_host" $CLUSTERLIST_LOCATION`
        cluster_type=`expr "$cluster_out" : ".*cluster_type=\(.*\)"`

        # Do the test
        do_test_cluster $remote_host $cluster_type
        return $?
    else
        # This is stopping at the first cluster failing
        cluster_list_iterator  do_test_cluster
        if [ $? = 255 ]; then
            echo "No clusters configured."
        else
            echo "$? cluster/s testsed successfully."
        fi
    fi
}

do_test_cluster () {
    # Testing the cluster. No checks are done on the parameters
    # $1 remote_host (user@fqdn)
    # $2 cluster_type (pbs, condor, ...)
    # other parameters are ignored
    if [ "x$1" != "x" ]; then
        remote_host=$1
    else
        echo "ERROR - No remote host given for the test.  Please list one of the cluster you have added"
        exit 1
    fi

    if [ "x$2" != "x" ]; then
        cluster_type=$2
    else
        echo "ERROR - No remote cluster type provided.  Please specify the correct one."
        exit 1
    fi

    # Check if passwordless ssh works
    echo -n "Testing ssh to $remote_host..."
    init_ssh_args $remote_host
    start_ssh
    # Get the pwd from the remote cluster, for use later in the submission file
    default_pwd=`ssh -o "PasswordAuthentication=no" $ssh_args $ssh_host "pwd"`
    ssh_exit=$?
    stop_ssh
    if [ $ssh_exit -ne 0 ]; then
        echo "Failed to run simple ssh on remote cluster."
        echo "Passwordless ssh to $remote_host is not working."
        exit 1
    else
        echo "Passed!"
    fi
    
    # Test condor submission
    echo -n "Testing remote submission..."

    # Put test files under the user's home directory
    root_submit_dir=$HOME/bosco-test
    mkdir -p $root_submit_dir
    if [ $? -ne 0 ]; then
        echo "Unable to create directory $root_submit_dir"
        echo "Tests FAILED!"
        exit 1
    fi
    submit_dir=`mktemp -d $root_submit_dir/boscotest.XXXXX`
    submit_file=$submit_dir/condor.submit
    log_file=$submit_dir/logfile
    test_stdout=$submit_dir/test.stdout
    test_stderr=$submit_dir/test.stderr
    test_id=bosco-test.$RANDOM
    cat > $submit_file << End_of_Submit
universe = grid
grid_resource = batch $cluster_type $remote_host
output = $test_stdout
error = $test_stderr
transfer_executable=false
executable = /bin/echo
arguments = Hello
log = $log_file
notification = NEVER
+bosco_test_id = "$test_id"
queue
End_of_Submit

    submit_out=`condor_submit $submit_file 2>&1 `
    if [ $? -ne 0 ]; then
        echo "Failed"
        echo $submit_out
        exit 1
    else
        echo "Passed!"
    fi
    echo "Submission and log files for this job are in $submit_dir"

    # Get the condor job id using the bosco_test_id
    condor_jobid=`condor_q -const "bosco_test_id =?= \"$test_id\"" -format '%i' ClusterId`

    # Check if the jobmanager picked up the job
    echo -n "Waiting for jobmanager to accept job..."
    check_condor_q_classad "(ClusterId == $condor_jobid) && (Managed == \"External\")" 20
    if [ $? -eq 0 ]; then
        echo "Failed!"
        echo "Check your gridmanager log, located at: `condor_config_val GRIDMANAGER_LOG`"
        exit 1
    else
        echo "Passed"
    fi

    # Check if there is anything in the user log
    echo -n "Checking for submission to remote $cluster_type cluster (could take ~30 seconds)..."
    counter=50
    submit_found=0
    while [ $counter -gt 0 ]
    do
        grep_out=`grep -A 2 -e "^027.*" $log_file`
        if [ $? -eq 0 ]; then
            submit_found=1
            break
        fi
        sleep 1
        counter=$(( $counter - 1 ))
    done
    
    if [ $submit_found -eq 1 ]; then
        echo "Passed!"
    else
        echo "Failed"
        echo "Showing last 5 lines of logs:"
        gridmanager_log=`condor_config_val GRIDMANAGER_LOG`
        if [ -r $gridmanager_log ]; then
            tail -5 $gridmanager_log
        else
            echo "Failed to open gridmanager log for reading"
        fi
        exit 1
    fi
    
    # Check the remote job state
    # GridJobStatus is not properly implemented for PBS
    #echo "Checking the test job's status to change from idle to running"
    #echo -n "This could take a very long time... "
    #check_condor_q_classad "(ClusterId == $condor_jobid) && (isUndefined(GridJobStatus) =!= TRUE)" 60
    #if [ $? -eq 0 ]; then
    #    echo "Failed"
    #    echo "GridManager did not update status of remote job"
    #    echo "See GridManager log for information, located at `condor_config_val GRIDMANAGER_LOG`"
    #    exit 1
    #else
    #    condor_q -const "(ClusterId == '$condor_jobid')" -format '%s\n' GridJobStatus
    #fi

    # NOTE: We will probably never see the job in running status, as it will
    #   finish immediately and Condor only polls its status periodically.
    # Check for job to run
    #echo -n "Waiting for job to run... this could take a while (waiting 60 seconds)..."
    #check_condor_q_classad "(ClusterId == $condor_jobid) && (JobStatus == 2)" 60
    #if [ $? -eq 0 ]; then
    #    echo "Failed"
    #    echo "The job did not start in the 60 seconds we waited."
    #    echo "This doesn't always mean there is something wrong, maybe the remote queue is long..."
    #    echo "Here is the current status of the job:"
    #    condor_q $condor_jobid
    #    echo "You can look at job's log file at $log_file"
    #    echo "for the job's current status"
    #    exit 1
    #else
    #    echo "Passed!"
    #fi
    
    echo -n "Waiting for job to exit... this could take a while (waiting 60 seconds)..."
    check_condor_q_classad "(ClusterId == $condor_jobid) && (GridJobStatus==\"COMPLETED\") " 60
    if [ $? -eq 0 ]; then
        cat << Error_message_end
Failed
The job did not end in 60 seconds.  This is not always a bad thing...
Maybe condor is waiting longer to poll for job completion?
Here is the current status of the job:"
Error_message_end
        condor_q $condor_jobid
        # returning zero anyway, test considered successful 
        exit 0
    else
        echo "Passed!"
    fi

    # Check if there is anything in the user log
    echo -n "Checking for completion in Condor, could take a while..."
    counter=5
    complete_found=0
    while [ $counter -gt 0 ]
    do
        grep_out=`grep -A 2 -e "^005.*" $log_file`
        if [ $? -eq 0 ]; then
            complete_found=1
            break
        fi
        sleep 10
        counter=$(( $counter - 1 ))
    done
    
    if [ $complete_found -eq 1 ]; then
        echo "Passed!"
    else
        echo "Failed"
        echo "Showing last 5 lines of logs:"
        gridmanager_log=`condor_config_val GRIDMANAGER_LOG`
        if [ -r $gridmanager_log ]; then
            tail -5 $gridmanager_log
        else
            echo "Failed to open gridmanager log for reading"
        fi
        exit 1
    fi

    echo -n "Checking for job output..."
    output=`cat $test_stdout`
    if [ "$output" == "Hello" ] ; then
	echo "Passed!"
    else
	echo "Failed"
	echo "Job output should be 'Hello', but isn't"
	echo "Showing contents of job stdout:"
	cat $test_stdout
	echo "Showing contents of job stderr:"
	cat $test_stderr
	exit 1
    fi

}


get_status () {
    remote_host=$1

    # First, check if the cluster is in the clusterlist
    check_cluster_list2 $remote_host "ve"

    # Print queue status
    tmpfile=`mktemp -t queustatustmp.XXXXX`
    init_ssh_args $remote_host
    start_ssh
    for i in "qstat -q" "showq" "condor_q"
    do
        # Show the result only if successful
        test_out=`ssh $ssh_args $ssh_host "$i" 2>&1 `
        if [ $? -eq 0 ]; then
            echo "Showing output of $i on $remote_host"
            echo "$test_out"
        else
            echo "No $i on $remote_host"
        fi
    done
    stop_ssh
}


# The getopt command line.  Using -a for alternate (allow options with only 1 '-')
if [ `uname` = "Darwin" ] ; then
    # Mac OS X doesn't have GNU getopt, so not fancy argument checking here
    TEMP="$@"
else
    TEMP=`getopt -a -o a:ls:t:r:dhp:o:b:u: --longoptions add:,platform:,list,status:,test:,remove:,debug,help,override:,base-dir:,url:,copy-ssh-key:  -n 'condor_remote_cluster' -- "$@"`

    if [ $? != 0 ]; then usage; echo "Terminating..." >&2; exit 1; fi
fi

eval set -- "$TEMP"

platform_force=
override_dir=
release_url=
update_url=
rc_url=
daily_url=

# test and remove could have 0 or 1 parameter
while true; do
    case "$1" in
        -h|--help) usage; exit 1; shift ;;
        -d|--debug) debug=1; shift;;
        -p|--platform) platform_force=$2; shift 2;;
        -a|--add) remote_host=$2; shift 2; break;;
        -l|--list) list $2
                   if [ $? -eq 0 ]; then
                       exit 2
                   fi
                   exit 0 ;;
        -s|--status) get_status $2;  shift 2; exit 0 ;;
        -t|--test) test_cluster $2; exit 0 ;;
        -r|--remove) remove_cluster $2; exit 0;;
        -o|--override) override_dir=$2; shift 2;;
        -b|--base-dir) remote_base_dir_host=$2; shift 2;;
        -u|--url) release_url=$2; shift 2;;
        --copy-ssh-key) copy_ssh_key=$2; shift 2;;
        --) echo "No command found" >&2; usage; exit 1;;
        *) echo "Unknown option: $1" >&2; usage; exit 1;;
    esac
done


################################################################
# The rest of the file covers the 'add' cluster functionality.
################################################################

# Shift away the "--"
if [ `uname` != "Darwin" ] ; then
    shift
fi

# $1 - The batch system

if [ "x$1" == "x" ]; then
    echo "Warning: No batch system specified, defaulting to PBS"
    echo "If this is incorrect, rerun the command with the batch system specified"
    echo 
    cluster_type="pbs"
else
    cluster_type=$(echo $1 | $TR '[:upper:]' '[:lower:]')
    if [ "$cluster_type" == "htcondor" ]; then
        cluster_type="condor"
    fi
    if [ ! "$cluster_type" == "pbs" -a ! "$cluster_type" == "lsf" -a ! "$cluster_type" == "sge" -a ! "$cluster_type" == "condor" -a ! "$cluster_type" == "slurm" ]; then
        echo "Unrecognized batch system: $1 (normalized: $cluster_type)"
        echo "Please specify one of the following: pbs, lsf, sge, condor, slurm"
        exit 1
    fi
fi


# Check if the cluster is already in the list
check_cluster_list2 $remote_host
if [ $? -eq 0 ]; then
    echo "Cluster $remote_host already installed"
    echo "Reinstalling on $remote_host"
    reinstall=1
else
    reinstall=0
fi

# If the key doesn't exist, create it
if [ ! -e $bosco_key ]; then
    # Generate a password
    PASSPHRASE=`echo $RANDOM$RANDOM$RANDOM$RANDOM`

    # Output the password to a specially crafted file
    mkdir -p `dirname $PASSPHRASE_LOCATION`
    echo $PASSPHRASE > $PASSPHRASE_LOCATION
    chmod go-rwx $PASSPHRASE_LOCATION
    
    # Check if the passphrase is empty
    ssh-keygen -q -t rsa -f $bosco_key -P $PASSPHRASE > /dev/null
 
    if [ $? -ne 0 ]; then
        echo "Error running keygen" >&2
        exit 1
    fi
fi

init_ssh_args $remote_host

if [ $copy_ssh_key = "yes" ] ; then
    # Transfer the public key to the remote host
    echo "Enter the password to copy the ssh keys to $remote_host:"
    cat ${bosco_key}.pub | ssh $ssh_args $ssh_host "umask 077; test -d ~/.ssh || mkdir ~/.ssh ; cat >> ~/.ssh/authorized_keys"
    if [ $? -ne 0 ]; then
        echo "Error copying remote key.  Please make sure you password is correct."
        exit 1
    fi
fi

start_ssh

# Quickly test the ssh
ssh_opts='-o PasswordAuthentication=no'
[[ $debug = 1 ]] && ssh_opts="$ssh_opts -vvv"
qmgr_out=$(ssh $ssh_opts $ssh_args "$ssh_host" "pwd" 2>&1)
if [ $? -ne 0 ]; then
    echo "Password-less ssh to $remote_host did NOT work, even after adding the ssh-keys."
    echo "Does the remote resource allow password-less ssh?"
    echo "$qmgr_out"
    exit 1
fi

if [ "$cluster_type" == "pbs" ]; then
    # Get the queue information
    echo -n "Detecting PBS cluster configuration..."
    qmgr_out=`ssh -o ControlMaster=auto -o ControlPath=$ssh_control_path $ssh_args $ssh_host /bin/bash -c -i "'qmgr -c \"print server\"'" 2>/dev/null `

    # Get the default queue
    default_queue=`expr "$qmgr_out" : '.*set server default_queue = \([a-zA-Z0-9_]*\)'`

    # Get the max queued for the queue default_queue
    max_queued=`expr "$qmgr_out" : ".*set queue $default_queue max_user_queuable = \([0-9]*\).*"`

    echo "Done!"
fi

if [ "x$max_queued" == "x" ]; then
    max_queued=-1
fi


# Find the URL we should use to download the HTCondor binaries to be used
# on the remote host.
if [ -z "${release_url}" ]; then
    url_base=`condor_config_val BOSCO_URL_BASE 2>/dev/null`
    if [ -z "${url_base}" ] ; then
        url_base="https://research.cs.wisc.edu/htcondor/tarball/"
    fi

    ver_str=(`condor_version`)
    if [ -z "${ver_str[0]}" ] ; then
        echo "Unable to determine HTCondor version."
	echo "Is it installed and set up correctly?"
	exit 1
    fi
    version="${ver_str[1]}"
    aversion=(${version//./ })
    major_ver=${aversion[0]}
    minor_ver=${aversion[1]}
    series="${major_ver}.${minor_ver}"
    if [ $minor_ver -ne 0 ]; then
        series="${major_ver}.x"
    fi

    if [ ! -z "${platform_force}" ] ; then
        platform="${platform_force}"
    else
        platform=`ssh_find_remote $remote_host`
        if [ -z "${platform}" ] ; then
            echo "Failed to detect remote host's platform."
            exit 1;
        fi
    fi

    release_url="${url_base}${series}/${version}/release/condor-${version}-${platform}-stripped.tar.gz"
    update_url="${url_base}${series}/${version}/update/condor-${version}-${platform}-stripped.tar.gz"
    if echo "${ver_str[*]}" | grep -q -e "PRE-RELEASE" -e " RC " -e "UW_development" ; then
        rc_url="${url_base}${series}/${version}/rc/condor-${version}-${platform}-stripped.tar.gz"
    fi
    if echo "${ver_str[*]}" | grep -q -e "PRE-RELEASE" -e "DAILY" -e "UW_development" ; then
        daily_url="${url_base}${series}/${version}/daily/condor-${version}-${platform}-stripped.tar.gz"
    fi
fi

# Create a tmp dir, download the files and rsync them to the remote system
# A download to the remote system would be faster but we cannot count on ports different form the SSH one
# OS X mktemp requires a template
tmp_dir=`mktemp -d /tmp/tmp.XXXXXXXX`
rc=1
# Check for final release first, most general case
if [ $rc -ne 0 ] ; then
    show_progress "Downloading release build for $remote_host" curl -f -s -S -o $tmp_dir/bosco-download.tar.gz $release_url
    rc=$?
    if [ $rc -ne 0 ] ; then
        echo "Failed to download release build."
    fi
fi
# Next, check for update releases or blessed RCs
if [ $rc -ne 0 ] ; then
    show_progress "Downloading update build for $remote_host" curl -f -s -S -o $tmp_dir/bosco-download.tar.gz $update_url
    rc=$?
    if [ $rc -ne 0 ] ; then
        echo "Failed to download update build."
    fi
fi
if [ ! -z "$rc_url" -a $rc -ne 0 ] ; then
    show_progress "Downloading rc build for $remote_host" curl -f -s -S -o $tmp_dir/bosco-download.tar.gz $rc_url
    rc=$?
    if [ $rc -ne 0 ] ; then
        echo "Failed to download rc build."
    fi
fi
if [ ! -z "$daily_url" ] ; then
    show_progress "Downloading daily build for $remote_host" curl -f -s -S -o $tmp_dir/bosco-download.tar.gz $daily_url
    rc=$?
    if [ $rc -ne 0 ] ; then
        echo "Failed to download daily build."
    fi
fi
if [ $rc -eq 0 ] ; then
    show_progress "Unpacking" tar xzf $tmp_dir/bosco-download.tar.gz -C $tmp_dir
    rc=$?
    archive_dir=`ls -d $tmp_dir/condor*`
fi
if [ $rc -ne 0 ]; then
    echo "Unable to download and prepare files for remote installation."
    echo "Download URL: $release_url"
    echo "Aborting installation to $remote_host."
    rm -r $tmp_dir
    exit 1
fi


# Remote directories
#TODO: do we need a separate sandbox for each cluster?
remote_base_dir_root="bosco/cluster"
remote_base_dir_default="$remote_base_dir_root/default"
tmp_install_dir=`mktemp -d /tmp/tmp.XXXXXXXX`
mkdir -p "$tmp_install_dir/$remote_base_dir_host"
local_install_dir="$tmp_install_dir/$remote_base_dir_host"
remote_sandbox_dir="$remote_base_dir_host/sandbox"

# TODO: uncomment to enable subdirs
#remote_base_dir_host="$remote_base_dir_root/$remote_host"
remote_glite_dir="$local_install_dir/glite"
mkdir -p $remote_glite_dir
local_sandbox_dir="$local_install_dir/sandbox"
mkdir -p $local_sandbox_dir

#ssh $ssh_args $ssh_host "mkdir -p bosco/glite/lib; mkdir -p bosco/glite/log; mkdir -p bosco/sandbox"
# Make the necessary remote directories
mkdir -p $remote_glite_dir/bin; mkdir -p $remote_glite_dir/lib; mkdir -p $remote_glite_dir/log; mkdir -p $local_sandbox_dir
# TODO: uncomment to enable subdirs
#ssh $ssh_args $ssh_host "ln -sfn $remote_base_dir_host $remote_base_dir_default"

if [ -d $archive_dir/usr/lib64 ] ; then
    libdir=lib64
else
    libdir=lib
fi

# Copy over Manifest
rsync -aq $archive_dir/Manifest.txt $remote_glite_dir/
# Record the tarball name in the Manifest
echo $(basename $archive_dir) >> $remote_glite_dir/Manifest.txt

# Copy over the blahp files
rsync -aq $archive_dir/bin/blahpd $remote_glite_dir/bin 2>/dev/null
rsync -aq $archive_dir/sbin/bl* $remote_glite_dir/sbin 2>/dev/null
rsync -aq $archive_dir/usr/libexec/blahp/* $remote_glite_dir/libexec 2>/dev/null
rsync -aq $archive_dir/etc/bl* $remote_glite_dir/etc 2>/dev/null

# Copy over libraries preserving symlinks and directory name
rsync -aq  $archive_dir/$libdir/lib*.so* $remote_glite_dir/$libdir/ 2>/dev/null

# Copy the condor_ft-gahp
rsync -aq  $archive_dir/sbin/condor_ft-gahp $remote_glite_dir/bin 2>/dev/null

# Copy rvgahp_server
rsync -aq  $archive_dir/sbin/rvgahp_server $remote_glite_dir/bin 2>/dev/null

# link in local submit attributes with correct symlinks
(cd $remote_glite_dir/libexec; ln -sf ../etc/blahp/*_local_submit_attributes.sh .)

cat >$remote_glite_dir/etc/condor_config.ft-gahp 2>/dev/null <<EOF
BOSCO_SANDBOX_DIR=\$ENV(HOME)/$remote_sandbox_dir
LOG=\$ENV(HOME)/$remote_base_dir_host/glite/log
FT_GAHP_LOG=\$(LOG)/FTGahpLog
SEC_CLIENT_AUTHENTICATION_METHODS = FS, PASSWORD
SEC_PASSWORD_FILE = \$ENV(HOME)/$remote_base_dir_host/glite/etc/passwdfile
USE_SHARED_PORT = False
ENABLE_URL_TRANSFERS = False
EOF

# If patch is set, perform the patch
if [ ! -z "${override_dir}" ]; then
    echo "Patching bosco installation from directory: $override_dir"
    rsync -aq --backup --suffix .orig $override_dir/ "$local_install_dir"
fi

# Do the actual rsync
# Delete any old files, but save everything in the sanbdox directory.
show_progress "Installing on cluster $remote_host" rsync --delete "--filter=protect sandbox/*" "--filter=protect .nfs*" -aqK -e "ssh -o ControlMaster=auto -o ControlPath=$ssh_control_path $ssh_args" "$local_install_dir" $ssh_host:
exit_code=$?

rm -rf $tmp_install_dir
# Removing temporary directory
rm -rf $tmp_dir 2>/dev/null

stop_ssh

# Detect an improper rsync exit code
if [ "$exit_code" -ne "0" ]; then
    echo "Unable to install on remote cluster..."
    echo "Exiting"
    exit 1
fi


# Add the cluster to the cluster list
add_cluster_to_list $remote_host $max_queued $cluster_type

echo "Installation complete"



# Submission instructions
cat << EOM
The cluster $remote_host has been added for remote submission
It is available to run jobs submitted with the following values:
> universe = grid
> grid_resource = batch $cluster_type $remote_host
EOM

