#!/usr/bin/perl -w
####################################################################
# (C) Copyright 2009-2013 Hewlett-Packard Development Company, L.P.
# @(#) Product Name                :  HP Serviceguard
# @(#) Product Version             :  A.12.10.00
# @(#) Patch Name                  :  
####################################################################

use strict;

use Data::Dumper;
use Pod::Usage;
use Sys::Hostname;
# POSIX exports a ton of stuff. Suppress all but what we'll use.
use POSIX qw(pathconf _PC_PATH_MAX _PC_NAME_MAX);
use Errno qw(EAGAIN);

use constant TIME_TO_PROBE => 0.39;
=head1 NAME

cmcheckdg - check that there is a uniform number of disks for a given
            dg on the specified nodes

=cut

# only root can run this
unless ($< == 0) {
    die "Only root is allowed to run $0\n";
}


# process argument
# -v                            Verbose flag
# -d <disk group>               VxVM/CVM disk group path
# -n <node>                     Name of node to include.
# -D                            Debug flag - to display more logging
# -O                            Logfile option


=head1 SYNOPSIS

cmcheckdg
S<[-v] -d disk_group -n node [-n node] ... [-d disk_group -n node ...] ...>

=cut

# GLOBALS
my %diskGroups;
my @nodes;
my %cmexecOutput;
my %cmexecFiles;
my $verbose = 0;
my $debug = 0;
my %options;
my $dg = 0;
my $node = 0;
my $nodeName;
my $nodeName2;
my $curDg;
my $errors = 0;
my $logFile = undef;
my $debugLevel = -1;
my $debOpt = 0;
my $logOpt = 0;
my $hostname;

my $sgsbin;
$sgsbin = get_sgsbin_path();
chomp($sgsbin);

my $lastOption=undef;
foreach my $option (@ARGV) {

    #This segment is to make sure that for options
    #other than -v there is a value associated.

    if (defined($lastOption)) {
        if ($option =~ m/^-/) {
	    if (($lastOption =~ m/^-/) and ($lastOption ne "-v")) {
                print "Incorrect usage: $lastOption, $option \n";
                pod2usage();
                exit(1);
            }
        }
    }
    $lastOption = $option;
    if ($option eq "cmcheckdg") {
        # do nothing
    }
    elsif ($option eq "-v") {
        if ($verbose == 0) {
            $verbose = 1; 
        }
        else {
            pod2usage();
            exit(1);
        }
    }
    elsif ($option eq "-n") {
        $node = 1;
    } 
    elsif ($option eq "-d") {
        $dg = 1;
    }  
    elsif ($option eq "-D") {
        if ($debugLevel == -1) {
            $debOpt = 1;
        }
        else {
            pod2usage();
            exit(1);
        }
    }
    elsif ($option eq "-O") {
        if (!defined($logFile)) {
            $logOpt = 1;
        }
        else {
            pod2usage();
            exit(1);
        }
    }
    else {
        # Not a flag, this must be either a node name or dg name
        # or debug value or log file name.

        if ($dg) { # dg name?
            # Make sure we had at least one node in the prior dg
            if (defined $curDg) {
                unless (keys %{$diskGroups{$curDg}}) {
                    # Last dg had no nodes
                    pod2usage();
                    exit(1);
                }
            }

            $curDg = $option;
            $dg = 0;
        } 
        elsif ($node) { # node name?
            unless (defined $curDg) {
                pod2usage();
                exit(1);
            } 
            else {
                # Add node hash to dg hash and initialize the value to 0
                $diskGroups{$curDg}{$option} = 0; 

                # Also, push the nodename onto our nodes array 
                unless (grep (/^$option$/, @nodes)) {
                    push(@nodes, $option);
                }
 
                $node = 0;
            }
        }
        # It must either be debug level or log file 
        elsif ($debOpt) {
            if (!($option =~ m/^[\d]+$/)) {
                print "Log level must be an integer value\n";
                pod2usage();
                exit(1);
            }
            $debugLevel = $option;
            $debOpt = 0;
            if ($debugLevel >= 1) {
                $debug = 1;
                $verbose = 1;
            }
        }
        elsif ($logOpt) {
            $logFile = $option;
            open(FH, ">>$logFile") || die "Cannot open $logFile";
            $logOpt = 0;
        }
        else {    
            # This isn't a valid option, and hasn't been preceded by 
            # a -d or -n or -D or -O option
            pod2usage();
            exit(1);
        }
    } 
}

#If -D option is not provided set
#the debugLevel to 0. This is to keep
#logging in sync with Serviceguard commands.

if ($debugLevel == -1) {
    $debugLevel = 0;
}

# Make sure we had at least one node in the final dg
if (defined $curDg) {
    unless (keys %{$diskGroups{$curDg}}) {
        # Last dg had no nodes
        pod2usage();
        exit(1);
    }
}
else {
    # No disk groups specified
    pod2usage();
    exit(1);
}

# If debug, dump our hash of hashes for debugging 
if ($debugLevel > 3) {
   print Dumper(%diskGroups);
}

# Get our own hostname
$hostname = hostname();
chomp($hostname);
# if we got a fqdn, trim it down to just the host name.
$hostname =~ s/\..*//;

if ($verbose) {
    print "Validating diskgroups:" ;
    foreach my $dg (keys %diskGroups) {
        print " $dg";
    }
    print "\n"
}

my @totalDisks = ();

cmexecAndCapture("/usr/sbin/vxdisk list") || $errors++;

foreach $nodeName (@nodes) {
    my @output = @{ $cmexecOutput{$nodeName} };
    # Drop the heading line
    shift(@output);
    push(@totalDisks, scalar(@output));
    debugLog(3, "Node $nodeName has ". scalar(@output) . " disks. \n");
}
my $maxDisks = pop(@totalDisks);
foreach (@totalDisks) {
    $maxDisks = $_ if ($_ > $maxDisks);
}

if ($verbose) {
    print("Found a maximum of $maxDisks disks on a node \n");
}

#In our testing on a large lun cluster it approximately took 20 minutes
#to analyze 3076 disks. So it comes to .39 seconds for disk.

my $eTime = $maxDisks * TIME_TO_PROBE ;

# If it is goind to take more than 90000 milliseconds (1.5 minutes) then 
# print estimated time in minutes else in seconds.

if ($eTime > 90000) {
    my $mins = int($eTime/60) + 1;
    if ($verbose) {
        print("Analysis of $maxDisks disks should take approximately " .
               "$mins minutes\n");
    }
} else {
    my $secs = int($eTime) + 1;
    if ($verbose) {
        print("Analysis of $maxDisks disks should take approximately " .
               "$secs seconds\n");
    }
}

    # Run vxdisk on all nodes and capture number of disks per dg.
cmexecAndCapture("/usr/sbin/vxdisk -o alldgs list") || $errors++;

foreach $nodeName (@nodes) {
    my @output = @{ $cmexecOutput{$nodeName} };
    # Example vxdisk output:
    # bill:/>vxdisk -o alldgs list
    # DEVICE       TYPE            DISK         GROUP        STATUS
    # c0t0d0s2     auto:LVM        -            -            LVM
    # c0t2d0       auto:cdsdisk    -            -            online
    # c3t0d0       auto:cdsdisk    c3t0d0       rootdg       online
    # c5t0d1       auto:none       -            -            online invalid
    # c5t0d2       auto:LVM        -            -            LVM
    # c5t0d6       auto:LVM        -            -            LVM
    # c5t0d7       auto:cdsdisk    -            (dg_dd0)     online
    # c5t1d0       auto:cdsdisk    -            (dg_dd1)     online
    # c5t1d1       auto:cdsdisk    -            (cvm_dg0)    online
    # c5t1d2       auto:cdsdisk    -            (cvm_dg1)    online
    # c5t1d3       auto:cdsdisk    -            (cvm_dg2)    online
    # c5t1d4       auto:cdsdisk    -            (cvm_dg3)    online
    # bill:/>

    # Drop the heading line
    shift(@output);

    foreach my $line (@output) {

        # Capture the fields we're interested in
        if ($line =~ /^(\S+)\s+\S+\s+\S+\s+\(?([^)\s]+)\)?/) {

            if (($2 ne "-") && (defined $diskGroups{$2})) {

                # Second 'if' required due to perl's autovivification
                if (defined $diskGroups{$2}{$nodeName}) {
                    debugLog(3, "Found disk $1 on node $nodeName " .
                             "for disk group $2\n");
                    $diskGroups{$2}{$nodeName}++;
                }
            }  
        }
    }
}

# Iterate through each DG supplied
foreach $curDg (keys %diskGroups) {
   
    # Get the name of the node to compare  
    foreach $nodeName (keys %{$diskGroups{$curDg}}) {

        # Report error if no disks are found for this DG on this node
        if ($diskGroups{$curDg}{$nodeName} == 0) {
            print "ERROR: No Disks found for Disk Group $curDg on node " .
                  "$nodeName.\n";
            $errors++;
        }

        # Iterate through all nodes, comparing the disk count
        foreach $nodeName2 (keys %{$diskGroups{$curDg}}) {

            # Compare the disk counts per node, report errors
            if ($diskGroups{$curDg}{$nodeName} 
                < $diskGroups{$curDg}{$nodeName2}) {
                print "ERROR: Inconsistent Disk count discovered for Disk " .
                      "Group $curDg. $diskGroups{$curDg}{$nodeName} Disks " .
                      "found on node $nodeName, " .
                      "$diskGroups{$curDg}{$nodeName2} Disks found on node " .
                      "$nodeName2.\n";
                $errors++;
            }
        }
    }      
}

# If debug, dump our hash of hashes for debugging 
if ($debugLevel > 3) {
   print Dumper(%diskGroups);
}

close(FH);

if ($errors) {
    if ($^O eq "hpux") {
        print "VxVM/CVM disk group check failed.\n";
    } elsif ($^O eq "linux") {
        print "VxVM disk group check failed.\n";
    }
    exit(1);
} else {
    exit(0); 
}

# Runs $cmd on all nodes in parallel using cmexec.
# Saves command output in %cmexecOutput.
# Returns 1 if all nodes succeeded, 0 if any node failed.
sub cmexecAndCapture {
    my ($cmd) = @_;
    my $ret = 1;
    my $pid;
    my %pidNodes;

    foreach my $node (@nodes) {
        $cmexecFiles{$node} = "/var/tmp/checkdg.$node.$$";
        $pid = undef;
        while (!defined($pid = fork)) {
            die "fork failed: $!" if ($! != EAGAIN);
            sleep 1;
        }
        if ($pid == 0) {
            exec("$sgsbin/cmexec", $node, "-o", $cmexecFiles{$node}, $cmd) or
                die "exec \"$cmd\" on node $node failed";
        }
        #parent
        $pidNodes{$pid} = $node;
        debugLog(1, "Launched command \"$cmd\" on node $node\n");
    }

    my $runningPids = keys %pidNodes;
    while (-1 != ($pid = wait())) {
        my $stat = $?;
        if ($stat == 0) {
            debugLog(1, "Command \"$cmd\" on node " . $pidNodes{$pid} .
                     " completed\n");
        }
        else {
            $ret = 0;
            $node = $pidNodes{$pid};
            print "\"$cmd\" on node $node failed\n";
            my $errCmd = "cat " . $cmexecFiles{$node};
            `$errCmd`;
        }
        $runningPids--;
    }
    die "Failed to wait for all commands" if ($runningPids > 0);
    
    foreach my $node (values %pidNodes) {
        my $file = $cmexecFiles{$node};
        debugLog(3, "Reading $file\n");
        open(OUTFILE, "< $file") or print "ERROR: No output for node $node" and next;
        my @output = ();
        while (my $line = <OUTFILE>) {
            chomp $line;
            push(@output, $line);
            debugLog(5, "$line\n");
        }
        debugLog(3, "Read " . scalar(@output) . " lines from node $node\n");
        debugLog(5, "Output array:\n" . join("\n", @output) . "\n");
        $cmexecOutput{$node} = [ @output ];
        close(OUTFILE) or die "close $cmd failed: %!";
        unlink $file;
    }
    debugLog(5, "cmexecOutput:\n");
    print Dumper(%cmexecOutput) if $debugLevel > 4;
    debugLog(3, "cmexecAndCapture returning $ret\n");

    return $ret;
}

sub debugLog {
    my ($level, $msg) = @_;
    if (($level <= $debugLevel) and (defined($logFile))) {
        printf FH $msg ;
    }
    elsif ($level <= $debugLevel) {
        print "$msg";
    }
}

################################################################################
#Function get_sgsbin_path
#This function finds the path of the SG binareies
################################################################################
sub get_sgsbin_path {
    my $file = "/etc/cmcluster.conf";
    open SRCPKGASC, "< $file" or
                die "Unable to open the file $file : $!";
    my @lines = <SRCPKGASC>;
    my @pth = grep(/^SGSBIN/, @lines);
    close SRCPKGASC;
    die "Unable to find SGSBIN in $file" if(!@pth);
    chomp($pth[0]);
    my @val = split("=", $pth[0]);
    return ($val[1]);
}
