#!/usr/bin/perl -w
# virt-df
# Copyright (C) 2009-2010 Red Hat Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

use warnings;
use strict;

use Sys::Guestfs;
use Sys::Guestfs::Lib qw(feature_available);

use Pod::Usage;
use Getopt::Long;
use File::Basename qw(basename);
use POSIX qw(ceil);

use Locale::TextDomain 'libguestfs';

=encoding utf8

=head1 NAME

virt-df - Display free space on virtual filesystems

=head1 SYNOPSIS

 virt-df [--options]

 virt-df [--options] domname

 virt-df [--options] disk.img [disk.img ...]

=head1 DESCRIPTION

C<virt-df> is a command line tool to display free space on virtual
machine filesystems.  Unlike other tools, it doesn't just display the
amount of space allocated to a virtual machine, but can look inside
the virtual machine to see how much space is really being used.

It is like the L<df(1)> command, but for virtual machines, except that
it also works for Windows virtual machines.

If used without any arguments, C<virt-df> checks with libvirt to get a
list of all active and inactive guests, and performs a C<df>-type
operation on each one in turn, printing out the results.

If used with any argument(s), C<virt-df> performs a C<df>-type
operation on either the single named libvirt domain, or on the disk
image(s) listed on the command line (which must all belong to a single
VM).  In this mode (with arguments), C<virt-df> will I<only work for a
single guest>.  If you want to run on multiple guests, then you have
to invoke C<virt-df> multiple times.

Use the C<--csv> option to get a format which can be easily parsed by
other programs.  Other options are mostly similar to standard C<df>
options.  See below for the complete list.

=head1 OPTIONS

=over 4

=cut

my $help;

=item B<--help>

Display brief help.

=cut

my $version;

=item B<--version>

Display version number and exit.

=cut

my $uri;

=item B<--connect URI> | B<-c URI>

If using libvirt, connect to the given I<URI>.  If omitted, then we
connect to the default libvirt hypervisor.

If you specify guest block devices directly, then libvirt is not used
at all.

=cut

my $csv;

=item B<--csv>

Write out the results in CSV format (comma-separated values).  This format
can be imported easily into databases and spreadsheets, but
read L</NOTE ABOUT CSV FORMAT> below.

=cut

my $human;

=item B<--human-readable> | B<-h>

Print sizes in human-readable format.

You are not allowed to use I<-h> and I<--csv> at the same time.

=cut

my $inodes;

=item B<--inodes> | B<-i>

Print inodes instead of blocks.

=cut

my $one_per_guest;

=item B<--one-per-guest>

Run one libguestfs appliance per guest.  Normally C<virt-df> will
add the disks from several guests to a single libguestfs appliance.

You might use this option in the following circumstances:

=over 4

=item *

If you think an untrusted guest might actively try to exploit the
libguestfs appliance kernel, then this prevents one guest from
interfering with the stats printed for another guest.

=item *

If the kernel has a bug which stops it from accessing a
filesystem in one guest (see for example RHBZ#635373) then
this allows libguestfs to continue and report stats for further
guests.

=back

=back

=cut

GetOptions ("help|?" => \$help,
            "version" => \$version,
            "connect|c=s" => \$uri,
            "csv" => \$csv,
            "human-readable|human|h" => \$human,
            "inodes|i" => \$inodes,
            "one-per-guest" => \$one_per_guest,
    ) or pod2usage (2);
pod2usage (1) if $help;
if ($version) {
    my $g = Sys::Guestfs->new ();
    my %h = $g->version ();
    print "$h{major}.$h{minor}.$h{release}$h{extra}\n";
    exit
}

# RHBZ#600977
die __"virt-df: cannot use -h and --csv options together\n" if $human && $csv;

# RHBZ#635373
#
# Limit the number of devices we will ever add to the appliance.  The
# overall limit in current libguestfs is 25: 26 = number of letters in
# the English alphabet since we are only confident that /dev/sd[a-z]
# will work because of various limits, minus 1 because that may be
# used by the ext2 initial filesystem.
my $max_disks = 25;

# Get the list of domains and block devices.
#
# We can't use Sys::Guestfs::Lib::open_guest here because we want to
# create the libguestfs handle/appliance as few times as possible.
#
# If virt-df is called with no parameters, then run the libvirt
# equivalent of "virsh list --all", get the XML for each domain, and
# get the disk devices.
#
# If virt-df is called with parameters, assume it must either be a
# single disk image filename, a list of disk image filenames, or a
# single libvirt guest name.  Construct disk devices accordingly.

my @domains = ();

if (@ARGV == 0) {               # No params, use libvirt.
    my $conn;

    if ($uri) {
        $conn = Sys::Virt->new (readonly => 1, address => $uri);
    } else {
        $conn = Sys::Virt->new (readonly => 1);
    }

    my @doms = $conn->list_defined_domains ();
    push @doms, $conn->list_domains ();

    # https://bugzilla.redhat.com/show_bug.cgi?id=538041
    @doms = grep { $_->get_id () != 0 } @doms;

    exit 0 unless @doms;

    foreach my $dom (@doms) {
        my @disks = get_disks_from_libvirt ($dom);
        push @domains, { dom => $dom,
                         name => $dom->get_name (),
                         disks => \@disks }
    }
} elsif (@ARGV == 1) {          # One param, could be disk image or domname.
    if (-e $ARGV[0]) {
        push @domains, { name => basename ($ARGV[0]),
                         disks => [ $ARGV[0] ] }
    } else {
        my $conn;

        if ($uri) {
            $conn = Sys::Virt->new (readonly => 1, address => $uri);
        } else {
            $conn = Sys::Virt->new (readonly => 1);
        }

        my $dom = $conn->get_domain_by_name ($ARGV[0])
            or die __x("{name} is not the name of a libvirt domain\n",
                       name => $ARGV[0]);
        my @disks = get_disks_from_libvirt ($dom);
        push @domains, { dom => $dom,
                         name => $dom->get_name (),
                         disks => \@disks }
    }
} else {                        # >= 2 params, all disk images.
    push @domains, { name => basename ($ARGV[0]),
                     disks => \@ARGV }
}

sub get_disks_from_libvirt
{
    my $dom = shift;
    my $xml = $dom->get_xml_description ();

    my $p = XML::XPath->new (xml => $xml);
    my @disks = $p->findnodes ('//devices/disk/source/@dev');
    push (@disks, $p->findnodes ('//devices/disk/source/@file'));

    # Code in Sys::Guestfs::Lib dies here if there are no disks at all.

    return map { $_->getData } @disks;
}

# Sort the domains by name for display.
@domains = sort { $a->{name} cmp $b->{name} } @domains;

# Since we got this far, we're somewhat sure we're going to
# get to print the result, so display the title.
print_title ();

# To minimize the number of times we have to launch the appliance,
# shuffle as many domains together as we can, but not exceeding
# MAX_DISKS per request.  If --one-per-guest was requested then only
# request disks from a single guest each time.
if ($one_per_guest) {
    foreach (@domains) {
        my @request = ( $_ );
        multi_df (@request);
    }
} else {
    while (@domains) {
        my $n = 0; # number of disks added so far
        my @request = ();
        while (@domains) {
            my $c = @{$domains[0]->{disks}};
            if ($c > $max_disks) {
                warn __x("virt-df: ignoring {name}, it has too many disks ({c} > {max})",
                         name => $domains[0]->{name},
                         c => $c, max => $max_disks);
                next;
            }
            last if $n + $c > $max_disks;
            $n += $c;
            push @request, shift (@domains);
        }
        multi_df (@request);
    }
}

sub multi_df
{
    local $_;
    eval {
        my $g = Sys::Guestfs->new ();

        my ($d, $disk);

        foreach $d (@_) {
            foreach $disk (@{$d->{disks}}) {
                $g->add_drive_ro ($disk);
            }
        }

        $g->launch ();
        my $has_lvm2 = feature_available ($g, "lvm2");

        my @devices = $g->list_devices ();
        my @partitions = $g->list_partitions ();

        my $n = 0;
        foreach $d (@_) {
            my $name = $d->{name};
            my $nr_disks = @{$d->{disks}};

            # Filter LVM to only the devices applying to the original domain.
            my @devs = @devices[$n .. $n+$nr_disks-1];
            $g->lvm_set_filter (\@devs) if $has_lvm2;

            # Find which whole devices (RHBZ#590167), partitions and LVs
            # contain mountable filesystems.  Stat those which are
            # mountable, and ignore the others.
            foreach (@devs) {
                try_df ($name, $g, $_, canonical_dev ($_, $n));
            }
            foreach (filter_partitions (\@devs, @partitions)) {
                try_df ($name, $g, $_, canonical_dev ($_, $n));
            }
            if ($has_lvm2) {
                foreach ($g->lvs ()) {
                    try_df ($name, $g, $_);
                }
            }

            $n += $nr_disks;
        }
    };
    warn if $@;
}

sub filter_partitions
{
    my $devs = shift;
    my @devs = @$devs;
    my @r;

    foreach my $p (@_) {
        foreach my $d (@devs) {
            if ($p =~ /^$d\d/) {
                push @r, $p;
                last;
            }
        }
    }

    return @r;
}

# Calculate the canonical name for a device.
# eg: /dev/vdb1 when offset = 1
#     => canonical name is /dev/sda1
sub canonical_dev
{
    local $_;
    my $dev = shift;
    my $offset = shift;

    return $dev unless $dev =~ m{^/dev/.d([a-z])(\d*)$};
    my $disk = $1;
    my $partnum = $2;

    $disk = chr (ord ($disk) - $offset);

    return "/dev/sd$disk$partnum"
}

sub try_df
{
    local $_;
    my $domname = shift;
    my $g = shift;
    my $dev = shift;
    my $display = shift || $dev;

    my %stat;
    eval {
        $g->mount_ro ($dev, "/");
        %stat = $g->statvfs ("/");
    };
    if (!$@) {
        print_stat ($domname, $display, \%stat);
    }
    $g->umount_all ();
}

sub print_stat
{
    my $domname = shift;
    my $dev = shift;
    my $stat = shift;

    my @cols = ($domname, $dev);

    if (!$inodes) {
        my $bsize = $stat->{bsize};	# block size
        my $blocks = $stat->{blocks};	# total number of blocks
        my $bfree = $stat->{bfree};	# blocks free (total)
        my $bavail = $stat->{bavail};	# blocks free (for non-root users)

        my $factor = $bsize / 1024;

        push @cols, $blocks*$factor;	# total 1K blocks
        push @cols, ($blocks-$bfree)*$factor; # total 1K blocks used
        push @cols, $bavail*$factor;	# total 1K blocks available

        push @cols, 100.0 - 100.0 * $bfree / $blocks;

        if ($human) {
            $cols[2] = human_size ($cols[2]);
            $cols[3] = human_size ($cols[3]);
            $cols[4] = human_size ($cols[4]);
        }
    } else {
        my $files = $stat->{files};	# total number of inodes
        my $ffree = $stat->{ffree};	# inodes free (total)
        my $favail = $stat->{favail};	# inodes free (for non-root users)

        push @cols, $files;
        push @cols, $files-$ffree;
        push @cols, $ffree;

        push @cols, 100.0 - 100.0 * $ffree / $files;
    }

    print_cols (@cols);
}

sub print_title
{
    my @cols = (__"Virtual Machine", __"Filesystem");
    if (!$inodes) {
        if (!$human) {
            push @cols, __"1K-blocks";
        } else {
            push @cols, __"Size";
        }
        push @cols, __"Used";
        push @cols, __"Available";
        push @cols, __"Use%";
    } else {
        push @cols, __"Inodes";
        push @cols, __"IUsed";
        push @cols, __"IFree";
        push @cols, __"IUse%";
    }

    if (!$csv) {
        # ignore $cols[0] in this mode
        printf "%-36s%10s %10s %10s %5s\n",
          $cols[1], $cols[2], $cols[3], $cols[4], $cols[5];
    } else {
        print (join (",", @cols), "\n");
    }
}

sub print_cols
{
    if (!$csv) {
        my $label = sprintf "%s:%s", $_[0], $_[1];

        printf ("%-36s", $label);
        print "\n"," "x36 if length ($label) > 36;

        # Use 'ceil' on the percentage in order to emulate
        # what df itself does.
        my $percent = sprintf "%3d%%", ceil($_[5]);

        printf ("%10s %10s %10s %5s\n", $_[2], $_[3], $_[4], $percent);
    } else {
        printf ("\"%s\",\"%s\",%d,%d,%d,%.1f%%\n", @_);
    }
}

# Convert a number of 1K blocks to a human-readable number.
sub human_size
{
    local $_ = shift;

    if ($_ < 1024) {
        sprintf "%dK", $_;
    } elsif ($_ < 1024 * 1024) {
        sprintf "%.1fM", ($_ / 1024);
    } else {
        sprintf "%.1fG", ($_ / 1024 / 1024);
    }
}

=head1 NOTE ABOUT CSV FORMAT

Comma-separated values (CSV) is a deceptive format.  It I<seems> like
it should be easy to parse, but it is definitely not easy to parse.

Myth: Just split fields at commas.  Reality: This does I<not> work
reliably.  This example has two columns:

 "foo,bar",baz

Myth: Read the file one line at a time.  Reality: This does I<not>
work reliably.  This example has one row:

 "foo
 bar",baz

For shell scripts, use C<csvtool> (L<http://merjis.com/developers/csv>
also packaged in major Linux distributions).

For other languages, use a CSV processing library (eg. C<Text::CSV>
for Perl or Python's built-in csv library).

Most spreadsheets and databases can import CSV directly.

=head1 SEE ALSO

L<guestfs(3)>,
L<guestfish(1)>,
L<Sys::Guestfs(3)>,
L<Sys::Guestfs::Lib(3)>,
L<Sys::Virt(3)>,
L<http://libguestfs.org/>.

=head1 AUTHOR

Richard W.M. Jones L<http://people.redhat.com/~rjones/>

=head1 COPYRIGHT

Copyright (C) 2009-2010 Red Hat Inc.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
