#!/usr/local/bin/perl -w
#---------#---------#---------#---------#---------#---------#---------#---------
#
# title
#      read LDIF files, make LDIF delete records for all except ones in a list
# author
#      idc@planetlarg.net 28 nov 03
# description
#    a variation on ldap-ldif2ldif-hash.pl
#    This one checks each record, selects a few and prints delete statements
#
#
# bugs
#    does not allow for multivalued attributes
#    does not deal with multiline entries
#
# modifications
#
#---------#---------#---------#---------#---------#---------#---------#---------
# setup
   use 5.005;
   use strict;
   use vars qw( $DEBUG $VERBOSE);
   $DEBUG          = 0; # 1 to print internal information, 0 for normal
   $VERBOSE        = 1; # 1 to print internal information, 0 for normal


# logs to examine
    my @logs       = @ARGV if (@ARGV) ;
    usage() unless (@logs);
    die "no log to process! \n" unless @logs;


# vars
   # store the results in these vars
    my %tally;                    # count of various totals
    $tally{del_records}    = 0;   # tally of records to delete
    $tally{match_records}  = 0;   # tally of target records in all log files
    my %records;                  # ldif entries
    # oooh, let's get our first statistic
    $tally{logs}            = @logs;  # number of logs
    #
    my $log;                      # current logfile (with path) being read
#
#---------#---------#---------#---------#---------#---------#---------#---------
# main
# let's rock. open the logfile
# examine
# add up the stats
# display the results
#
    # get stuff
    $/  = "\n\n";   # change from line reading mode to paragraph reading mode
    oreach $log ( @logs ) {
        print STDERR "\$log $log \n" if $DEBUG;
        my $handle  = &open_log ("$log");
        my %conns;
        while (<$handle>) {
            $tally{total_records}++;
            &ldif2hash ( $_, \%tally, \%records);
        }
        close $handle or die "can't close file $log: $!";
    } # foreach
    $/  = "\n";  # change from paragraph reading mode to line reading mode
    #
    # put stuff
    my $dels = &hash2txt (\%tally, \%records);
    print STDOUT $dels;
    #
    # report
    my $rep  = &report (\%tally, @logs);
    print STDERR $rep;


#---------#---------#---------#---------#---------#---------#---------#---------
# subs
#   all subroutines go below here.
#
#---------#---------#---------#---------#---------#---------#---------#---------
# subroutine
#   hash2txt
# description
#
sub hash2txt {
    my $tally_ptr     = shift; # counts for lines read, files examined, etc.
    my $rec_ptr       = shift;
    my $key;
    my $txt;                # list of delete LDIFs
    my $dn;                 # LDAP's idea of a key
    my $new_attribute;      # what the old attributes get translated to
    my $value;              # value to copy
    #
    # agent labels. Each one must be checked.
    my $val_to_check;       # current value being checked
    my $val_in_key;         # value in record key
    my @vals_to_keep  = qw(
    0000
    0212
    0402
    0558
    0573
    0723
    0727
    K481
    K484
    K486
    K498
    K508
    K519
    K541
    K547
    K635
    K654
    K674
    M035
    M051
    M059
    M070
    M071
    M073
    M088
    NQ029
    WT001
    );
    my $val_found;          # one of the vals_to_keep

# work through the whole hash
    oreach $key (sort keys %$rec_ptr) {
        print STDERR "checking \$key $key \n" if $DEBUG;
        #
# skip certain records
        #
        # does the key contain the text
        # "ou=XXX,ou=agent,o=isp01.co.uk"?
        unless ($key =~ m/
            ou=(.....),\s*              # Note the ou label.
            ou=[Ii]ntermediary,\s*      # Match the fields, commas,
            o=pp                        # and any spaces seperating them.
        /x) {
            print STDERR "keep   $key\n" if $VERBOSE;
            next;
        }; # unless
        $val_in_key = uc ($1);
        #
        # does a certain field contain one value?
        if ($rec_ptr->{$key}{destinationindicator}) {
            if ($rec_ptr->{$key}{destinationindicator} =~ m/International|Both/i) {
                print STDERR "uid    $val_in_key has value 'International' or 'Both'\n" if $VERBOSE;
                next;
            };
        };
        #
        # does the key contain one of a range of values?
        $val_found = '';
        oreach $val_to_check (@vals_to_keep) {
            #print STDERR "\$val_to_check $val_to_check\n" if $DEBUG;
            if ( $val_to_check eq $val_in_key ) {
                print STDERR "keep   $key\n" if $VERBOSE;
                $val_found = $key;
                last;
            }
        };  # foreach
        next if ($val_found); # found one of our keepers
        #
        print STDERR "delete $key\n" if $VERBOSE;
        $tally_ptr->{del_records}++;
        $txt .= "dn: $key
changetype: delete

";
    } # foreach
# return the new hash
    return $txt;
}
#
#---------#---------#---------#---------#---------#---------#---------#---------
# subroutine
#   open_log
# description
#
sub open_log {
   my $file = shift;
   die "empty file: $file" if -z $file;
   die "not a text file: $file" unless -T $file;
   open( IN, "<$file") or die "can't open $file: $!";
   return *IN;
}
#
#---------#---------#---------#---------#---------#---------#---------#---------
# subroutine
#  ldif2hash
# description
#  Take a multiline ldap record.
#  Copy attributes and values from this record using regular expressions.
#  Regex flags are
#    c (continue past end of line characters),
#    m (treat it as a multiline record by matching start and end of each line)
#  and
#    x (extended format, so we can use whitespace and comments in the regex).
#
#  people records start with lines like this
#     # entry-id: 25
#     dn: uid=john smith,ou=people,o=isp01.co.uk
#     ...
#  and have entries like this
#     sn: SMITH
#     ...
sub ldif2hash {

    my $record        = shift; # multiline record
    my $tally_ptr     = shift; # counts for lines read, files examined, etc.
    my $records_ptr   = shift; # processed records
    #
# extract the record key
    $record =~ m/
            dn:\s*           # Match "dn:", then any spaces,
            (.*)             # then everything to the end of the line
    /x;
    my $key = $1;
    #
# copy fields from the LDIF to the hash
    #
    # BUGS
    #
    # PROBLEMS WITH
    # - \s*$     IF NO VALUE, MISSES THE END OF LINE AND MATCHES THE NEXT LINE
    # - \s*(.*)$ NOT REMOVING LEADING SPACES FROM VALUE
    #
    # DOES NOT MATCH MULTILINE VALUE CORRECTLY
    # eg. these two fields won't be split properly
    # teamdescription: The e-commerce team look after the web application
    #   hosting for the UK
    # mail: john.smith@isp01-tech.com
    #
    # would Grahame Barr's Net::LDAP::LDIF be better?
    #
    $tally_ptr->{match_records}++; # how many records parsed so far
    my $field;
    my $attr;                       # lots of "attribute: value" in LDAP land.
    my $value;
    while ($record =~ m/
                ^                   # from the start of line, find
                ([^:]*)             # attribute (anything that isn't a colon),
                :                   # first colon (the inter-field seperator),
                (.*)                # and the value is everything else
                $                   # to the end of the line
        /gmox) {
            $attr   = $1;
            $value  = &trim ($2);
            $records_ptr->{$key}{$attr}    = $value; # add an entry to the hash
            print STDERR
                "\$records_ptr->{\$key}{\$attr} = \$value ",
                "$records_ptr ->{ $key }{ $attr } = $value \n"
                if $DEBUG;
    } # while
}
#
#---------#---------#---------#---------#---------#---------#---------#---------
# subroutine
#  trim
# description
#  see perl cookbook p.30
sub trim {
    my @out = @_;
    or (@out) {
#        print STDERR "trim before  >>>$_<<< \n";
        s/^\s+//;
        s/\s+$//;
#        print STDERR "trim after   >>>$_<<< \n";
    }
    return wantarray ? @out : $out[0];
}


#---------#---------#---------#---------#---------#---------#---------#---------
# subroutine
#  report
# description
#  Create a report and shove it in a scalar.
#  note the presence of one of those weird
#  numeric value sort criteria operators "<=>"
#  and a format string "%this %that %the-other"
#
sub report {

   my $tally_ptr        = shift; # counts of everything else eg. lines read
   my @logs             = @_;
   my $today            = scalar localtime; # date eg.Tue Apr 17 11:29:19 2001
   # host
   use Sys::Hostname;
   my $host             = hostname();  # the machine this script runs on

   my $report           = "\n";        # the scalar to hold the final report
   my $threshold        = 20;          # number of top records to report

   # add some pretty commas
   # this should be done inline, but I have some format problems creeping in
   my $tally_logs           = &commas($tally_ptr->{logs});
   my $tally_del_records    = &commas($tally_ptr->{del_records});
   my $tally_match_records  = &commas($tally_ptr->{match_records});

# report header
   $report = <<END;
report summary
-------------------------------------------------------------------------------
host:                         $host
prepared on:                  $today
matching records:             $tally_match_records
records to delete:            $tally_del_records
number of logs:               $tally_logs
logs:                         @logs
-------------------------------------------------------------------------------
END

   return $report;
}


#---------#---------#---------#---------#---------#---------#---------#---------
# subroutine
#  commas
# description
#  break up a big number with commas. Lifted from the perlop manpage
#
sub commas {
   local($_)=@_;
   1 while s/(\d)(\d\d\d)(?!\d)/$1,$2/g;
   $_;
}



#---------#---------#---------#---------#---------#---------#---------#---------
# subroutine
#   usage
# description
#   describe how this script should be used.
#   This is one way of producing a set of instructions,
#   using simple indentation.
#   For more complex document, use mark-up. See the perldoc section below.
#
sub usage {

   use Text::Wrap qw(wrap $columns fill);
   $columns             = 70;
   my $pre1             = "   ";
   my $pre2             = "   ";
   my $name             = "$0 - extract from LDIF file to CSV";
   my $synopsis         = "$0 dir/file1.ldif dir/file2.ldif 1>/tmp/ldif-out.txt 2>/tmp/ldif-err.txt";
   my $description      = "
When given a list of ldif files, this script reads
 in all the records, tweak them and print them as LDAPMODIFY records
to STDOUT.
It prints a summary report to STDERR.
";
   my $examples         = "
";

   print
               "NAME \n",
               wrap ($pre1, $pre2, $name), "\n",
               "SYNOPSIS \n",
               wrap ($pre1, $pre2, $synopsis), "\n",
               "DESCRIPTION \n",
               ill ($pre1, $pre2, $description), "\n",
               "EXAMPLES \n",
               wrap ($pre1, $pre2, $examples), "\n",
   ;
   exit 1;
}
#---------#---------#---------#---------#---------#---------#---------#---------


__END__

documentation

The "__END__" string is a token. It is the logical end of program text.
You can put anything you want after this token, such as an
instruction manual page; it will be ignored by the compiler.
