#!/usr/bin/perl -w
#---------#---------#---------#---------#---------#---------#---------#---------
# OUTSTANDING
#   DIRECTORY DELETION
#   SMTP SEND CHECK
#
# Title
#   take a directory full of e-mail messages and send them again
# Author
#   idc@planetlarg.net 13/03/01
# Description:
#   This is for cleaning up the message queue. Sometimes the smtp server dies
#   and refuses to restart until you move the messages to another directory.
#   You are then left with a bunch of messages that need re-sending.
#   take a queue of envelopes.
#   Check them out for nasty bits such as huge size or known spammer.
#   mail the messages.
#
#   See the associated utility "backup-queue.sh".
# modifications
#
#---------#---------#---------#---------#---------#---------#---------#---------
# debugging
   use 5.005;
   use strict;
   use vars qw ($DEBUG);
   $DEBUG                = 0; # extra internal printing

# temporary directories
#   my $queue_dir         = '/data/netscape/ms-home/msg-fusmss01/queue';
   my $queue_dir         = '/data/netscape/ms-isp01.com/msg-isp01.com/queue';
#   my $queue_dir         = '/export/home/isp01/scripts/smtp/queue-test';

   my $message_dir       = "$queue_dir/messages_230501";
   my $control_dir       = "$queue_dir/control_230501";
   my $deferred_dir      = "$queue_dir/deferred_230501/SMTP-Deliver";

#   my $message_dir       = "$queue_dir/messages_090501";
#   my $control_dir       = "$queue_dir/control_090501";
#   my $deferred_dir      = "$queue_dir/deferred_090501/Mailbox-Deliver";

# store the results somewhere safe
   my $results_file = "$0.log";
   print "sending the results to $results_file \n";
   open (OUT, ">$results_file") or die "can't open results log $results_file: $! \n";

   use File::Find;
   print OUT "re-mail started on ", scalar localtime, "\n";

# re-send each waiting message
   print OUT "-------------------------------------\n";
   print OUT "re-sending the control queue \n";
   print OUT "directory: $control_dir \n";
   find ( \&resend, ($control_dir) );

# re-send each deferred message
   print OUT "-------------------------------------\n";
   print OUT "re-sending the deferred queue \n";
   print OUT "directory: $deferred_dir \n";
   find ( \&resend, ($deferred_dir) );

# delete the temporary directories.
   print OUT "-------------------------------------\n";
   print OUT "deleting the control queue \n";
   # dunno if this syntax is correct
   find ( \&remove, ($control_dir) );
   print OUT "deleting the deferred queue \n";
   find ( \&remove, ($deferred_dir) );

# clean up
   print OUT "re-mail finished on ", scalar localtime, "\n";
   close OUT;

#---------#---------#---------#---------#---------#---------#---------#---------
# subroutine
#   remove
# description
#
sub remove {
   # is it a file? unlink it
   # is it a dir? rmdir it
}

# subroutine
#   resend
# description
#   we are in the envelope directory and $_ is set to the filename.
#   read the file and extract all the envelopes.
#   Match each envelope with its body and send it.
#   If anything goes wrong or looks dodgy, skip the envelope and move onto the
#   next one.
#
sub resend {
   print STDERR "   \$File::Find::dir   $File::Find::dir  \n" if $DEBUG;
   print STDERR "   \$File::Find::name  $File::Find::name \n" if $DEBUG;

# print a roadmap if we are looking at a directory.
   # File::Find will enter the directory and re-do this sub for its contents.
   return 1 if m/^\.\.?$/; # skip "this" and "parent" directory entries
   if ( -d $_) {
      print OUT " envelope directory: $File::Find::dir/$_ \n" ;
      return 1;
   }

# check the file
   print OUT "  envelope file $_: "; # note the lack of newline on the end
   # check it is a text file
   unless ( -T $_) {
      print OUT "file is not text. \n" ;
      return 2;
   }
   # check the file is not too big
   my $max               = 10 * 1024 * 1024;  # 10 Meg size limit
   if (&file_too_fat ($_, $max)) {
      print OUT "file is bigger than the maximum $max bytes. \n" ;
      return 2;
   }

   # check it is not a backup
   if ( m/\.bak\d*$/ ) {  # match env.bak, env.bak12
      print OUT "skipping this file because it is a backup. \n";
      return 3;
   }

# read the headers
   # most of these are Netscape's proprietary headers.
   # put the headers into a hash
   # if no headers are found, skip this file.
   my @envs           = &envs_to_list ($_);
   my $envs_count     = @envs;
   print OUT "file contains $envs_count envelopes. \n";
   return 4 if ($envs_count == 0);

   my $msg;
ENVELOPE:   foreach $msg ( @envs ) {
      # check that the envelope is not empty
      my $header_count      = (keys %$msg);
      unless ($header_count) {
         print OUT "envelope contains no headers. \n";
         next ENVELOPE;
      }
#      print OUT "envelope contains $header_count headers. \n";

# extract the message ID
      $msg->{'Message-Id'} =~ m/\s*(\S*)\s*/;
      my $message_file       = $1;
      print OUT "   message $message_file: "; # note the lack of newline

# extract the from address
      $msg->{'User-From'} =~ m/<([^>]*)/;
      # check that it is reasonably formed
      unless (&valid_email_address ($1)) {
         print OUT "crap sender address: <$1> \n";
         next ENVELOPE;
      }
      # check it isn't known spam
      if ( &unwanted_address ($1) ) {
         print OUT "refusing to re-send message from $1 \n";
         next ENVELOPE;
      }
      my $from              = $1;

# extract the to address
      # envelopes seem to only be addressed to one person.
      unless (defined $msg->{'Channel-To'}) {
         print OUT "missing recipient (Channel-To) address \n";
         next ENVELOPE;
      }
      $msg->{'Channel-To'} =~ m/<([^>]*)/;
      unless (&valid_email_address ($1)) {
         print OUT "crap recipient address: <$1> \n";
         next ENVELOPE;
      }
      # check it isn't known spam
      if ( &unwanted_address ($1) ) {
         print OUT "refusing to re-send message to $1 \n";
         next ENVELOPE;
      }
      my $to                 = $1;

# read the body
      unless (-T "$message_dir/$message_file") {
         print OUT "body file is missing, empty or not text. \n";
         next ENVELOPE;
      };
      # check the file is not too big
      if (&file_too_fat ("$message_dir/$message_file", $max)) {
         print OUT "body file is bigger than $max bytes. \n" ;
         next ENVELOPE;
      }
      my @body               = &file_to_array ("$message_dir/$message_file");
      chomp @body;

# send the message



#      &send_message ($from, $to, @body);



      print OUT "sent from $from to $to \n" ;

   } # foreach message

   return 0;
}


# subroutine
#   unwanted_address
# description
#   match an e-mail address against a list of spammers and other
#   unwanted people. And machines.
#
sub unwanted_address {
   my $address = shift;
   my @list = qw(
               wasteeater19@hotmail.com
               postmaster@bt
               unsubscribe
               e-groups
   );
   print STDERR "   unwanted \$address?     $address \n" if $DEBUG;
   my $item;
   foreach $item (@list) {
      print STDERR "   check against \$item   $item \n" if $DEBUG;
      return 1 if $address =~ m/$item/;
   }
   return 0;
}

# subroutine
#   valid_email_address
# description
#   basic check of an e-mail address. Use Mail::Address for RFC 822 compliance.
#   the regex matches if there's a "@" character, proceeded by a
#   letter, digit, underscore or dash, and followed by a group of
#   letters, digits, underscores and dashes, followed by a dot, followed
#   by a letter, digit, underscore or dash.
#
sub valid_email_address {
   my $address = shift;
   print STDERR "   valid \$address?     $address \n" if $DEBUG;
   return 1 if $address =~ m/[\w\-]+\@[\w\-]+\.[\w\-]+/;
   return 0;
}


# subroutine
#   file_too_fat
# description
#   file size check. how big is it?
#   A huge file causes an error.
#   return true if too big, false otherwise.
#
sub file_too_fat {

   my $file = shift;
   my $max  = shift;
   my (
               $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
               $size, $atime, $mtime, $ctime, $blksize, $blocks
   ) = stat "$file";
   print STDERR "   \$size      $size  \n" if $DEBUG;
   print STDERR "   \$max       $max   \n" if $DEBUG;
   return 1 if $size > $max;
   return 0;
}


# subroutine
#   envs_to_list
# description
#   read the contents of a file. The file contains envelopes
#   (sets of attribute-value pairs, seperated by integers).
#   eg
# 824
# Message-Id: G9ZL750A.BFU
# Parent: 0
# Header-Size: 526
# ...
#   Put each envelope's att-val pairs into a hash
#   Put this hash address in a hash
#   This is not a clever parser.
#   If each attribute is not unique, values will be overwritten.
#
sub envs_to_list {

   my $file         = shift;

   my $sep          = ':';
   my @envs         = (); # this holds the pointers to env hashes
   my %env;                  # atts and vals for one envelope
   my $attribute    = '';
   my $env_size     = 0;     # size of the envelope in bytes
   my $bytes_read   = 0;

   open (IN, "<$file") or die "can't open file $file: $!";

# read the next envelope
ENV:   while ( defined ($env_size        = <IN>) ) {
      chomp $env_size;
         # match a line that is a number like +3, 4567 or -89
      unless ($env_size =~ m/^[+-]?\d+$/) {
         print OUT "expected an envelope size but got '$env_size'. "; # look, no newlines.
         close IN;
         return @envs;
      }
      $bytes_read      = read IN, $_, $env_size;
      unless (defined $bytes_read) {
         print OUT "error reading file $file: $!. ";
         close IN;
         return @envs;
      };
      unless ($bytes_read) {
         print OUT "premature end of file in $file. "; # not sure this is right.
         close IN;
         return @envs;
      };
      #print STDERR "   \$_ \n$_ \n" if $DEBUG;

# split the attribute value pairs and put them in the hash
      # line format: "Error-Text:     Recipient: <geezer@domain.net>"
      while (m/
               ^          # from the start of each line
               ([^:]*)    # capture stuff to the 1st delimiter
               :          # match but ignore the field delimiter
               (.*)       # capture everything else
               $          # to the end of the line
      /gmx) {
         $env{$1} = $2;
         # there may be 7,000 envelopes in one file, so think before de-commenting
         #print STDERR "   \$1      $1 \n" if $DEBUG;
         #print STDERR "   \$2      $2 \n" if $DEBUG;
      } # while match
# add the hash to the uber-hash
      push @envs, {%env};
   } # while read IN

# clean up
   close IN or die "can't close file $file: $!";

# print the whole thing with refs
   if ($DEBUG) {
      my $href;
      my $role;
      for $href ( @envs ) {
          print STDERR "   { \n";
          for $role ( keys %$href ) {
              print STDERR "      $role \t=> $href->{$role} \n";
          }
          print STDERR "   } \n";
      }
   }

   return @envs;
}


# subroutine
#   file_to_array
# description
#   read the contents of a file and put it into an array
#
sub file_to_array {

   my $file = shift;

   open (IN, "<$file") or die "can't open file $file: $!";
   my @contents     = (); # this holds the message, 1 line per element
   @contents = <IN>;
   close IN            or die "can't close file $file: $!";
   print STDERR "\@contents    \n\n@contents  \n\n" if $DEBUG;

   return @contents;
}


# subroutine
#   send_message
# description
#   send an e-mail message the simplest way possible, using
#   the SMTP protocol. No message queueing or other fanciness.
#   end-of-line markers are added. No sanity checks are carried out.
#
sub send_message {

   my $from     = shift;
   my $to       = shift;
   my @body     = @_;
   my $server   = 'mta01.isp01.com';
#   my $server   = 'mta03.isp01.com';


   my $line;
   if ($DEBUG) {
      print STDERR "   \$from \t$from \n";
      print STDERR "   \$to   \t$to   \n";
      foreach $line (@body) {
         print STDERR "   \$line \t$line \n";
      }
   }

   use Net::SMTP;
   my $smtp = Net::SMTP->new($server);

   $smtp->mail ($from);
   $smtp->to   ($to);
   $smtp->data ();
   foreach $line (@body) {
      # put your rude word scanner here
      $smtp->datasend("$line");
   }
   $smtp->dataend();
   $smtp->quit;

}

#---------#---------#---------#---------#---------#---------#---------#---------


