#!/usr/bin/perl
#
# Tool for searching in PowerMTA's accounting data
#
# Copyright (c) 2002-2014, Port25 Solutions, Inc.  All Rights Reserved.
#
# @TODO: remove the globals for writing to the temporary file.
#        They make testing more difficult since one has to do some 
#        cleanup manually instead of letting the destructor do it

package Port25::Acctfind;


use strict;
use warnings;

my ($NAME,$VERSION);
($NAME,$VERSION) = ('$Id$' =~ / (\w+).pm (\d+)/);

use Getopt::Long qw//;      # for parsing options
if ($^O eq 'MSWin32') {     # for expanding file names
    require File::Glob;  import File::Glob;
}
use File::Temp qw/:mktemp/;
use POSIX;
use Port25::Acctfind::Storage;
use Storable qw(freeze thaw);
use Time::Local;


sub abexit;                           # cleanup handler


# this sub is called from the "alias" acctfind shell script
sub shell {
    @ARGV = @_;
    $0 = 'acctfind';
    my $acct = new Port25::Acctfind();
    
    eval { $acct->main(\@ARGV); };
    if ($@) {
        my $stderr = $acct->{stderr};
        print $stderr "$@\n";
        exit(1);
    }
    
    exit(0);
}


#
# The constructor
# We initialize all the variables and espcially the tables here
#
# @param   $class  the name of the class
# @return  an object of class $class
#
sub new {
    my $class = shift @_;
    my $self = bless {
          version => 0,
          stderr => *STDERR,
          matches => [],
          # Holding the current and the matched records
          record => {},
          # Used for building the keys for the hash containing a record
          keyPrefixStack => [],
          options => {},
          db => 0,
          usedTags => {},
          usedHeaders => {},
          usedArfs => {},
          }, ref($class) || $class;

    # Some default configurations we supply
    $self->{outputConfigs} = { 
       "html" => {
                 "document.header"      => <<HEAD,
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
  <head>
    <title>Generated by Acctfind</title>
    <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
  </head>
<body>
HEAD
                 "document.footer"      => "</body></html>\n",
                 "table.prefix"         => "<table border=1 cellspacing=0>\n",
                 "table.postfix"        => "</table>\n",
                 "table.header.prefix"  => "<tr>\n<th>",
                 "table.header.infix"   => "</th><th>",
                 "table.header.postfix" => "</th>\n</tr>\n",
                 "table.row.prefix"     => "<tr>\n",
                 "table.row.infix"      => "",
                 "table.row.postfix"    => "\n</tr>\n",
                 "table.cell.prefix"    => "<td>",
                 "table.cell.default"   => "",
                 "table.cell.postfix"   => "</td>",
                 "separator.multipleDataPerField" => "<br>",
                 "replacements" => [
                                   '&' => '&amp;',
                                   '<' => '&lt;',
                                   '>' => '&gt;',
                                   ]
                 },
       "csv"  => {
                 "document.header"      => "",
                 "document.footer"      => "",
                 "table.prefix"         => "",
                 "table.postfix"        => "",
                 "table.header.prefix"  => "\"",
                 "table.header.infix"   => "\",\"",
                 "table.header.postfix" => "\"\n",
                 "table.row.prefix"     => "\"",
                 "table.row.infix"      => "\",\"",
                 "table.row.postfix"    => "\"\n",
                 "table.cell.prefix"    => "",
                 "table.cell.default"   => "",
                 "table.cell.postfix"   => "",
                 "separator.multipleDataPerField" => "; ",
                 "replacements" => [
                                   '"' => '""'
                                   ]
                 },
       "tsv"  => {
                 "document.header"      => "",
                 "document.footer"      => "",
                 "table.prefix"         => "",
                 "table.postfix"        => "",
                 "table.header.prefix"  => "",
                 "table.header.infix"   => "\t",
                 "table.header.postfix" => "\n",
                 "table.row.prefix"     => "",
                 "table.row.infix"      => "\t",
                 "table.row.postfix"    => "\n",
                 "table.cell.prefix"    => "",
                 "table.cell.default"   => "",
                 "table.cell.postfix"   => "",
                 "separator.multipleDataPerField" => "; ",
                 "replacements" => [
                                   '\t' => ' '
                                   ]
                 },
        "custom" => {
                 "document.header"      => "",
                 "document.footer"      => "",
                 "table.prefix"         => "",
                 "table.postfix"        => "",
                 "table.header.prefix"  => "",
                 "table.header.infix"   => "",
                 "table.header.postfix" => "",
                 "table.row.prefix"     => "",
                 "table.row.infix"      => "",
                 "table.row.postfix"    => "",
                 "table.cell.prefix"    => "",
                 "table.cell.default"   => "",
                 "table.cell.postfix"   => "",
                 "separator.multipleDataPerField" => "",
                 "replacements" => [
                                   ]
                 },
        "xml" => {
                 "document.header"      => "<?xml version=\"1.0\" encoding=\"ISO-8859-1\" ?><document>",
                 "document.footer"      => "</document>",
                 "replacements" => [
                                   '&' => '&amp;',
                                   '<' => '&lt;',
                                   '>' => '&gt;',
                                   "'" => '&apos;',
                                   '"' => '&quot;'
                                   ]
                 },
        "prettyxml" => { # Only the presence of this entry is important.
                         # Values are taken from the 'xml' entry.
                 },
        "orig-csv"  => {
                 "document.header"      => "",
                 "document.footer"      => "",
                 "table.prefix"         => "",
                 "table.postfix"        => "",
                 "table.header.prefix"  => "",
                 "table.header.infix"   => ",",
                 "table.header.postfix" => "\n",
                 "table.row.prefix"     => "\"",
                 "table.row.infix"      => "\",\"",
                 "table.row.postfix"    => "\"\n",
                 "table.cell.prefix"    => "",
                 "table.cell.default"   => "",
                 "table.cell.postfix"   => "",
                 "separator.multipleDataPerField" => "; ",
                 "replacements" => [
                                   '"' => '""'
                                   ]
                 }
       };

    # This hash is used to translate shortcut options (which are also usable 
    # as field specifiers with the -match option) to the tags used in the 
    # internally build record.
    # The keys of the tag hash are the version numbers of PowerMTA's 
    # accounting, so the matches have to be (re)build for every accounting file.
    # The other entries are for table output.
    $self->{fieldInfo} = {
                '_type' =>         {
                                   tag          => {csv => 'type'},
                                   columnHeader => "Type",
                                   priority     => 0   # first printed field
                                   },
                # shortcuts for tags from the delivered and bounced records
                # KEYS MUST BE ALL LOWERCASE!
                'bouncecat' =>     {
                                   tag          => {csv => 'bounceCat'},
                                   columnHeader => "Bounce Category",
                                   priority     => 10900
                                   },
                'dlvtlscipher' =>  {
                                   tag          => {csv => 'dlvTlsCipher'},
                                   columnHeader => "Delivery TLS Cipher",
                                   priority     => 23000
                                   },
                'dlvtlsprotocol' => {
                                   tag          => {csv => 'dlvTlsProtocol'},
                                   columnHeader => "Delivery TLS Protocol",
                                   priority     => 22000
                                   },
                'dsnaction' =>     {
                                   tag          => {csv => 'dsnAction'},
                                   columnHeader => "DSN Action",
                                   priority     => 1000
                                   },
                'dsndiag' =>       {
                                   tag          => {csv => 'dsnDiag'},
                                   columnHeader => "DSN Diagnostics",
                                   priority     => 11000
                                   },
                'dsnmta' =>        {
                                   tag          => {csv => 'dsnMta'},
                                   columnHeader => "DSN MTA",
                                   priority     => 12000
                                   },
                'dsnstatus' =>     {
                                   tag          => {csv => 'dsnStatus'},
                                   columnHeader => "DSN Status",
                                   priority     => 13000
                                   },
                'dstip' =>         {
                                   tag          => {csv => 'dlvDestinationIp'},
                                   columnHeader => "Destination IP",
                                   priority     => 10000
                                   },
                'dsttype' =>       {
                                   tag          => {csv => 'dlvType'},
                                   columnHeader => "Delivery Type",
                                   priority     => 20000
                                   },
                'envid' =>         {
                                   tag          => {csv => 'envId'},
                                   columnHeader => "Envelope Id",
                                   priority     => 5000
                                   },
                'esmtp' =>         {
                                   tag          => {csv => 'dlvEsmtpAvailable'},
                                   columnHeader => "SMTP Extensions",
                                   priority     => 18000
                                   },
                'from' =>          {
                                   tag          => {csv => 'orig'},
                                   columnHeader => "From",
                                   priority     => 4000
                                   },
                'header' =>        {
                                   tag          => {csv => 'header'},
                                   columnHeader => "Header",
                                   priority     => 99999
                                   },
                'jobid' =>         {
                                   tag          => {csv => 'jobId'},
                                   columnHeader => "Job ID",
                                   priority     => 21000
                                   },
                'origto' =>        {
                                   tag          => {csv => 'orcpt'},
                                   columnHeader => "Original To",
                                   priority     => 15000
                                   },
                'queue' =>         {
                                   tag          => {csv => 'queue'},
                                   columnHeader => "Queue",
                                   priority     => 19500
                                   },
                'size' =>          {
                                   tag          => {csv => 'dlvSize'},
                                   columnHeader => "Size",
                                   priority     => 6000
                                   },
                'smtpuser' =>      {
                                   tag          => {csv => 'rcvSmtpUser'},
                                   columnHeader => "SMTP User",
                                   priority     => 21500
                                   },
                'srcip' =>         {
                                   tag          => {csv => 'dlvSourceIp'},
                                   columnHeader => "Source IP",
                                   priority     => 3000
                                   },
                'srcmta' =>        {
                                   tag          => {csv => 'srcMta'},
                                   columnHeader => "Source MTA",
                                   priority     => 16000
                                   },
                'srctype' =>       {
                                   tag          => {csv => 'srcType'},
                                   columnHeader => "Source Type",
                                   priority     => 17000
                                   },
                'timelogged' => {
                                   tag          => {csv => 'timeLogged'},
                                   columnHeader => "Time Logged",
                                   priority     => 9000
                                   },
                'timemlsssigned' => {
                                   tag          => {csv => 'timeMlssSigned'},
                                   columnHeader => "Time Mlss Signed",
                                   priority     => 7600
                                   },
                'timequeued' =>    {
                                   tag          => {csv => 'timeQueued'},
                                   columnHeader => "Time Queued",
                                   priority     => 7000
                                   },
                'to' =>            {
                                   tag          => {csv => 'rcpt'},
                                   columnHeader => "To",
                                   priority     => 2000
                                   },
                'totalsecondsqueued' =>            {
                                   tag          => {csv =>'totalSecondsQueued'},
                                   columnHeader => "Total Seconds Queued",
                                   priority     => 9500
                                   },
                'vmta' =>          {
                                   tag          => {csv => 'vmta'},
                                   columnHeader => "Virtual MTA",
                                   priority     => 19000
                                   },
                'vmtapool' =>      {
                                   tag          => {csv => 'vmtaPool'},
                                   columnHeader => "Virtual MTA Pool",
                                   priority     => 19100
                                   },
                # shortcuts for tags from the receipt records
                'receivedfromip' => {
                                   tag          => {csv => 'rcvSourceIp'},
                                   columnHeader => "Received From IP",
                                   priority     => 50000
                                   },
                'receivedforip' => {
                                   tag          => {csv => 'rcvDestinationIp'},
                                   columnHeader => "Received For IP",
                                   priority     => 51000
                                   },
                # shortcuts for tags from the remote bounce/status records
                'dsnreportingmta' => {
                                   tag          => {csv => 'dsnReportingMta'},
                                   columnHeader => "DSN Reporting MTA",
                                   priority     => 60000
                                   },
                # shortcuts for tags from the feedback loop records
                'arf' =>           {
                                   tag          => {csv => 'arf'},
                                   columnHeader => "ARF Fields",
                                   priority     => 100000
                                   },
                'feedbacktype' =>  {
                                   tag          => {csv => 'feedbackType'},
                                   columnHeader => "Feedback Type",
                                   priority     => 73000
                                   },
                'feedbackformat' => {
                                   tag          => {csv => 'format'},
                                   columnHeader => "Feedback Format",
                                   priority     => 74000
                                   },
                'reporteddomain' => {
                                   tag          => {csv => 'reportedDomain'},
                                   columnHeader => "Reported Domain",
                                   priority     => 70000
                                   },
                'reportingmta' =>  {
                                   tag          => {csv => 'reportingMTA'},
                                   columnHeader => "Reporting MTA",
                                   priority     => 71000
                                   },
                'reportingsourceip' => {
                                   tag          => {csv => 'repSourceIp'},
                                   columnHeader => "Reporting Source IP",
                                   priority     => 72000
                                   },
                'useragent' =>     {
                                   tag          => {csv => 'userAgent'},
                                   columnHeader => "User Agent",
                                   priority     => 75000
                                   }
                };
                
    # Here we add the real field names (tags) from csv acct files
    # that are different from their shortcut option
    $self->{fieldInfo}{'orig'} =              $self->{fieldInfo}{'from'},
    $self->{fieldInfo}{'rcpt'} =              $self->{fieldInfo}{'to'},
    $self->{fieldInfo}{'orcpt'} =             $self->{fieldInfo}{'origto'},
    $self->{fieldInfo}{'dlvtype'} =           $self->{fieldInfo}{'dsttype'},
    $self->{fieldInfo}{'dlvsourceip'} =       $self->{fieldInfo}{'srcip'},
    $self->{fieldInfo}{'dlvdestinationip'} =  $self->{fieldInfo}{'dstip'},
    $self->{fieldInfo}{'dlvesmtpavailable'} = $self->{fieldInfo}{'esmtp'},
    $self->{fieldInfo}{'dlvsize'} =           $self->{fieldInfo}{'size'},
    $self->{fieldInfo}{'rcvsourceip'} =       $self->{fieldInfo}{'receivedfromip'},
    $self->{fieldInfo}{'rcvdestinationip'} =  $self->{fieldInfo}{'receivedforip'},
    $self->{fieldInfo}{'repsourceip'} =       $self->{fieldInfo}{'reportingsourceip'},
    $self->{fieldInfo}{'rcvsmtpuser'} =       $self->{fieldInfo}{'smtpuser'},
    
    # Hash which determines which fields are time fields
    $self->{isTimeField} = {
                           "record.timeLogged"     => 1,
                           "record.timeQueued"     => 1,
                           "record.timeMlssSigned" => 1,
                           };

    # Two tables for translating the records keys to integers and back.
    $self->{key2int} = {};
    $self->{int2key} = [];

    return $self;
}

#
# A small helper method to make the accesses more readable
#
# @param   $field  the field name (=shortcut option) from which to get the tag
# @return          the tag for the $field
#
sub getTag {
    my $self = shift @_;
    my $field = shift @_;
    
    $field =~ s/(arf|header):.+/$1/; # we use the key "header" for all headers
                                     # and "arf" for all arf fields
    
    return $self->{fieldInfo}{$field}{tag}{$self->{version}};
}


# a flag signaling that we were signaled to exit
my $interrupted = 0;


#
# This sub does all the work
#
# @param  $ARGV  a reference to the @ARGV array
#
sub main {
    my $self = shift @_;
    my $ARGV = shift @_;
    
    $self->{options} = $self->getOptions($ARGV);
    return unless $self->{options};
    
    my %typesList = ();
    if (exists $self->{options}{"delivered"}) {
        $typesList{"d"}      = $self->{options}{"delivered"};
    }
    if (exists $self->{options}{"localBounced"}) {
        $typesList{"b"}      = $self->{options}{"localBounced"};
    }
    if (exists $self->{options}{"remoteBounced"}) {
        $typesList{"rb"}     = $self->{options}{"remoteBounced"};
    }
    if (exists $self->{options}{"bounced"}) {
        $typesList{"b"}      = $self->{options}{"bounced"};
        $typesList{"rb"}     = $self->{options}{"bounced"};
    }
    if (exists $self->{options}{"tempError"}) {
        $typesList{"t"}      = $self->{options}{"tempError"};
    }
    if (exists $self->{options}{"tempQueueError"}) {
        $typesList{"tq"}     = $self->{options}{"tempQueueError"};
    }
    if (exists $self->{options}{"received"}) {
        $typesList{"r"}      = $self->{options}{"received"};
    }
    if (exists $self->{options}{"remoteStatus"}) {
        $typesList{"rs"}     = $self->{options}{"remoteStatus"};
    }
    if (exists $self->{options}{"feedbackLoop"}) {
        $typesList{"f"}      = $self->{options}{"feedbackLoop"};
    }
    # empty list is not acceptable, use a default
    unless (keys %typesList) {
        %typesList = ("d" => 1, "b" => 1, rb => 1);
    }
    
    my $stderr = $self->{"stderr"};
    my $hashCounter = $self->{options}{hashmarks}
        if exists $self->{options}{hashmarks};
    my $nSkip = $self->{options}{skip} if exists $self->{options}{skip};
    
    my $recordCounter = 0;
    
    $self->openDb();
    
    FILE:
    foreach my $file (@{$ARGV}) {
        
        eval { $self->openAcctFile($file) };
        if ($@) {
            print $stderr "$0: Error when opening accounting file '$file': $@";
            next;
        }
        
        if ($self->{options}{verbose}) {
            my $now = localtime;
            
            print $stderr "%% $now accounting file $file\n";
            print $stderr "%% $now temporary file ", $self->{db}->dbFile(),"\n";
            print $stderr "%% verbosity=$self->{options}{verbose}\n"
                if $self->{options}{verbose} > 1;
        }
        
        if ($nSkip) {
            print $stderr "%% skipping first $self->{options}{skip} records\n";
            
            eval {
                while ($nSkip > $recordCounter) {
                    $self->readAcctRecord();
                    $recordCounter++;
                    print $stderr "%" unless $recordCounter % 1000;
                    
                    if ($nSkip == $recordCounter) {
                        my $now = localtime;
                        print $stderr "\n";
                        print $stderr "%% $now skipped $nSkip records\n";
                        last;
                    }
                }
            };
            
            print $stderr "$0: Error when reading accounting data:\n$@\n" if $@;
        }
        
        $self->{record} = {};
        
        while (eval { $self->readAcctRecord() }) {
            last FILE if $interrupted;
            
            $recordCounter++;
            
            if ($self->{options}{hashmarks}) {
                unless (--$hashCounter) {
                    print $stderr "#";
#                    if ($self->{options}{verbose}) {
#                      # print details about matched records vs total records
#                    }
                    $hashCounter = $self->{options}{hashmarks};
                }
            }
            
            unless ($typesList{$self->{record}{"record.type"}}) {
                $self->{record} = {};
                next;
            }
            
            my $rec = $self->{record};
            my $matched = 1;
            foreach my $match (@{$self->{matches}}) {
                $matched = $self->recordMatchesMatch($rec, $match);
                last unless $matched;
            }
            
            unless ($matched) {
                $self->{record} = {};
                next;
            }
            
            # if we don't have a list of output fields, we have to
            # determine which fields are used
            if (@{$self->{options}{outputFields}} == 0) {
                foreach my $key (keys %$rec) {
                    $self->{usedTags}{$key}++;
                    if ($key eq 'record.header') {
                        foreach my $header (@{$rec->{'record.header'}}) {
                            $self->{usedHeaders}{$header->{name}}++;
                        }
                    }
                    elsif ($key eq 'record.arf') {
                        foreach my $arf (@{$rec->{'record.arf'}}) {
                            $self->{usedArfs}{$arf->{name}}++;
                        }
                    }
                }
            }
            
            my $doStop = $self->{db}->storeRecord($rec);
            
            $self->{record} = {};
            
            last FILE if $doStop;
            
            if (($self->{options}{maxMatch} > 0) &&
                ($self->{db}->{dbEntries} >= $self->{options}{maxMatch})) {
                my $nrecs = $recordCounter;
                my $max = $self->{options}{maxMatch};
                print $stderr "\n";
                print $stderr "Found $max matches after $nrecs records.\n";
                $self->closeAcctFile();
                last FILE;
            }
        }
        
        print $stderr "\n" if $self->{options}{hashmarks};
        print $stderr "$0: Error when reading accounting data:\n"
                    . "$@\n"
                    . "Stopping to read data!\n\n" if $@;
        
        $self->closeAcctFile();
    }
    
    if ($self->{options}{verbose}) {
        my $dbFile = $self->{db}->dbFile();
        my $now = localtime;
        my $size = (stat $dbFile)[7];
        
        print $stderr "%% $now finishing\n";
        print $stderr "%% $dbFile used $size bytes\n";
#      if ($self->{options}{verbose} > 1) {
#        my %r = map { $_ => 1 } keys %records;
#        my $n = keys %r;
#        print $stderr "%% $dbfile contains $n unique keys\n";
#      }
    }
    $self->printRecords unless $interrupted;
    $self->closeDb();
}


sub openAcctFile {
    my $self = shift @_;
    my $file = shift @_;
    my $stderr = $self->{stderr};
    
    $self->{acctFileFormat} = $self->accountingFormatOfFile($file);
    die 'not an accounting file' unless $self->{acctFileFormat};
    die "The binary accounting file format support is not available anymore.\n"
        if $self->{acctFileFormat} eq 'binary';
    
    my $fields = csv2Array($self->{headerLine});
    unless (defined $fields) {
        die "When parsing csv header:\n$@";
    }
    
    # add prefix "record."
    # This should be removed but has lots of places where it is used :-(
    for (my $i = 0; $i <= $#{$fields}; $i++) {
        $fields->[$i] = "record.$fields->[$i]";
    }
    $self->{version} = "csv";
    $self->{csvFields} = $fields;
    
    # Now, that we have the version number, we can setup the matches and the sorting
    $self->{matches} = $self->setupMatches(@{$self->{options}{match}},
                                           $self->shortcutOptionsToMatch($self->{options}));
    
    $self->setupSorting();
    
    return 1;
}

sub readAcctRecord {
    my $self = shift @_;
    
    my $FH = $self->{csvReadHandle};
    my $line = <$FH>;
    
    return 0 if ! defined $line;
    
    my $rec = $self->csv2Record($line, $self->{csvFields});
        
    $self->{record} = $rec;
    
    return 1;
}

sub closeAcctFile {
    my $self = shift @_;
    
    close $self->{csvReadHandle};
}


# openDb: called to open DB for storing matched records
sub openDb {
    my $self = shift;
    
    if ($self->{options}{sortBy}) {
        eval { require Port25::Acctfind::Storage::SQLite; 
               require DBD::SQLite; }; # workaround for a suspected Perl bug in
                                       # at least Perl 5.8.5, 5.10.0 is fine:
                                       # requiring ...::SQLite succeeds on the
                                       # 2nd attempt if DBD::SQLite is missing
        if (!$@) {
            Port25::Acctfind::Storage::SQLite->import();
            $self->{db} = new Port25::Acctfind::Storage::SQLite(
                                                      $self->{options}{tmpDir},
                                                      $self);
            return;
        }

        eval { require Port25::Acctfind::Storage::DBFile; };
        if (!$@) {
           Port25::Acctfind::Storage::DBFile->import();
           
           $self->{db} = new Port25::Acctfind::Storage::DBFile(
                                                      $self->{options}{tmpDir},
                                                      $self);
           return;
        }
        
        die "$0: Fatal error: No storage module that supports sorting available.\n";
    }
    else {
        require Port25::Acctfind::Storage::File;
        Port25::Acctfind::Storage::File->import();
        
        $self->{db} = new Port25::Acctfind::Storage::File(
                                                      $self->{options}{tmpDir},
                                                      $self);
    }
}


# closeDb: called to close and discard the DB
sub closeDb {
    my $self = shift;
    
    $self->{db} = undef;
}


# Freeze a record
# 
# @param   $rec  the record to serialize
# @return  a string containing the serialized version of the record
sub freezeRec {
    my $self = shift @_;
    my $rec = shift @_;
    my @rec;
    
    if (@{$self->{options}{outputFields}} == 0) {
        # We need all entries of the record
        foreach my $key (keys %$rec) {
            $rec[$self->key2int($key)] = $rec->{$key};
        }
    }
    else {
        # We need only selected entries of the record
        # record.type is an internal special tag which is required 
        # to "survive"
        $rec[$self->key2int('record.type')] = $rec->{'record.type'};
        
        # eliminate all not selected arf fields and headers the lists/arrays
        for my $listName ('arf', 'header') {
            if (my $lines = $rec->{'record.' . $self->getTag($listName)}) {
                my @lines = ();
                foreach my $line (@$lines) {
                    foreach my $field (@{$self->{options}{outputFields}}) {
                        if ($field =~ /$listName:$line->{name}/i) {
                            push @lines, $line;
                        }
                    }
                }
                if (@lines) {
                    $rec->{'record.' . $self->getTag($listName)} = \@lines;
                }
                else {
                    delete $rec->{'record.' . $self->getTag($listName)};
                }
            }
        }
        
        foreach my $field (@{$self->{options}{outputFields}}) {
            foreach my $tag ($self->fieldToTags($field)) {
                $rec[$self->key2int($tag)] = $rec->{$tag};
            }
        }
    }
    
    return freeze(\@rec);
}


# Thaw a record
#
# @param   $ser  a string containing a serialized record (created by
#                freezeRec)
# @return  a reference to the thawed record (hash)
sub thawRec {
    my $self = shift @_;
    my @rec = @{thaw(shift @_)};
    my %rec;

    for (my $i = 0; $i < @rec; $i++) {
        $rec{$self->int2key($i)} = $rec[$i] if defined $rec[$i];
    }
    
    return \%rec;
}


# Translates an output field to a list of corresponding tags (keys in the 
# record hash)
# 
# @param   $field   the field to inquire the tags for
# @return  an array with the tags
sub fieldToTags {
    my $self = shift @_;
    my $field = shift @_;
    my @tags;
    
    @tags = ("record." . $self->getTag($field));
    
    return @tags;
}


# Translates a record's key to an integer.
# If the key has not been seen before it assigns a new number to it
#
# @param   $key  a key of a records hash
# @return  an integer identifying the key
sub key2int {
    my $self = shift @_;
    my $key = shift @_;
    
    unless (exists $self->{key2int}{$key}) {
        # this is a "new" key, add it to the translation tables
        $self->{key2int}{$key} = @{$self->{int2key}};
        push(@{$self->{int2key}}, $key);
    }
    
    return $self->{key2int}{$key};
}


# Translates an integer to a record's key
#
# @param   $int  the integer to get the key for
# @return  a key for a record's hash. If the integer wasn't assigned to a 
#          key by sub key2int before, undef will be returned
sub int2key {
    my $self = shift @_;
    
    return $self->{int2key}[shift @_];
}



#
# Determines whether the record matches the match.
# The match can contain multiple field, pattern pairs.
# We return true if at least the content of one field matches it's 
# corresponding pattern.
#
# @param   $rec    a reference to the record hash with the data.
# @param   $match  a reference to the hash containing the field, pattern pairs.
# @return          1: if at least one match matches
#                  0: otherwise
#
sub recordMatchesMatch {
    my ($self, $rec, $match) = @_;
    
    foreach my $tag (keys %$match) {
        if ($tag eq 'all') {
            foreach my $recordField (keys %$rec) {
                next unless $rec->{$recordField};
                if (my ($field) = ( $recordField =~ /^record.(arf|header)/)) {
                    for my $line (@{$rec->{"record.$field"}}) {
                        return 1 if $line->{content} =~ $match->{$tag};
                    }
                }
                else {
                    return 1 if $rec->{$recordField} =~ $match->{$tag};
                }
            }
        }
        elsif (my ($list, $name) = ($tag =~ /(arf|header)\s*:\s*(.+)/)) {
            foreach my $line (@{$rec->{'record.' . $self->getTag($list)}}) {
                return 1 if    ($line->{name} =~ /^$name$/i)
                            && ($line->{content} =~ $match->{$tag});
            }
        }
        elsif (exists $rec->{"record.$tag"}) {
            return 1 if $rec->{"record.$tag"} =~ $match->{$tag};
        }
    }
    
    return 0;
}


#
# Builds a regular expression for matching.
#
# @param    @patterns    an array of either a fixed string (for simple string
#                        matching), or a perl regular expression in m/.../,
#                        possibly suffixed with an 'i' (for case-insensitive
#                        matching) and optionally prefixed by a field list 
#                        enclosed in square brackets
# @return                reference to an array of hashes, each containing pairs 
#                        of a field name and a regular expressions.
#                        At least one of the expressions of every hash has to 
#                        match on the according field later evaluate as true
#
sub setupMatches {
    my $self = shift @_;
    my @patterns = @_;
    my $regexp;
    my ($match, @matches);
    my ($tags, $pattern);
    
    foreach $_ (@patterns) {
        ($tags, $pattern) = /^\s*\[(.*?)\]\s*(.+)/;
        unless ($pattern) {
            $pattern = $_;
            $tags = "all";
        }
        
        if ($pattern =~ m|^m/(.*)/(i)?$|) {
            $regexp = $1;
            $regexp = '(?i)' . $regexp if (defined $2);
        }
        else {
            # string matching
            $regexp = "(?i)\Q$pattern\E";
        }
        
        $match = {};
        $tags =~ tr/,/ /;
        $tags =~ s/\s+/ /g;
        foreach my $field (split(/[\s,]/, lc $tags)) {
            if (defined ($self->{fieldInfo}{$field})) {
              # we need this check first to prevent the next check
              # from creating a key in the fieldInfo hash =:-(
                if (defined ($self->getTag($field))) {
                    $field = $self->getTag($field);
                }
            }
            $match->{$field} = $regexp;
        }
        
        push(@matches, $match);
    }

    return \@matches;
}


#
# Gets and checks options, also expanding file names in @ARGV
#
# @param    $ARGV   a reference to the ARGV array (containing the options and 
#                   files)
# @return           reference to hash of options
#
sub getOptions {
    my $self = shift @_;
    my $ARGV = shift @_;
    local @ARGV = @{$ARGV};
    my %options;
    my ($output, @outputFields);
    
    if ($^O eq 'MSWin32') {
        # convert from /foo[:=]bar Windows-style options
        @ARGV = map {s|^/(\w+)[:=](.*)|--$1=$2| or s|^/|--|;  $_} @ARGV;
    }
    
    Getopt::Long::GetOptions(\%options, "help",
                             "match=s@", "version", "iso-times",
                             "output=s", "sortBy=s", "format=s",
                             "maxMatch=i", "tmpDir=s", "skip:i",
                             "hashmarks:i", "debug+", "verbose+",
                             # record types
                             "delivered+", "bounced+",
                             "tempError+", "tempQueueError+", "received+",
                             "localBounced+", "remoteBounced+",
                             "remoteStatus+", "feedbackLoop+",
                             # record type aliases from the config file
                             "delivery+", "bounce+",
                             "transient+", "transient-queue+", "receipt+",
                             # Shortcut options:
                             map "$_=s@", keys %{$self->{fieldInfo}}
                            )
        or die "$0: bad options\n";
    
    if ($options{version}) {
        print "$NAME r$VERSION\n";
        return undef;
    }
    
    if ($options{help}) {
        PrintHelp();
        return undef;
    }
    
    # translate the aliases to acctfind's record type names
    $options{delivered}      = 1 if $options{delivery};
    $options{bounced}        = 1 if $options{bounce};
    $options{tempError}      = 1 if $options{transient};
    $options{tempQueueError} = 1 if $options{'transient-queue'};
    $options{received}       = 1 if $options{receipt};
    
    # check whether a --match or a shortcut was given and 
    # use a default for --match otherwise
    unless (defined $options{match}) {
        my $matchSpecified = 0;
        foreach my $field (keys %{$self->{fieldInfo}}) {
            $matchSpecified = defined $options{$field};
            last if $matchSpecified;
        }
        $options{match} = ["m/./"] unless $matchSpecified;
    }
    
    unless (@ARGV) {
        die "$0: please specify at least one accounting file\n";
    }
    
    if (exists $options{sortBy}) {
        $options{sortBy} = lc $options{sortBy};
        $options{sortBy} =~ s/^(header|arf)_/$1:/;
        die "$0: invalid parameter to option --sortBy\n"
            unless ((exists $self->{fieldInfo}{$options{sortBy}})
                or ($options{sortBy} eq "todomain")
                or ($options{sortBy} eq "fromdomain")
                or ($options{sortBy} =~ /^(?:arf|header):./));
    }
    
    if (exists $options{output}) {
        my $fields;
        
        ($output, $fields) = ($options{output} =~ /^\s*([-\w]+)\s*(\[.*\])?/i);
        
        if ($output) {
            $output = lc $output;
            
            die "$0: illegal output format specified.\n"
                unless defined $self->{outputConfigs}{$output};
            
            if ($output =~ /custom/i) {
            
                unless (defined $options{format}) {
                    die "$0: missing option --format.\n"
                      . "(This option is required if you specify"
                      . " --output custom[...]).\n";
                }
            }
            
            if (defined $options{format}) {
                die "$0: cannot parse format file.\n" unless 
                    $self->evaluateFormatFile($output, $options{format});
            }
            
            if ($fields) {
                $fields =~ s/\[\s*(.*)\s*\]/$1/;
                $fields =~ s/,/ /g;
                $fields =~ s/\s+/ /g;
                @outputFields = split(/[\s,]/, lc $fields);
                die "$0: empty output field list specified.\n"
                    unless @outputFields;
                
                foreach my $field (@outputFields) {
                    $field =~ s/^(header|arf)_/$1:/;
                    die "$0: header name missing for output field 'header'.\n"
                        if $field =~ /^header:?$/;
                        
                    die "$0: arf field name missing for output field 'arf'.\n"
                        if $field =~ /^arf:?$/;
                        
                    die "Illegal output field '$field' specified.\n"
                        unless ($field =~ /^(?:header|arf):/i)
                            || exists $self->{fieldInfo}{lc $field};
                }
            }
            else {
                @outputFields = ();
            }
        }
        else {
            die "$0: option 'output' of illegal format\n";
        }
    }
    else {
        $output = "raw";
        
        if (defined $options{format}) {
            my $stderr = $self->{stderr};
            print $stderr "$0: Option --format without --output detected.",
                          " Ignoring --format.\n";
        }
    }
    
    $options{output} = $output;
    $options{outputFields} = \@outputFields;
    
    $options{maxMatch} = 0 unless defined $options{maxMatch};
    
    if (exists $options{hashmarks}) {
        $options{hashmarks} = 1000 if ($options{hashmarks} <= 0);
    }
    
    if (exists $options{skip}) {
        if ($options{skip} < 0) {
            die "$0: option 'skip' must be a positive integer\n";
        }
    }
    
    # on Linux bsd_glob chokes on file names such as "acct.1" ("not numeric")
    if ($^O eq 'MSWin32') {
        # expand file names since the Win32 "shell" doesn't
        @ARGV = map {File::Glob::bsd_glob($_)} @ARGV;
    }
    
    unless (@ARGV) {
        die "$0: no files matched\n";
    }
    
    # set up interrupt handlers
    $SIG{HUP} = \&abexit unless $^O eq "MSWin32";
    $SIG{INT} = $SIG{KILL} = $SIG{TERM} = \&abexit;
    
    @{$ARGV} = @ARGV;
    return \%options;
}


# abexit: an exit handler to set a flag ($interrupted) which causes the 
#         program to exit after reading the next record
sub abexit {
    my $self = shift @_;
    my $what = shift;
    my $verbose = shift;
    
    print STDERR "\n";
    
    if (exists $SIG{$what}) {
        # got a signal
        warn "$0: received SIG$_[0], exiting\n";
    }
    else {
        # some other reason to stop
        warn "$0: $what\n";
    }
    
#    if (defined $dbfile) {
#        if ($verbose) {
#            my $now = localtime;
#            my $size = (stat $dbfile)[7];
#            
#            print STDERR "%% $now aborting\n";
#            print STDERR "%% $dbfile used $size bytes\n";
#            print STDERR "%% $recordCounter records read\n";
#        }
#    }
    
    $interrupted = 1;
}


#
# Reads and parses a format file. It modifies the hash found under $output
# in the %outputConfigs hash. Escape sequences for \n, \r, and \t in the
# value part are converted to the corresponding escape charaters.
#
# @param   $output  name of the output style to modify (custom is the empty one)
# @param   $file    name of the format file
# @return           0: errors while parsing
#                   1: no error while parsing
#
sub evaluateFormatFile {
    my $self = shift @_;
    my $output = shift @_;
    my $file = shift @_;
    my ($key, $value, $arraymode);
    my $arrayContent;
    
    open(FORMAT, "<$file") or die "$0: could not open format file $file.\n";
    my $line = 0;
    my $err = 0;
    $key = " dummy ";
    while (<FORMAT>) {
        $line++;
        chomp;
        next if /^\s*$/;
        next if /^\s*#/;
        
        if (/^\s*([\w.]+?)\s*=>\s*(\[\s*)?(.*?)\s*$/) {
            my $oldArraymode = $arraymode;
            my $oldKey = $key;
            
            $key = lc $1;
            $arraymode = $2;
            $value = $3;
            
            delete $self->{outputConfigs}{$output}{$key};
            $self->{outputConfigs}{$output}{$key} = [] if $arraymode;
            
            if ($oldArraymode) {
                my ($error, @items) = splitItems($arrayContent);
                $arrayContent = "";
                
                if ($error) {
                    my $stderr = $self->{stderr};
                    print $stderr "$0: $error before line '$line' of $file.\n";
                    print $stderr "Ignoring value for '$oldKey'\n";
                    $err++;
                }
                else {
                    push @{$self->{outputConfigs}{$output}{$oldKey}}, @items;
                }
            }
        }
        else {
            if ($key ne " dummy ") {
                $value = $_;
                $value =~ s/^\s*(.*?)\s*$/$1/;
            }
            else {
                my $stderr = $self->{stderr};
                print $stderr "$0: Couldn't parse line $line of $file.\n";
                $err++;
                next;
            }
        }
        
        $value =~ s/^(["']?)(.*)\1$/$2/;
            
        # Replacing \\n with \n and \\\\n with \\n
        $value =~ s/([^\\])\\n/$1\n/g;
        $value =~ s/^\\n/\n/g;
        $value =~ s/\\\\n/\\n/g;

        # Replacing \\r with \r and \\\\r with \\r
        $value =~ s/([^\\])\\r/$1\r/g;
        $value =~ s/^\\r/\r/g;
        $value =~ s/\\\\r/\\r/g;

        # Replacing \\t with \t and \\\\t with \\t
        $value =~ s/([^\\])\\t/$1\t/g;
        $value =~ s/^\\t/\t/g;
        $value =~ s/\\\\t/\\t/g;

        if ($arraymode) {
            $arrayContent .= $value;
        }
        else {
            $self->{outputConfigs}{$output}{$key} .= $value;
        }
    }
    
    if ($arraymode) {
        my ($error, @items) = splitItems($arrayContent);
        $arrayContent = "";
        
        if ($error) {
            my $stderr = $self->{stderr};
            print $stderr "$0: $error before line '$line' of $file.\n";
            print $stderr "Ignoring value for '$key'\n";
            $err++;
        }
        else {
            push @{$self->{outputConfigs}{$output}{$key}}, @items;
        }
    }
    
    delete $self->{outputConfigs}{$output}{" dummy "};
    
    return $err ? 0 : 1;
}


#
# Splits a comma separated list (string) into items.
# The list has to be terminated with an unquoted ']' at the end of a line
# (only following whitespace allowed).
# 
# @param   $string  the comma separated list to split
#
# @return  $error   an error message describing the error or undef if no
#                   error occured
# @return  @items   an array containing the items found
#
sub splitItems {
    my $string = shift @_;
    my $quote;
    my @items;
    my $error;
    
    return ($error, @items) if $string =~ /^\s*]\s*$/;
    
    my $lastItem;
    my $terminator;
    
    my @matches = ($string =~ /\G\s*(["']?)(.*?)\1\s*,/gc);
    ($quote, $lastItem, $terminator) =
                                ($string =~ /\G\s*(["']?)(.*?)\1\s*(\])\s*$/gc);
    my ($remainder) = ($string =~ /\G(.*)/);
    
    my $i = 1;
    @items = grep {$i ^= 1} @matches;
    
    if ($terminator) {
        push @items, $lastItem;
    }
    elsif ($remainder && ($remainder !~ /^\s+$/)) {
        $error = "Could not parse '$remainder'";
    }
    else {
        $error = "Could not find list terminator ']'";
    }
    
    return ($error, @items);
}


#
# Initializes the variables that control sorting.
# Must be called after opening the DB since sorting is DB dependent.
#
sub setupSorting {
    my $self = shift @_;
    my $options = $self->{options};
    my $sortByDomain = 0;
    my $sortingTag;
    
    if (exists $options->{sortBy}) {
        if (exists $self->{fieldInfo}{$options->{sortBy}}) {
            $sortingTag = $self->getTag($options->{sortBy});
        }
        elsif ($options->{sortBy} =~ /^(?:arf|header)\s*:\s*(.*)/) {
            $sortingTag = $options->{sortBy};
        }
        elsif ($options->{sortBy} eq 'todomain') {
            $sortByDomain = 1;
            $sortingTag = $self->getTag('to');
        }
        elsif ($options->{sortBy} eq 'fromdomain') {
            $sortByDomain = 1;
            $sortingTag = $self->getTag('from');
        }
        # This can't happen since $options{sortBy} has been verified
        # in getOptions()
        else {
            die "$0: invalid parameter to option --sortBy\n";
        }
        
        $sortingTag = 'record.' . $sortingTag;
        
        $self->{db}->setupSorting($sortingTag, $sortByDomain);
    }
}


#
# We have many options that are for special cases.
# These (ui) "shortcuts" are converted to -match options here,
# so we can later work with -match only.
#
# @param    $opt    a reference to the hash containing the options
# @return           an array containing all the expanded -match options
#
sub shortcutOptionsToMatch {
    my $self = shift @_;
    my $opt = shift @_;
    my @matches = ();

    # Expand shortcut options to match options
    foreach my $field (keys %{$self->{fieldInfo}}) {
        if ($field =~ /^(?:header|arf)$/) {  # header or arf field
            foreach my $header (@{$opt->{$field}}) {
                my ($name, $pattern) = $header =~ /^\[(.*?)\](.*)/;
                if ($name && $pattern) {
                    push @matches, "[$field:$name]$pattern";
                }
                else {
                    die "$0: invalid parameter to --$field option: $header\n";
                }
            }
        }
        elsif (exists $opt->{$field}) {
            if ($opt->{$field}) {
                push @matches, map "[$field]$_", @{$opt->{$field}};
            }
            else {
                die "$0: invalid parameter to --$field option:"
                  . " $opt->{$field}\n";
            }
        }
    }
    
    return @matches;
}


#
# prints all the records in a format specified by the output option
#
sub printRecords {
    my $self = shift @_;
    
    my $db = $self->{db};
    my $output = $self->{options}{output};
    my $conf;
    
    unless ($db->{dbEntries}) {
        print "No records matched.\n";
        return;
    }
    
    # if no output fields specified, determine and use the used ones
    if (@{$self->{options}{outputFields}} == 0) {
        my $tag;
        
        # delete the type key we inserted ourself 
        # (for internal use only, see sub beginStructure)
        delete $self->{usedTags}{'record.type'};
        
        foreach my $field (sort { $self->{fieldInfo}{$a}{priority} <=>
                                  $self->{fieldInfo}{$b}{priority};
                                } keys %{$self->{fieldInfo}}) {
                                
            # not all fields are defined in all versions
            if ($self->getTag($field)) {
                $tag = 'record.' . $self->getTag($field);
                
                if ($self->{usedTags}{$tag}) {
                    if ($tag eq 'record.header' && $output eq 'orig-csv') {
                        foreach my $header (keys %{$self->{usedHeaders}}) {
                            push @{$self->{options}{outputFields}},
                                 "header:$header";
                        }
                    }
                    elsif ($tag eq 'record.arf' && $output eq 'orig-csv') {
                        foreach my $arf (keys %{$self->{usedArfs}}) {
                            push @{$self->{options}{outputFields}}, "arf:$arf";
                        }
                    }
                    else {
                        push @{$self->{options}{outputFields}}, $field;
                    }
                    
                    delete $self->{usedTags}{$tag};
                }
            }
        }
    }
    
    unshift(@{$self->{options}{outputFields}}, '_type')
        if $output eq 'orig-csv';
    
    if ($output eq 'xml') {
        $self->printXML(0);
    }
    elsif ($output eq 'prettyxml') {
        $self->printXML(1);
    }
    elsif (exists $self->{outputConfigs}{$output}) {
        $self->printConfigurable();
    }
    else {
        # "raw" output
        my $status;
        my $key = '';
        my $rec;
        my $n = 0;
        while (($status, $key, $rec) = $self->{db}->loadRecord($key, $n++)) {
            last unless defined $rec;
            $self->printRawRecord($rec);
        }
        
        print $db->{dbEntries}, " record(s) matched\n";
    }
}


#
# Prints a record in a "raw" format
#
# @param   $rec    a reference to the record hash with the data.
#
sub printRawRecord {
    my $self = shift @_;
    my $rec = shift @_;
    
    print "type = $rec->{'record.type'}\n";
    
    foreach my $field (@{$self->{options}{outputFields}}) {
        my $tag = $self->getTag($field);
        my $key = 'record.' . $tag;
        
        if ($field =~ /^(header|arf)$/) {
            for my $line (@{$rec->{$key}}) {
                print "$1 = $line->{name}: $line->{content}\n";
            }
        }
        elsif ($field =~ /(header|arf):(.+)/) {
            for my $line (@{$rec->{$key}}) {
                if ($line->{name} eq $2) {
                    print "$1 = $line->{name}: $line->{content}\n";
                }
            }
        }
        else {
            print "$tag = $rec->{$key}\n" if exists $rec->{$key};
        }
    }
    
    print "\n";
}


#
# prints out all the records in the format controlled by the
# configuration in the $conf parameter hash
#
sub printConfigurable {
    my $self = shift @_;
    my $conf = $self->{outputConfigs}{$self->{options}{output}};
    
    print $conf->{'document.header'} if exists $conf->{'document.header'};
    print $conf->{'table.prefix'} if exists $conf->{'table.prefix'};
    
    $self->printHeader();
    
    my $status;
    my $key = '';
    my $rec;
    my $n = 0;
    while (($status, $key, $rec) = $self->{db}->loadRecord($key, $n++)) {
        last unless defined $rec;
        $self->printRow($rec);
    }
    
    print $conf->{'table.postfix'} if exists $conf->{'table.postfix'};
    print $conf->{'document.footer'} if exists $conf->{'document.footer'};
}

#
# prints the table header
#
sub printHeader {
    my $self = shift @_;
    my $conf = $self->{outputConfigs}{$self->{options}{output}};
    
    print $conf->{'table.header.prefix'}
        if exists $conf->{'table.header.prefix'};
    
    my $firstField = 1;
    foreach my $field (@{$self->{options}{outputFields}}) {
        unless ($firstField) {
            print $conf->{'table.header.infix'}
                if exists $conf->{'table.header.infix'};
        }
        else {
            $firstField = 0;
        }
        
        if ($self->{options}{output} eq 'orig-csv') {
            if ($field =~ /(header|arf):(.*)/) {
                print "$1_$2";
            }
            else {
                print $self->{fieldInfo}{$field}{tag}{csv}; 
            }
        }
        else {
            if ($field =~ /(header|arf):(.+)/) {
                print $2;
            }
            else {
                print $self->{fieldInfo}{$field}{columnHeader};
            }
        }
    }
    
    print $conf->{'table.header.postfix'}
        if exists $conf->{'table.header.postfix'};
}

#
# prints a table row, i.e. all requested data of one record
# @param   $rec       reference to a hash containing the data of the record
#
sub printRow {
    my $self = shift @_;
    my $rec = shift @_;
    my $conf = $self->{outputConfigs}{$self->{options}{output}};
    my $tag;
    
    print $conf->{'table.row.prefix'} if exists $conf->{'table.row.prefix'};
    
    my $firstField = 1;
    
    foreach my $field (@{$self->{options}{outputFields}}) {
        unless ($firstField) {
            print $conf->{'table.row.infix'}
                if exists $conf->{'table.row.infix'};
        }
        else {
            $firstField = 0;
        }
        
        print $conf->{'table.cell.prefix'}
            if exists $conf->{'table.cell.prefix'};
        
        $tag = 'record.' . $self->getTag($field);
        
        if ($field =~ /^(arf|header):(.+)/) {
            my $name = $2;
            my $lines = $rec->{'record.' . $self->getTag($1)};
            my $matched = 0;
            foreach my $line (@$lines) {
                if ($line->{name} =~ /^$name$/i) {
                    $matched = 1;
                    if ($line->{content}) {
                        print $self->replaceCriticalCharacters(
                                                              $line->{content});
                    }
                    else {
                        print $conf->{'table.cell.default'}
                            if exists $conf->{'table.cell.default'};
                    }
                    # print only the first header of that name
                    # TODO: change that
                    last;
                }
            }
            print $conf->{'table.cell.default'}
                if exists $conf->{'table.cell.default'} && !$matched;
        }
        elsif ($field =~ /^(arf|header)$/) { # print all headers/fields at once
            my $lines = $rec->{'record.' . $self->getTag($1)};
            print $self->replaceCriticalCharacters( 
                             join($conf->{'separator.multipleDataPerField'},
                                  map("$_->{name}: $_->{content}", @$lines)));
        }
        elsif (defined $rec->{$tag} && $rec->{$tag} ne '') { 
            print $self->replaceCriticalCharacters($rec->{$tag});
        }
        else {
            print $conf->{'table.cell.default'}
                if exists $conf->{'table.cell.default'};
        }
        
        print $conf->{'table.cell.postfix'}
            if exists $conf->{'table.cell.postfix'};
    }
    
    print $conf->{'table.row.postfix'} if exists $conf->{'table.row.postfix'};
}


sub printHeaderLineXml {
    my ($self, $tag, $line, $pretty) = @_;
    
    print '  ' if $pretty;
    print "<$tag>";
    print "\n    " if $pretty;
    print '<name>',
          $self->replaceCriticalCharacters($line->{name}),
          '</name>';
    print "\n    " if $pretty;
    print '<content>',
          $self->replaceCriticalCharacters($line->{content}),
          '</content>';
    print "\n  " if $pretty;
    print "</$tag>";
    print "\n" if $pretty;
}


#
# prints out all records in (pretty) XML format
#
# @param   $db       reference to a db containing all the records to print
# @param   $pretty   flag controlling whether the output should be pretty XML
#                    (one field per line, with indentation) or not
#                    (one record per line)
#
sub printXML {
    my $self = shift @_;
    my $pretty = shift @_;
    
    my $db = $self->{db};
    
    print "$self->{outputConfigs}{xml}{'document.header'}\n" if
        exists $self->{outputConfigs}{xml}{'document.header'};
    
    my $status;
    my $key = '';
    my $rec;
    my $n = 0;
    while (($status, $key, $rec) = $self->{db}->loadRecord($key, $n++)) {
        last unless defined $rec;
        
        my $type = $rec->{'record.type'};
        delete $rec->{'record.type'};
        
        print "<$type>";
        print "\n" if $pretty;
        
        foreach my $field (@{$self->{options}{outputFields}}) {
            my $tag = $self->getTag($field);
            my $key = 'record.' . $tag;
            
            next unless exists $rec->{$key};
            
            if ($field =~ /(?:arf|header)$/) {
                foreach my $line (@{$rec->{$key}}) {
                    $self->printHeaderLineXml($tag, $line, $pretty);
                }
            }
            elsif (my ($name) = $field =~ /(?:arf|header):(.*)/) {
                foreach my $line (@{$rec->{$key}}) {
                    if ($line->{name} eq $name) {
                        $self->printHeaderLineXml($tag, $line, $pretty);
                    }
                }
            }
            else {
                print '  ' if $pretty;
                print "<$tag>";
                print $self->replaceCriticalCharacters($rec->{$key});
                print "</$tag>";
                print "\n" if $pretty;
            }
        }
        
        print "</$type>\n";
        
        $rec->{'record.type'} = $type;
    }
    
    print $self->{outputConfigs}{xml}{'document.footer'} 
        if exists $self->{outputConfigs}{xml}{'document.footer'};
}


#
# replaces critical characters (or strings)  with the corresponding replacement
# strings
#
# @param   $content  a string in which these characters should be replaced
# @return            a string with the critical characters being replaced by
#                    the replacement strings (specified in the output config)
#
sub replaceCriticalCharacters {
    my $self = shift @_;
    my $string = shift @_;
    
    my $output = $self->{options}{output};
    $output = "xml" if $output =~ /prettyxml/i;
    my $conf = $self->{outputConfigs}{$output};
    
    if (exists $conf->{replacements}) {
        my $replacements = $conf->{replacements};
        for (my $i = 0; $i < @{$replacements}; $i +=2) {
            my $character = $replacements->[$i];
            my $replacement = $replacements->[$i+1];
            
            $string =~ s/$character/$replacement/g;
        }
    }
    
    return $string;
}


#
# Splits a comma separated values list (string) into items.
# 
# @param   $string  the comma separated values list to split
#
# @return  \@items  a ref to an array containing the items found or
#                   undef in case of an error
#
sub csv2Array {
    my $string = shift @_;
    my @items;
    my $lastItem;
    my $error;
    
    return [] if $string =~ /^\s*$/;
    
    @items =      ($string =~ /\G\s*("(?:[^"]|"")*"|[^",]*?)\s*,/gc);
    ($lastItem) = ($string =~ /\G\s*("(?:[^"]|"")*"|[^",]*?)\s*$/);
    push @items, $lastItem if defined $lastItem;
    
    return undef unless @items;
    
    # unquote the items
    for (my $i = 0; $i <= $#items; $i++) {
        if ((my $quote) = ($items[$i] =~ /^(["']).*\1$/)) {
            $items[$i] =~ s/^(["']?)(.*)\1$/$2/;
            $items[$i] =~ s/$quote$quote/$quote/g;
        }
    }
    
    return \@items;
}


#
# A sub to calculate the numeric time zone based on a time represented in
# local time and gmt
#
# @param   $locDay   day of the date in local time
# @param   $locHour  hours of the date in local time
# @param   $locMin   minutes of the date in local time
# @param   $gmDay    day of the date in gmt
# @param   $gmHour   hours of the date in gmt
# @param   $gmMin    minutes of the date in gmt
#
# @return  a time offset string like "+0100" representing the difference
#          between local time and gmt
#
sub calcTimeZone {
    my ($locDay, $locHour, $locMin, $gmDay, $gmHour, $gmMin) = @_;
    my ($diffMin, $diffHour, $diffDay);
    
    my $isEastern =     $gmDay <  $locDay
                    || ($gmDay == $locDay && (   $gmHour <  $locHour
                                             || ($gmHour == $locHour && $gmMin <= $locMin)));

    if ($isEastern) {
        $diffMin  = $locMin  - $gmMin;
        $diffHour = $locHour - $gmHour;
        $diffDay  = $locDay  - $gmDay;
    }
    else {    
        # western
        $diffMin  = $gmMin  - $locMin;
        $diffHour = $gmHour - $locHour;
        $diffDay  = $gmDay  - $locDay;
    }
    
    if ($diffMin < 0) {
        $diffMin  += 60;
        $diffHour -= 1;
    }
    if ($diffHour < 0) {
        $diffHour += 24;
        $diffDay  -= 1;
    }
    
    if ($diffDay != 0) {
        print STDERR "Problem calculating " .
                     ($isEastern ? 'eastern' : 'western') .
                     " time difference:\n" .
                     "gmt:   $gmDay $gmHour:$gmMin\n" .
                     "local: $locDay $locHour:$locMin\n" .
                     "diff:  $diffDay $diffHour:$diffMin\n";
    }
    
    return sprintf('%s%02d%02d', $isEastern ? '+' : '-',
                                 abs($diffHour), $diffMin);
}


#
# Converts the value of a time field to the desired time format (based on
# the options supplied to the Acctfind object)
#
# @param   the value from a time field in time_t or iso format
#
# @return  the date passed in in the desired format
#
sub convertTimeField {
    my $self = shift @_;
    my $date = shift @_;
    
    return $date unless $date ne '';

    if ($date =~ /^\d+$/) {
        # date has time_t format
        if ($self->{options}{"iso-times"}) {
            my ($sec,$min,$hour,$day,$month,$year) = localtime($date);
            my ($gmSec,$gmMin,$gmHour,$gmDay)      = gmtime($date);
            
            my $tz = calcTimeZone($day,$hour,$min,$gmDay,$gmHour,$gmMin);
            
            return sprintf("%4d-%02d-%02d %02d:%02d:%02d%s",
                           $year+1900, $month+1, $day, $hour, $min, $sec, $tz);
        }
        else {
            return $date;
        }
    }
    else {
        # date has iso format
        if ($self->{options}{"iso-times"}) {
            return $date;
        }
        else {
            # convert iso to time_t
            my ($year, $mon, $day, $hour, $min, $sec, $tzMode, $tzh, $tzm) =
               ($date =~ /^(\d\d\d\d)-(\d\d)-(\d\d) (\d\d):(\d\d):(\d\d)([+-])(\d\d)(\d\d)$/);
            
            my $timeOffset = ($tzMode eq "+" ? 1 : -1) *
                             (($tzh * 60 + $tzm) * 60);
                             
            return timegm($sec,$min,$hour,$day,$mon-1,$year) - $timeOffset;
        }
    }
}


#
# Converts the items of a csv row to a record with the values supplied in an
# array as keys
#
# @param   $csvLine  the comma separated values list to split
# @param   $fields   an array ref containing the keys ("column headers")
#
# @return  \%rec     a ref to a hash containing the data
#          $@        a description of the error if one occured
#
sub csv2Record {
    my $self = shift @_;
    my ($csvLine, $fields) = @_;
    my %rec;
    
    my $array = csv2Array($csvLine);
    
    die "Record is not correctly csv formatted. The line is:\n$csvLine"
        unless defined $array;
    
    die "Too many columns in the record"
            . " (" . (int @$array) . " instead of " . (int @$fields)
            . " columns).\n"
            . "The line is:\n$csvLine"
        if @$array > @$fields;
        
    die "Not enough columns in the record"
            . " (" . (int @$array) . " instead of " . (int @$fields)
            . " columns).\n"
            . "The line is:\n$csvLine"
        if @$array < @$fields;
        
    foreach my $i (0..$#{$fields}) {
        
        # convert time fields to ISO format (if requested)
        if ($self->{isTimeField}{$fields->[$i]}) {
            $array->[$i] = $self->convertTimeField($array->[$i]);
        }
        
        if ($fields->[$i] =~ /^record\.(arf|header)_(.*)/) {
            my $lines;
            
            if (exists $rec{"record.$1"}) {
                $lines = $rec{"record.$1"};
            }
            else {
                $lines = [];
                $rec{"record.$1"} = $lines;
            }
            push @$lines, {name => $2, content => $array->[$i]};
        }
        else {
            $rec{$fields->[$i]} = $array->[$i];
        }
    }
    
    return \%rec;
}


#
# Determines whether a file is a binary accounting file or not
#
# @param   $file  the name of the file to check
#
# @return  "binary"  for binary accounting file
#          "csv"     for csv file
#          undef     for any other file format
#          
# @throws  an error message in case of an error
#
sub accountingFormatOfFile {
   my $self = shift @_;
   my $file = shift @_;
   
   if (open FH, "<$file") {
       my $header = <FH>;
       
       if ($header =~ /^FMacc001/) {
           close FH;
           return 'binary';
       }
       if (csv2Array($header)) {
           $self->{csvReadHandle} = *FH;
           $self->{headerLine} = $header;
           return 'csv'
       }
       
       close FH;
       return undef;
   }
   
   die "Error opening accounting file: $!\n";
}


#
# Displays the documentation
#
sub PrintHelp {
    if ($^O eq 'MSWin32') {
        # The DATA handle is not available within PerlApp, so we need to read
        # the POD in some other way;  at the same time, we don't want to
        # require having these modules on other platforms just to be able to
        # display the documentation.

        require IO::Scalar;  import IO::Scalar;
        require Pod::Text;   import Pod::Text;

        my $pod = GetPod();
        tie *POD, 'IO::Scalar', \$pod;
        my $parser = Pod::Text->new();
        $parser->parse_from_filehandle(\*POD);
    }
    else {
        if ($^O =~ /solaris/i) {
            $ENV{PERLLIB} = "/opt/PT25pmta/acctfind";
        }
        else {
            $ENV{PERLLIB} = "/opt/pmta/acctfind";
        }
        print `perldoc Port25::Acctfind`;
    }
}

#
# Returns Plain Old Documentation for this script.
# See PrintHelp for how/why this is used.
#
# @return                string with POD documentation
#
sub GetPod {
    return <<'EOP';

=head1 NAME

acctfind


=head1 SYNOPSIS

pmtaacctfind <options> <accounting files ...>


=head1 DESCRIPTION

acctfind reads the given accounting files and outputs defined
fields from matching records, in one of 8 different output formats.


=head1 OPTIONS

=head2 Record Types

The following options can be freely combined.
If none of them are specified, only the records for bounced and delivered mails
are searched.

=over 8

=item B<--bounced>

Allows records for bounced mails (local and remote bounces) to be searched.

=item B<--delivered>

Allows records for successful delivered mails (d records) to be searched.

=item B<--feedbackLoop>

Allows records for feedback loop mails (f records) to be searched

=item B<--localBounced>

Allows records for locally bounces mails (b records) to be searched.

=item B<--received>

Allows records for received mails (r records) to be searched

=item B<--remoteBounced>

Allows records for remotely bounced mails (rb records) to be searched.

=item B<--remoteStatus>

Allows records for remote status reports (rs records) to be searched.

=item B<--tempError>

Allows records for transient recipient failures (t records) to be searched

=item B<--tempQueueError>

Allows records for transient queue-wide failures (tq records) to be searched

=back

=head2 Matching Options

These options specify what to match. A record is output only if it matches
all of the specified B<--match> options.

=over 8

=item B<--match needle>

Looks for the B<needle> in all fields of the accounting records. The B<needle> 
can be either a perl regular expression m/.../ (or m/.../i for case-insensitive
matching), or some string, for plain, case-independent string matching.


=item B<--match [field ...]needle>

=item B<--match '[field ...] needle'>

B<Note:> the '[' and ']' aren't meta characters, the list of fields has to
be bracketed by them.

Looks for the B<needle> in the specified fields of the accounting records.
B<Field> can either be

=over 3 

=item *

shortcut options without the leading '--'

=item *

'arf:<arf field name>' with <arf field name> being the name of the arf field 
to match

=item *

'header:<header name>' with <header name> being the name of the header 
line to match

=item *

field specifiers found in PowerMTA's user guide

=item *

'all' which is the same as specifying no field.

=back

B<field>s must be separated by whitespace or commas.
The match is successful if B<needle> is found in at least one of the fields. 
The B<needle> can be either a perl regular expression m/.../ (or m/.../i for 
case-insensitive matching), or some string, for plain, case-independent 
string matching.
Be aware that the argument to --match has to be a single string!

If neither a B<--match> option nor a shortcut option (see below) is supplied,
it is assumed that you meant B<--match m/./> which matches every record.


=item B<--maxMatch n>

Limits the output to the first B<n> matched records. Specifiying 0 or not
specifying --maxMatch means unlimited.

=back

=head2 Shortcut Options

Shortcut options are for convenience only. There is no difference between
the shortcut and the corresponding --match option.

See the B<--match> option for a definition of what the B<needle> can be.

=over 8

=item B<--arf [name]needle>

Shortcut for B<--match [arf:name]needle>.
Looks for B<needle> in the arf field with name B<name>.


=item B<--bounceCat needle>

Shortcut for B<--match [bounceCat]needle>.
Looks for B<needle> in the bounceCat field.


=item B<--dlvTlsCipher needle>

Shortcut for B<--match [dlvTlsCipher]needle>.
Looks for B<needle> in the dlvTlsCipher field.


=item B<--dlvTlsProtocol needle>

Shortcut for B<--match [dlvTlsProtocol]needle>.
Looks for B<needle> in the dlvTlsProtocol field.


=item B<--dsnAction needle>

Shortcut for B<--match [dsnAction]needle>.
Looks for B<needle> in the dsnAction field.


=item B<--dsnDiag needle>

Shortcut for B<--match [dsnDiag]needle>.
Looks for B<needle> in the dsnDiag field.


=item B<--dsnMta needle>

Shortcut for B<--match [dsnMta]needle>.
Looks for B<needle> in the dsnMta field.


=item B<--dsnReportingMta needle>

Shortcut for B<--match [dsnReportingMta]needle>.
Looks for B<needle> in the dsnReportingMta field.


=item B<--dsnStatus needle>

Shortcut for B<--match [dsnStatus]needle>.
Looks for B<needle> in the dsnStatus field.


=item B<--dstIp needle>

Shortcut for B<--match [dstIp]needle>.
Looks for B<needle> in the dlvDestinationIp field.


=item B<--dstType needle>

Shortcut for B<--match [dstType]needle>.
Looks for B<needle> in the dlvType field.


=item B<--envId needle>

Shortcut for B<--match [envId]needle>.
Looks for B<needle> in the envId field.


=item B<--esmtp needle>

Shortcut for B<--match [esmtp]needle>.
Looks for B<needle> in the dlvEsmtpAvailable field.


=item B<--feedbackFormat needle>

Shortcut for B<--match [feedbackFormat]needle>.
Looks for B<needle> in the format field.


=item B<--from needle>

Shortcut for B<--match [from]needle>.
Looks for B<needle> in the orig field.


=item B<--header [name]needle>

Shortcut for B<--match [header:name]needle>.
Looks for B<needle> in the header line with name B<name>.


=item B<--jobId needle>

Shortcut for B<--match [jobId]needle>.
Looks for B<needle> in the jobId field.


=item B<--origTo needle>

Shortcut for B<--match [origTo]needle>.
Looks for B<needle> in the orcpt field.


=item B<--queue needle>

Shortcut for B<--match [queue]needle>.
Looks for B<needle> in the queue field (of the tq records).


=item B<--receivedForIp needle>

Shortcut for B<--match [receivedForIp]needle>.
Looks for B<needle> in the rcvDestinationIp field (of the r records).


=item B<--receivedFromIp needle>

Shortcut for B<--match [receivedFromIp]needle>.
Looks for B<needle> in the rcvSourceIp field (of the r records).


=item B<--reportedDomain needle>

Shortcut for B<--match [reportedDomain]needle>.
Looks for B<needle> in the reportedDomain field.


=item B<--reportingMTA needle>

Shortcut for B<--match [reportingMTA]needle>.
Looks for B<needle> in the reportingMTA field.


=item B<--reportingSourceIp needle>

Shortcut for B<--match [reportingSourceIp]needle>.
Looks for B<needle> in the reportingSourceIp field.


=item B<--size needle>

Shortcut for B<--match [size]needle>.
Looks for B<needle> in the dlvSize field.


=item B<--smtpuser needle>

Shortcut for B<--match [rcvSmtpUser]needle>.
Looks for B<needle> in the rvcSmtpUser field.


=item B<--srcIp needle>

Shortcut for B<--match [srcIp]needle>.
Looks for B<needle> in the dlvSourceIp field.


=item B<--srcMta needle>

Shortcut for B<--match [srcMta]needle>.
Looks for B<needle> in the srcMta field.


=item B<--srcType needle>

Shortcut for B<--match [srcType]needle>.
Looks for B<needle> in the srcType field.


=item B<--timeLogged needle>

Shortcut for B<--match [timeDelivered]needle>.
Looks for B<needle> in the timeLogged field.


=item B<--timeQueued needle>

Shortcut for B<--match [timeQueued]needle>.
Looks for B<needle> in the timeQueued field.


=item B<--to needle>

Shortcut for B<--match [to]needle>.
Looks for B<needle> in the rcpt field.


=item B<--totalSecondsQueued needle>

Shortcut for B<--match [totalSecondsQueued]needle>.
Looks for B<needle> in the totalSecondsQueued field.


=item B<--userAgent needle>

Shortcut for B<--match [userAgent]needle>.
Looks for B<needle> in the userAgent field.


=item B<--vmta needle>

Shortcut for B<--match [vmta]needle>.
Looks for B<needle> in the vmta field.


=item B<--vmtaPool needle>

Shortcut for B<--match [vmtaPool]needle>.
Looks for B<needle> in the vmtaPool field.

=back


=head2 Output Options

These options control how the output should look like.

=over 8

=item B<--iso-times>

Uses timestamps in ISO 8601 format (rather than numeric time_t)
for output and matching


=item B<--sortBy field>

Sorts the output by the content of the field B<field>, where B<field> can be
one of the shortcut options or one of the following special values:

=over 4

=item B<fromDomain>

Sorts by the content of the originators domain.

=item B<toDomain>

Sorts by the content of the recipients domain.

=back

=item B<--output specification>

This option specifies the style of and what to output.
Allowed fields are the shortcut options (without the leading '--').
Legal values for specification are:

=over 4

=item B<csv>

=item B<csv[field ...]>

Initializes the output format to CSV (Comma Separated Values) style:
All output fields will be enclosed in double quotes ("), seperated by commas.
The whitespace or comma separated list of fields in the brackets determines
which fields to output.
The order of the output fields is the same as in the list.
If the list is omitted, all fields in the matched records are output.



=item B<orig-csv>

=item B<orig-csv[field ...]>

Initializes the output format to a PowerMTA compatible CSV style.
I.e. the output is the same as with the csv output style except that the 
first field is always the record type and the column headers are the field 
names used in the csv accounting files.



=item B<html>

=item B<html[field ...]>

Initializes the output format to generate html code for a page with a table
containing the requested data.
The whitespace or comma separated list of fields in the brackets determines
which fields to output.
The order of the output fields is the same as in the list.
If the list is omitted, all fields in the matched records 
are output.


=item B<tsv>

=item B<tsv[field ...]>

Initializes the output to TSV (Tabulator Separated Values) style:
All output fields are separated by tabulators.
The whitespace or comma separated list of fields in the brackets determines
which fields to output.
The order of the output fields is the same as in the list.
If the list is omitted, all fields in the matched records 
are output.


=item B<XML>

=item B<XML[field ...]>

Prints the records in compact XML format, one record per line.
The whitespace or comma separated list of fields in the brackets determines
which fields to output.
If the list is omitted, all fields in the matched records 
are output.


=item B<prettyXML>

=item B<prettyXML[field ...]>

Prints the records in "beautified" XML format,
i.e. one field per line, indented two spaces per level.
The whitespace or comma separated list of fields in the brackets determines
which fields to output.
If the list is omitted, all fields in the matched records 
are output.


=item B<custom>

=item B<custom[field ...]>

Initializes the output format specifications to have no defaults 
(i.e. empty strings).
As this renders the output unusable you also 
have to provide a B<--format> option which specifies the format of the
output.
Note that if you forget to specify an output specification setting,
an empty string will be used instead which may render your output 
unusable!
The whitespace or comma separated list of fields in the brackets determines
which fields to output.
The order of the output fields is the same as in the list.
If the list is omitted, all fields in the matched records 
are output.


=back

=item B<--format filename>

Specifies a file from which the specification for formatting the output
should be read. This may override the defaults selected with B<--output>.
If it is specified without B<--output>, it will be ignored.


B<format file's format>

The format of this file is rather simple.
It consists of key value pairs defined this way: 'key => value', where the
value can extend multiple lines.
The keys are case insensitive.

Currently used keys are 
'document.header', 
'document.footer', 
'table.prefix', 
'table.postfix', 
'table.header.prefix', 
'table.header.infix', 
'table.header.postfix', 
'table.row.prefix', 
'table.row.infix', 
'table.row.postfix', 
'table.cell.prefix', 
'table.cell.postfix',
'table.cell.default', and 
'replacements'
(xml and prettyXML output styles use only the 'document.*' and 'replacement' 
keys).

The 'replacements' key is special in that it takes an even sized, 
comma separated list of strings enclosed in square brackets ('[' and ']')
as value.
All occurences of the first, third, fifth,... string in the output data will 
be replaced by the second, fourth, sixth,... string.
The order is significant.

The values of the other keys will be inserted in the corresponding positions 
of the output.

If a value contains the character sequence "\n", "\r", or "\t" the sequence
will be converted to linefeed, carriage return, or tabulator.
To prevent this behavior, prefix the sequence with an additional backslash.
Whitespace at the begining and end of each value part (the part behind the
'=>', the whole line when using multiple lines, or one of the strings from the 
list of the replacements key) are stripped off.
If you need whitespace at the begining or end of a value part, you can enclose
this part with single or double quotes (''' or '"').

Empty lines and comment lines (lines starting with a '#') are ignored and can 
therefore not be part of a value!


=back

=head2 Standard Options

=over 8

=item B<--hashmarks>

=item B<--hashmarks n>

This enables a progress indicator.
It prints a hashmark ('#') to STDERR every B<n> records read. If no B<n> or
a value less than 1 is provided, a default of 1000 will be used.

=item B<--help>

Displays this documentation.


=item B<--tmpDir dir>

Sets the directory where the temporary db file will be created.
This is especially useful if your default temp file device might not be large
enough to hold the intermediate files, depending on the size of your accounting
files.

=item B<--version>

Displays this program's version number.

=back

=head2 Special Options

=over 8

=item B<--skip n>

Skip the first B<n> records before looking for matches.

=item B<--verbose>

Displays some statistic information for the run, like the size and location
of the temporary spool file used.

=back


=head1 EXAMPLES

B<acctfind --delivered --to @port25.com --output csv[to,dstIp] acctfile>

This generates a csv formatted output that lists all recipient addresses
from the domain port25.com that were successfully delivered to, including
the IP address of the machine that accepted the message, one per line.

B<acctfind --bounced --sortBy toDomain --output html[dsnStatus,to,dsnDiag] acctfile>

This generates a HTML table containing the recipient address, DSN Status code,
and DSN Diagnostic information of all bounce reports generated by PowerMTA,
sorted by the recipient's domain.


=head1 COPYRIGHT

Copyright (c) 2002-2010, Port25 Solutions, Inc.  All Rights Reserved.

=cut
EOP
}

1;

