Convert Openfire audit logs to csv

Here’s a script that lets you parse Openfire’s audit log and export data from it in comma separated values (csv) format.

The Openfire audit log contains a lot of useful stuff about client interactions with the server and each other, this includes presence information on what clients are connected to the server at any given time. Because the audit log is written in XML an XML parser is needed to extract data from it.

The script is built around perl’s XML::Twig module and performs searches on the LDAP directory that users authenticate to for additional business information not included in the log data.

In the example below LDAP attribute names that begin with the word “example” represent custom attributes that have been added to the base schema.

The script uses a simple on-disk database created with the DB_File module to store the multiple unique resourceId values that are assigned to those who use many different clients types, sometimes simultaneously.

# Read Openfire xml audit log and convert to csv
# 1. Work with multiple logs, in chrono order (approx 90M of xml data)
# 2. Do LDAP lookup to append sn, givenname, title, o, ou, exampletimezone,
# postaladdress.
# 3. XML attributes:
# presence (id, to, from), status, priority
# 4. Output field order:
# UserID,Last,First,ResourceID,Title,Company,Department,TimeZone,Address
# XML data format is as follows:
# <jive xmlns="">
#  <packet xmlns="" streamID="2b1f5f0c" status="auth" timestamp="Nov 05, 2014 07:08:02:331 PM">
#    <presence xmlns="" id="3Cz74-3924" to="" from=" 2.6.3">
#      <status>Online</status>
#      <priority>1</priority>
#    </presence>
#  </packet>
#  * * * 
#  <packet streamID="b126a9e0" status="auth" timestamp="Nov 05, 2014 07:08:02:336 PM">
#   <presence id="3Cz74-14" from=" 2.6.3" to="">
#     <status>Online</status>
#     <priority>1</priority>
#   </presence>
#  </packet>
# * * *
# Run this on a prod Openfire server as the gctech user.
# Created 11/11/2014 by P Lembo
# On RHEL 6 had to do an alternate install of perl 5.18.4 from source to get around a serious perl bug only resolved in 5.15 and later.

use strict;
use XML::Twig;
use Date::Calc qw(Today Add_Delta_Days);
use File::Sort qw(sort_file);
use Text::ParseWords;
use Net::LDAP;
use Net::LDAP::Entry;
use DB_File;
use Fcntl;

my $HOME = $ENV{'HOME'};
our($ofdirHost, $ofdirUsr, $ofdirPass);
require "$HOME/etc/admin.conf";

# Get yesterday's date
my( $year, $month, $day ) = Today();
( $year, $month, $day ) = Add_Delta_Days ( $year, $month, $day, -1 );
my $yesterday = sprintf("%04d%02d%02d",$year,$month,$day);

my $rawName = "ofrawfile" . "-" . $yesterday . ".csv";
my $rawFile = "$HOME/data/export/$rawName";
my $rawSorted = "$HOME/data/export/$rawName\.sorted";
my $reptName = "ofauditreport" . "-" . $yesterday . ".csv";
my $reptFile = "/data/www/html/reports/$reptName";
my $errLog = "$HOME/data/logs/ofauditreport.log";
my $dirHost = $ofdirHost;
my $dirUsr = $ofdirUsr;
my $dirPass = $ofdirPass;
my $dbmName = "ridb";
my $dbmFile = "$HOME/data/export/$dbmName";

open LOGZ, ">$errLog" or die $!;


close LOGZ;

sub get_logs {

 # Read all files in /data/logs/im-server from yesterday
 # with pattern: jive.audit-[YYYYMMDD-NNN].log, and pass
 # to parse_doc subroutine

 my $logDir = "/data/logs/im-server";
 my $time = localtime();

 print LOGZ "$time\tGetting audit logs for $yesterday\n";
 print "$time\tGetting audit logs for $yesterday\n";

 open FH, ">$rawFile" or die $!;
 close FH;

 # Get list of file names from log directory
 opendir(DIR, "$logDir") or die $!;
 my @files = readdir DIR;
 closedir DIR;

 foreach my $fileName(@files) {

   if($fileName =~ /$yesterday/g) {
      # Send each file to the XML parser
      my $logFile = "/data/logs/im-server/$fileName";

sub parse_doc {

 # Parse the XML file at hand and write the user and
 # resourceIDs

 my $xmlFile = $_[0];
 print LOGZ "Processing ", $xmlFile, "\n";
 print "Processing ", $xmlFile, "\n";

 open FH, ">>$rawFile" or die $!;

 my $twig = XML::Twig->new();
 my $root = $twig->root;

 foreach my $packets ($root->children()) {

    my $type = $packets->first_child()->att('type');

    # Include packets that have no type, i.e. that contain
    # messages between clients and not inquiries (iq).
    if($type !~/.+/) { 

      my $timestamp = $packets->att('timestamp');
      for($timestamp) { s/,/ /g; }
      my $from = $packets->first_child()->att('from');
      my $to = $packets->first_child()->att('to');
      my $id = $packets->first_child()->att('id');
      my $packet = $packets->first_child();
      my $status = $packet->first_child_text();
      my($frjid, $resourceId) = split('/', $from);
      my($fruid, $frdomain) = split('@', $frjid);
      my($touid, $todomain) = split('@', $to);

      print FH $fruid;
      print FH ",";
      print FH $resourceId;
      print FH "\n";


    # Include iq packets with a type of 'get', these are
    # pings of the server by clients
    elsif($type =~ /get/g) {

      my $from = $packets->first_child()->att('from');

      # Only record those packets that have the from attr
      # (these should be real people)
      if($from =~ /.+/) {

         my $id = $packets->first_child()->att('id');
         my($frjid, $resourceId) = split('/', $from);
         my($fruid, $frdomain) = split('@', $frjid);

	 print FH $fruid;
	 print FH ",";
	 print FH $resourceId;
	 print FH "\n";
 close FH;

sub make_rept {

 # Create a report from the log data gathered
 my $time = localtime();

 print LOGZ "$time\tMaking session report\n";
 print "$time\tMaking session report\n";

 # Create a hash table keyed to userIDs

 # Open the hash table for reading
 my $db = tie my %riddb, "DB_File", $dbmFile, O_RDONLY or die $!;

 # Open the report file for writing
 open FH, ">$reptFile" or die $!;

 # Print the heading for the report
 print FH "UserID,Last,First,ResourceID,Title,Company,Department,TimeZone,Address\n";

 # Iterate through each key (userId) in the hash table
 foreach my $fruid (keys %riddb) {

    # Get the resourceIds associated with the user
    my $rids = $riddb{$fruid};
    # Search LDAP for user business info 
    my %return = get_ldap($fruid);
    my $result = $return{result};

    if($result eq 'success') {

       # Get user attribute values
       my $sn = $return{sn};  
       my $givenname = $return{givenname};
       my $title = $return{title};
       my $o = $return{o};
       my $ou = $return{ou};
       my $exampletimezone = $return{exampletimezone};
       my $streetaddress = $return{streetaddress};

       # Print the report.
       print FH $fruid;
       print FH ",";
       print FH "\"$sn\"";
       print FH ",";
       print FH "\"$givenname\"";
       print FH ",";
       print FH "\"$rids\"";
       print FH ",";
       print FH "\"$title\"";
       print FH ",";
       print FH "\"$o\"";
       print FH ",";
       print FH "\"$ou\"";
       print FH ",";
       print FH $exampletimezone;
       print FH ",";
       print FH "\"$streetaddress\"";
       print FH "\n";
 close FH;
 undef $db;
 untie %riddb;
sub make_riddb {

 # Sort csvFile unique before processing
 sort_file({u => 1, I=>"$rawFile", o=>"$rawSorted"});

 # Create the initial hash table using the sorted results
 open FH, "<$rawSorted" or die $!;

 my $db = tie my %riddb, "DB_File", $dbmFile, O_RDWR | O_CREAT, 0666, $DB_HASH or die $!;

 while (<FH>) {
    my ( $fruid,
    ) = ( &parse_line(',',0,$_));
    # Write the keys (userId) and values (resourceId) to the hash table
    $riddb{$fruid} = $resourceId;

 undef $db;
 untie %riddb;
 close FH;

 # Append additional resourceIds to the hash table using the raw file
 open FH, "<$rawFile" or die $!;

 my $db = tie my %riddb, "DB_File", $dbmFile, O_RDWR or die $!;

 while (<FH>) {

    my ( $fruid,
    ) = ( &parse_line(',',0,$_));

    if($resourceId =~/^converse\.js.+/) {
       ($idprefix, $idsuffix) = split(/\-/, $resourceId);
        $resourceId = $idprefix;

    my $existingrids = $riddb{$fruid};

    if($resourceId !~ /$existingrids/g) {
        my $allrids = "$existingrids" . "\|$resourceId";
        $riddb{$fruid} = $allrids;

  undef $db;
  untie %riddb;
  close FH;

sub get_ldap {

 my $uid = $_[0];
 my %ldinfo;
 my @attrs = qw(uid sn givenname title o ou exampletimezone street l st c postalcode);
 my $basedn = "dc=example,dc=com";
 my $query = "(uid=$uid)";

 my $ldap = Net::LDAP->new($dirHost, port =>'389', version =>'3');
 my $mesg = $ldap->bind($dirUsr, password =>$dirPass) or die $!;

 $mesg = $ldap->search(
              base =>$basedn,
              scope =>'sub',
              filter =>$query,
              attrs =>\@attrs
 die "Failed to search with ",$mesg->error(),"\n" if $mesg->code();

 if ($mesg->count<1) {
     print LOGZ "Warning: $uid not found\n";
     print "Warning: $uid not found\n";
     $ldinfo{result} = "nomatch";
 else {
   while (my $entry = $mesg->shift_entry()) {
      $ldinfo{result} = "success";
      $ldinfo{dn} = $entry->dn;
      $ldinfo{sn} = $entry->get_value('sn');
      $ldinfo{givenname} = $entry->get_value('givenname');
      $ldinfo{title} = $entry->get_value('title');
      $ldinfo{o} = $entry->get_value('o');
      $ldinfo{ou} = $entry->get_value('ou');
      $ldinfo{exampletimezone} = $entry->get_value('exampletimezone');
      my $street = $entry->get_value('street');
      my $l = $entry->get_value('l');
      my $st = $entry->get_value('st');
      my $c = $entry->get_value('c');
      my $postalcode = $entry->get_value('postalcode');
      my $streetaddress = $street . "," . $l . "," . $st . " " . $c . " " . $postalcode;
      for($streetaddress) { s/\s\s/\s/; }
      $ldinfo{streetaddress} = $streetaddress;
 return (%ldinfo);
sub clean_up {

  # Remove raw files and hash table
  `/bin/rm -f $rawFile`;
  `/bin/rm -f $rawSorted`;
  `/bin/rm -f $dbmFile.dir`;
  `/bin/rm -f $dbmFile.pag`;
This entry was posted in System Administration, Systems Analysis on by .

About phil

My name is Phil Lembo. In my day job I’m an enterprise IT architect for a leading distribution and services company. The rest of my time I try to maintain a semi-normal family life in the suburbs of Raleigh, NC. E-mail me at philipATlembobrothersDOTcom. The opinions expressed here are entirely my own and not those of my employers, past, present or future (except where I quote others, who will need to accept responsibility for their own rants).