#!/usr/bin/perl -w # # ======================================================================== # sar2rrd.pl # ------------------------------------------------------------------------ # # This script can be used to parse sar command output. # It will create RRDTool archives and graphs. # # You can use command line arguments to select the graphs # and the columns you wish. # # ------------------------------------------------------------------------ # Author: Jerome Delamarche (jd@maje.biz - jd@trickytools.com) # Version: 1.1 # Date: 25 Jan 2007 # # Changes: # Ignore lines such as "HH:MM:SS LINUX RESTART" # Take care of time given as "HH:MM:SS PM" or "HH:MM:SS AM" # instead of "HH:MM:SS" # New command line argument to specify the date format on the # first output line (MDY or DMY) # Date field separator can be a '.' or a '/' # ------------------------------------------------------------------------ # Version: 1.1 # Date: 30 Jan 2007 # ------------------------------------------------------------------------ # TODO: for memory graphing, should use the -b 1024 # ======================================================================== # use strict; use Getopt::Std; use Time::Local; use Date::Calc qw(Add_Delta_Days); my $rrdtool = "/usr/local/rrdtool/bin/rrdtool"; my $rrddir = "./rrd"; my $imgdir = "./img"; my $imgwidth = 400; my $imgheight = 200; my $dateformat = "MDY"; # format of the date on the 1st output line my $sarfile = ""; my $startdate = ""; my $enddate = ""; my $graph_name_spec = ""; my $graph_col_sign = ""; my @graph_col_spec = (); my $graph_startsec = 0; my $graph_endsec = 0; my $step = 0; my $line_width = 1; my $hostname = "localhost"; my $starttime = ""; my $secondtime = ""; my $endtime = ""; my $measurecount = 0; my $logarithmic = ""; my $verbose = 0; my @colors = ( "FF0000", "0000FF", "00FFFF", "FF00FF", "FFFF00", "00FF00", "000000", "C0C0C0", "FF8C00", "8FBC8F" ); # "multiple" graphs are stats like Block Device or CPU Usage that indicate one line per item #14:46:56 DEV tps rd_sec/s wr_sec/s #14:47:01 dev1-0 0,00 0,00 0,00 #14:47:01 dev1-1 0,00 0,00 0,00 my %sarstats = ( "proc_s" => { "ignore_col" => undef, "title" => "Process per Second", "unit" => "count/s", "multiple" => 0, "keysize" => 1, }, "runq_sz" => { "ignore_col" => undef, "title" => "Queue Size and Load Average", "unit" => "count", "multiple" => 0, "keysize" => 1, }, "tps" => { "ignore_col" => undef, "title" => "I/O Transfer Rate", "unit" => "count/s", "multiple" => 0, "keysize" => 1, }, "pgpgin_s" => { "ignore_col" => undef, "title" => "Paging Statistics", "unit" => "count/s", "multiple" => 0, "keysize" => 1, }, "DEV" => { "ignore_col" => undef, "title" => "Block Device Activity", "unit" => "count/s", "multiple" => 1, "keysize" => 1, }, "INTR" => { "ignore_col" => undef, "title" => "Interrupt Count", "unit" => "count/s", "multiple" => 1, "keysize" => 1, }, "IFACE" => { "ignore_col" => undef, "title" => { "rxpck_s" => "Network Statistics", "rxerr_s" => "Network Failure Statistics", }, "unit" => "count/s", "multiple" => 1, "keysize" => 2, }, "totsck" => { "ignore_col" => undef, "title" => "Socket Statistics", "unit" => "count", "multiple" => 0, "keysize" => 1, }, "kbmemfree" => { "ignore_col" => undef, "title" => "Memory and Swap Utilization", "unit" => "count", "multiple" => 0, "keysize" => 1, }, "frmpg_s" => { "ignore_col" => undef, "title" => "Memory Statistics", "unit" => "count", "multiple" => 0, "keysize" => 1, }, "CPU" => { "ignore_col" => undef, "title" => { "prct_user" => "CPU Utilization", "i000_s" => "Interruption Statistics", }, "unit" => "count", "multiple" => 1, "keysize" => 2, }, "dentunusd" => { "ignore_col" => undef, "title" => "Inode and Files Statistics", "unit" => "count", "multiple" => 0, "keysize" => 1, }, "cswch_s" => { "ignore_col" => undef, "title" => "Context Switches per Second", "unit" => "count", "multiple" => 0, "keysize" => 1, }, "pswpin_s" => { "ignore_col" => undef, "title" => "Swapping Statistics", "unit" => "count", "multiple" => 0, "keysize" => 1, }, ); my %opts = (); getopts("?d:i:t:f:oH:W:vS:e:s:g:",\%opts); if (exists($opts{'?'})) { Usage(); } if (exists($opts{'d'})) { $rrddir = $opts{'d'}; } if (exists($opts{'i'})) { $imgdir = $opts{'i'}; } if (exists($opts{'f'})) { $sarfile = $opts{'f'}; } if (exists($opts{'t'})) { $dateformat = $opts{'t'}; } if (exists($opts{'H'})) { $imgheight = $opts{'H'}; } if (exists($opts{'W'})) { $imgwidth = $opts{'W'}; } if (exists($opts{'o'})) { $logarithmic = "-o"; } if (exists($opts{'s'})) { $startdate = $opts{'s'}; } if (exists($opts{'e'})) { $enddate = $opts{'e'}; } if (exists($opts{'S'})) { $step = $opts{'S'}; } if (exists($opts{'v'})) { $verbose = 1; } # Parameters check: # should specify multiple files ? a hostname ? a range ? data to process ? die("'$rrdtool' is not an executable") if ! -x $rrdtool; die("RRD Directory '$rrddir' is not a writeable directory") if ! -d $rrddir || ! -w $rrddir; die("Image Directory '$imgdir' is not a writeable directory") if ! -d $imgdir || ! -w $imgdir; if ($sarfile eq "") { print "sar result file not set. Please use -f option\n"; Usage(); } die("sar File '$sarfile' is not a readable") if ! -r $sarfile; if (exists($opts{'g'})) { my @graph_spec_parts = split(/:/,$opts{'g'}); if (scalar(@graph_spec_parts) < 2) { print "Incorrect syntax for '-g' option: should be '-g graphname:(+|-)column,...\n"; Usage(); } if (!exists($sarstats{$graph_spec_parts[0]})) { die("Value of graph specification does not start by a valid statistics name (".$graph_spec_parts[0].")\n"); } $graph_col_sign = substr($graph_spec_parts[1],0,1); if ($graph_col_sign ne "-" && $graph_col_sign ne "+") { print "Incorrect syntax for '-g' option: should be '-g graphname:(+|-)column,...\n"; Usage(); } $graph_name_spec = $graph_spec_parts[0]; @graph_col_spec = split(/,/,substr($graph_spec_parts[1],1)); # Note: column names cannot be checked (or we must store predefined values somewhere ?) } # First pass: # we determine the time range: print "First Pass: determine the time range...\n"; my $firstline = 1; my $headerline = 0; # set to 1 when the next line is a header line my $curdate = ""; my ($startday,$startmonth,$startyear); my ($endday,$endmonth,$endyear); my $first_over = 1; my $curitem = ""; my @parts; open(FD,"<$sarfile") or die("Could not open file '$sarfile' in read mode\n"); print "Processing file '$sarfile':\n"; while () { chomp; if ($_ eq "") { $headerline = 1; next; } # Added with v1.1: eliminate lines containing LINUX events if ($_ =~ /LINUX/) { next; } if ($firstline) { $firstline = 0; # should indicate the day: # format is: system version (hostname) DD.MM.YYYY # or: system version (hostname) DD/MM/YYYY ($curdate = $_) =~ s/^.*\(.*\).*([[:digit:]]{2}[\.\/][[:digit:]]{2}[\.\/][[:digit:]]{4}).*$/$1/; #print $curdate,"\n"; die; # Added with v1.1: if ($dateformat eq "MDY") { ($startmonth,$startday,$startyear) = split(/[\.\/]/,$curdate); } elsif ($dateformat eq "DMY") { ($startday,$startmonth,$startyear) = split(/[\.\/]/,$curdate); } else { die("Unknown Date Format: '$dateformat' (supported format are: MDY and DMY)\n"); } #print "startday=$startday, startmonth=$startmonth, startyear=$startyear\n"; ($endday,$endmonth,$endyear) = ($startday,$startmonth,$startyear); $startmonth--; next; } # Multiple spaces are considered as a simple char: $_ =~ s/([[:space:]]+)/ /g; #print $_,"\n"; @parts = split(/[[:space:]]/); #print $parts[0]; # First field must be: HH:MM:SS if ($parts[0] !~ /[[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]/) { next; } # Added with v1.1: eliminate the 2nd column if it is a AM or a PM: if ($parts[1] eq "AM" || $parts[1] eq "PM") { Time24(\@parts); # Eliminate the column: my $part0 = $parts[0]; shift(@parts); shift(@parts); unshift(@parts,$part0); } if ($headerline) { $headerline = 0; # Examine next fields: shift(@parts); my $line = join(' ',@parts); if ($curitem ne "" && $curitem ne $line) { # we can stop, we got the start/end time by now: last; } # $curitem contains the whole columns except the 1st, because # sometimes, the 1st column can be ambiguous (see IFACE) $curitem = $line; next; # the starttime starts AFTER the header line } # Is it the first row of value and the first date ? if ($starttime eq "") { $starttime = $parts[0]; } elsif ($secondtime eq "" && $starttime ne $parts[0]) { $secondtime = $parts[0]; } # Note: we suppose the first and second lines belong to the same day # Ignore lines which have identical timestamp (think of multiple CPU !) if ($endtime eq "" || $endtime ne $parts[0]) { $measurecount++; # cross over midnight ? if ($starttime gt $parts[0]) { if ($first_over) { eval { ($endyear,$endmonth,$endday) = Add_Delta_Days($endyear,$endmonth,$endday,1); }; if ($@) { die("Incorrect Date Format on the 1st line\nUse the -t option\n"); } $first_over = 0; } } else { $first_over = 1; } } $endtime = $parts[0]; } close(FD); # Sanity check: # we must have a start & endtime and curitem should not be empty: if ($starttime eq "") { die("Could not determine the Start Time from the output file\n"); } if ($endtime eq "") { die("Could not determine the End Time from the output file\n"); } if ($curitem eq "") { die("No Item to graph found in the file\n"); } print "$measurecount measures detected\n"; my $startsec = timelocal(reverse(split(/:/,$starttime)),$startday,$startmonth,$startyear); my $secondsec = timelocal(reverse(split(/:/,$secondtime)),$startday,$startmonth,$startyear); $endmonth--; my $endsec = timelocal(reverse(split(/:/,$endtime)),$endday,$endmonth,$endyear); #print "$startday,$startmonth,$startyear\n"; exit(0); my $user_startsec = 0; my $user_endsec= 0; if ($startdate ne "") { $user_startsec = CheckDate($startdate,"start"); } if ($enddate ne "") { $user_endsec = CheckDate($enddate,"end"); } if ($user_startsec && $user_endsec && $user_endsec < $user_startsec) { die("The specified end date: $enddate, is anterior to the specified start date: $startdate\n"); } $graph_startsec = ($user_startsec) ? $user_startsec : $startsec; $graph_endsec = ($user_endsec) ? $user_endsec : $endsec; # Second pass: # create the graphs print "Second Pass: create the graphs...\n"; print "Range starts from $starttime to $endtime\n"; # Compute the interval (in seconds) between to measures: my $s = $secondsec - $startsec; if ($step == 0) { $step = $s; } else { if ($step < $s) { die("The specified step ('$step') is less than the interval between two values ('$s')\n"); } } print "Use Interval of $step seconds\n"; # Variables for graphs creation: my $nextblock = 0; my %graphs = (); my $dsheartbeat = 0; my $dsstring = ""; my $rras = ""; $headerline = 0; # set to 1 when the next line is a header line $first_over = 1; my $sarstat = ""; my $graphname = ""; my $dsname = ""; my $keyname = ""; my $rrdfile = ""; my $cmd = ""; my $firstvalue = 1; # skip the 1st value which is never significant with sar my $imgfile = ""; my $title = ""; my $vlabel = ""; my $dsnames = ""; my $ismultiple = 0; my $skip = ($graph_name_spec ne "") ? 1 : 0; my $startidx; my $idx; my $status; open(FD,"<$sarfile") or die("Could not open file '$sarfile' in read mode\n"); print "Processing file '$sarfile':\n"; while () { chomp; if ($_ eq "") { $headerline = 1; next }; # Added with v1.1: eliminate lines containing LINUX events if ($_ =~ /LINUX/) { next; } # Multiple spaces are considered as a simple char: $_ =~ s/([[:space:]]+)/ /g; @parts = split(/[[:space:]]/); # First field must be: HH:MM:SS if ($parts[0] !~ /[[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]/) { next; } # Added with v1.1: eliminate the 2nd column if it is a AM or a PM: if ($parts[1] eq "AM" || $parts[1] eq "PM") { Time24(\@parts); # Eliminate the column: my $part0 = $parts[0]; shift(@parts); shift(@parts); unshift(@parts,$part0); } # Check if current line is the header for a multiple graph: if ($nextblock) { # if the column name is not in the @graphs array, # add it and create a new graph, otherwise, all the graphs # have been created... my $graphname = $parts[1]; if (exists($graphs{$graphname})) { $nextblock = 0; } else { $graphs{$graphname} = 1; $rrdfile = "$rrddir/$dsname$keyname-$graphname.rrd"; $cmd = "$rrdtool create $rrdfile -b $startsec -s $step $dsstring $rras"; MySystem($cmd); $firstvalue = 1; } } # Handle header line and determine if we analyze a new statistics: if ($headerline) { $headerline = 0; # Do we need to create a new graph ? my $len = scalar(@parts) - 1; my $line = join(' ',@parts[1..$len]); if ($sarstat ne $line) { # Dump the former graph: if ($sarstat ne "") { CreateImage(); } # Create a new graph: $dsname = MakeDSName($parts[1]); if ($graph_name_spec ne "") { if ($dsname eq $graph_name_spec) { print "Analyzing data for $dsname\n"; $skip = 0; } else { print "Skip data for $dsname\n"; $sarstat = $line; $skip = 1; next; } } else { print "Analyzing data for $dsname\n"; } # the DS Name depends on the keysize: # Sanity check: $keyname = ""; if (!exists($sarstats{$dsname})) { die("Unknown dsname: $dsname\n"); } if ($sarstats{$dsname}{'keysize'} > 1) { $keyname = MakeDSName($parts[2]); $keyname = "-".$keyname; } $dsheartbeat = 2 * $step; $dsstring = ""; $dsnames = ""; # Is it a single or a multiple graph ? $ismultiple = $sarstats{$dsname}{'multiple'}; $startidx = ($ismultiple) ? 2 : 1; for ( $idx = $startidx ; $idx < scalar(@parts) ; $idx++ ) { my $ds = MakeDSName($parts[$idx]); if ($dsstring ne "") { $dsnames .= ":"; } $dsstring .= "DS:$ds:GAUGE:$dsheartbeat:0:U "; $dsnames .= $ds; } $rras = "RRA:AVERAGE:0.5:1:$measurecount"; # Is it a single or a multiple graph ? $ismultiple = $sarstats{$dsname}{'multiple'}; if (!$ismultiple) { $rrdfile = "$rrddir/$dsname$keyname.rrd"; $cmd = "$rrdtool create $rrdfile -b $startsec -s $step $dsstring $rras"; MySystem($cmd); $firstvalue = 1; } else { # we cannot create the RRD now, we must analyse the next lines: $nextblock = 1; %graphs = (); } } $sarstat = $line; next; } if ($firstvalue) { $firstvalue = 0; next; } if ($skip) { next; } # did we cross over midnight ? if ($starttime gt $parts[0]) { if ($first_over) { $startmonth++; ($startyear,$startmonth,$startday) = Add_Delta_Days($startyear,$startmonth,$startday,1); $startmonth--; $first_over = 0; } } else { $first_over = 1; } # This is a measure line: we must update the graph $secondsec = timelocal(reverse(split(/:/,$parts[0])),$startday,$startmonth,$startyear); my $DATA = "$secondsec:"; $startidx = ($ismultiple) ? 2 : 1; for ( $idx = $startidx ; $idx < scalar(@parts) ; $idx++ ) { $parts[$idx] =~ s/,/\./g; if ($idx > $startidx) { $DATA.= ":"; } $DATA .= $parts[$idx]; } if ($ismultiple) { $rrdfile = "$rrddir/$dsname$keyname-".$parts[1].".rrd"; } $cmd = "$rrdtool update $rrdfile -t $dsnames $DATA"; MySystem($cmd); } close(FD); # Dump the former graph: if ($sarstat ne "") { CreateImage(); } sub CheckDate { my ($date,$label) = @_; my @date; my $sec; my $month; @date = split(/[ :-]/,$date); $sec = timelocal($date[5],$date[4],$date[3],$date[1],$date[0]-1,$date[2]); #print "sec=$sec, startsec=$startsec, endsec=$endsec\n"; if ($sec < $startsec) { $month = $startmonth+1; die("The $label date specified on the command line: $date, is anterior to the first date read in the file: $month-$startday-$startyear $starttime\n"); } if ($sec > $endsec) { $month = $endmonth+1; die("The $label date specified on the command line: $date, is posterior to the last date read in the file: $month-$endday-$endyear $endtime\n"); } return $sec; } sub Time24 { my ($parts) = @_; # Add 12 hours, but 12PM is 12 and 12AM is 00: #print "p0=",${$parts}[0],"--"; my @thetime = split(/:/,${$parts}[0]); if (${$parts}[1] eq "AM") { if ($thetime[0] eq "12") { ${$parts}[0] = "00:".$thetime[1].":".$thetime[2]; } } else { if ($thetime[0] ne "12") { ${$parts}[0] = $thetime[0]+12; ${$parts}[0] .= ":".$thetime[1].":".$thetime[2]; } } if ($verbose) { print "Time24: new time is: ",${$parts}[0],"\n"; } } sub MakeDSName { my ($name) = @_; $name =~ s/%/prct_/g; $name =~ s/[^[:alnum:]]/_/g; return $name; } sub CreateImage { if ($skip) { return; } $imgfile = "$imgdir/$dsname.png"; $title = $sarstats{$dsname}{'title'}; $vlabel = $sarstats{$dsname}{'unit'}; # $dsnames is col1:col2:.... # we must apply $graph_col_sign and $graph_col_spec here: my @ds = split(/:/,$dsnames); my $defs = ""; my $color; COL: for ( my $idx = 0 ; $idx < scalar(@ds) ; $idx++ ) { if ($graph_name_spec ne "") { my $colname = $ds[$idx]; if ($graph_col_sign eq "+") { # the column must be listed my $found = 0; foreach my $col (@graph_col_spec) { if ($col eq $colname) { $found = 1; last; } } if (!$found) { next COL; } } else { # the column must not be listed foreach my $col (@graph_col_spec) { if ($col eq $colname) { next COL; } } } } $color = $colors[$idx % scalar(@colors)]; if ($ismultiple) { $defs .= "DEF:v$idx=RRDFILE:".$ds[$idx].":AVERAGE LINE$line_width:v$idx#$color:".$ds[$idx]." "; } else { $defs .= "DEF:v$idx=$rrdfile:".$ds[$idx].":AVERAGE LINE$line_width:v$idx#$color:".$ds[$idx]." "; } } if ($defs eq "") { die("No column selected to display the graph\n"); } my $startdate = localtime($graph_startsec); my $enddate = localtime($graph_endsec); $startdate =~ s/:/\\:/g; $enddate =~ s/:/\\:/g; #print $startdate,"\n"; #print $enddate,"\n"; my $legend = '"COMMENT:From '.$startdate.', To '.$enddate.'\\c" "COMMENT:\\n"'; if ($ismultiple) { foreach $graphname (keys %graphs) { $imgfile = "$imgdir/$dsname$keyname-$graphname.png"; # set the good RRD file name: my $defs2 = $defs; $defs2 =~ s!RRDFILE!$rrddir/$dsname$keyname-$graphname.rrd!g; if ($keyname ne "") { my $keyname2 = substr($keyname,1); # suppress the leading '-' $title = $sarstats{$dsname}{'title'}{$keyname2}." for $graphname"; } else { $title = $sarstats{$dsname}{'title'}." $graphname"; } $cmd = "$rrdtool graph $imgfile -t '$title' -s $graph_startsec -e $graph_endsec $logarithmic -S $step -v '$vlabel' -w $imgwidth -h $imgheight -a PNG $legend $defs2 >/dev/null"; MySystem($cmd); } } else { $imgfile = "$imgdir/$dsname$keyname.png"; $cmd = "$rrdtool graph $imgfile -t '$title' -s $graph_startsec -e $graph_endsec $logarithmic -S $step -v '$vlabel' -w $imgwidth -h $imgheight -a PNG $legend $defs >/dev/null"; MySystem($cmd); } } sub MySystem { my ($cmd) = @_; my $status; if ($verbose) { print $cmd,"\n"; } if ($status = system($cmd)) { die("Command '$cmd' failed with return code: $status\n"); } } sub Usage { print "Usage: $0\t[-?ov] [-d rrd_dir] [-i img_dir] [-W width] [-H height]\n"; print "\t\t\t[-s start_date] [-e end_date] [-S step]\n"; print "\t\t\t[-g graph_spec] [-t DMY|MDY] -f sar_file\n"; print "Options:\n"; print "\t-? : this help\n"; print "\t-v : verbose mode\n"; print "\t-o : use a logarithmic scale for Y scale\n"; print "\t-d rrd_dir : directory where RRD files must be created\n"; print "\t-i img_dir : directory where to place PNG images\n"; print "\t-W width : images width (in pixels)\n"; print "\t-H height : images height (in pixels)\n"; print "\t-s start_date : start date (MM-DD-YYYY HH:MM:SS)\n"; print "\t-e end_date : end date (MM-DD-YYYY HH:MM:SS)\n"; print "\t-S step : interval (in seconds) between to values in the graph\n"; print "\t-g graph_spec: by default all possible graphs are created\n"; print "\t\tgraph_spec syntax is: data[:(+|-)column[,column...]]\n"; print "\t\tthis creates only the graph and the specified columns\n"; print "\t-t MDY|DMY: indicates the format for the date displayed on the 1st output line\n"; print "\t-f sar_file : file to analyse - create by the 'sar -f ...' command\n"; exit(1); } exit(0); # EOF