#!/usr/bin/perl

# Run this program with no parameters for help.
#
# This program is compatible both with 32-bit Perl 4.036, and Perl 5.
#
# by David A. Burton
# Cary, NC  USA
# +1 919-481-0149
# Email: http://www.burtonsys.com/email/


# immediate output of debug prints
$| = 1;


$debugmode=0;  # for debug prints

while (($#ARGV >= 0) && ('-' eq substr($ARGV[0],0,1))) {
   if ('-d' eq $ARGV[0]) {
      $debugmode++;  # turn on debug prints
      shift @ARGV;
   } else {
      printf "ERROR: unrecognized command-line option: '%s'\n", $ARGV[0];
      exit 1;
   }
   shift @ARGV;
}


$perlver = "3 or earlier";
if ($] =~ /\$\$Revision\:\s*([0-9.]+)\s/) {
   $perlver = $1;  # probably 4.something
} elsif ($] =~ /([0-9][0-9.]*)/) {
   $perlver = $1;  # probably 5.something or 6.something
}
if ($debugmode) {
   print "You are using Perl version $perlver\n";
}


$num_args = $#ARGV+1;

if ($num_args != 2) {
 print "church_white_averager.pl -- massage church_white_grl_gmsl.lis (monthly data)\n" .
       "to match the format of church_white_new_gmsl.lis (annual data).\n" .
       "(Input files are from  http://www.pol.ac.uk/psmsl/author_archive/church_white/)\n" .
       "\n" .
       "Usage:\n" .
       "\n" .
       "   perl -w $0 church_white_grl_gmsl.lis output.lis\n" .
       "or:\n" .
       "   perl -w $0 church_white_grl_gmsl.lis output.csv\n" .
       "\n" .
       "If output file ends in \".csv\" then format will be comma-separated-values.\n" .
       "\n" .
       "This program can also be used to convert church_white_new_gmsl.lis to .csv format.\n" .
       "\n" .
       "If run under Windows, output will have CR+LF line delimiters.  If run under\n" .
       "Linux, output will have LF-only line delimiters.\n";
       exit 1;
}

$inpfile = $ARGV[0];
$outpfile = $ARGV[1];

if (!open(INP,"<$inpfile")) {
   printf "ERROR: could not open '%s', $!\n", $inpfile;
   exit 1;
}
if (!open(OUTP,">$outpfile")) {
   printf "ERROR: could not create '%s', $!\n", $outpfile;
   exit 1;
}
if ($debugmode) {
   print "reading '$inpfile' and writing '$outpfile'...\n";
}

$output_in_CSV = 0;
if ($outpfile =~ /\.csv$/i) {
   $output_in_CSV = 1;
}



# SAMPLE INPUT:
#
#   years    GMSL (mm)   SD
# ---------  ---------  -----
# 1870.0417   -75.61    22.19
# 1870.1250   -75.68    22.19
# 1870.2083   -74.13    22.19
# 1870.2917   -74.06    22.19
# 1870.3750   -73.23    22.19
# 1870.4583   -74.47    22.19
# 1870.5417   -72.76    22.19
# 1870.6250   -70.71    22.19
# 1870.7083   -71.18    22.19
# 1870.7917   -70.77    22.19
# 1870.8750   -78.15    22.19
# 1870.9583   -81.73    22.19
# 1871.0417   -86.35    22.19
# 1871.1250   -82.06    22.19
# 1871.2083   -78.42    22.19
#...
# 2001.7917   118.71     8.94
# 2001.8750   114.17     9.05
# 2001.9583   110.85     9.05


# SAMPLE OUTPUT FORMAT:
#
#  years   GMSL (mm)   SD
# -------  ---------  -----
#  1870.5  -104.53    22.73
#  1871.5  -108.49    22.73
#...
#  2001.5   115.24     5.07
#  2002.5   115.28     5.17
#  2003.5   121.73     5.38
#  2004.5   123.41     6.11
#  2005.5   128.05     6.43
#  2006.5   141.67     6.87
#  2007.5   137.64     7.22


$year = '0000';
# these are where we accumulate the sums, for calculating averages:
$year_sum = $gmsl_sum = $sd_sum = $month_count = 0;


# calculate the annual averages from the monthly values,
# write one line of output, and zero the accumulating sums.
sub writetheline {
   if ($month_count > 0) {
      $year_o = sprintf( "%6.1f", $year_sum / $month_count );
      $gmsl_o = sprintf( "%7.2f", $gmsl_sum / $month_count );
      $sd_o = sprintf( "%5.2f", $sd_sum / $month_count );
      if ($year_o !~ /\.5$/) {
         print "Warning: year '$year_o' doesn't end in '.5'\n";
      }
      if ($output_in_CSV) {
         $gmsl_o =~ s/ //g;  # no blank padding neeed in .csv format
         $sd_o =~ s/ //g;
         printf OUTP "%s,%s,%s\n", $year_o, $gmsl_o, $sd_o;
      } else {
         # this format matches church_white_new_gmsl.lis
         printf OUTP " %s  %s    %s\n", $year_o, $gmsl_o, $sd_o;
      }
      $year_sum = $gmsl_sum = $sd_sum = $month_count = 0;
      $count_outp_lines++;
   }
}


$count_inp_lines = $count_outp_lines = 0;
while (<INP>) {
   $count_inp_lines++;
   $_ =~ s/^\s+//g;  # delete any leading whitespace
   ($year_i, $gmsl_i, $sd_i) = split( /\s+/, $_ );
   if ((1==$count_inp_lines) && ($year_i =~ /year/i)) {
      next;  # if there's a header line, ignore it
   }
   if ($year_i !~ "^$year") {
      # when we see a new year, write the averaged data for the previous year
      &writetheline;
      $year = substr($year_i,0,4);
   }
   $year_sum += $year_i;
   $gmsl_sum += $gmsl_i;
   $sd_sum += $sd_i;
   $month_count++;
}
close INP;
&writetheline;
close OUTP;

print "$0 read $count_inp_lines lines from '$inpfile'\n" .
      "  and wrote $count_outp_lines lines to '$outpfile'\n";

exit 0;

__END__

