#!/usr/bin/perl


# This program is compatible both with 32-bit Perl 4.036, and Perl 5.
#
# Copyright 2010, by David A. Burton
# Cary, NC  USA
# +1 919-481-0149
# Email: http://www.burtonsys.com/email/


# immediate output of debug prints
$| = 1;


# ---- these globals are for command-line options ----
$debugmode=0;  # >0 for debug prints
$givehelp=0;  # 1 for '-?' or '--help'
$opt_sd=0;  # 1 to calculate & output standard deviations
$samples_per_group = 30;  # '-n=30' is the default (assume 30 days per month)

while (($#ARGV >= 0) && ('-' eq substr($ARGV[0],0,1))) {
   if ('-d' eq $ARGV[0]) {
      $debugmode++;  # turn on debug prints
   } elsif ('-s' eq $ARGV[0]) {
      $opt_sd++;  # output standard deviation
   } elsif ($ARGV[0] =~ /^\-n\=([0-9\.]+)$/) {
      $tmp = $1 + 0;
      if ($tmp >= 2) {
         $samples_per_group = $tmp;
         print "$tmp samples per group\n";
      }
   } elsif (('-?' eq $ARGV[0]) || ('-h' eq $ARGV[0]) ||('--help' eq $ARGV[0])) {
      $givehelp++;
   } else {
      printf "ERROR: unrecognized command-line option: '%s'\n", $ARGV[0];
      exit 1;
   }
   shift @ARGV;
}

$num_args = $#ARGV+1;

if (($num_args != 1) || $givehelp) {
 print "psmsl_monthly2yearly.pl -- Calculate annual Local Mean Sea Level data from\n" .
       "PSMSL's monthly LMSL data; output is same format as PSMSL's annual LMSL data.\n" .
       "(See http://www.psmsl.org/data/obtaining/notes.php )\n" .
       "\n" .
       "Usage:\n" .
       "\n" .
       "   perl -w $0 [options] psmsl_auckland_monthly_rlrdata\n" .
       "or:\n" .
       "   perl -w $0 [options] psmsl_auckland_monthly_rlrdata >annualized_gmsl.lis\n" .
       "\n" .
       "where [options] can be a combination (or none) of the following:\n" .
       "  -n=xxx  Assume xxx samples per group (default is 30)\n" .
       "  -d  enable Debug prints\n" .
       "  -s  output standard deviations as 5th field\n" .
       "  -?  print this Help message\n" .
       "\n";
       exit 1;
}


$perlver = "3 or earlier";
if ($] =~ /\$\$Revision\:\s*([0-9.]+)\s/) {
   $perlver = $1;  # probably 4.something
} elsif ($] =~ /([0-9][0-9.]*)/) {
   $perlver = $1;  # probably 5.something or 6.something
}
if ($debugmode) {
   print "You are using Perl version $perlver\n";
}

@INC = ('.','..');
require "composite_sd.pl";  # define &sum, &avg, &sample_SD and &composite_SD


# input is an integer; result is 1 (true) iff odd, null (false) if even
sub odd {
   local($i) = @_;
   return (($i & 1) == 1);
}


# for sorting (from the Camel Book)
sub numerically { $a <=> $b; }


# input is an array of numbers; output is the median
sub median {
   local(@vals) = sort numerically @_;
   local($m,$i);
   $i = int( 0.01 + ($#vals / 2) );
   $m = $vals[ $i ];
   if (&odd($#vals)) {
      # there are an even number of values
      $m += $vals[ $i+1 ];
      $m /= 2;
   }
   return $m;
}


# Input is an array of samples and an array of sample weights; result is the weighted average.
sub weighted_avg {
   local( $sum ) = local( $sum_of_weights ) = 0;
   local( @vals, @weights );
   local( $n ) = int((1+$#_) / 2);
   if (((2*$n)-1) != $#_) {
      die "ERR: odd number of values passed to sub weighted_sum\n";
   }
   @vals = splice( @_, 0, $n );
   @weights = @_;
   for ($i=0; $i<$n; $i++) {
      $sum += $vals[$i] * $weights[$i];
      $sum_of_weights += $weights[$i];
   }
   if ($sum_of_weights != 0) {
      $sum = $sum / $sum_of_weights;
   }
   return $sum;
}



$inpfile = $ARGV[0];
if (!open(INP,"<$inpfile")) {
   printf "ERROR: could not open '%s', $!\n", $inpfile;
   exit 1;
}


# absolute value
sub abs {
   local( $x ) = shift;
   if ($x < 0) {
      $x = - $x;
   }
   return $x;
}



# These globals are used as inputs to &analyze_one_yr:
#   @m is an array of 12 means
#   @sd is an array of 12 standard deviations
@m = @sd = @weights = ();
$sum_missing_days = 0;
$sum_attn_flags = 0;


# four-digit $year is passed as the input parameter, but the main inputs are globals @m and @sd
sub analyze_one_yr {
   local( $year ) = @_;
   local( $mean1, $i, $num_missing_months, $missing_days_flag, $sd );
   # local( $sd1 );

   if ((0 == $year) && (-1 == $#m)) {
      return;
   }
   if (!defined $year) {
      $year = '0000';
   }
   $num_missing_months = (12 - (1+$#m));
   if (11 < $#m) {
      print "ERR: analyze_one_yr($year), > 12 months.   \$\#m=$#m\n";
      return;
   }
   foreach $i (@m) {
      if ($i == -99999) {
         $num_missing_months++;
      }
   }
   $missing_days_flag = 'N';
   if ($num_missing_months >= 2) {
      $mean1 = -99999;
      $sd = 0;
   } else {
      if ((1 == $num_missing_months) || ($sum_missing_days >= 30)) {
         $missing_days_flag = 'Y';
      }
      # Now the 12 means are in @m

      # For Auckland, the two ways of averaging give identical results, which
      # are close but aren't always exactly equal to PSMSL's annual MSL data:
      # $mean1 = &avg( @m );
      $mean1 = &weighted_avg( @m, @weights );
      # Unfortunately, PSMSL doesn't give us Standard Deviation or Confidence Interval
      # for their MSL data, so we do the best we can.
      # $sd = &composite_SD( 1+$#m, @m, @sd, $samples_per_group );  # this is what we SHOULD do
      $sd = &sample_SD( @m );  # this is what we ACTUALLY do

      if ($debugmode) {
         printf "dbg: %s;%6.0f;%s;%03o;%8.1f  ", $year, $mean1, $missing_days_flag, $sum_attn_flags, $sd;
         print " wt=(" . join(',',@weights) . ")\n";
      }
   }
   if ($opt_sd) {
      printf " %s;%6.0f;%s;%03o;%6.1f\n", $year, $mean1, $missing_days_flag, $sum_attn_flags, $sd;
   } else {
      printf " %s;%6.0f;%s;%03o\n", $year, $mean1, $missing_days_flag, $sum_attn_flags;
   }
   @m = @sd = @weights = ();
   $sum_missing_days = 0;
   $sum_attn_flags = 0;
} #analyze_one_yr


# SAMPLE INPUT:
#
# 1904.0416;  6923; 0;000
# 1904.1250;  6983; 0;000
# 1904.2084;  6963; 0;000
# 1904.2916;  6953; 0;000
# 1904.3750;  6963; 0;000
# 1904.4584;  6963; 0;000
# 1904.5416;  6863; 0;000
# 1904.6250;  6843; 0;000
# 1904.7084;  6893; 0;000
# 1904.7916;  6883; 0;000
# 1904.8750;  6823; 0;000
# 1904.9584;  6833; 0;000
# 1905.0416;  6883; 0;000
# 1905.1250;  6883; 0;000

# SAMPLE OUTPUT:
#
# 1904;  6907;N;000


$year = '0000';
@m = @sd = @weights = ();
$sum_missing_days = 0;
$count_inp_lines = 0;
$sum_attn_flags = 0;
while (<INP>) {
   $count_inp_lines++;
   $_ =~ s/^\s+//g;  # delete any leading whitespace
   if ($debugmode > 1) {
      print "dbg: in = $_";
   }
   ($year_i, $msl_i, $num_missing_days, $flagged_for_attention) = split( /\;\s*/, $_ );
   if ($year_i !~ "^$year") {
      # when we see a new year, process the data for the previous year
      &analyze_one_yr($year);
      # now save the January values for the new year
      $year = substr($year_i,0,4);
      $weight = (31 - $num_missing_days) / 31;  # weight the month according to percentage of days which are missing
      if ($debugmode && $num_missing_days) {
         printf "dbg: yr=$year_i, num_missing_days=$num_missing_days, wt=$weight\n";
      }
      if ($msl_i == -99999) {
         @m = ();
      } else {
         @m = ($msl_i);
         @weights = ($weight);
      }
      # @sd = ($sd_i);
      $sum_attn_flags = 0 + $flagged_for_attention;
      $sum_missing_days = 0 + $num_missing_days;
   } else {
      # save the February through December values
      if ($msl_i != -99999) {
         push( @m, $msl_i );
         push( @weights, $weight );
      }
      # push( @sd, $sd_i );
      $sum_attn_flags += $flagged_for_attention;
      $sum_missing_days += $num_missing_days;
   }
}
# at end-of-file; the final year's data still needs to be analyzed
if ($#m > 0) {
   &analyze_one_yr($year);
}
close INP;

__END__

