#!/usr/bin/perl

# A program to calculate, for the 1113 locations in this list,
#  http://www.pol.ac.uk/psmsl/datainfo/rlr.trends
# each station's distance from each of the other stations, and for
# each such pair of stations the difference between the two MSL trends,
#
# However, I exclude stations which operated for only a few years, or
# which showed no clear MSL trend (very high standard error), and I
# exclude station pairs which were not operating at substantially the
# same time.
#
# Note: This program is derived from distances_vs_trend_diffs.pl, and
# these two commands are equivalent:
#
#   perl -w distances_vs_trend_diffs_pmsl.pl pmsl_rlr_trends.v03.txt pmsl_trend_v_distance3.csv2 1400
#   perl -w distances_vs_trend_diffs.pl -name=11 -trend=6 -lat=9 -lon=10 pmsl_rlr_trends.v03.txt pmsl_trend_v_distance3.csv2 1400
#
# Run it with no parameters for help.
#
# (This program is compatible with both Perl4 and Perl5.)
#
# Copyright 2010, by David A. Burton
# Cary, NC  USA
# +1-919-481-0149
# Email: http://www.burtonsys.com/email/


# immediate output of debug prints
$| = 1;

# @INC = ('c:\html\climategate','.');

# print "before 'require splitcsv.pl'\n";
require "splitcsv.pl";

print "dbg: cmd='perl -w $0 " . join(' ', @ARGV) . "'\n";


# These field numbers can be overridden by command-line options.
$fn_name = 11;  # 11th field is normally the name of the station
$fn_trend = 6;  # 6th field is normally the MSL trend for the station
$fn_lat = 9;  # 9th field is normally the latitude of the station
$fn_lon = 10;  # 10th field is normally the longitude of the station


$dbg=0;  # for debug prints
$quiet = 0;  # we don't actually use the -q ("quiet") command line parameter, but we might someday
while (($#ARGV >= 0) && ('-' eq substr($ARGV[0],0,1))) {
   if ('-d' eq $ARGV[0]) {
      $dbg++;  # turn on debug prints
   } elsif ('-q' eq $ARGV[0]) {
      $quiet = 1;
   } elsif ('-name=' eq substr($ARGV[0],0,6)) {
      $fn_name = substr($ARGV[0],6) + 0;
   } elsif ('-trend=' eq substr($ARGV[0],0,7)) {
      $fn_trend = substr($ARGV[0],7) + 0;
   } elsif ('-lat=' eq substr($ARGV[0],0,5)) {
      $fn_lat = substr($ARGV[0],5) + 0;
   } elsif ('-lon=' eq substr($ARGV[0],0,5)) {
      $fn_lon = substr($ARGV[0],5) + 0;
   } else {
      printf "ERROR: unrecognized command-line option: '%s'\n", $ARGV[0];
      exit 1;
   }
   shift @ARGV;
}

# $start_time = time;
# # print "before 'require ctime.pl'\n";
# require 'ctime.pl';  # see the -P and -I option in perl.man


# some needed constants:
$pi = 3.1415926535897932384626433832795;
$pi_over_180 = $pi / 180.0;  # 0.017453292519943295769236907684886;
$radius_of_earth = 6366707.0;  # in meters


$num_args = $#ARGV+1;

if (($num_args < 2) || ($num_args > 3)) {
 print "distances_vs_trend_diffs.pl\n" .
       "\n" .
       "Usage:\n" .
       "   perl -w $0 {options} pmsl_rlr_trends.v03.csv pmsl_trend_v_distance3.csv {maxkm}\n" .
       "\n" .
       "where {maxkm} is the maximum distance (in km) between stations; more distant\n" .
       "pairs to be discarded.  (1400 is a good choice for {maxkm})\n" .
       "\n" .
       "'pmsl_rlr_trends.v03.csv' is processed as follows:\n" .
       "\n" .
       "1) Read all 1113 or 1114 records, noting the Mean Sea Level (MSL) trend for\n" .
       "   each station and the station's coordinates.\n" .
       "2) For each pair of stations, calculate the difference\n" .
       "   in MSL trend (in mm/yr), and the distance between the two stations\n" .
       "   (in km).\n" .
       "2.1) If {maxkm} is specified, then discard pairs which are more distant\n" .
       "   than the specified distance.\n" .
       "3) Write the data to the output file, for subsequent\n" .
       "   loading into a spreadsheet, and plotting.\n" .
       "\n" .
       "Then I loaded pmsl_trend_v_distance3.csv into Excel, sorted by the \"distance\"\n" .
       "column, generated an \"(XY) Scatter Chart,\" and added a red \"Trendlines,\"\n" .
       "for a 100-point moving average.\n" .
       "\n" .
       "The following options are supported:\n" .
       "  -name=xx  (where xx is field number of the station name, default xx=1)\n" .
       "  -trend=xx (where xx is field number of the MSL trend, default xx=5)\n" .
       "  -lat=xx   (where xx is field number of the latitude, default xx=13)\n" .
       "  -lon=xx   (where xx is field number of the longitude, default xx=14)\n" .
       "\n" .
       "Dave Burton\n" .
       "Cary, NC  USA\n" .
       "+1-919-481-0149\n" .
       "\n";
       exit 1;
}

$inpfile = $ARGV[0];
$outpfile = $ARGV[1];
if (!open(INP,"<$inpfile")) {
   printf "ERROR: could not open '%s', $!\n", $inpfile;
   exit 1;
}
if (!open(OUTP,">$outpfile")) {
   printf "ERROR: could not create '%s', $!\n", $outpfile;
   exit 1;
}
print "reading '$inpfile' and writing '$outpfile'...\n";
if (defined $ARGV[2]) {
   $maxkm = 0+$ARGV[2];
   print "(Discarding points more distant than $maxkm km.)\n";
} else {
   $maxkm = int((4 * $radius_of_earth) / 1000);  # more than half the circumference of the earth, in km
   # nothing is discarded
}



# Test a string to see if it looks like a Degrees/Minutes latitude
# or longitude.  Result is 'LAT' or 'LON' or undef.
sub is_DM {
  local( $fld ) = @_;
  local( $result );
  undef $result;
  local( $deg, $min, $dir );
  if ((defined $fld) && ($fld =~ /^([0-9]+)[ \,]([0-9][0-9\.]*)( |)([NSEW])$/)) {
     # found a latitude or longitude
     $deg = $1;
     $min = $2;
     # I should test whether $deg,$min is in [-90..90] for latitude
     # or in [-180..180] for longitude, but I didn't bother.
     $dir = $4;
     if ($dir =~ /[NS]/) {
        $result = "LAT";
     } else {
        $result = "LON";
     }
  }
  return $result;
}


# Convert a Degrees,Minutes representation of latitude or longitude
# to decimal degrees.
sub DegMin_to_DecimalDeg {
  local( $fld ) = @_;
  local( $deg, $min, $dir );
  if ($fld =~ /^([0-9]+)[ \,]([0-9][0-9\.]*)( |)([NSEW])$/) {
     # found a latitude or longitude
     $deg = $1;
     $min = $2;
     $dir = $4;
     $deg = $1 + ($2 / 60);
     if ($dir =~ /[SW]/) {
        $deg = -$deg;
     }
     # if (length($deg) > 10) {
     #    $deg = sprintf("%4.5f",$deg);
     # }
  } else {
     print STDERR "ERR: DegMin_to_DecimalDeg(\"$fld\")  (not a recognized latitude or longitude)\n";
     undef $deg;
  }
  return $deg;
}



$num_recs = 0;
# Read the spreadsheet records
$count_inp_lines = 0;
while (<INP>) {
   $count_inp_lines++;
   chop;
   $_ =~ s/[\r\n]*$//;  # because Perl 4 lacks chomp (to strip both cr and lf), we make sure here

   # Split into fields.
   @sfields = &split_csv( $_ );
   $num_fields_with_content = 0;
   $first_field_is_only_field = 1;
   $fieldnumber = 0;
   foreach $fld (@sfields) {
      # get rid of leading and trailing whitespace in each field:
      if ($fld =~ /\s/) {
         $fld =~ s/^\s*//;  # strip leading whitespace, too.
         $fld =~ s/\s*$//;  # strip trailing blanks, tabs, cr, lf, etc.
         # get rid of any tabs, and collapse multiple spaces to one space:
         if ($fld =~ /\t|\s\s/) {
            $fld =~ s/\s+/ /g;
         }
      }
      if ('' ne $fld) {
         $num_fields_with_content++;
         if ($fieldnumber > 0) {
            $first_field_is_only_field = 0;
         }
      }

      # # upper-case each field value
      # $fld =~ tr/a-z/A-Z/;

      $fieldnumber++;
   }
   if (!$num_fields_with_content) {
      next;  # skip empty records
   }

   # 1st field is normally the name of the station
   # 5th field is normally the MSL trend for the station
   # 13th field is normally the latitude of the station
   # 14th field is normally the longitude of the station
   # These field numbers can be overridden by command-line options.

   if ( (&is_DM($sfields[$fn_lat-1]) ne 'LAT') ||
        (&is_DM($sfields[$fn_lon-1]) ne 'LON') ) {
      printf STDERR "ERR: record no. %d ('%s') lacks Lat/Lon in fields $fn_lat-$fn_lon: F$fn_lat='%s', F$fn_lon='%s'.\n", $num_recs, $sfields[0], $sfields[$fn_lat-1], $sfields[$fn_lon-1];
      exit 1;
   }
   $rec_name[$num_recs] = $sfields[$fn_name-1];
   $rec_trend[$num_recs] = $sfields[$fn_trend-1] + 0.0;
   $rec_lat[$num_recs] = &DegMin_to_DecimalDeg($sfields[$fn_lat-1]);
   $rec_lon[$num_recs] = &DegMin_to_DecimalDeg($sfields[$fn_lon-1]);
   $num_recs++;
}
print "$num_recs station records were read from $inpfile\n";
close INP;


# absolute value
sub abs {
   local( $x ) = @_;
   if ($x < 0) {
      $x = - $x;
   }
   return $x;
}


# Compute distance between two locations via the Haversine Formula.
# (per http://mathforum.org/library/drmath/view/51879.html)
# Input is the two (latitude,longitude) pairs, in degrees.
# Output is the distance between the two locations, in meters.
# Formula is:
#   dlon = lon2 - lon1
#   dlat = lat2 - lat1
#   a = (sin(dlat/2))^2 + cos(lat1) * cos(lat2) * (sin(dlon/2))^2
#   c = 2 * atan2(sqrt(a), sqrt(1-a))
#   d = R * c
sub distance {
   local( $lat1, $lon1, $lat2, $lon2 ) = @_;
   # print "dbg: calculate distance between ($lat1,$lon1) and ($lat2,$lon2)\n";
   local( $result ) = 0.0;
   # first, convert degrees to radians:
   $lat1 *= $pi_over_180;
   $lon1 *= $pi_over_180;
   $lat2 *= $pi_over_180;
   $lon2 *= $pi_over_180;
   # then calcuate according to the Haversine Formula:
   local( $dlon ) = $lon2 - $lon1;
   local( $dlat ) = $lat2 - $lat1;
   local( $a1 ) = sin( $dlat/2.0 );
   $a1 = $a1 * $a1;
   local( $a2 ) = sin( $dlon/2.0 );
   $a2 = $a2 * $a2;
   local( $a ) = $a1 + (cos($lat1) * cos($lat2) * $a2);
   local( $c ) = 2.0 * atan2( sqrt($a), sqrt(1.0-$a) );
   $result = $radius_of_earth * $c;
   return $result;
}


## Some sanity checks, to verify that &distance works:
#
# $d1 = &distance( 0.0, 0.0, 0.0, 1.0 );
# # should be 1/360 of the circumference of the earth, which is
# # (1/360) * 40,003,200 meters = 111,120 meters.
# print " $d1 should be about 111,120 meters.\n";
#
# $d1 = &distance( 0.0, -1.0, 0.0, 0.0 );
# # should be 1/360 of the circumference of the earth, which is
# # (1/360) * 40,003,200 meters = 111,120 meters.
# print " $d1 should be about 111,120 meters.\n";
#
# $d1 = &distance( 0.0, -1.0, 0.0, 1.0 );
# # should be 2/360 of the circumference of the earth, which is
# # (2/360) * 40,003,200 meters = 222,240 meters.
# print " $d1 should be about 222,240 meters.\n";
#
# $d1 = &distance( 0.0, 1.0, 0.0, -1.0 );
# # should be 2/360 of the circumference of the earth, which is
# # (2/360) * 40,003,200 meters = 222,240 meters.
# print " $d1 should be about 222,240 meters.\n";
#
# $d1 = &distance( 0.0, -90.0, 0.0, 90.0 );
# # should be 1/2 of the circumference of the earth, which is
# # (1/2) * 40,003,200 meters = 20,001,600 meters.
# print " $d1 should be about 20,001,600 meters.\n";
#
# $d1 = &distance( 90.0, 0.0, -90.0, 0.0 );
# # should be 1/2 of the circumference of the earth, which is
# # (1/2) * 40,003,200 meters = 20,001,600 meters.
# print " $d1 should be about 20,001,600 meters.\n";
#
# $d1 = &distance( 0.0, 0.0, 0.0, 0.000009 );
# # should be about 1 meter.
# print " $d1 should be about 1 meter.\n";


# For each pair of locations, generate a composite name ("name1 / name2"),
# the distance between the two locations, and the difference between the
# MSL trends at the two locations.  Then (unless the distance exceeds $maxkm)
# write the three fields to the output file, in CSV format:

$count_outp_lines = 0;
print OUTP "Locations,distance (km),MSL trend difference\n";
for ($s1 = 0; $s1 < ($num_recs-1); $s1++) {
   for ($s2 = $s1+1; $s2 < $num_recs; $s2++) {
      $name1 = $rec_name[$s1];
      $name2 = $rec_name[$s2];
      $name1 =~ s/[\,\/].*$//;  # trim off country names, for conciseness
      $name2 =~ s/[\,\/].*$//;
      $namepair = $name1 . ' / ' . $name2;
      $trenddif = &abs( $rec_trend[$s1] - $rec_trend[$s2] );
      $dist = &distance( $rec_lat[$s1], $rec_lon[$s1], $rec_lat[$s2], $rec_lon[$s2] ) / 1000.0;
      # divided by 1000 to convert meters to km
      if ($dist < $maxkm) {
         print OUTP '"' . $namepair . '",' . $dist, ',' . $trenddif . "\n";
         $count_outp_lines++;
      }
   }
}
close OUTP;


print "read $count_inp_lines records from '$inpfile'\n" .
      "wrote $count_outp_lines records to '$outpfile'\n";

exit 0;

__END__

