#!/usr/bin/perl

# Subroutine to calculate the coefficients of a least-squares best-fit
# quadratic to a list of X,Y points + weights.
#
# (Written for Perl 5, but compatible with Perl 4 if you delete the "use bignum"
# and "no bignum" directives.)
#
# Copyright 2010, by David A. Burton, 2010
# Cary, NC  USA
# +1-919-481-0149
# Email: http://www.burtonsys.com/email/
#
# You can use a "require" to pull this into another Perl program, like this:
#
#   unshift( @INC, '.' );
#   require "weighted_quadratic_fit.pl";

# TLIB Version Control fills in the version information for us:
$version_str = "";
#--=>keyflag<=-- "&(#)%n, version %v, %d "
$version_str = "&(#)weighted_quadratic_fit.pl, version 2, 28-May-10 ";


$| = 1;   # predefined variable. If <> 0 then each print to the console
          # will immediatly be displayed, instead of buffered.


unshift( @INC, '.' );  # make sure we fetch these modules from the current folder
require "detect_do_or_require.pl";
require "traceback.pl";
shift( @INC );  # restore


#   Note: this function REALLY benefits from the use of bignum.  Without it,
# when using 80-100 data points the results of the 2 methods tend to differ
# in the 6th digit, which means that we only get (at best!) about 5 or 5.5
# significant digits of accuracy, and when using only a few data points
# the results are often just rediculously wrong.  But with bignum those
# problems don't seem to happen.
#   As an alternative to using bignum, you can bias the X data values to be
# near zero, which reduces the range of the numbers calculated, and seems
# to stabilize the method.  But bignum should be available with Perl 5.008
# and later.


# "use bignum" for sub weighted_quadratic_fit and sub three_unknowns
use bignum;  # See http://perldoc.perl.org/bignum.html

unshift( @INC, '.' );  # make sure we fetch this module from the current folder
require "threeunknowns.pl";
shift( @INC );  # restore


# Least-squares fit of a quadratic curve to a set of weighted points.
# Input is three arrays of identical length: @x (the X-coordinates), @y
# (the Y-coordinates), and @w (the weights).  Result is the A, B & C
# coefficients of the Least-Squares best-fit quadratic, Y = A*X^2 + B*X + C.
sub weighted_quadratic_fit {
   local( @x, @y, @w, $x, $y, $w, $N, $a1, $b1, $c1, $d1, $c2, $d2, $d3, $sumw, $A, $B, $C, $i );
   $N = 1+$#_;
   if ($N % 3) {
      die "ERR: number of values passed to sub weighted_quadratic_fit not divisible by 3\n";
   }
   $N = int(0.001+($N/3));
   if ($debugmode) {
      print "weighted_quadratic_fit( $N X,Y,W triples ):\n";
   }
   @x = splice( @_, 0, $N );
   @y = splice( @_, 0, $N );
   @w = @_;

   $a1 = $b1 = $c1 = $d1 = $c2 = $d2 = $d3 = $sumw = 0;
   for ($i=0; $i < $N; $i++) {
      $x = $x[$i];
      $y = $y[$i];
      $w = $w[$i];
      $c2 += ($x * $w);
      $c1 += ($x * $x * $w);
      $b1 += ($x * $x * $x * $w);
      $a1 += ($x * $x * $x * $x * $w);
      $d3 += ($y * $w);
      $d2 += ($x * $y * $w);
      $d1 += ($x * $x * $y * $w);
      $sumw += $w;  # compute sum of weights
   }
   ($A,$B,$C) = &three_unknowns( $a1,$b1,$c1,$d1, $b1,$c1,$c2,$d2, $c1,$c2,$sumw,$d3 );

   if ((defined $debugmode) && ($debugmode > 0)) {
      printf "dbg: [weighted_quadratic_fit/1]  Y  =  (A=%f)x^2  +  (B=%f)x  +  (C=%f)\n", $A, $B, $C;
   }

   # John Lapeyre used mathematica to invert the matrix and translated to perl
   local($m0,$v1,$v2,$v3,$m1,$m2,$m3,$m4);
   $v1=$v2=$v3=$m1=$m2=$m3=$m4=0;
   $m0 = $sumw;
   $v1 = $d1;
   $v2 = $d2;
   $v3 = $d3;
   $m1 = $c2;
   $m2 = $c1;
   $m3 = $b1;
   $m4 = $a1;
   local( $divisor ) = ($m2**3 + $m0*$m3**2 + $m1**2*$m4 - $m2*(2*$m1*$m3 + $m0*$m4));
   local( $A2, $B2, $C2 );
   if (0 == $divisor) {
      warn "ERROR: Lepeyre method, division by zero; m2**3=" . ($m2**3) .
           " m0*m3**2=" . ($m0*$m3**2) . " m1**2*m4=" . ($m1**2*$m4) .
           " m2*(2*m1*m3 + m0*m4)=" . ($m2*(2*$m1*$m3 + $m0*$m4)) . "\n";
      $A2 = $B2 = $C2 = 0;
   } else {
      $A2 = ($m1**2*$v1 - $m0*$m2*$v1 + $m0*$m3*$v2 + $m2**2*$v3 - $m1*($m2*$v2 + $m3*$v3))/
        $divisor;
      $B2 = (-($m1*$m2*$v1) + $m0*$m3*$v1 + $m2**2*$v2 - $m0*$m4*$v2 - $m2*$m3*$v3 + $m1*$m4*$v3)/
        $divisor;
      $C2 =  ($m2**2*$v1 - $m1*$m3*$v1 + $m1*$m4*$v2 + $m3**2*$v3 - $m2*($m3*$v2 + $m4*$v3))/
        $divisor;
   }

   if ((defined $debugmode) && ($debugmode > 0)) {
      printf "dbg: [weighted_quadratic_fit/2]  Y  =  (A=%f)x^2  +  (B=%f)x  +  (C=%f)\n", $A2, $B2, $C2;
   }

   # Return Dr. Rob's / my way first, then John Lapeyre's way second:
   return ($A, $B, $C, $A2, $B2, $C2);
}

no bignum;


# if invoked from command line, print an error message
if (! &invoked_via_do_or_require) {
   print "$0 is intended to be loaded via 'require'.  See comments\n" .
         "in the source code file for instructions.\n";
   exit 1;
}


1;

__END__




Given a scatter plot of X & Y cordinates, with weights for each one,
how does one find the best-fitting quadratic curve?

Let's say that Y = A*X^2 + B*X + C is the best fit curve.

How does one find A, B, and C for the best quadratic fit curve?

- - - - - - - - - - - - - - - - - - - - - - - -

Suppose the points you are given are {(X[i],Y[i]): 1 <= i <= N}.
Also suppose the weights are W[i].

Then you want the values of A, B, and C that minimize the weighted sum
of squares of the deviations of Y[i] from A*X[i]^2 + B*X[i] + C.  They
will give you the best-fitting quadratic equation.

Let the sum of the squares of the deviations be

               N
   F(A,B,C) = SUM (W[i] * (A*X[i]^2 + B*X[i] + C - Y[i])^2).
              i=1

To minimize this, take partial derivatives of F with respect to the
three variables A, B, and C, set them all equal to zero, and solve
simultaneously.

The derivative of a sum is the sum of the derivatives of the
summed terms, and the derivative of a product is, by the
"product rule," as follows:

    d/dx F(x)G(x) = F(x)G'(x) * F'(x)G(x)

The derivative of a constant like W[i] is zero, so when we take
the partial derivatives of F(A,B,C) w/r/t m and b and set them to
zero we have:


   dF/dA = SUM 2*(A*X[i]^2+B*X[i]+C-Y[i])*X[i]^2*W[i] = 0,
   dF/dB = SUM 2*(A*X[i]^2+B*X[i]+C-Y[i])*X[i]*W[i] = 0,
   dF/dC = SUM 2*(A*X[i]^2+B*X[i]+C-Y[i])*W[i] = 0.

(Here all sums range over i = 1, 2, ..., N.)  Dividing by 2 and
rearranging, you can see that these are three simultaneous linear
equations in the three unknowns A, B, and C:

   (SUM X[i]^4*W[i])*A + (SUM X[i]^3*W[i])*B + (SUM X[i]^2*W[i])*C = SUM X[i]^2*Y[i]*W[i]
   (SUM X[i]^3*W[i])*A + (SUM X[i]^2*W[i])*B +   (SUM X[i]*W[i])*C = SUM X[i]*Y[i]*W[i]
   (SUM X[i]^2*W[i])*A +   (SUM X[i]*W[i])*B +        (SUM W[i])*C = SUM Y[i]*W[i]

Okay, let's solve the 3 equations:

For simplicity of notation, let:

   A1=(SUM X[i]^4*W[i])     B1=(SUM X[i]^3*W[i])     C1=(SUM X[i]^2*W[i])  D1=(SUM X[i]^2*Y[i]*W[i])
   A2=(SUM X[i]^3*W[i])=B1  B2=(SUM X[i]^2*W[i])=C1  C2=(SUM X[i]*W[i])    D2=(SUM X[i]*Y[i]*W[i])
   A3=(SUM X[i]^2*W[i])=C1  B3=(SUM X[i]*W[i])=C2    C3=(SUM W[i])         D3=(SUM Y[i]*W[i])

Then the 3 equations are in standard form (except that Dr. Rob's A,B,C
I'm calling x,y,z):

   A1*x + B1*y + C1*z = D1
   A2*x + B2*y + C2*z = D2
   A3*x + B3*y + C3*z = D3

   (x,y,z) = &three_unknowns( A1,B1,C1,D1, A2,B2,C2,D2, A3,B3,C3,D3 );
           = &three_unknowns( A1,B1,C1,D1, B1,C1,C2,D2, C1,C2,C3,D3 );

   A1=(SUM X[i]^4)
   B1=(SUM X[i]^3)
   C1=(SUM X[i]^2)
   D1=(SUM X[i]^2*Y[i])
   C2=(SUM X[i])
   D2=(SUM X[i]*Y[i])
   D3=(SUM Y[i])
   C3=(SUM W[i])

And here's the Perl code:

   $a1 = $b1 = $c1 = $d1 = $c2 = $d2 = $d3 = $c3 = 0;
   for ($i=0; $i < $N; $i++) {
      $x = $x[$i];
      $y = $y[$i];
      $w = $w[$i];
      $c2 += ($x * $w);
      $c1 += ($x * $x * $w);
      $b1 += ($x * $x * $x * $w);
      $a1 += ($x * $x * $x * $x * $w);
      $d3 += ($y * $w);
      $d2 += ($x * $y * $w);
      $d1 += ($x * $x * $y * $w);
      $c3 += $w;
   }
   ($x,$y,$z) = &three_unknowns( $a1,$b1,$c1,$d1, $b1,$c1,$c2,$d2, $c1,$c2,$c3,$d3 );

Or, reverting to Dr. Rob's A,B,C notation:

   ($A,$B,$C) = &three_unknowns( $a1,$b1,$c1,$d1, $b1,$c1,$c2,$d2, $c1,$c2,$c3,$d3 );

