#!/usr/bin/perl

# Subroutine to calculate the coefficients of a least-squares best-fit
# quadratic to a list of X,Y points.
#
# (Written for Perl 5, but compatible with Perl 4 if you delete the "use bignum"
# and "no bignum" directives.)
#
# Copyright 2010, by David A. Burton, 2010
# Cary, NC  USA
# +1-919-481-0149
# Email: http://www.burtonsys.com/email/
#
# You can use a "require" to pull this into another Perl program, like this:
#
#   unshift( @INC, '.' );
#   require "quadratic_fit.pl";

# TLIB Version Control fills in the version information for us:
$version_str = "";
#--=>keyflag<=-- "&(#)%n, version %v, %d "
$version_str = "&(#)quadratic_fit.pl, version 7, 27-May-10 ";


$| = 1;   # predefined variable. If <> 0 then each print to the console
          # will immediatly be displayed, instead of buffered.


unshift( @INC, '.' );  # make sure we fetch these modules from the current folder
require "detect_do_or_require.pl";
require "traceback.pl";
shift( @INC );  # restore


#   Note: this function REALLY benefits from the use of bignum.  Without it,
# when using 80-100 data points the results of the 2 methods tend to differ
# in the 6th digit, which means that we only get (at best!) about 5 or 5.5
# significant digits of accuracy, and when using only a few data points
# the results are often just rediculously wrong.  But with bignum those
# problems don't seem to happen.
#   As an alternative to using bignum, you can bias the X data values to be
# near zero, which reduces the range of the numbers calculated, and seems
# to stabilize the method.  But bignum should be available with Perl 5.008
# and later.


# "use bignum" for sub quadratic_fit and sub three_unknowns
use bignum;  # See http://perldoc.perl.org/bignum.html

unshift( @INC, '.' );  # make sure we fetch this module from the current folder
require "threeunknowns.pl";
shift( @INC );  # restore


# Input is a series of at least three X,Y pairs.  Result is the A, B & C
# coefficients of the Least-Squares best-fit quadratic, Y = A*X^2 + B*X + C.
sub quadratic_fit {
   local( $x, $y, $N, $a1, $b1, $c1, $d1, $c2, $d2, $d3, $A, $B, $C );
   $a1 = $b1 = $c1 = $d1 = $c2 = $d2 = $d3 = $N = 0;
   while ($#_ > 0) {
      $N++;
      $x = shift @_;
      $y = shift @_;
      $c2 += $x;
      $c1 += ($x * $x);
      $b1 += ($x * $x * $x);
      $a1 += ($x * $x * $x * $x);
      $d3 += $y;
      $d2 += ($x * $y);
      $d1 += ($x * $x * $y);
   }
   ($A,$B,$C) = &three_unknowns( $a1,$b1,$c1,$d1, $b1,$c1,$c2,$d2, $c1,$c2,$N,$d3 );

   if ((defined $debugmode) && ($debugmode > 0)) {
      printf "dbg: [quadratic_fit/1]  Y  =  (A=%f)x^2  +  (B=%f)x  +  (C=%f)\n", $A, $B, $C;
   }


   # Now, to check this, I calculate it a different way.
   # ADAPTED FROM:
   # pfit: linear least square fit to quadratic function
   # self contained, needs only perl. Should run on most any
   # unix or linux or Mac OS X machine.
   # John Lapeyre Tue Jan 28 18:45:19 MST 2003
   # lapeyre@physics.arizona.edu

   local($m0,$v1,$v2,$v3,$m1,$m2,$m3,$m4);
   $v1=$v2=$v3=$m1=$m2=$m3=$m4=0;
   $m0 = $N;  # number of data points
  # for($ii=0;$d2<$N;$d2++) {
  #     $x=$x[$ii];
  #     $y=$y[$ii];
  #     $v1 += $y*$x**2;
  #     $v2 += $y*$x;
  #     $v3 += $y;
  #     $m1 += $x;
  #     $m2 += $x**2;
  #     $m3 += $x**3;
  #     $m4 += $x**4;
  # }
   $v1 = $d1;
   $v2 = $d2;
   $v3 = $d3;
   $m1 = $c2;
   $m2 = $c1;
   $m3 = $b1;
   $m4 = $a1;

   # John Lapeyre used mathematica to invert the matrix and translated to perl

   local( $divisor ) = ($m2**3 + $m0*$m3**2 + $m1**2*$m4 - $m2*(2*$m1*$m3 + $m0*$m4));
   local( $A2, $B2, $C2 );
   if (0 == $divisor) {
      warn "ERROR: Lepeyre method, division by zero; m2**3=" . ($m2**3) .
           " m0*m3**2=" . ($m0*$m3**2) . " m1**2*m4=" . ($m1**2*$m4) .
           " m2*(2*m1*m3 + m0*m4)=" . ($m2*(2*$m1*$m3 + $m0*$m4)) . "\n";
      $A2 = $B2 = $C2 = 0;
   } else {
      $A2 = ($m1**2*$v1 - $m0*$m2*$v1 + $m0*$m3*$v2 + $m2**2*$v3 - $m1*($m2*$v2 + $m3*$v3))/
        $divisor;
      $B2 = (-($m1*$m2*$v1) + $m0*$m3*$v1 + $m2**2*$v2 - $m0*$m4*$v2 - $m2*$m3*$v3 + $m1*$m4*$v3)/
        $divisor;
      $C2 =  ($m2**2*$v1 - $m1*$m3*$v1 + $m1*$m4*$v2 + $m3**2*$v3 - $m2*($m3*$v2 + $m4*$v3))/
        $divisor;
   }

   if ((defined $debugmode) && ($debugmode > 0)) {
      printf "dbg: [quadratic_fit/2]  Y  =  (A=%f)x^2  +  (B=%f)x  +  (C=%f)\n", $A2, $B2, $C2;
   }

   # Return Dr. Rob's / my way first, then John Lapeyre's way second:
   return ($A, $B, $C, $A2, $B2, $C2);
}

no bignum;


# if invoked from command line, print an error message
if (! &invoked_via_do_or_require) {
   print "$0 is intended to be loaded via 'require'.  See comments\n" .
         "in the source code file for instructions.\n";
   exit 1;
}


1;

__END__



http://mathforum.org/library/drmath/view/53796.html
Date: 02/21/2002 at 01:57:27
From: Avin Sinanan
Subject: Best fit Quadratic Curve

Hello,

I would please like to know how, given a scatter plot of X and Y
cordinates, one finds the best-fitting quadratic curve.

Let's say that Y = A*X^2 + B*X + C is the best fit curve.

How does one find A, B, and C for the best quadratic fit curve?

I looked on many Web sites and they mention that it can be done, but
they never show how to do it.

Thanks,
Yours respectfully,
Avin Sinanan

- - - - - - - - - - - - - - - - - - - - - - - -
Date: 02/21/2002 at 07:52:20
From: Doctor Rob
Subject: Re: Best fit Quadratic Curve

Thanks for writing to Ask Dr. Math, Avin.

Suppose the points you are given are {(X[i],Y[i]): 1 <= i <= N}.
Then you want the values of A, B, and C that minimize the sum of
squares of the deviations of Y[i] from A*X[i]^2 + B*X[i] + C.  They
will give you the best-fitting quadratic equation.

Let the sum of the squares of the deviations be

               N
   F(A,B,C) = SUM (A*X[i]^2+B*X[i]+C-Y[i])^2.
              i=1

To minimize this, take partial derivatives of F with respect to the
three variables A, B, and C, set them all equal to zero, and solve
simultaneously:

   dF/dA = SUM 2*(A*X[i]^2+B*X[i]+C-Y[i])*X[i]^2 = 0,
   dF/dB = SUM 2*(A*X[i]^2+B*X[i]+C-Y[i])*X[i] = 0,
   dF/dC = SUM 2*(A*X[i]^2+B*X[i]+C-Y[i]) = 0.

(Here all sums range over i = 1, 2, ..., N.)  Dividing by 2 and
rearranging, you can see that these are three simultaneous linear
equations in the three unknowns A, B, and C:

   (SUM X[i]^4)*A + (SUM X[i]^3)*B + (SUM X[i]^2)*C = SUM X[i]^2*Y[i],
   (SUM X[i]^3)*A + (SUM X[i]^2)*B +   (SUM X[i])*C = SUM X[i]*Y[i],
   (SUM X[i]^2)*A +   (SUM X[i])*B +      (SUM 1)*C = SUM Y[i].

I leave it to you to solve these three equations.

Feel free to write again if I can help further.

- Doctor Rob, The Math Forum
  http://mathforum.org/dr.math/


-----------------------------------------------------------

Okay, let's solve the 3 equations:

   (SUM X[i]^4)*A + (SUM X[i]^3)*B + (SUM X[i]^2)*C = SUM X[i]^2*Y[i]
   (SUM X[i]^3)*A + (SUM X[i]^2)*B +   (SUM X[i])*C = SUM X[i]*Y[i]
   (SUM X[i]^2)*A +   (SUM X[i])*B +      (SUM 1)*C = SUM Y[i]

Note that (SUM 1) = N

For simplicity of notation, let:

   A1=(SUM X[i]^4)     B1=(SUM X[i]^3)     C1=(SUM X[i]^2)  D1=(SUM X[i]^2*Y[i])
   A2=(SUM X[i]^3)=B1  B2=(SUM X[i]^2)=C1  C2=(SUM X[i])    D2=(SUM X[i]*Y[i])
   A3=(SUM X[i]^2)=C1  B3=(SUM X[i])=C2    C3=(SUM 1)=N     D3=(SUM Y[i])

Then the 3 equations are in standard form (except that Dr. Rob's A,B,C
I'm calling x,y,z):

   A1*x + B1*y + C1*z = D1
   A2*x + B2*y + C2*z = D2
   A3*x + B3*y + C3*z = D3

   (x,y,z) = &three_unknowns( A1,B1,C1,D1, A2,B2,C2,D2, A3,B3,C3,D3 );
           = &three_unknowns( A1,B1,C1,D1, B1,C1,C2,D2, C1,C2, N,D3 );

   A1=(SUM X[i]^4)
   B1=(SUM X[i]^3)
   C1=(SUM X[i]^2)
   D1=(SUM X[i]^2*Y[i])
   C2=(SUM X[i])
   D2=(SUM X[i]*Y[i])
   D3=(SUM Y[i])

And here's the Perl code:

   $a1 = $b1 = $c1 = $d1 = $c2 = $d2 = $d3 = $N = 0;
   while ($#_ > 0) {
      $N++;
      $x = shift @_;
      $y = shift @_;
      $c2 += $x;
      $c1 += ($x * $x);
      $b1 += ($x * $x * $x);
      $a1 += ($x * $x * $x * $x);
      $d3 += $y;
      $d2 += ($x * $y);
      $d1 += ($x * $x * $y);
   }
   ($x,$y,$z) = &three_unknowns( $a1,$b1,$c1,$d1, $b1,$c1,$c2,$d2, $c1,$c2,$n,$d3 );

Or, reverting to Dr. Rob's A,B,C notation:

   ($A,$B,$C) = &three_unknowns( $a1,$b1,$c1,$d1, $b1,$c1,$c2,$d2, $c1,$c2,$n,$d3 );

