public class Regression {
static double rsquared = 0;
// do some simple tests with the regression techniques
public static void main(String argv[]) {
// do a simple equation: Y = 1 + 2X
double rawData[][] = {{1, 0}, {3, 1}, {5, 2}};
double coef[] = linear_equation(rawData, 1);
print_equation(coef);
rawData = new double[100][2];
// Second order equation: Y = 1 + 2X + 3X^2
for (int i = 0; i < rawData.length; i++) {
rawData[i][0] = 1 + 2*i + 3*i*i;
rawData[i][1] = i;
}
print_equation(linear_equation(rawData, 2));
// Third order equation: Y = 4 + 3X + 2X^2 + 1X^3
for (int i = 0; i < rawData.length; i++) {
rawData[i][0] = 4 + 3*i + 2*i*i + 1*i*i*i;
rawData[i][1] = i;
}
print_equation(linear_equation(rawData, 3));
}
// Apply least squares to raw data to determine the coefficients for
// an n-order equation: y = a0*X^0 + a1*X^1 + ... + an*X^n.
// Returns the coefficients for the solved equation, given a number
// of y and x data points. The rawData input is given in the form of
// {{y0, x0}, {y1, x1},...,{yn, xn}}. The coefficients returned by
// the regression are {a0, a1,...,an} which corresponds to
// {X^0, X^1,...,X^n}. The number of coefficients returned is the
// requested equation order (norder) plus 1.
static double[] linear_equation(double rawData[][], int norder) {
double a[][] = new double[norder+1][norder+1];
double b[] = new double[norder+1];
double term[] = new double[norder+1];
double ysquare = 0;
// step through each raw data entries
for (int i = 0; i < rawData.length; i++) {
// sum the y values
b[0] += rawData[i][0];
ysquare += rawData[i][0] * rawData[i][0];
// sum the x power values
double xpower = 1;
for (int j = 0; j < norder+1; j++) {
term[j] = xpower;
a[0][j] += xpower;
xpower = xpower * rawData[i][1];
}
// now set up the rest of rows in the matrix - multiplying each row by each term
for (int j = 1; j < norder+1; j++) {
b[j] += rawData[i][0] * term[j];
for (int k = 0; k < b.length; k++) {
a[j][k] += term[j] * term[k];
}
}
}
// solve for the coefficients
double coef[] = gauss(a, b);
// calculate the r-squared statistic
double ss = 0;
double yaverage = b[0] / rawData.length;
for (int i = 0; i < norder+1; i++) {
double xaverage = a[0][i] / rawData.length;
ss += coef[i] * (b[i] - (rawData.length * xaverage * yaverage));
}
rsquared = ss / (ysquare - (rawData.length * yaverage * yaverage));
// solve the simultaneous equations via gauss
return coef;
}
// it's been so long since I wrote this, that I don't recall the math
// logic behind it. IIRC, it's just a standard gaussian technique for
// solving simultaneous equations of the form: |A| = |B| * |C| where we
// know the values of |A| and |B|, and we are solving for the coefficients
// in |C|
static double[] gauss(double ax[][], double bx[]) {
double a[][] = new double[ax.length][ax[0].length];
double b[] = new double[bx.length];
double pivot;
double mult;
double top;
int n = b.length;
double coef[] = new double[n];
// copy over the array values - inplace solution changes values
for (int i = 0; i < ax.length; i++) {
for (int j = 0; j < ax[i].length; j++) {
a[i][j] = ax[i][j];
}
b[i] = bx[i];
}
for (int j = 0; j < (n-1); j++) {
pivot = a[j][j];
for (int i = j+1; i < n; i++) {
mult = a[i][j] / pivot;
for (int k = j+1; k < n; k++) {
a[i][k] = a[i][k] - mult * a[j][k];
}
b[i] = b[i] - mult * b[j];
}
}
coef[n-1] = b[n-1] / a[n-1][n-1];
for (int i = n-2; i >= 0; i--) {
top = b[i];
for (int k = i+1; k < n; k ++) {
top = top - a[i][k] * coef[k];
}
coef[i] = top / a[i][i];
}
return coef;
}
// simple routine to print the equation for inspection
static void print_equation(double coef[]) {
for (int i = 0; i < coef.length; i++) {
if (i == 0) {
System.out.print("Y = ");
} else {
System.out.print(" + ");
}
System.out.print(coef[i] + "*X^" + i);
}
System.out.println(" [r^2 = " + rsquared + "]");
}
}