diff --git a/src/FSharp.Stats/Fitting/LinearRegression.fs b/src/FSharp.Stats/Fitting/LinearRegression.fs index 66c50e4c..6337c1a4 100644 --- a/src/FSharp.Stats/Fitting/LinearRegression.fs +++ b/src/FSharp.Stats/Fitting/LinearRegression.fs @@ -288,6 +288,51 @@ module LinearRegression = let coefficientConstrained (xData : Vector) (yData : Vector) ((xC,yC): float*float) = (fitConstrained xData yData (xC,yC)).Coefficients + /// + /// Calculates the intercept and slope for a weighted straight line fitting the data. + /// Weighted least squares minimizes Σ w_i * (y_i - (a + b*x_i))². + /// + /// vector of non-negative weights, one per observation + /// vector of x values + /// vector of y values + /// Coefficients of [intercept; slope] + /// + /// + /// let xData = vector [|1.;2.;3.;4.;5.;6.|] + /// let yData = vector [|4.;7.;9.;10.;11.;15.|] + /// // down-weight the last observation + /// let weights = vector [|1.;1.;1.;1.;1.;0.1|] + /// let coefficients = + /// Univariable.fitWithWeighting weights xData yData + /// + /// + let fitWithWeighting (weighting: Vector) (xData: Vector) (yData: Vector) = + if xData.Length <> yData.Length || xData.Length <> weighting.Length then + raise (System.ArgumentException("Vectors x, y and weighting must have the same length!")) + // Closed-form WLS normal equations for y = a + b*x: + // [Σw Σwx ] [a] [Σwy ] + // [Σwx Σwx²] [b] = [Σwxy] + let mutable sw = 0. + let mutable swx = 0. + let mutable swy = 0. + let mutable swxx = 0. + let mutable swxy = 0. + for i = 0 to xData.Length - 1 do + let wi = weighting.[i] + let xi = xData.[i] + let yi = yData.[i] + sw <- sw + wi + swx <- swx + wi * xi + swy <- swy + wi * yi + swxx <- swxx + wi * xi * xi + swxy <- swxy + wi * xi * yi + let denom = sw * swxx - swx * swx + if abs denom < System.Double.Epsilon then + raise (System.ArgumentException("Degenerate weighting: all weight is concentrated at a single x value.")) + let slope = (sw * swxy - swx * swy) / denom + let intercept = (swy - slope * swx) / sw + Coefficients([|intercept; slope|]) + /// /// Takes intercept and slope of simple linear regression to predict the corresponding y value. /// @@ -954,7 +999,11 @@ type LinearRegression() = LinearRegression.OLS.Linear.RTO.fit xData yData | Constraint.RegressionThroughXY coordinate -> LinearRegression.OLS.Linear.Univariable.fitConstrained xData yData coordinate - | _ -> failwithf "Weighted simple linear regression is not yet implemented! Use polynomial weighted regression with degree 1 instead." + | Some w -> + match _constraint with + | Constraint.Unconstrained -> + LinearRegression.OLS.Linear.Univariable.fitWithWeighting w xData yData + | _ -> failwithf "Constrained weighted simple linear regression is not yet implemented!" | Method.Polynomial o -> match _constraint with diff --git a/tests/FSharp.Stats.Tests/Fitting.fs b/tests/FSharp.Stats.Tests/Fitting.fs index 91111c40..cbd53f57 100644 --- a/tests/FSharp.Stats.Tests/Fitting.fs +++ b/tests/FSharp.Stats.Tests/Fitting.fs @@ -55,6 +55,72 @@ let leastSquaresCholeskyTests = ) ] open FSharp.Stats.Fitting.Spline + +[] +let weightedSimpleLinearRegressionTests = + // Known exact case: y = 2 + 3x, equal weights → coefficients must be exact + let xData = vector [|1.; 2.; 3.; 4.; 5.|] + let yExact = vector [|5.; 8.; 11.; 14.; 17.|] // 2 + 3x + + testList "Weighted Simple Linear Regression" [ + + testCase "Equal unit weights reproduce unweighted fit" (fun () -> + let weights = vector [|1.; 1.; 1.; 1.; 1.|] + let wCoef = LinearRegression.OLS.Linear.Univariable.fitWithWeighting weights xData yExact + let uCoef = LinearRegression.OLS.Linear.Univariable.fit xData yExact + Expect.floatClose Accuracy.high wCoef.Constant uCoef.Constant "Intercept should match unweighted" + Expect.floatClose Accuracy.high wCoef.Linear uCoef.Linear "Slope should match unweighted" + ) + + testCase "Exact line – intercept 2, slope 3" (fun () -> + let weights = vector [|1.; 1.; 1.; 1.; 1.|] + let coef = LinearRegression.OLS.Linear.Univariable.fitWithWeighting weights xData yExact + Expect.floatClose Accuracy.high coef.Constant 2. "Intercept should be 2" + Expect.floatClose Accuracy.high coef.Linear 3. "Slope should be 3" + ) + + testCase "Down-weighting an outlier pulls fit toward true line" (fun () -> + // y = 2 + 3x except the last point is a large outlier + let yOutlier = vector [|5.; 8.; 11.; 14.; 100.|] + let weightsFlat = vector [|1.; 1.; 1.; 1.; 1.|] + let weightsDownLast = vector [|1.; 1.; 1.; 1.; 0.001|] + let coefFlat = LinearRegression.OLS.Linear.Univariable.fitWithWeighting weightsFlat xData yOutlier + let coefDown = LinearRegression.OLS.Linear.Univariable.fitWithWeighting weightsDownLast xData yOutlier + // The down-weighted fit should be closer to slope=3, intercept=2 + let errorFlat = abs (coefFlat.Linear - 3.) + abs (coefFlat.Constant - 2.) + let errorDown = abs (coefDown.Linear - 3.) + abs (coefDown.Constant - 2.) + Expect.isTrue (errorDown < errorFlat) "Down-weighting outlier should give fit closer to true line" + ) + + testCase "Doubling all weights does not change coefficients" (fun () -> + let yData = vector [|4.; 7.; 9.; 10.; 11.|] + let w1 = vector [|1.; 1.; 1.; 1.; 1.|] + let w2 = vector [|2.; 2.; 2.; 2.; 2.|] + let coef1 = LinearRegression.OLS.Linear.Univariable.fitWithWeighting w1 xData yData + let coef2 = LinearRegression.OLS.Linear.Univariable.fitWithWeighting w2 xData yData + Expect.floatClose Accuracy.high coef1.Constant coef2.Constant "Intercept invariant to weight scaling" + Expect.floatClose Accuracy.high coef1.Linear coef2.Linear "Slope invariant to weight scaling" + ) + + testCase "Agrees with Polynomial.fitWithWeighting order 1" (fun () -> + let yData = vector [|4.; 7.; 9.; 10.; 11.|] + let weights = vector [|2.; 1.; 0.5; 1.; 1.|] + let coefUni = LinearRegression.OLS.Linear.Univariable.fitWithWeighting weights xData yData + let coefPoly = LinearRegression.OLS.Polynomial.fitWithWeighting 1 weights xData yData + Expect.floatClose Accuracy.high coefUni.Constant coefPoly.Constant "Intercept agrees with poly order-1" + Expect.floatClose Accuracy.high coefUni.Linear coefPoly.Linear "Slope agrees with poly order-1" + ) + + testCase "LinearRegressor.fit dispatches to weighted path" (fun () -> + let yData = vector [|4.; 7.; 9.; 10.; 11.|] + let weights = vector [|2.; 1.; 0.5; 1.; 1.|] + let coefDirect = LinearRegression.OLS.Linear.Univariable.fitWithWeighting weights xData yData + let coefDispatch = LinearRegression.fit(xData, yData, FittingMethod = Method.SimpleLinear, Weighting = weights) + Expect.floatClose Accuracy.high coefDirect.Constant coefDispatch.Constant "Intercept matches dispatch" + Expect.floatClose Accuracy.high coefDirect.Linear coefDispatch.Linear "Slope matches dispatch" + ) + ] + [] let splineTests = testList "Fitting.Spline" [