Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion src/FSharp.Stats/Fitting/LinearRegression.fs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
member this.Degree = n - 1

/// <summary>Constant (first-degree) coefficient of the polynomial a + bx + cx^2 + dx^3 .. --&gt; a</summary>
member this.Constant = if n = 0 then 0. else coefficients.[0]

Check warning on line 39 in src/FSharp.Stats/Fitting/LinearRegression.fs

View workflow job for this annotation

GitHub Actions / build-and-test-windows

This construct causes code to be less generic than indicated by the type annotations. The type variable 'T has been constrained to be type 'float'.

Check warning on line 39 in src/FSharp.Stats/Fitting/LinearRegression.fs

View workflow job for this annotation

GitHub Actions / build-and-test-linux

This construct causes code to be less generic than indicated by the type annotations. The type variable 'T has been constrained to be type 'float'.

/// <summary>Linear (second-degree) coefficient of the polynomial a + bx + cx^2 + dx^3 .. --&gt; b</summary>
member this.Linear = if n < 2 then 0. else coefficients.[1]
Expand Down Expand Up @@ -288,6 +288,51 @@
let coefficientConstrained (xData : Vector<float>) (yData : Vector<float>) ((xC,yC): float*float) =
(fitConstrained xData yData (xC,yC)).Coefficients

/// <summary>
/// Calculates the intercept and slope for a weighted straight line fitting the data.
/// Weighted least squares minimizes Σ w_i * (y_i - (a + b*x_i))².
/// </summary>
/// <param name="weighting">vector of non-negative weights, one per observation</param>
/// <param name="xData">vector of x values</param>
/// <param name="yData">vector of y values</param>
/// <returns>Coefficients of [intercept; slope]</returns>
/// <example>
/// <code>
/// let xData = vector [|1.;2.;3.;4.;5.;6.|]
/// let yData = vector [|4.;7.;9.;10.;11.;15.|]
/// // down-weight the last observation
/// let weights = vector [|1.;1.;1.;1.;1.;0.1|]
/// let coefficients =
/// Univariable.fitWithWeighting weights xData yData
/// </code>
/// </example>
let fitWithWeighting (weighting: Vector<float>) (xData: Vector<float>) (yData: Vector<float>) =
if xData.Length <> yData.Length || xData.Length <> weighting.Length then
raise (System.ArgumentException("Vectors x, y and weighting must have the same length!"))
// Closed-form WLS normal equations for y = a + b*x:
// [Σw Σwx ] [a] [Σwy ]
// [Σwx Σwx²] [b] = [Σwxy]
let mutable sw = 0.
let mutable swx = 0.
let mutable swy = 0.
let mutable swxx = 0.
let mutable swxy = 0.
for i = 0 to xData.Length - 1 do
let wi = weighting.[i]
let xi = xData.[i]
let yi = yData.[i]
sw <- sw + wi
swx <- swx + wi * xi
swy <- swy + wi * yi
swxx <- swxx + wi * xi * xi
swxy <- swxy + wi * xi * yi
let denom = sw * swxx - swx * swx
if abs denom < System.Double.Epsilon then
raise (System.ArgumentException("Degenerate weighting: all weight is concentrated at a single x value."))
let slope = (sw * swxy - swx * swy) / denom
let intercept = (swy - slope * swx) / sw
Coefficients([|intercept; slope|])

/// <summary>
/// Takes intercept and slope of simple linear regression to predict the corresponding y value.
/// </summary>
Expand Down Expand Up @@ -954,7 +999,11 @@
LinearRegression.OLS.Linear.RTO.fit xData yData
| Constraint.RegressionThroughXY coordinate ->
LinearRegression.OLS.Linear.Univariable.fitConstrained xData yData coordinate
| _ -> failwithf "Weighted simple linear regression is not yet implemented! Use polynomial weighted regression with degree 1 instead."
| Some w ->
match _constraint with
| Constraint.Unconstrained ->
LinearRegression.OLS.Linear.Univariable.fitWithWeighting w xData yData
| _ -> failwithf "Constrained weighted simple linear regression is not yet implemented!"

| Method.Polynomial o ->
match _constraint with
Expand Down
66 changes: 66 additions & 0 deletions tests/FSharp.Stats.Tests/Fitting.fs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,72 @@ let leastSquaresCholeskyTests =
)
]
open FSharp.Stats.Fitting.Spline

[<Tests>]
let weightedSimpleLinearRegressionTests =
// Known exact case: y = 2 + 3x, equal weights → coefficients must be exact
let xData = vector [|1.; 2.; 3.; 4.; 5.|]
let yExact = vector [|5.; 8.; 11.; 14.; 17.|] // 2 + 3x

testList "Weighted Simple Linear Regression" [

testCase "Equal unit weights reproduce unweighted fit" (fun () ->
let weights = vector [|1.; 1.; 1.; 1.; 1.|]
let wCoef = LinearRegression.OLS.Linear.Univariable.fitWithWeighting weights xData yExact
let uCoef = LinearRegression.OLS.Linear.Univariable.fit xData yExact
Expect.floatClose Accuracy.high wCoef.Constant uCoef.Constant "Intercept should match unweighted"
Expect.floatClose Accuracy.high wCoef.Linear uCoef.Linear "Slope should match unweighted"
)

testCase "Exact line – intercept 2, slope 3" (fun () ->
let weights = vector [|1.; 1.; 1.; 1.; 1.|]
let coef = LinearRegression.OLS.Linear.Univariable.fitWithWeighting weights xData yExact
Expect.floatClose Accuracy.high coef.Constant 2. "Intercept should be 2"
Expect.floatClose Accuracy.high coef.Linear 3. "Slope should be 3"
)

testCase "Down-weighting an outlier pulls fit toward true line" (fun () ->
// y = 2 + 3x except the last point is a large outlier
let yOutlier = vector [|5.; 8.; 11.; 14.; 100.|]
let weightsFlat = vector [|1.; 1.; 1.; 1.; 1.|]
let weightsDownLast = vector [|1.; 1.; 1.; 1.; 0.001|]
let coefFlat = LinearRegression.OLS.Linear.Univariable.fitWithWeighting weightsFlat xData yOutlier
let coefDown = LinearRegression.OLS.Linear.Univariable.fitWithWeighting weightsDownLast xData yOutlier
// The down-weighted fit should be closer to slope=3, intercept=2
let errorFlat = abs (coefFlat.Linear - 3.) + abs (coefFlat.Constant - 2.)
let errorDown = abs (coefDown.Linear - 3.) + abs (coefDown.Constant - 2.)
Expect.isTrue (errorDown < errorFlat) "Down-weighting outlier should give fit closer to true line"
)

testCase "Doubling all weights does not change coefficients" (fun () ->
let yData = vector [|4.; 7.; 9.; 10.; 11.|]
let w1 = vector [|1.; 1.; 1.; 1.; 1.|]
let w2 = vector [|2.; 2.; 2.; 2.; 2.|]
let coef1 = LinearRegression.OLS.Linear.Univariable.fitWithWeighting w1 xData yData
let coef2 = LinearRegression.OLS.Linear.Univariable.fitWithWeighting w2 xData yData
Expect.floatClose Accuracy.high coef1.Constant coef2.Constant "Intercept invariant to weight scaling"
Expect.floatClose Accuracy.high coef1.Linear coef2.Linear "Slope invariant to weight scaling"
)

testCase "Agrees with Polynomial.fitWithWeighting order 1" (fun () ->
let yData = vector [|4.; 7.; 9.; 10.; 11.|]
let weights = vector [|2.; 1.; 0.5; 1.; 1.|]
let coefUni = LinearRegression.OLS.Linear.Univariable.fitWithWeighting weights xData yData
let coefPoly = LinearRegression.OLS.Polynomial.fitWithWeighting 1 weights xData yData
Expect.floatClose Accuracy.high coefUni.Constant coefPoly.Constant "Intercept agrees with poly order-1"
Expect.floatClose Accuracy.high coefUni.Linear coefPoly.Linear "Slope agrees with poly order-1"
)

testCase "LinearRegressor.fit dispatches to weighted path" (fun () ->
let yData = vector [|4.; 7.; 9.; 10.; 11.|]
let weights = vector [|2.; 1.; 0.5; 1.; 1.|]
let coefDirect = LinearRegression.OLS.Linear.Univariable.fitWithWeighting weights xData yData
let coefDispatch = LinearRegression.fit(xData, yData, FittingMethod = Method.SimpleLinear, Weighting = weights)
Expect.floatClose Accuracy.high coefDirect.Constant coefDispatch.Constant "Intercept matches dispatch"
Expect.floatClose Accuracy.high coefDirect.Linear coefDispatch.Linear "Slope matches dispatch"
)
]

[<Tests>]
let splineTests =
testList "Fitting.Spline" [
Expand Down
Loading