summaryrefslogtreecommitdiffstats
path: root/stats
diff options
context:
space:
mode:
authorGravatar Robert C. Helling <helling@atdotde.de>2021-01-10 00:12:41 +0100
committerGravatar Robert C. Helling <helling@atdotde.de>2021-01-14 20:51:23 +0100
commitd83c9b524622286d4dcd35d1233dfc722511a5be (patch)
tree6f332b5580caa8b9e510ba8f751db6e1ffecb604 /stats
parent622e5aab692082509fcba8fba53ea2b16342e0d4 (diff)
downloadsubsurface-d83c9b524622286d4dcd35d1233dfc722511a5be.tar.gz
Indicate goodness of fit of regression line
The goodness of fit of a regression line is the percentage of the variance of the y values that is explained by the dependence on the x values. Set the alpha value of the regression line to this goodness of fit. Further, set the width of the regression line to a standard deviation of the values from the regression line valies. Signed-off-by: Robert C. Helling <helling@atdotde.de>
Diffstat (limited to 'stats')
-rw-r--r--stats/statsview.cpp76
-rw-r--r--stats/statsview.h8
2 files changed, 51 insertions, 33 deletions
diff --git a/stats/statsview.cpp b/stats/statsview.cpp
index 5583643d7..405677e56 100644
--- a/stats/statsview.cpp
+++ b/stats/statsview.cpp
@@ -723,13 +723,14 @@ void StatsView::QuartileMarker::updatePosition()
x + quartileMarkerSize / 2.0, y);
}
-StatsView::RegressionLine::RegressionLine(double a, double b, QPen pen, QGraphicsScene *scene, StatsAxis *xAxis, StatsAxis *yAxis) :
- item(createItemPtr<QGraphicsLineItem>(scene)),
+StatsView::RegressionLine::RegressionLine(double a, double b, double width, QBrush brush, QGraphicsScene *scene, StatsAxis *xAxis, StatsAxis *yAxis) :
+ item(createItemPtr<QGraphicsPolygonItem>(scene)),
xAxis(xAxis), yAxis(yAxis),
- a(a), b(b)
+ a(a), b(b), width(width)
{
item->setZValue(ZValues::chartFeatures);
- item->setPen(pen);
+ item->setPen(Qt::NoPen);
+ item->setBrush(brush);
}
void StatsView::RegressionLine::updatePosition()
@@ -738,21 +739,16 @@ void StatsView::RegressionLine::updatePosition()
return;
auto [minX, maxX] = xAxis->minMax();
auto [minY, maxY] = yAxis->minMax();
- double y1 = a * minX + b;
- double y2 = a * maxX + b;
-
- // If not fully inside drawing region, do clipping.
- if ((y1 < minY || y1 > maxY || y2 < minY || y2 > maxY) && fabs(a) > 0.0001) {
- // Intersections with y = minY and y = maxY lines
- double intersect_x1 = (minY - b) / a;
- double intersect_x2 = (maxY - b) / a;
- if (intersect_x1 > intersect_x2)
- std::swap(intersect_x1, intersect_x2);
- minX = std::max(minX, intersect_x1);
- maxX = std::min(maxX, intersect_x2);
- }
- item->setLine(xAxis->toScreen(minX), yAxis->toScreen(a * minX + b),
- xAxis->toScreen(maxX), yAxis->toScreen(a * maxX + b));
+
+ QPolygonF poly;
+ poly << QPointF(xAxis->toScreen(minX), yAxis->toScreen(a * minX + b + width))
+ << QPointF(xAxis->toScreen(maxX), yAxis->toScreen(a * maxX + b + width))
+ << QPointF(xAxis->toScreen(maxX), yAxis->toScreen(a * maxX + b - width))
+ << QPointF(xAxis->toScreen(minX), yAxis->toScreen(a * minX + b - width))
+ << QPointF(xAxis->toScreen(minX), yAxis->toScreen(a * minX + b + width));
+ QRectF box(QPoint(xAxis->toScreen(minX), yAxis->toScreen(minY)), QPoint(xAxis->toScreen(maxX), yAxis->toScreen(maxY)));
+
+ item->setPolygon(poly.intersected(box));
}
StatsView::HistogramMarker::HistogramMarker(double val, bool horizontal, QPen pen, QGraphicsScene *scene, StatsAxis *xAxis, StatsAxis *yAxis) :
@@ -784,9 +780,15 @@ void StatsView::addHistogramMarker(double pos, const QPen &pen, bool isHorizonta
histogramMarkers.emplace_back(pos, isHorizontal, pen, &scene, xAxis, yAxis);
}
-void StatsView::addLinearRegression(double a, double b, double minX, double maxX, double minY, double maxY, StatsAxis *xAxis, StatsAxis *yAxis)
+void StatsView::addLinearRegression(double a, double b, double res2, double r2, double minX, double maxX, double minY, double maxY, StatsAxis *xAxis, StatsAxis *yAxis)
{
- regressionLines.emplace_back(a, b, QPen(Qt::red), &scene, xAxis, yAxis);
+ QColor red = QColor(Qt::red);
+ red.setAlphaF(r2);
+ QPen pen(red);
+ QBrush brush(red);
+ brush.setStyle(Qt::SolidPattern);
+
+ regressionLines.emplace_back(a, b, sqrt(res2), brush, &scene, xAxis, yAxis);
}
// Yikes, we get our data in different kinds of (bin, value) pairs.
@@ -1025,12 +1027,21 @@ static bool is_linear_regression(int sample_size, double cov, double sx2, double
return true; // can't happen, as we tested for sample_size above.
}
-// Returns the coefficients [a,b] of the line y = ax + b
-// If case of an undetermined regression or one with infinite slope, returns [nan, nan]
-static std::pair<double, double> linear_regression(const std::vector<StatsScatterItem> &v)
+struct regression_data {
+ double a,b;
+ double res2, r2;
+};
+
+// Returns the coefficients a,b of the line y = ax + b
+// as well as the variance of the residuals (averaged residual squared) as res2
+// and r^2 = 1.0 - variance of data / res2 which is the fraction of the variance of
+// the data that is explained by the linear regression.
+// If case of an undetermined regression or one with infinite slope, returns {nan, nan, 0.0, 0.0}
+
+static struct regression_data linear_regression(const std::vector<StatsScatterItem> &v)
{
if (v.size() < 2)
- return { NaN, NaN };
+ return { .a = NaN, .b = NaN, .res2 = 0.0, .r2 = 0.0};
// First, calculate the x and y average
double avg_x = 0.0, avg_y = 0.0;
@@ -1051,10 +1062,15 @@ static std::pair<double, double> linear_regression(const std::vector<StatsScatte
bool is_linear = is_linear_regression((int)v.size(), cov, sx2, sy2);
if (fabs(sx2) < 1e-10 || !is_linear) // If t is not statistically significant, do not plot the regression line.
- return { NaN, NaN };
+ return { .a = NaN, .b = NaN, .res2 = 0.0, .r2 = 0.0};
double a = cov / sx2;
double b = avg_y - a * avg_x;
- return { a, b };
+
+ double res2 = 0.0;
+ for (auto [x, y, d]: v)
+ res2 += (y - a * x - b) * (y - a * x - b);
+ double r2 = sy2 > 0.0 ? 1.0 - res2 / sy2 : 1.0;
+ return { .a = a, .b = b, .res2 = res2 / v.size(), .r2 = r2 };
}
void StatsView::plotScatter(const std::vector<dive *> &dives, const StatsVariable *categoryVariable, const StatsVariable *valueVariable)
@@ -1084,10 +1100,10 @@ void StatsView::plotScatter(const std::vector<dive *> &dives, const StatsVariabl
series->append(dive, x, y);
// y = ax + b
- auto [a, b] = linear_regression(points);
- if (!std::isnan(a)) {
+ struct regression_data reg = linear_regression(points);
+ if (!std::isnan(reg.a)) {
auto [minx, maxx] = axisX->minMax();
auto [miny, maxy] = axisY->minMax();
- addLinearRegression(a, b, minx, maxx, miny, maxy, xAxis, yAxis);
+ addLinearRegression(reg.a, reg.b, reg.res2, reg.r2, minx, maxx, miny, maxy, xAxis, yAxis);
}
}
diff --git a/stats/statsview.h b/stats/statsview.h
index fbf51d46e..198ded14f 100644
--- a/stats/statsview.h
+++ b/stats/statsview.h
@@ -9,6 +9,7 @@
#include <QImage>
#include <QPainter>
#include <QQuickItem>
+#include <QGraphicsPolygonItem>
struct dive;
struct StatsBinner;
@@ -117,11 +118,12 @@ private:
// A regression line
struct RegressionLine {
- std::unique_ptr<QGraphicsLineItem> item;
+ std::unique_ptr<QGraphicsPolygonItem> item;
StatsAxis *xAxis, *yAxis;
double a, b; // y = ax + b
+ double width;
void updatePosition();
- RegressionLine(double a, double b, QPen pen, QGraphicsScene *scene, StatsAxis *xAxis, StatsAxis *yAxis);
+ RegressionLine(double a, double b, double width, QBrush brush, QGraphicsScene *scene, StatsAxis *xAxis, StatsAxis *yAxis);
};
// A line marking median or mean in histograms
@@ -134,7 +136,7 @@ private:
HistogramMarker(double val, bool horizontal, QPen pen, QGraphicsScene *scene, StatsAxis *xAxis, StatsAxis *yAxis);
};
- void addLinearRegression(double a, double b, double minX, double maxX, double minY, double maxY, StatsAxis *xAxis, StatsAxis *yAxis);
+ void addLinearRegression(double a, double b, double res2, double r2, double minX, double maxX, double minY, double maxY, StatsAxis *xAxis, StatsAxis *yAxis);
void addHistogramMarker(double pos, const QPen &pen, bool isHorizontal, StatsAxis *xAxis, StatsAxis *yAxis);
StatsState state;