From 995100a540c2c5a49a2cfebcb245a7e964d2d988 Mon Sep 17 00:00:00 2001 From: Berthold Stoeger Date: Fri, 1 Jan 2021 22:43:21 +0100 Subject: statistics: implement StatsView The StatsView shows the chart described by the StatsState structure. It is based on a QML ChartView. This should make it possible to easily port to mobile. It does not include any of the UI around the chart, viz. the variable and chart selection, etc. The code checking for the statistical significance of the regression line was written by Willem. Signed-off-by: Berthold Stoeger Signed-off-by: willemferguson --- CMakeLists.txt | 3 +- stats/CMakeLists.txt | 2 + stats/qml/statsview.qml | 8 + stats/qml/statsview.qrc | 5 + stats/statsview.cpp | 984 ++++++++++++++++++++++++++++++++++++++++++++++++ stats/statsview.h | 138 +++++++ 6 files changed, 1139 insertions(+), 1 deletion(-) create mode 100644 stats/qml/statsview.qml create mode 100644 stats/qml/statsview.qrc create mode 100644 stats/statsview.cpp create mode 100644 stats/statsview.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f359aa823..d25313e96 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -301,7 +301,7 @@ endif() #set up the subsurface_link_libraries variable set(SUBSURFACE_LINK_LIBRARIES ${SUBSURFACE_LINK_LIBRARIES} ${LIBDIVECOMPUTER_LIBRARIES} ${LIBGIT2_LIBRARIES} ${LIBUSB_LIBRARIES} ${LIBMTP_LIBRARIES}) if (NOT SUBSURFACE_TARGET_EXECUTABLE MATCHES "DownloaderExecutable") - qt5_add_resources(SUBSURFACE_RESOURCES subsurface.qrc map-widget/qml/map-widget.qrc) + qt5_add_resources(SUBSURFACE_RESOURCES subsurface.qrc map-widget/qml/map-widget.qrc stats/qml/statsview.qrc) endif() # hack to build successfully on LGTM @@ -391,6 +391,7 @@ elseif (SUBSURFACE_TARGET_EXECUTABLE MATCHES "DesktopExecutable") subsurface_models_desktop subsurface_commands subsurface_corelib + subsurface_stats ${SUBSURFACE_LINK_LIBRARIES} ) add_dependencies(subsurface_desktop_preferences subsurface_generated_ui) diff --git a/stats/CMakeLists.txt b/stats/CMakeLists.txt index 594bc6fd2..31e809270 100644 --- a/stats/CMakeLists.txt +++ b/stats/CMakeLists.txt @@ -27,6 +27,8 @@ set(SUBSURFACE_STATS_SRCS statsstate.cpp statsvariables.h statsvariables.cpp + statsview.h + statsview.cpp zvalues.h ) diff --git a/stats/qml/statsview.qml b/stats/qml/statsview.qml new file mode 100644 index 000000000..24f1fe9d3 --- /dev/null +++ b/stats/qml/statsview.qml @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +import QtQuick 2.0 +import QtCharts 2.0 + +ChartView { + antialiasing: true + localizeNumbers: true +} diff --git a/stats/qml/statsview.qrc b/stats/qml/statsview.qrc new file mode 100644 index 000000000..aeb65167e --- /dev/null +++ b/stats/qml/statsview.qrc @@ -0,0 +1,5 @@ + + + statsview.qml + + diff --git a/stats/statsview.cpp b/stats/statsview.cpp new file mode 100644 index 000000000..ba5e8c24e --- /dev/null +++ b/stats/statsview.cpp @@ -0,0 +1,984 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "statsview.h" +#include "barseries.h" +#include "boxseries.h" +#include "legend.h" +#include "pieseries.h" +#include "scatterseries.h" +#include "statsaxis.h" +#include "statsstate.h" +#include "statstranslations.h" +#include "statsvariables.h" +#include "zvalues.h" +#include "core/divefilter.h" +#include "core/subsurface-qt/divelistnotifier.h" + +#include +#include +#include +#include +#include +#include + +// Constants that control the graph layouts +static const QColor quartileMarkerColor(Qt::red); +static const double quartileMarkerSize = 15; + +static const QUrl urlStatsView = QUrl(QStringLiteral("qrc:/qml/statsview.qml")); + +// We use QtQuick's ChartView so that we can show the statistics on mobile. +// However, accessing the ChartView from C++ is maliciously cumbersome and +// the full QChart interface is not exported. Fortunately, the interface +// leaks the QChart object: We can create a dummy-series and access the chart +// object via the chart() accessor function. By creating a "PieSeries", the +// ChartView does not automatically add axes. +static QtCharts::QChart *getChart(QQuickItem *item) +{ + QtCharts::QAbstractSeries *abstract_series; + if (!item) + return nullptr; + if (!QMetaObject::invokeMethod(item, "createSeries", Qt::AutoConnection, + Q_RETURN_ARG(QtCharts::QAbstractSeries *, abstract_series), + Q_ARG(int, QtCharts::QAbstractSeries::SeriesTypePie), + Q_ARG(QString, QString()))) { + qWarning("Couldn't call createSeries()"); + return nullptr; + } + QtCharts::QChart *res = abstract_series->chart(); + res->removeSeries(abstract_series); + delete abstract_series; + return res; +} + +bool StatsView::EventFilter::eventFilter(QObject *o, QEvent *event) +{ + if (event->type() == QEvent::GraphicsSceneHoverMove) { + QGraphicsSceneHoverEvent *hover = static_cast(event); + view->hover(hover->pos()); + return true; + } + return QObject::eventFilter(o, event); +} + +StatsView::StatsView(QWidget *parent) : QQuickWidget(parent), + highlightedSeries(nullptr), + eventFilter(this) +{ + setResizeMode(QQuickWidget::SizeRootObjectToView); + setSource(urlStatsView); + chart = getChart(rootObject()); + connect(chart, &QtCharts::QChart::plotAreaChanged, this, &StatsView::plotAreaChanged); + connect(&diveListNotifier, &DiveListNotifier::numShownChanged, this, &StatsView::replotIfVisible); + + chart->installEventFilter(&eventFilter); + chart->setAcceptHoverEvents(true); + chart->legend()->setVisible(false); +} + +StatsView::~StatsView() +{ +} + +void StatsView::plotAreaChanged(const QRectF &) +{ + for (auto &axis: axes) + axis->updateLabels(chart); + for (auto &series: series) + series->updatePositions(); + for (QuartileMarker &marker: quartileMarkers) + marker.updatePosition(); + for (LineMarker &marker: lineMarkers) + marker.updatePosition(); + if (legend) + legend->resize(); +} + +void StatsView::replotIfVisible() +{ + if (isVisible()) + plot(state); +} + +void StatsView::hover(QPointF pos) +{ + for (auto &series: series) { + if (series->hover(pos)) { + if (series.get() != highlightedSeries) { + if (highlightedSeries) + highlightedSeries->unhighlight(); + highlightedSeries = series.get(); + } + return; + } + } + + // No series was highlighted -> unhighlight any previously highlighted series. + if (highlightedSeries) { + highlightedSeries->unhighlight(); + highlightedSeries = nullptr; + } +} + +template +T *StatsView::createSeries(Args&&... args) +{ + StatsAxis *xAxis = axes.size() >= 2 ? axes[0].get() : nullptr; + StatsAxis *yAxis = axes.size() >= 2 ? axes[1].get() : nullptr; + T *res = new T(chart, xAxis, yAxis, std::forward(args)...); + series.emplace_back(res); + series.back()->updatePositions(); + return res; +} + +void StatsView::setTitle(const QString &s) +{ + chart->setTitle(s); +} + +template +T *StatsView::createAxis(const QString &title, Args&&... args) +{ + T *res = new T(std::forward(args)...); + axes.emplace_back(res); + axes.back()->updateLabels(chart); + axes.back()->qaxis()->setTitleText(title); + return res; +} + +void StatsView::addAxes(StatsAxis *x, StatsAxis *y) +{ + chart->addAxis(x->qaxis(), Qt::AlignBottom); + chart->addAxis(y->qaxis(), Qt::AlignLeft); +} + +void StatsView::reset() +{ + if (!chart) + return; + highlightedSeries = nullptr; + legend.reset(); + series.clear(); + quartileMarkers.clear(); + lineMarkers.clear(); + chart->removeAllSeries(); + axes.clear(); +} + +void StatsView::plot(const StatsState &stateIn) +{ + state = stateIn; + if (!chart || !state.var1) + return; + reset(); + + const std::vector dives = DiveFilter::instance()->visibleDives(); + switch (state.type) { + case ChartType::DiscreteBar: + return plotBarChart(dives, state.subtype, state.var1, state.var1Binner, state.var2, + state.var2Binner, state.labels, state.legend); + case ChartType::DiscreteValue: + return plotValueChart(dives, state.subtype, state.var1, state.var1Binner, state.var2, + state.var2Operation, state.labels); + case ChartType::DiscreteCount: + return plotDiscreteCountChart(dives, state.subtype, state.var1, state.var1Binner, state.labels); + case ChartType::Pie: + return plotPieChart(dives, state.var1, state.var1Binner, state.labels, state.legend); + case ChartType::DiscreteBox: + return plotDiscreteBoxChart(dives, state.var1, state.var1Binner, state.var2); + case ChartType::DiscreteScatter: + return plotDiscreteScatter(dives, state.var1, state.var1Binner, state.var2, state.quartiles); + case ChartType::HistogramCount: + return plotHistogramCountChart(dives, state.subtype, state.var1, state.var1Binner, + state.labels, state.median, state.mean); + case ChartType::HistogramValue: + return plotHistogramValueChart(dives, state.subtype, state.var1, state.var1Binner, state.var2, + state.var2Operation, state.labels); + case ChartType::HistogramStacked: + return plotHistogramStackedChart(dives, state.subtype, state.var1, state.var1Binner, + state.var2, state.var2Binner, state.labels, state.legend); + case ChartType::HistogramBox: + return plotHistogramBoxChart(dives, state.var1, state.var1Binner, state.var2); + case ChartType::ScatterPlot: + return plotScatter(dives, state.var1, state.var2); + default: + qWarning("Unknown chart type: %d", (int)state.type); + return; + } +} + +template +CategoryAxis *StatsView::createCategoryAxis(const QString &name, const StatsBinner &binner, + const std::vector &bins, bool isHorizontal) +{ + std::vector labels; + labels.reserve(bins.size()); + for (const auto &[bin, dummy]: bins) + labels.push_back(binner.format(*bin)); + return createAxis(name, labels, isHorizontal); +} + +CountAxis *StatsView::createCountAxis(int maxVal, bool isHorizontal) +{ + return createAxis(StatsTranslations::tr("No. dives"), maxVal, isHorizontal); +} + +// For "two-dimensionally" binned plots (eg. stacked bar or grouped bar): +// Counts for each bin on the independent variable, including the total counts for that bin. +struct BinCounts { + StatsBinPtr bin; + std::vector counts; + int total; +}; + +// The problem with bar plots is that for different category +// bins, we might get different value bins. So we have to keep track +// of our counts and adjust accordingly. That's a bit annoying. +// Perhaps we should determine the bins of all dives first and then +// query the counts for precisely those bins? +struct BarPlotData { + std::vector hbin_counts; // For each category bin the counts for all value bins + std::vector vbins; + std::vector vbinNames; + int maxCount; // Highest count of any bin-combination + int maxCategoryCount; // Highest count of any category bin + // Attention: categoryBin argument will be consumed! + BarPlotData(std::vector &categoryBins, const StatsBinner &valuebinner); +}; + +BarPlotData::BarPlotData(std::vector &categoryBins, const StatsBinner &valueBinner) : + maxCount(0), maxCategoryCount(0) +{ + for (auto &[bin, dives]: categoryBins) { + // This moves the bin - the original pointer is invalidated + hbin_counts.push_back({ std::move(bin), std::vector(vbins.size(), 0), 0 }); + for (auto &[vbin, count]: valueBinner.count_dives(dives, false)) { + // Note: we assume that the bins are sorted! + auto it = std::lower_bound(vbins.begin(), vbins.end(), vbin, + [] (const StatsBinPtr &p, const StatsBinPtr &bin) + { return *p < *bin; }); + ssize_t pos = it - vbins.begin(); + if (it == vbins.end() || **it != *vbin) { + // Add a new value bin. + // Attn: this invalidates "vbin", which must not be used henceforth! + vbins.insert(it, std::move(vbin)); + // Fix the old arrays + for (auto &[bin, v, total]: hbin_counts) + v.insert(v.begin() + pos, 0); + } + hbin_counts.back().counts[pos] = count; + hbin_counts.back().total += count; + if (count > maxCount) + maxCount = count; + } + maxCategoryCount = std::max(maxCategoryCount, hbin_counts.back().total); + } + + vbinNames.reserve(vbins.size()); + for (const auto &vbin: vbins) + vbinNames.push_back(valueBinner.formatWithUnit(*vbin)); +} + +// Formats "x (y%)" as either a single or two strings for horizontal and non-horizontal cases, respectively. +static std::vector makePercentageLabels(int count, int total, bool isHorizontal) +{ + double percentage = count * 100.0 / total; + QString countString = QString("%L1").arg(count); + QString percentageString = QString("%L1%").arg(percentage, 0, 'f', 1); + if (isHorizontal) + return { QString("%1 %2").arg(countString, percentageString) }; + else + return { countString, percentageString }; +} + +// From a list of counts, make (count, label) pairs, where the label +// formats the total number and the percentage of dives. +static std::vector>> makeCountLabels(const std::vector &counts, int total, + bool labels, bool isHorizontal) +{ + std::vector>> count_labels; + count_labels.reserve(counts.size()); + for (int count: counts) { + std::vector label = labels ? makePercentageLabels(count, total, isHorizontal) + : std::vector(); + count_labels.push_back(std::make_pair(count, label)); + } + return count_labels; +} + +void StatsView::plotBarChart(const std::vector &dives, + ChartSubType subType, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + const StatsVariable *valueVariable, const StatsBinner *valueBinner, bool labels, bool showLegend) +{ + if (!categoryBinner || !valueBinner) + return; + + setTitle(valueVariable->nameWithBinnerUnit(*valueBinner)); + + std::vector categoryBins = categoryBinner->bin_dives(dives, false); + + bool isStacked = subType == ChartSubType::VerticalStacked || subType == ChartSubType::HorizontalStacked; + bool isHorizontal = subType == ChartSubType::HorizontalGrouped || subType == ChartSubType::HorizontalStacked; + + // Construct the histogram axis now, because the pointers to the bins + // will be moved away when constructing BarPlotData below. + CategoryAxis *catAxis = createCategoryAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner), + *categoryBinner, categoryBins, !isHorizontal); + + BarPlotData data(categoryBins, *valueBinner); + + int maxVal = isStacked ? data.maxCategoryCount : data.maxCount; + CountAxis *valAxis = createCountAxis(maxVal, isHorizontal); + + if (isHorizontal) + addAxes(valAxis, catAxis); + else + addAxes(catAxis, valAxis); + + // Paint legend first, because the bin-names will be moved away from. + if (showLegend) + legend = std::make_unique(chart, data.vbinNames); + + std::vector items; + items.reserve(data.hbin_counts.size()); + double pos = 0.0; + for (auto &[hbin, counts, total]: data.hbin_counts) { + items.push_back({ pos - 0.5, pos + 0.5, makeCountLabels(counts, total, labels, isHorizontal), + categoryBinner->formatWithUnit(*hbin) }); + pos += 1.0; + } + + createSeries(isHorizontal, isStacked, categoryVariable->name(), valueVariable, std::move(data.vbinNames), items); +} + +const double NaN = std::numeric_limits::quiet_NaN(); + +// These templates are used to extract min and max y-values of various lists. +// A bit too convoluted for my tastes - can we make that simpler? +static std::pair getMinMaxValueBase(const std::vector &values) +{ + // Attention: this supposes that the list is sorted! + return values.empty() ? std::make_pair(NaN, NaN) : std::make_pair(values.front().v, values.back().v); +} +static std::pair getMinMaxValueBase(double v) +{ + return { v, v }; +} +static std::pair getMinMaxValueBase(const StatsQuartiles &q) +{ + return { q.min, q.max }; +} +static std::pair getMinMaxValueBase(const StatsScatterItem &s) +{ + return { s.y, s.y }; +} +template +static std::pair getMinMaxValueBase(const std::pair &p) +{ + return getMinMaxValueBase(p.second); +} +template +static std::pair getMinMaxValueBase(const StatsBinValue &v) +{ + return getMinMaxValueBase(v.value); +} + +template +static void updateMinMax(double &min, double &max, bool &found, const T &v) +{ + const auto [mi, ma] = getMinMaxValueBase(v); + if (!std::isnan(mi) && mi < min) + min = mi; + if (!std::isnan(ma) && ma > max) + max = ma; + if (!std::isnan(mi) || !std::isnan(ma)) + found = true; +} + +template +static std::pair getMinMaxValue(const std::vector &values) +{ + double min = 1e14, max = 0.0; + bool found = false; + for (const T &v: values) + updateMinMax(min, max, found, v); + return found ? std::make_pair(min, max) : std::make_pair(0.0, 0.0); +} + +static std::pair getMinMaxValue(const std::vector &bins, StatsOperation op) +{ + double min = 1e14, max = 0.0; + bool found = false; + for (auto &[bin, res]: bins) { + if (!res.isValid()) + continue; + updateMinMax(min, max, found, res.get(op)); + } + return found ? std::make_pair(min, max) : std::make_pair(0.0, 0.0); +} + +void StatsView::plotValueChart(const std::vector &dives, + ChartSubType subType, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + const StatsVariable *valueVariable, StatsOperation valueAxisOperation, + bool labels) +{ + if (!categoryBinner) + return; + + setTitle(QStringLiteral("%1 (%2)").arg(valueVariable->name(), StatsVariable::operationName(valueAxisOperation))); + + std::vector categoryBins = valueVariable->bin_operations(*categoryBinner, dives, false); + + // If there is nothing to display, quit + if (categoryBins.empty()) + return; + + + bool isHorizontal = subType == ChartSubType::Horizontal; + const auto [minValue, maxValue] = getMinMaxValue(categoryBins, valueAxisOperation); + int decimals = valueVariable->decimals(); + CategoryAxis *catAxis = createCategoryAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner), + *categoryBinner, categoryBins, !isHorizontal); + ValueAxis *valAxis = createAxis(valueVariable->nameWithUnit(), + 0.0, maxValue, valueVariable->decimals(), isHorizontal); + + if (isHorizontal) + addAxes(valAxis, catAxis); + else + addAxes(catAxis, valAxis); + + std::vector items; + items.reserve(categoryBins.size()); + double pos = 0.0; + QString unit = valueVariable->unitSymbol(); + for (auto &[bin, res]: categoryBins) { + if (res.isValid()) { + double height = res.get(valueAxisOperation); + QString value = QString("%L1").arg(height, 0, 'f', decimals); + std::vector label = labels ? std::vector { value } + : std::vector(); + items.push_back({ pos - 0.5, pos + 0.5, height, label, + categoryBinner->formatWithUnit(*bin), res }); + } + pos += 1.0; + } + + createSeries(isHorizontal, categoryVariable->name(), valueVariable, items); +} + +static int getTotalCount(const std::vector &bins) +{ + int total = 0; + for (const auto &[bin, count]: bins) + total += count; + return total; +} + +template +static int getMaxCount(const std::vector &bins) +{ + int res = 0; + for (auto const &[dummy, val]: bins) { + if (val > res) + res = val; + } + return res; +} + +void StatsView::plotDiscreteCountChart(const std::vector &dives, + ChartSubType subType, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + bool labels) +{ + if (!categoryBinner) + return; + + setTitle(categoryVariable->nameWithBinnerUnit(*categoryBinner)); + + std::vector categoryBins = categoryBinner->count_dives(dives, false); + + // If there is nothing to display, quit + if (categoryBins.empty()) + return; + + int total = getTotalCount(categoryBins); + bool isHorizontal = subType != ChartSubType::Vertical; + + CategoryAxis *catAxis = createCategoryAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner), + *categoryBinner, categoryBins, !isHorizontal); + + int maxCount = getMaxCount(categoryBins); + CountAxis *valAxis = createCountAxis(maxCount, isHorizontal); + + if (isHorizontal) + addAxes(valAxis, catAxis); + else + addAxes(catAxis, valAxis); + + std::vector items; + items.reserve(categoryBins.size()); + double pos = 0.0; + for (auto const &[bin, count]: categoryBins) { + std::vector label = labels ? makePercentageLabels(count, total, isHorizontal) + : std::vector(); + items.push_back({ pos - 0.5, pos + 0.5, count, label, + categoryBinner->formatWithUnit(*bin), total }); + pos += 1.0; + } + + createSeries(isHorizontal, categoryVariable->name(), items); +} + +void StatsView::plotPieChart(const std::vector &dives, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + bool labels, bool showLegend) +{ + if (!categoryBinner) + return; + + setTitle(categoryVariable->nameWithBinnerUnit(*categoryBinner)); + + std::vector categoryBins = categoryBinner->count_dives(dives, false); + + // If there is nothing to display, quit + if (categoryBins.empty()) + return; + + std::vector> data; + data.reserve(categoryBins.size()); + for (auto const &[bin, count]: categoryBins) + data.emplace_back(categoryBinner->formatWithUnit(*bin), count); + + bool keepOrder = categoryVariable->type() != StatsVariable::Type::Discrete; + PieSeries *series = createSeries(categoryVariable->name(), data, keepOrder, labels); + + if (showLegend) + legend = std::make_unique(chart, series->binNames()); +} + +void StatsView::plotDiscreteBoxChart(const std::vector &dives, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + const StatsVariable *valueVariable) +{ + if (!categoryBinner) + return; + + setTitle(valueVariable->name()); + + std::vector categoryBins = valueVariable->bin_quartiles(*categoryBinner, dives, false); + + // If there is nothing to display, quit + if (categoryBins.empty()) + return; + + CategoryAxis *catAxis = createCategoryAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner), + *categoryBinner, categoryBins, true); + + auto [minY, maxY] = getMinMaxValue(categoryBins); + ValueAxis *valueAxis = createAxis(valueVariable->nameWithUnit(), + minY, maxY, valueVariable->decimals(), false); + + addAxes(catAxis, valueAxis); + + BoxSeries *series = createSeries(valueVariable->name(), valueVariable->unitSymbol(), valueVariable->decimals()); + + double pos = 0.0; + for (auto &[bin, q]: categoryBins) { + if (q.isValid()) + series->append(pos - 0.5, pos + 0.5, q, categoryBinner->formatWithUnit(*bin)); + pos += 1.0; + } +} + +void StatsView::plotDiscreteScatter(const std::vector &dives, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + const StatsVariable *valueVariable, bool quartiles) +{ + if (!categoryBinner) + return; + + setTitle(valueVariable->name()); + + std::vector categoryBins = valueVariable->bin_values(*categoryBinner, dives, false); + + // If there is nothing to display, quit + if (categoryBins.empty()) + return; + + CategoryAxis *catAxis = createCategoryAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner), + *categoryBinner, categoryBins, true); + + auto [minValue, maxValue] = getMinMaxValue(categoryBins); + + ValueAxis *valAxis = createAxis(valueVariable->nameWithUnit(), + minValue, maxValue, valueVariable->decimals(), false); + + addAxes(catAxis, valAxis); + ScatterSeries *series = createSeries(*categoryVariable, *valueVariable); + + double x = 0.0; + for (const auto &[bin, array]: categoryBins) { + for (auto [v, d]: array) + series->append(d, x, v); + if (quartiles) { + StatsQuartiles quartiles = StatsVariable::quartiles(array); + if (quartiles.isValid()) { + quartileMarkers.emplace_back(x, quartiles.q1, series); + quartileMarkers.emplace_back(x, quartiles.q2, series); + quartileMarkers.emplace_back(x, quartiles.q3, series); + } + } + x += 1.0; + } +} + +StatsView::QuartileMarker::QuartileMarker(double pos, double value, QtCharts::QAbstractSeries *series) : + item(new QGraphicsLineItem(series->chart())), + series(series), + pos(pos), + value(value) +{ + item->setZValue(ZValues::chartFeatures); + item->setPen(QPen(quartileMarkerColor, 2.0)); + updatePosition(); +} + +void StatsView::QuartileMarker::updatePosition() +{ + QtCharts::QChart *chart = series->chart(); + QPointF center = chart->mapToPosition(QPointF(pos, value), series); + item->setLine(center.x() - quartileMarkerSize / 2.0, center.y(), + center.x() + quartileMarkerSize / 2.0, center.y()); +} + +StatsView::LineMarker::LineMarker(QPointF from, QPointF to, QPen pen, QtCharts::QAbstractSeries *series) : + item(new QGraphicsLineItem(series->chart())), + series(series), from(from), to(to) +{ + item->setZValue(ZValues::chartFeatures); + item->setPen(pen); + updatePosition(); +} + +void StatsView::LineMarker::updatePosition() +{ + QtCharts::QChart *chart = series->chart(); + item->setLine(QLineF(chart->mapToPosition(from, series), + chart->mapToPosition(to, series))); +} + +void StatsView::addLinearRegression(double a, double b, double minX, double maxX, QtCharts::QAbstractSeries *series) +{ + lineMarkers.emplace_back(QPointF(minX, a * minX + b), QPointF(maxX, a * maxX + b), QPen(Qt::red), series); +} + +void StatsView::addHistogramMarker(double pos, double low, double high, const QPen &pen, bool isHorizontal, QtCharts::QAbstractSeries *series) +{ + QPointF from = isHorizontal ? QPointF(low, pos) : QPointF(pos, low); + QPointF to = isHorizontal ? QPointF(high, pos) : QPointF(pos, high); + lineMarkers.emplace_back(from, to, pen, series); +} + +// Yikes, we get our data in different kinds of (bin, value) pairs. +// To create a category axis from this, we have to templatify the function. +template +HistogramAxis *StatsView::createHistogramAxis(const QString &name, const StatsBinner &binner, + const std::vector &bins, bool isHorizontal) +{ + std::vector labels; + for (auto const &[bin, dummy]: bins) { + QString label = binner.formatLowerBound(*bin); + double lowerBound = binner.lowerBoundToFloat(*bin); + bool prefer = binner.preferBin(*bin); + labels.push_back({ label, lowerBound, prefer }); + } + + const StatsBin &lastBin = *bins.back().bin; + QString lastLabel = binner.formatUpperBound(lastBin); + double upperBound = binner.upperBoundToFloat(lastBin); + labels.push_back({ lastLabel, upperBound, false }); + + return createAxis(name, std::move(labels), isHorizontal); +} + +void StatsView::plotHistogramCountChart(const std::vector &dives, + ChartSubType subType, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + bool labels, bool showMedian, bool showMean) +{ + if (!categoryBinner) + return; + + setTitle(categoryVariable->name()); + + std::vector categoryBins = categoryBinner->count_dives(dives, true); + + // If there is nothing to display, quit + if (categoryBins.empty()) + return; + + bool isHorizontal = subType == ChartSubType::Horizontal; + HistogramAxis *catAxis = createHistogramAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner), + *categoryBinner, categoryBins, !isHorizontal); + + int maxCategoryCount = getMaxCount(categoryBins); + int total = getTotalCount(categoryBins); + + StatsAxis *valAxis = createCountAxis(maxCategoryCount, isHorizontal); + double chartHeight = valAxis->minMax().second; + + if (isHorizontal) + addAxes(valAxis, catAxis); + else + addAxes(catAxis, valAxis); + + std::vector items; + items.reserve(categoryBins.size()); + + for (auto const &[bin, count]: categoryBins) { + double lowerBound = categoryBinner->lowerBoundToFloat(*bin); + double upperBound = categoryBinner->upperBoundToFloat(*bin); + std::vector label = labels ? makePercentageLabels(count, total, isHorizontal) + : std::vector(); + + items.push_back({ lowerBound, upperBound, count, label, + categoryBinner->formatWithUnit(*bin), total }); + } + + BarSeries *series = createSeries(isHorizontal, categoryVariable->name(), items); + + if (categoryVariable->type() == StatsVariable::Type::Numeric) { + if (showMean) { + double mean = categoryVariable->mean(dives); + QPen meanPen(Qt::green); + meanPen.setWidth(2); + if (!std::isnan(mean)) + addHistogramMarker(mean, 0.0, chartHeight, meanPen, isHorizontal, series); + } + if (showMedian) { + double median = categoryVariable->quartiles(dives).q2; + QPen medianPen(Qt::red); + medianPen.setWidth(2); + if (!std::isnan(median)) + addHistogramMarker(median, 0.0, chartHeight, medianPen, isHorizontal, series); + } + } +} + +void StatsView::plotHistogramValueChart(const std::vector &dives, + ChartSubType subType, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + const StatsVariable *valueVariable, StatsOperation valueAxisOperation, + bool labels) +{ + if (!categoryBinner) + return; + + setTitle(QStringLiteral("%1 (%2)").arg(valueVariable->name(), StatsVariable::operationName(valueAxisOperation))); + + std::vector categoryBins = valueVariable->bin_operations(*categoryBinner, dives, true); + + // If there is nothing to display, quit + if (categoryBins.empty()) + return; + + bool isHorizontal = subType == ChartSubType::Horizontal; + HistogramAxis *catAxis = createHistogramAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner), + *categoryBinner, categoryBins, !isHorizontal); + + const auto [minValue, maxValue] = getMinMaxValue(categoryBins, valueAxisOperation); + + int decimals = valueVariable->decimals(); + ValueAxis *valAxis = createAxis(valueVariable->nameWithUnit(), + 0.0, maxValue, decimals, isHorizontal); + + if (isHorizontal) + addAxes(valAxis, catAxis); + else + addAxes(catAxis, valAxis); + + std::vector items; + items.reserve(categoryBins.size()); + + QString unit = valueVariable->unitSymbol(); + for (auto const &[bin, res]: categoryBins) { + if (!res.isValid()) + continue; + double height = res.get(valueAxisOperation); + double lowerBound = categoryBinner->lowerBoundToFloat(*bin); + double upperBound = categoryBinner->upperBoundToFloat(*bin); + QString value = QString("%L1").arg(height, 0, 'f', decimals); + std::vector label = labels ? std::vector { value } + : std::vector(); + items.push_back({ lowerBound, upperBound, height, label, + categoryBinner->formatWithUnit(*bin), res }); + } + + createSeries(isHorizontal, categoryVariable->name(), valueVariable, items); +} + +void StatsView::plotHistogramStackedChart(const std::vector &dives, + ChartSubType subType, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + const StatsVariable *valueVariable, const StatsBinner *valueBinner, bool labels, bool showLegend) +{ + if (!categoryBinner || !valueBinner) + return; + + setTitle(valueVariable->nameWithBinnerUnit(*valueBinner)); + + std::vector categoryBins = categoryBinner->bin_dives(dives, true); + + // Construct the histogram axis now, because the pointers to the bins + // will be moved away when constructing BarPlotData below. + bool isHorizontal = subType == ChartSubType::HorizontalStacked; + HistogramAxis *catAxis = createHistogramAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner), + *categoryBinner, categoryBins, !isHorizontal); + + BarPlotData data(categoryBins, *valueBinner); + if (showLegend) + legend = std::make_unique(chart, data.vbinNames); + + CountAxis *valAxis = createCountAxis(data.maxCategoryCount, isHorizontal); + + if (isHorizontal) + addAxes(valAxis, catAxis); + else + addAxes(catAxis, valAxis); + + std::vector items; + items.reserve(data.hbin_counts.size()); + + for (auto &[hbin, counts, total]: data.hbin_counts) { + double lowerBound = categoryBinner->lowerBoundToFloat(*hbin); + double upperBound = categoryBinner->upperBoundToFloat(*hbin); + items.push_back({ lowerBound, upperBound, makeCountLabels(counts, total, labels, isHorizontal), + categoryBinner->formatWithUnit(*hbin) }); + } + + createSeries(isHorizontal, true, categoryVariable->name(), valueVariable, std::move(data.vbinNames), items); +} + +void StatsView::plotHistogramBoxChart(const std::vector &dives, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + const StatsVariable *valueVariable) +{ + if (!categoryBinner) + return; + + setTitle(valueVariable->name()); + + std::vector categoryBins = valueVariable->bin_quartiles(*categoryBinner, dives, true); + + // If there is nothing to display, quit + if (categoryBins.empty()) + return; + + HistogramAxis *catAxis = createHistogramAxis(categoryVariable->nameWithBinnerUnit(*categoryBinner), + *categoryBinner, categoryBins, true); + + auto [minY, maxY] = getMinMaxValue(categoryBins); + ValueAxis *valueAxis = createAxis(valueVariable->nameWithUnit(), + minY, maxY, valueVariable->decimals(), false); + + addAxes(catAxis, valueAxis); + + BoxSeries *series = createSeries(valueVariable->name(), valueVariable->unitSymbol(), valueVariable->decimals()); + + for (auto &[bin, q]: categoryBins) { + if (!q.isValid()) + continue; + double lowerBound = categoryBinner->lowerBoundToFloat(*bin); + double upperBound = categoryBinner->upperBoundToFloat(*bin); + series->append(lowerBound, upperBound, q, categoryBinner->formatWithUnit(*bin)); + } +} + +static bool is_linear_regression(int sample_size, double cov, double sx2, double sy2) +{ + // One point never, two points always form a line + if (sample_size < 2) + return false; + if (sample_size <= 2) + return true; + + const double tval[] = { 12.709, 4.303, 3.182, 2.776, 2.571, 2.447, 2.201, 2.120, 2.080, 2.056, 2.021, 1.960, 1.960 }; + const int t_df[] = { 1, 2, 3, 4, 5, 6, 11, 16, 21, 26, 40, 100, 100000 }; + int df = sample_size - 2; // Following is the one-tailed t-value at p < 0.05 and [sample_size - 2] degrees of freedom for the dive data: + double t = (cov / sx2) / sqrt(((sy2 - cov * cov / sx2) / (double)df) / sx2); + for (int i = std::size(tval) - 2; i >= 0; i--) { // We do linear interpolation rather than having a large lookup table. + if (df >= t_df[i]) { // Look up the appropriate reference t-value at p < 0.05 and df degrees of freedom + double t_lookup = tval[i] - (tval[i] - tval[i+1]) * (df - t_df[i]) / (t_df[i+1] - t_df[i]); + return abs(t) >= t_lookup; + } + } + + return true; // can't happen, as we tested for sample_size above. +} + +// Returns the coefficients [a,b] of the line y = ax + b +// If case of an undetermined regression or one with infinite slope, returns [nan, nan] +static std::pair linear_regression(const std::vector &v) +{ + if (v.size() < 2) + return { NaN, NaN }; + + // First, calculate the x and y average + double avg_x = 0.0, avg_y = 0.0; + for (auto [x, y, d]: v) { + avg_x += x; + avg_y += y; + } + avg_x /= (double)v.size(); + avg_y /= (double)v.size(); + + double cov = 0.0, sx2 = 0.0, sy2 = 0.0; + for (auto [x, y, d]: v) { + cov += (x - avg_x) * (y - avg_y); + sx2 += (x - avg_x) * (x - avg_x); + sy2 += (y - avg_y) * (y - avg_y); + } + + bool is_linear = is_linear_regression((int)v.size(), cov, sx2, sy2); + + if (fabs(sx2) < 1e-10 || !is_linear) // If t is not statistically significant, do not plot the regression line. + return { NaN, NaN }; + double a = cov / sx2; + double b = avg_y - a * avg_x; + return { a, b }; +} + +void StatsView::plotScatter(const std::vector &dives, const StatsVariable *categoryVariable, const StatsVariable *valueVariable) +{ + setTitle(StatsTranslations::tr("%1 vs. %2").arg(valueVariable->name(), categoryVariable->name())); + + std::vector points = categoryVariable->scatter(*valueVariable, dives); + if (points.empty()) + return; + + double minX = points.front().x; + double maxX = points.back().x; + auto [minY, maxY] = getMinMaxValue(points); + + StatsAxis *axisX = categoryVariable->type() == StatsVariable::Type::Continuous ? + static_cast(createAxis(categoryVariable->nameWithUnit(), + minX, maxX, true)) : + static_cast(createAxis(categoryVariable->nameWithUnit(), + minX, maxX, categoryVariable->decimals(), true)); + + StatsAxis *axisY = createAxis(valueVariable->nameWithUnit(), minY, maxY, valueVariable->decimals(), false); + + addAxes(axisX, axisY); + ScatterSeries *series = createSeries(*categoryVariable, *valueVariable); + + for (auto [x, y, dive]: points) + series->append(dive, x, y); + + // y = ax + b + auto [a, b] = linear_regression(points); + if (!std::isnan(a)) { + auto [minx, maxx] = axisX->minMax(); + addLinearRegression(a, b, minx, maxx, series); + } +} diff --git a/stats/statsview.h b/stats/statsview.h new file mode 100644 index 000000000..c65a4232a --- /dev/null +++ b/stats/statsview.h @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef STATS_VIEW_H +#define STATS_VIEW_H + +#include "statsstate.h" +#include +#include + +struct dive; +struct StatsBinner; +struct StatsBin; +struct StatsState; +struct StatsVariable; + +namespace QtCharts { + class QAbstractSeries; + class QChart; +} +class QGraphicsLineItem; +class StatsSeries; +class CategoryAxis; +class CountAxis; +class HistogramAxis; +class StatsAxis; +class Legend; + +enum class ChartSubType : int; +enum class StatsOperation : int; + +class StatsView : public QQuickWidget { + Q_OBJECT +public: + StatsView(QWidget *parent = NULL); + ~StatsView(); + + void plot(const StatsState &state); +private slots: + void plotAreaChanged(const QRectF &plotArea); + void replotIfVisible(); +private: + void reset(); // clears all series and axes + void addAxes(StatsAxis *x, StatsAxis *y); // Add new x- and y-axis + void plotBarChart(const std::vector &dives, + ChartSubType subType, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + const StatsVariable *valueVariable, const StatsBinner *valueBinner, bool labels, bool legend); + void plotValueChart(const std::vector &dives, + ChartSubType subType, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + const StatsVariable *valueVariable, StatsOperation valueAxisOperation, bool labels); + void plotDiscreteCountChart(const std::vector &dives, + ChartSubType subType, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, bool labels); + void plotPieChart(const std::vector &dives, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, bool labels, bool legend); + void plotDiscreteBoxChart(const std::vector &dives, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, const StatsVariable *valueVariable); + void plotDiscreteScatter(const std::vector &dives, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + const StatsVariable *valueVariable, bool quartiles); + void plotHistogramCountChart(const std::vector &dives, + ChartSubType subType, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + bool labels, bool showMedian, bool showMean); + void plotHistogramValueChart(const std::vector &dives, + ChartSubType subType, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + const StatsVariable *valueVariable, StatsOperation valueAxisOperation, bool labels); + void plotHistogramStackedChart(const std::vector &dives, + ChartSubType subType, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, + const StatsVariable *valueVariable, const StatsBinner *valueBinner, bool labels, bool legend); + void plotHistogramBoxChart(const std::vector &dives, + const StatsVariable *categoryVariable, const StatsBinner *categoryBinner, const StatsVariable *valueVariable); + void plotScatter(const std::vector &dives, const StatsVariable *categoryVariable, const StatsVariable *valueVariable); + void setTitle(const QString &); + + template + T *createSeries(Args&&... args); + + template + T *createAxis(const QString &title, Args&&... args); + + template + CategoryAxis *createCategoryAxis(const QString &title, const StatsBinner &binner, + const std::vector &bins, bool isHorizontal); + template + HistogramAxis *createHistogramAxis(const QString &title, const StatsBinner &binner, + const std::vector &bins, bool isHorizontal); + CountAxis *createCountAxis(int maxVal, bool isHorizontal); + + // Helper functions to add feature to the chart + void addLineMarker(double pos, double low, double high, const QPen &pen, bool isHorizontal); + + // A short line used to mark quartiles + struct QuartileMarker { + std::unique_ptr item; + QtCharts::QAbstractSeries *series; // In case we ever support charts with multiple axes + double pos, value; + QuartileMarker(double pos, double value, QtCharts::QAbstractSeries *series); + void updatePosition(); + }; + + // A general line marker + struct LineMarker { + std::unique_ptr item; + QtCharts::QAbstractSeries *series; // In case we ever support charts with multiple axes + QPointF from, to; // In local coordinates + void updatePosition(); + LineMarker(QPointF from, QPointF to, QPen pen, QtCharts::QAbstractSeries *series); + }; + + void addLinearRegression(double a, double b, double minX, double maxX, QtCharts::QAbstractSeries *series); + void addHistogramMarker(double pos, double low, double high, const QPen &pen, bool isHorizontal, QtCharts::QAbstractSeries *series); + + StatsState state; + QtCharts::QChart *chart; + std::vector> axes; + std::vector> series; + std::unique_ptr legend; + std::vector quartileMarkers; + std::vector lineMarkers; + StatsSeries *highlightedSeries; + + // This is unfortunate: we can't derive from QChart, because the chart is allocated by QML. + // Therefore, we have to listen to hover events using an events-filter. + // Probably we should try to get rid of the QML ChartView. + struct EventFilter : public QObject { + StatsView *view; + EventFilter(StatsView *view) : view(view) {} + private: + bool eventFilter(QObject *o, QEvent *event); + } eventFilter; + friend EventFilter; + void hover(QPointF pos); +}; + +#endif -- cgit v1.2.3-70-g09d2