summaryrefslogtreecommitdiffstats
path: root/stats
diff options
context:
space:
mode:
authorGravatar Berthold Stoeger <bstoeger@mail.tuwien.ac.at>2021-01-01 22:37:55 +0100
committerGravatar Dirk Hohndel <dirk@hohndel.org>2021-01-02 11:04:03 -0800
commita034014a6aaf807119feac0638461f5c95990b5e (patch)
tree11ade95a8e40abf896b58efd7cb648ce6ed40220 /stats
parentcccc0abc0c8ddf791d09fe514375230a5609e7c7 (diff)
downloadsubsurface-a034014a6aaf807119feac0638461f5c95990b5e.tar.gz
statistics: implement a structure representing the chart state
The StatsState structure fully describes the current state of the chart: the selected axes, operations and additional chart features, such as legend or labels. The code implements sanity checks and reacts accordingly, if an invalid combination of variables and charts is chosen. The chart and variable lists to be displayed can be queried and are encapsulated in the StatsState::UIState structure. Some variable / chart combinations are possible, but not recommended, which is represented by a warning flag. Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>
Diffstat (limited to 'stats')
-rw-r--r--stats/CMakeLists.txt2
-rw-r--r--stats/statsstate.cpp496
-rw-r--r--stats/statsstate.h120
3 files changed, 618 insertions, 0 deletions
diff --git a/stats/CMakeLists.txt b/stats/CMakeLists.txt
index 2065dada3..594bc6fd2 100644
--- a/stats/CMakeLists.txt
+++ b/stats/CMakeLists.txt
@@ -23,6 +23,8 @@ set(SUBSURFACE_STATS_SRCS
statscolors.cpp
statsseries.h
statsseries.cpp
+ statsstate.h
+ statsstate.cpp
statsvariables.h
statsvariables.cpp
zvalues.h
diff --git a/stats/statsstate.cpp b/stats/statsstate.cpp
new file mode 100644
index 000000000..e26a805eb
--- /dev/null
+++ b/stats/statsstate.cpp
@@ -0,0 +1,496 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "statsstate.h"
+#include "statstranslations.h"
+#include "statsvariables.h"
+
+// Attn: The order must correspond to the enum above
+static const char *chart_subtype_names[] = {
+ QT_TRANSLATE_NOOP("StatsTranslations", "vertical"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "grouped vertical"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "stacked vertical"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "horizontal"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "grouped horizontal"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "stacked horizontal"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "data points"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "box-whisker"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "piechart"),
+};
+
+enum class SupportedVariable {
+ Count,
+ Categorical, // Implies that the variable is binned
+ Continuous, // Implies that the variable is binned
+ Numeric
+};
+
+static const int ChartFeatureLabels = 1 << 0;
+static const int ChartFeatureLegend = 1 << 1;
+static const int ChartFeatureMedian = 1 << 2;
+static const int ChartFeatureMean = 1 << 3;
+static const int ChartFeatureQuartiles = 1 << 4;
+
+static const struct ChartTypeDesc {
+ ChartType id;
+ const char *name;
+ SupportedVariable var1;
+ SupportedVariable var2;
+ bool var2HasOperations;
+ const std::vector<ChartSubType> subtypes;
+ int features;
+} chart_types[] = {
+ {
+ ChartType::ScatterPlot,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Scattergraph"),
+ SupportedVariable::Continuous,
+ SupportedVariable::Numeric,
+ false,
+ { ChartSubType::Dots },
+ 0
+ },
+ {
+ ChartType::HistogramCount,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
+ SupportedVariable::Continuous,
+ SupportedVariable::Count,
+ false,
+ { ChartSubType::Vertical, ChartSubType::Horizontal },
+ ChartFeatureLabels | ChartFeatureMedian | ChartFeatureMean
+ },
+ {
+ ChartType::HistogramValue,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
+ SupportedVariable::Continuous,
+ SupportedVariable::Numeric,
+ true,
+ { ChartSubType::Vertical, ChartSubType::Horizontal },
+ ChartFeatureLabels
+ },
+ {
+ ChartType::HistogramBox,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
+ SupportedVariable::Continuous,
+ SupportedVariable::Numeric,
+ false,
+ { ChartSubType::Box },
+ 0
+ },
+ {
+ ChartType::HistogramStacked,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
+ SupportedVariable::Continuous,
+ SupportedVariable::Categorical,
+ false,
+ { ChartSubType::VerticalStacked, ChartSubType::HorizontalStacked },
+ ChartFeatureLabels | ChartFeatureLegend
+ },
+ {
+ ChartType::DiscreteScatter,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
+ SupportedVariable::Categorical,
+ SupportedVariable::Numeric,
+ false,
+ { ChartSubType::Dots },
+ ChartFeatureQuartiles
+ },
+ {
+ ChartType::DiscreteValue,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
+ SupportedVariable::Categorical,
+ SupportedVariable::Numeric,
+ true,
+ { ChartSubType::Vertical, ChartSubType::Horizontal },
+ ChartFeatureLabels
+ },
+ {
+ ChartType::DiscreteCount,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
+ SupportedVariable::Categorical,
+ SupportedVariable::Count,
+ false,
+ { ChartSubType::Vertical, ChartSubType::Horizontal },
+ ChartFeatureLabels
+ },
+ {
+ ChartType::DiscreteBox,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
+ SupportedVariable::Categorical,
+ SupportedVariable::Numeric,
+ false,
+ { ChartSubType::Box },
+ 0
+ },
+ {
+ ChartType::Pie,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
+ SupportedVariable::Categorical,
+ SupportedVariable::Count,
+ false,
+ { ChartSubType::Pie },
+ ChartFeatureLabels | ChartFeatureLegend
+ },
+ {
+ ChartType::DiscreteBar,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Barchart"),
+ SupportedVariable::Categorical,
+ SupportedVariable::Categorical,
+ false,
+ { ChartSubType::VerticalGrouped, ChartSubType::VerticalStacked, ChartSubType::HorizontalGrouped, ChartSubType::HorizontalStacked },
+ ChartFeatureLabels | ChartFeatureLegend
+ }
+};
+
+// Some charts are valid, but not preferrable. For example a numeric variable
+// is better plotted in a histogram than in a categorical bar chart. To
+// describe this use an enum: good, bad, invalid. Default to "good" charts
+// first, but ultimately let the user decide.
+enum ChartValidity {
+ Good,
+ Undesired,
+ Invalid
+};
+
+static const int count_idx = -1; // Special index for the count variable
+
+StatsState::StatsState() :
+ var1(stats_variables[0]),
+ var2(nullptr),
+ type(ChartType::DiscreteBar),
+ subtype(ChartSubType::Vertical),
+ labels(true),
+ legend(true),
+ median(false),
+ mean(false),
+ quartiles(true),
+ var1Binner(nullptr),
+ var2Binner(nullptr),
+ var2Operation(StatsOperation::Invalid),
+ var1Binned(false),
+ var2Binned(false),
+ var2HasOperations(false)
+{
+ validate(true);
+}
+
+static StatsState::VariableList createVariableList(const StatsVariable *selected, bool addCount, const StatsVariable *omit)
+{
+ StatsState::VariableList res;
+ res.variables.reserve(stats_variables.size() + addCount);
+ res.selected = -1;
+ if (addCount) {
+ if (selected == nullptr)
+ res.selected = (int)res.variables.size();
+ res.variables.push_back({ StatsTranslations::tr("Count"), count_idx });
+ }
+ for (int i = 0; i < (int)stats_variables.size(); ++i) {
+ const StatsVariable *variable = stats_variables[i];
+ if (variable == omit)
+ continue;
+ if (variable == selected)
+ res.selected = (int)res.variables.size();
+ res.variables.push_back({ variable->name(), i });
+ }
+ return res;
+}
+
+// This is a bit lame: we pass Chart/SubChart as an integer to the UI,
+// by placing one in the lower and one in the upper 16 bit of a 32 bit integer.
+static int toInt(ChartType type, ChartSubType subtype)
+{
+ return ((int)type << 16) | (int)subtype;
+}
+
+static std::pair<ChartType, ChartSubType> fromInt(int id)
+{
+ return { (ChartType)(id >> 16), (ChartSubType)(id & 0xff) };
+}
+
+static ChartValidity variableValidity(StatsVariable::Type type, SupportedVariable var)
+{
+ switch (var) {
+ default:
+ case SupportedVariable::Count:
+ return ChartValidity::Invalid; // Count has been special cased outside of this function
+ case SupportedVariable::Categorical:
+ return type == StatsVariable::Type::Continuous || type == StatsVariable::Type::Numeric ?
+ ChartValidity::Undesired : ChartValidity::Good;
+ case SupportedVariable::Continuous:
+ return type == StatsVariable::Type::Discrete ? ChartValidity::Invalid : ChartValidity::Good;
+ case SupportedVariable::Numeric:
+ return type != StatsVariable::Type::Numeric ? ChartValidity::Invalid : ChartValidity::Good;
+ }
+}
+
+static ChartValidity chartValidity(const ChartTypeDesc &desc, const StatsVariable *var1, const StatsVariable *var2)
+{
+ if (!var1)
+ return ChartValidity::Invalid; // Huh? We don't support count as independent variable
+
+ // Check the first variable
+ ChartValidity valid1 = variableValidity(var1->type(), desc.var1);
+ if (valid1 == ChartValidity::Invalid)
+ return ChartValidity::Invalid;
+
+ // Then, check the second variable
+ if (var2 == nullptr) // Our special marker for "count"
+ return desc.var2 == SupportedVariable::Count ? valid1 : ChartValidity::Invalid;
+
+ ChartValidity valid2 = variableValidity(var2->type(), desc.var2);
+ if (valid2 == ChartValidity::Invalid)
+ return ChartValidity::Invalid;
+
+ return valid1 == ChartValidity::Undesired || valid2 == ChartValidity::Undesired ?
+ ChartValidity::Undesired : ChartValidity::Good;
+}
+
+// Returns a list of (chart-type, warning) pairs
+const std::vector<std::pair<const ChartTypeDesc &, bool>> validCharts(const StatsVariable *var1, const StatsVariable *var2)
+{
+ std::vector<std::pair<const ChartTypeDesc &, bool>> res;
+ res.reserve(std::size(chart_types));
+ for (const ChartTypeDesc &desc: chart_types) {
+ ChartValidity valid = chartValidity(desc, var1, var2);
+ if (valid == ChartValidity::Invalid)
+ continue;
+ res.emplace_back(desc, valid == ChartValidity::Undesired);
+ }
+
+ return res;
+}
+
+static StatsState::ChartList createChartList(const StatsVariable *var1, const StatsVariable *var2, ChartType selectedType, ChartSubType selectedSubType)
+{
+ StatsState::ChartList res;
+ res.selected = -1;
+ for (auto [desc, warn]: validCharts(var1, var2)) {
+ QString name = StatsTranslations::tr(desc.name);
+ for (ChartSubType subtype: desc.subtypes) {
+ int id = toInt(desc.id, subtype);
+ if (selectedType == desc.id && selectedSubType == subtype)
+ res.selected = id;
+ QString subtypeName = StatsTranslations::tr(chart_subtype_names[(int)subtype]);
+ res.charts.push_back({ name, subtypeName, subtype, toInt(desc.id, subtype), warn });
+ }
+ }
+
+ // If none of the charts are recommended - remove the warning flag.
+ // This can happen if if first variable is numerical, but the second is categorical.
+ if (std::all_of(res.charts.begin(), res.charts.end(), [] (const StatsState::Chart &c) { return c.warning; })) {
+ for (StatsState::Chart &c: res.charts)
+ c.warning = false;
+ }
+
+ return res;
+}
+
+static StatsState::BinnerList createBinnerList(bool binned, const StatsVariable *var, const StatsBinner *binner)
+{
+ StatsState::BinnerList res;
+ res.selected = -1;
+ if (!binned || !var)
+ return res;
+ std::vector<const StatsBinner *> binners = var->binners();
+ if (binners.size() <= 1)
+ return res; // Don't show combo boxes for single binners
+ res.binners.reserve(binners.size());
+ for (const StatsBinner *bin: binners) {
+ if (bin == binner)
+ res.selected = (int)res.binners.size();
+ res.binners.push_back(bin->name());
+ }
+ return res;
+}
+
+static StatsState::VariableList createOperationsList(bool hasOperations, const StatsVariable *var, StatsOperation operation)
+{
+ StatsState::VariableList res;
+ res.selected = -1;
+ if (!hasOperations || !var)
+ return res;
+ std::vector<StatsOperation> operations = var->supportedOperations();
+ res.variables.reserve(operations.size());
+ for (StatsOperation op: operations) {
+ if (op == operation)
+ res.selected = (int)res.variables.size();
+ res.variables.push_back({ StatsVariable::operationName(op), (int)op });
+ }
+ return res;
+}
+
+static std::vector<StatsState::Feature> createFeaturesList(int chartFeatures, bool labels, bool legend, bool median, bool mean, bool quartiles)
+{
+ std::vector<StatsState::Feature> res;
+ if (chartFeatures & ChartFeatureLabels)
+ res.push_back({ StatsTranslations::tr("labels"), ChartFeatureLabels, labels });
+ if (chartFeatures & ChartFeatureLegend)
+ res.push_back({ StatsTranslations::tr("legend"), ChartFeatureLegend, legend });
+ if (chartFeatures & ChartFeatureMedian)
+ res.push_back({ StatsTranslations::tr("median"), ChartFeatureMedian, median });
+ if (chartFeatures & ChartFeatureMean)
+ res.push_back({ StatsTranslations::tr("mean"), ChartFeatureMean, mean });
+ if (chartFeatures & ChartFeatureQuartiles)
+ res.push_back({ StatsTranslations::tr("quartiles"), ChartFeatureQuartiles, quartiles });
+ return res;
+}
+
+StatsState::UIState StatsState::getUIState() const
+{
+ UIState res;
+ res.var1 = createVariableList(var1, false, nullptr);
+ res.var2 = createVariableList(var2, true, var1);
+ res.var1Name = var1 ? var1->name() : QString();
+ res.var2Name = var2 ? var2->name() : QString();
+ res.charts = createChartList(var1, var2, type, subtype);
+ res.binners1 = createBinnerList(var1Binned, var1, var1Binner);
+ res.binners2 = createBinnerList(var2Binned, var2, var2Binner);
+ res.operations2 = createOperationsList(var2HasOperations, var2, var2Operation);
+ res.features = createFeaturesList(chartFeatures, labels, legend, median, mean, quartiles);
+ return res;
+}
+
+static const StatsBinner *idxToBinner(const StatsVariable *v, int idx)
+{
+ if (!v)
+ return nullptr;
+ auto binners = v->binners();
+ return idx >= 0 && idx < (int)binners.size() ? binners[idx] : 0;
+}
+
+void StatsState::var1Changed(int id)
+{
+ var1 = stats_variables[std::clamp(id, 0, (int)stats_variables.size())];
+ validate(true);
+}
+
+void StatsState::binner1Changed(int idx)
+{
+ var1Binner = idxToBinner(var1, idx);
+ validate(false);
+}
+
+void StatsState::var2Changed(int id)
+{
+ // The "count" variable is represented by a nullptr
+ var2 = id == count_idx ? nullptr
+ : stats_variables[std::clamp(id, 0, (int)stats_variables.size())];
+ validate(true);
+}
+
+void StatsState::binner2Changed(int idx)
+{
+ var2Binner = idxToBinner(var2, idx);
+ validate(false);
+}
+
+void StatsState::var2OperationChanged(int id)
+{
+ var2Operation = (StatsOperation)id;
+ validate(false);
+}
+
+void StatsState::chartChanged(int id)
+{
+ std::tie(type, subtype) = fromInt(id); // use std::tie to assign two values at once
+ validate(false);
+}
+
+void StatsState::featureChanged(int id, bool state)
+{
+ if (id == ChartFeatureLabels)
+ labels = state;
+ else if (id == ChartFeatureLegend)
+ legend = state;
+ else if (id == ChartFeatureMedian)
+ median = state;
+ else if (id == ChartFeatureMean)
+ mean = state;
+ else if (id == ChartFeatureQuartiles)
+ quartiles = state;
+}
+
+// Creates the new chart-type from the current chart-type and a list of possible chart types.
+// If the flag "varChanged" is true, the current chart-type will be changed if the
+// current chart-type is undesired.
+const ChartTypeDesc &newChartType(ChartType type, std::vector<std::pair<const ChartTypeDesc &, bool>> charts,
+ bool varChanged)
+{
+ for (auto [desc, warn]: charts) {
+ // Found it, but if the axis was changed, we change anyway if the chart is "undesired"
+ if (type == desc.id) {
+ if (!varChanged || !warn)
+ return desc;
+ break;
+ }
+ }
+
+ // Find the first non-undesired chart
+ for (auto [desc, warn]: charts) {
+ if (!warn)
+ return desc;
+ }
+
+ return charts.empty() ? chart_types[0] : charts[0].first;
+}
+
+static void validateBinner(const StatsBinner *&binner, const StatsVariable *var, bool isBinned)
+{
+ if (!var || !isBinned) {
+ binner = nullptr;
+ return;
+ }
+ auto binners = var->binners();
+ if (std::find(binners.begin(), binners.end(), binner) != binners.end())
+ return;
+
+ // For now choose the first binner. However, we might try to be smarter here
+ // and adapt to the given screen size and the estimated number of bins.
+ binner = binners.empty() ? nullptr : binners[0];
+}
+
+static void validateOperation(StatsOperation &operation, const StatsVariable *var, bool hasOperation)
+{
+ if (!hasOperation) {
+ operation = StatsOperation::Invalid;
+ return;
+ }
+ std::vector<StatsOperation> ops = var->supportedOperations();
+ if (std::find(ops.begin(), ops.end(), operation) != ops.end())
+ return;
+
+ operation = ops.empty() ? StatsOperation::Invalid : ops[0];
+}
+
+// The var changed variable indicates whether this function is called
+// after a variable change or a change of the chart type. In the
+// former case, the chart type is switched, if it is not recommended.
+// In the latter case, the user explicitly chose a non-recommended type,
+// so let's use that.
+void StatsState::validate(bool varChanged)
+{
+ // Take care that we don't plot a variable against itself.
+ // By default plot the count of the first variable. Is that sensible?
+ if (var1 == var2)
+ var2 = nullptr;
+
+ // Let's see if the currently selected chart is one of the valid charts
+ auto charts = validCharts(var1, var2);
+ const ChartTypeDesc &desc = newChartType(type, charts, varChanged);
+ type = desc.id;
+
+ // Check if the current subtype is supported by the chart
+ if (std::find(desc.subtypes.begin(), desc.subtypes.end(), subtype) == desc.subtypes.end())
+ subtype = desc.subtypes.empty() ? ChartSubType::Horizontal : desc.subtypes[0];
+
+ var1Binned = type != ChartType::ScatterPlot;
+ var2Binned = desc.var2 == SupportedVariable::Categorical || desc.var2 == SupportedVariable::Continuous;
+ var2HasOperations = desc.var2HasOperations;
+
+ chartFeatures = desc.features;
+ // Median and mean currently only if first variable is numeric
+ if (!var1 || var1->type() != StatsVariable::Type::Numeric)
+ chartFeatures &= ~(ChartFeatureMedian | ChartFeatureMean);
+
+ // Check that the binners and operation are valid
+ validateBinner(var1Binner, var1, var1Binned);
+ validateBinner(var2Binner, var2, var2Binned);
+ validateOperation(var2Operation, var2, var2HasOperations);
+}
diff --git a/stats/statsstate.h b/stats/statsstate.h
new file mode 100644
index 000000000..d4713d414
--- /dev/null
+++ b/stats/statsstate.h
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+// Describes the current state of the statistics widget
+// (selected variables, chart type, etc.) and is the
+// interface between UI and plotting code.
+#ifndef STATS_STATE_H
+#define STATS_STATE_H
+
+#include <vector>
+#include <QString>
+
+enum class ChartType {
+ DiscreteBar,
+ DiscreteValue,
+ DiscreteCount,
+ DiscreteBox,
+ DiscreteScatter,
+ Pie,
+ HistogramCount,
+ HistogramValue,
+ HistogramBox,
+ HistogramStacked,
+ ScatterPlot
+};
+
+enum class ChartSubType {
+ Vertical = 0,
+ VerticalGrouped,
+ VerticalStacked,
+ Horizontal,
+ HorizontalGrouped,
+ HorizontalStacked,
+ Dots,
+ Box,
+ Pie,
+ Count
+};
+
+struct StatsVariable;
+struct StatsBinner;
+enum class StatsOperation : int;
+
+struct StatsState {
+public:
+ StatsState();
+ int setFirstAxis();
+ int setSecondAxis();
+
+ struct Variable {
+ QString name;
+ int id;
+ };
+ struct VariableList {
+ std::vector<Variable> variables;
+ int selected;
+ };
+ struct Chart {
+ QString name;
+ QString subtypeName;
+ ChartSubType subtype;
+ int id;
+ bool warning; // Not recommended for that combination
+ };
+ struct ChartList {
+ std::vector<Chart> charts;
+ int selected;
+ };
+ struct BinnerList {
+ std::vector<QString> binners;
+ int selected;
+ };
+ struct Feature {
+ QString name;
+ int id;
+ bool selected;
+ };
+ struct UIState {
+ VariableList var1;
+ VariableList var2;
+ QString var1Name;
+ QString var2Name;
+ ChartList charts;
+ std::vector<Feature> features;
+ BinnerList binners1;
+ BinnerList binners2;
+ // Currently, operations are only supported on the second variable
+ // This reuses the variable list - not very nice.
+ VariableList operations2;
+ };
+ UIState getUIState() const;
+
+ // State changers
+ void var1Changed(int id);
+ void var2Changed(int id);
+ void chartChanged(int id);
+ void binner1Changed(int id);
+ void binner2Changed(int id);
+ void var2OperationChanged(int id);
+ void featureChanged(int id, bool state);
+
+ const StatsVariable *var1; // Independent variable
+ const StatsVariable *var2; // Dependent variable (nullptr: count)
+ ChartType type;
+ ChartSubType subtype;
+ bool labels;
+ bool legend;
+ bool median;
+ bool mean;
+ bool quartiles;
+ const StatsBinner *var1Binner; // nullptr: undefined
+ const StatsBinner *var2Binner; // nullptr: undefined
+ StatsOperation var2Operation;
+private:
+ void validate(bool varChanged);
+ bool var1Binned;
+ bool var2Binned;
+ bool var2HasOperations;
+ int chartFeatures;
+};
+
+#endif