summaryrefslogtreecommitdiffstats
path: root/stats
diff options
context:
space:
mode:
Diffstat (limited to 'stats')
-rw-r--r--stats/CMakeLists.txt2
-rw-r--r--stats/statsstate.cpp496
-rw-r--r--stats/statsstate.h120
3 files changed, 618 insertions, 0 deletions
diff --git a/stats/CMakeLists.txt b/stats/CMakeLists.txt
index 2065dada3..594bc6fd2 100644
--- a/stats/CMakeLists.txt
+++ b/stats/CMakeLists.txt
@@ -23,6 +23,8 @@ set(SUBSURFACE_STATS_SRCS
statscolors.cpp
statsseries.h
statsseries.cpp
+ statsstate.h
+ statsstate.cpp
statsvariables.h
statsvariables.cpp
zvalues.h
diff --git a/stats/statsstate.cpp b/stats/statsstate.cpp
new file mode 100644
index 000000000..e26a805eb
--- /dev/null
+++ b/stats/statsstate.cpp
@@ -0,0 +1,496 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "statsstate.h"
+#include "statstranslations.h"
+#include "statsvariables.h"
+
+// Attn: The order must correspond to the enum above
+static const char *chart_subtype_names[] = {
+ QT_TRANSLATE_NOOP("StatsTranslations", "vertical"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "grouped vertical"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "stacked vertical"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "horizontal"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "grouped horizontal"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "stacked horizontal"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "data points"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "box-whisker"),
+ QT_TRANSLATE_NOOP("StatsTranslations", "piechart"),
+};
+
+enum class SupportedVariable {
+ Count,
+ Categorical, // Implies that the variable is binned
+ Continuous, // Implies that the variable is binned
+ Numeric
+};
+
+static const int ChartFeatureLabels = 1 << 0;
+static const int ChartFeatureLegend = 1 << 1;
+static const int ChartFeatureMedian = 1 << 2;
+static const int ChartFeatureMean = 1 << 3;
+static const int ChartFeatureQuartiles = 1 << 4;
+
+static const struct ChartTypeDesc {
+ ChartType id;
+ const char *name;
+ SupportedVariable var1;
+ SupportedVariable var2;
+ bool var2HasOperations;
+ const std::vector<ChartSubType> subtypes;
+ int features;
+} chart_types[] = {
+ {
+ ChartType::ScatterPlot,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Scattergraph"),
+ SupportedVariable::Continuous,
+ SupportedVariable::Numeric,
+ false,
+ { ChartSubType::Dots },
+ 0
+ },
+ {
+ ChartType::HistogramCount,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
+ SupportedVariable::Continuous,
+ SupportedVariable::Count,
+ false,
+ { ChartSubType::Vertical, ChartSubType::Horizontal },
+ ChartFeatureLabels | ChartFeatureMedian | ChartFeatureMean
+ },
+ {
+ ChartType::HistogramValue,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
+ SupportedVariable::Continuous,
+ SupportedVariable::Numeric,
+ true,
+ { ChartSubType::Vertical, ChartSubType::Horizontal },
+ ChartFeatureLabels
+ },
+ {
+ ChartType::HistogramBox,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
+ SupportedVariable::Continuous,
+ SupportedVariable::Numeric,
+ false,
+ { ChartSubType::Box },
+ 0
+ },
+ {
+ ChartType::HistogramStacked,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
+ SupportedVariable::Continuous,
+ SupportedVariable::Categorical,
+ false,
+ { ChartSubType::VerticalStacked, ChartSubType::HorizontalStacked },
+ ChartFeatureLabels | ChartFeatureLegend
+ },
+ {
+ ChartType::DiscreteScatter,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
+ SupportedVariable::Categorical,
+ SupportedVariable::Numeric,
+ false,
+ { ChartSubType::Dots },
+ ChartFeatureQuartiles
+ },
+ {
+ ChartType::DiscreteValue,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
+ SupportedVariable::Categorical,
+ SupportedVariable::Numeric,
+ true,
+ { ChartSubType::Vertical, ChartSubType::Horizontal },
+ ChartFeatureLabels
+ },
+ {
+ ChartType::DiscreteCount,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
+ SupportedVariable::Categorical,
+ SupportedVariable::Count,
+ false,
+ { ChartSubType::Vertical, ChartSubType::Horizontal },
+ ChartFeatureLabels
+ },
+ {
+ ChartType::DiscreteBox,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
+ SupportedVariable::Categorical,
+ SupportedVariable::Numeric,
+ false,
+ { ChartSubType::Box },
+ 0
+ },
+ {
+ ChartType::Pie,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
+ SupportedVariable::Categorical,
+ SupportedVariable::Count,
+ false,
+ { ChartSubType::Pie },
+ ChartFeatureLabels | ChartFeatureLegend
+ },
+ {
+ ChartType::DiscreteBar,
+ QT_TRANSLATE_NOOP("StatsTranslations", "Barchart"),
+ SupportedVariable::Categorical,
+ SupportedVariable::Categorical,
+ false,
+ { ChartSubType::VerticalGrouped, ChartSubType::VerticalStacked, ChartSubType::HorizontalGrouped, ChartSubType::HorizontalStacked },
+ ChartFeatureLabels | ChartFeatureLegend
+ }
+};
+
+// Some charts are valid, but not preferrable. For example a numeric variable
+// is better plotted in a histogram than in a categorical bar chart. To
+// describe this use an enum: good, bad, invalid. Default to "good" charts
+// first, but ultimately let the user decide.
+enum ChartValidity {
+ Good,
+ Undesired,
+ Invalid
+};
+
+static const int count_idx = -1; // Special index for the count variable
+
+StatsState::StatsState() :
+ var1(stats_variables[0]),
+ var2(nullptr),
+ type(ChartType::DiscreteBar),
+ subtype(ChartSubType::Vertical),
+ labels(true),
+ legend(true),
+ median(false),
+ mean(false),
+ quartiles(true),
+ var1Binner(nullptr),
+ var2Binner(nullptr),
+ var2Operation(StatsOperation::Invalid),
+ var1Binned(false),
+ var2Binned(false),
+ var2HasOperations(false)
+{
+ validate(true);
+}
+
+static StatsState::VariableList createVariableList(const StatsVariable *selected, bool addCount, const StatsVariable *omit)
+{
+ StatsState::VariableList res;
+ res.variables.reserve(stats_variables.size() + addCount);
+ res.selected = -1;
+ if (addCount) {
+ if (selected == nullptr)
+ res.selected = (int)res.variables.size();
+ res.variables.push_back({ StatsTranslations::tr("Count"), count_idx });
+ }
+ for (int i = 0; i < (int)stats_variables.size(); ++i) {
+ const StatsVariable *variable = stats_variables[i];
+ if (variable == omit)
+ continue;
+ if (variable == selected)
+ res.selected = (int)res.variables.size();
+ res.variables.push_back({ variable->name(), i });
+ }
+ return res;
+}
+
+// This is a bit lame: we pass Chart/SubChart as an integer to the UI,
+// by placing one in the lower and one in the upper 16 bit of a 32 bit integer.
+static int toInt(ChartType type, ChartSubType subtype)
+{
+ return ((int)type << 16) | (int)subtype;
+}
+
+static std::pair<ChartType, ChartSubType> fromInt(int id)
+{
+ return { (ChartType)(id >> 16), (ChartSubType)(id & 0xff) };
+}
+
+static ChartValidity variableValidity(StatsVariable::Type type, SupportedVariable var)
+{
+ switch (var) {
+ default:
+ case SupportedVariable::Count:
+ return ChartValidity::Invalid; // Count has been special cased outside of this function
+ case SupportedVariable::Categorical:
+ return type == StatsVariable::Type::Continuous || type == StatsVariable::Type::Numeric ?
+ ChartValidity::Undesired : ChartValidity::Good;
+ case SupportedVariable::Continuous:
+ return type == StatsVariable::Type::Discrete ? ChartValidity::Invalid : ChartValidity::Good;
+ case SupportedVariable::Numeric:
+ return type != StatsVariable::Type::Numeric ? ChartValidity::Invalid : ChartValidity::Good;
+ }
+}
+
+static ChartValidity chartValidity(const ChartTypeDesc &desc, const StatsVariable *var1, const StatsVariable *var2)
+{
+ if (!var1)
+ return ChartValidity::Invalid; // Huh? We don't support count as independent variable
+
+ // Check the first variable
+ ChartValidity valid1 = variableValidity(var1->type(), desc.var1);
+ if (valid1 == ChartValidity::Invalid)
+ return ChartValidity::Invalid;
+
+ // Then, check the second variable
+ if (var2 == nullptr) // Our special marker for "count"
+ return desc.var2 == SupportedVariable::Count ? valid1 : ChartValidity::Invalid;
+
+ ChartValidity valid2 = variableValidity(var2->type(), desc.var2);
+ if (valid2 == ChartValidity::Invalid)
+ return ChartValidity::Invalid;
+
+ return valid1 == ChartValidity::Undesired || valid2 == ChartValidity::Undesired ?
+ ChartValidity::Undesired : ChartValidity::Good;
+}
+
+// Returns a list of (chart-type, warning) pairs
+const std::vector<std::pair<const ChartTypeDesc &, bool>> validCharts(const StatsVariable *var1, const StatsVariable *var2)
+{
+ std::vector<std::pair<const ChartTypeDesc &, bool>> res;
+ res.reserve(std::size(chart_types));
+ for (const ChartTypeDesc &desc: chart_types) {
+ ChartValidity valid = chartValidity(desc, var1, var2);
+ if (valid == ChartValidity::Invalid)
+ continue;
+ res.emplace_back(desc, valid == ChartValidity::Undesired);
+ }
+
+ return res;
+}
+
+static StatsState::ChartList createChartList(const StatsVariable *var1, const StatsVariable *var2, ChartType selectedType, ChartSubType selectedSubType)
+{
+ StatsState::ChartList res;
+ res.selected = -1;
+ for (auto [desc, warn]: validCharts(var1, var2)) {
+ QString name = StatsTranslations::tr(desc.name);
+ for (ChartSubType subtype: desc.subtypes) {
+ int id = toInt(desc.id, subtype);
+ if (selectedType == desc.id && selectedSubType == subtype)
+ res.selected = id;
+ QString subtypeName = StatsTranslations::tr(chart_subtype_names[(int)subtype]);
+ res.charts.push_back({ name, subtypeName, subtype, toInt(desc.id, subtype), warn });
+ }
+ }
+
+ // If none of the charts are recommended - remove the warning flag.
+ // This can happen if if first variable is numerical, but the second is categorical.
+ if (std::all_of(res.charts.begin(), res.charts.end(), [] (const StatsState::Chart &c) { return c.warning; })) {
+ for (StatsState::Chart &c: res.charts)
+ c.warning = false;
+ }
+
+ return res;
+}
+
+static StatsState::BinnerList createBinnerList(bool binned, const StatsVariable *var, const StatsBinner *binner)
+{
+ StatsState::BinnerList res;
+ res.selected = -1;
+ if (!binned || !var)
+ return res;
+ std::vector<const StatsBinner *> binners = var->binners();
+ if (binners.size() <= 1)
+ return res; // Don't show combo boxes for single binners
+ res.binners.reserve(binners.size());
+ for (const StatsBinner *bin: binners) {
+ if (bin == binner)
+ res.selected = (int)res.binners.size();
+ res.binners.push_back(bin->name());
+ }
+ return res;
+}
+
+static StatsState::VariableList createOperationsList(bool hasOperations, const StatsVariable *var, StatsOperation operation)
+{
+ StatsState::VariableList res;
+ res.selected = -1;
+ if (!hasOperations || !var)
+ return res;
+ std::vector<StatsOperation> operations = var->supportedOperations();
+ res.variables.reserve(operations.size());
+ for (StatsOperation op: operations) {
+ if (op == operation)
+ res.selected = (int)res.variables.size();
+ res.variables.push_back({ StatsVariable::operationName(op), (int)op });
+ }
+ return res;
+}
+
+static std::vector<StatsState::Feature> createFeaturesList(int chartFeatures, bool labels, bool legend, bool median, bool mean, bool quartiles)
+{
+ std::vector<StatsState::Feature> res;
+ if (chartFeatures & ChartFeatureLabels)
+ res.push_back({ StatsTranslations::tr("labels"), ChartFeatureLabels, labels });
+ if (chartFeatures & ChartFeatureLegend)
+ res.push_back({ StatsTranslations::tr("legend"), ChartFeatureLegend, legend });
+ if (chartFeatures & ChartFeatureMedian)
+ res.push_back({ StatsTranslations::tr("median"), ChartFeatureMedian, median });
+ if (chartFeatures & ChartFeatureMean)
+ res.push_back({ StatsTranslations::tr("mean"), ChartFeatureMean, mean });
+ if (chartFeatures & ChartFeatureQuartiles)
+ res.push_back({ StatsTranslations::tr("quartiles"), ChartFeatureQuartiles, quartiles });
+ return res;
+}
+
+StatsState::UIState StatsState::getUIState() const
+{
+ UIState res;
+ res.var1 = createVariableList(var1, false, nullptr);
+ res.var2 = createVariableList(var2, true, var1);
+ res.var1Name = var1 ? var1->name() : QString();
+ res.var2Name = var2 ? var2->name() : QString();
+ res.charts = createChartList(var1, var2, type, subtype);
+ res.binners1 = createBinnerList(var1Binned, var1, var1Binner);
+ res.binners2 = createBinnerList(var2Binned, var2, var2Binner);
+ res.operations2 = createOperationsList(var2HasOperations, var2, var2Operation);
+ res.features = createFeaturesList(chartFeatures, labels, legend, median, mean, quartiles);
+ return res;
+}
+
+static const StatsBinner *idxToBinner(const StatsVariable *v, int idx)
+{
+ if (!v)
+ return nullptr;
+ auto binners = v->binners();
+ return idx >= 0 && idx < (int)binners.size() ? binners[idx] : 0;
+}
+
+void StatsState::var1Changed(int id)
+{
+ var1 = stats_variables[std::clamp(id, 0, (int)stats_variables.size())];
+ validate(true);
+}
+
+void StatsState::binner1Changed(int idx)
+{
+ var1Binner = idxToBinner(var1, idx);
+ validate(false);
+}
+
+void StatsState::var2Changed(int id)
+{
+ // The "count" variable is represented by a nullptr
+ var2 = id == count_idx ? nullptr
+ : stats_variables[std::clamp(id, 0, (int)stats_variables.size())];
+ validate(true);
+}
+
+void StatsState::binner2Changed(int idx)
+{
+ var2Binner = idxToBinner(var2, idx);
+ validate(false);
+}
+
+void StatsState::var2OperationChanged(int id)
+{
+ var2Operation = (StatsOperation)id;
+ validate(false);
+}
+
+void StatsState::chartChanged(int id)
+{
+ std::tie(type, subtype) = fromInt(id); // use std::tie to assign two values at once
+ validate(false);
+}
+
+void StatsState::featureChanged(int id, bool state)
+{
+ if (id == ChartFeatureLabels)
+ labels = state;
+ else if (id == ChartFeatureLegend)
+ legend = state;
+ else if (id == ChartFeatureMedian)
+ median = state;
+ else if (id == ChartFeatureMean)
+ mean = state;
+ else if (id == ChartFeatureQuartiles)
+ quartiles = state;
+}
+
+// Creates the new chart-type from the current chart-type and a list of possible chart types.
+// If the flag "varChanged" is true, the current chart-type will be changed if the
+// current chart-type is undesired.
+const ChartTypeDesc &newChartType(ChartType type, std::vector<std::pair<const ChartTypeDesc &, bool>> charts,
+ bool varChanged)
+{
+ for (auto [desc, warn]: charts) {
+ // Found it, but if the axis was changed, we change anyway if the chart is "undesired"
+ if (type == desc.id) {
+ if (!varChanged || !warn)
+ return desc;
+ break;
+ }
+ }
+
+ // Find the first non-undesired chart
+ for (auto [desc, warn]: charts) {
+ if (!warn)
+ return desc;
+ }
+
+ return charts.empty() ? chart_types[0] : charts[0].first;
+}
+
+static void validateBinner(const StatsBinner *&binner, const StatsVariable *var, bool isBinned)
+{
+ if (!var || !isBinned) {
+ binner = nullptr;
+ return;
+ }
+ auto binners = var->binners();
+ if (std::find(binners.begin(), binners.end(), binner) != binners.end())
+ return;
+
+ // For now choose the first binner. However, we might try to be smarter here
+ // and adapt to the given screen size and the estimated number of bins.
+ binner = binners.empty() ? nullptr : binners[0];
+}
+
+static void validateOperation(StatsOperation &operation, const StatsVariable *var, bool hasOperation)
+{
+ if (!hasOperation) {
+ operation = StatsOperation::Invalid;
+ return;
+ }
+ std::vector<StatsOperation> ops = var->supportedOperations();
+ if (std::find(ops.begin(), ops.end(), operation) != ops.end())
+ return;
+
+ operation = ops.empty() ? StatsOperation::Invalid : ops[0];
+}
+
+// The var changed variable indicates whether this function is called
+// after a variable change or a change of the chart type. In the
+// former case, the chart type is switched, if it is not recommended.
+// In the latter case, the user explicitly chose a non-recommended type,
+// so let's use that.
+void StatsState::validate(bool varChanged)
+{
+ // Take care that we don't plot a variable against itself.
+ // By default plot the count of the first variable. Is that sensible?
+ if (var1 == var2)
+ var2 = nullptr;
+
+ // Let's see if the currently selected chart is one of the valid charts
+ auto charts = validCharts(var1, var2);
+ const ChartTypeDesc &desc = newChartType(type, charts, varChanged);
+ type = desc.id;
+
+ // Check if the current subtype is supported by the chart
+ if (std::find(desc.subtypes.begin(), desc.subtypes.end(), subtype) == desc.subtypes.end())
+ subtype = desc.subtypes.empty() ? ChartSubType::Horizontal : desc.subtypes[0];
+
+ var1Binned = type != ChartType::ScatterPlot;
+ var2Binned = desc.var2 == SupportedVariable::Categorical || desc.var2 == SupportedVariable::Continuous;
+ var2HasOperations = desc.var2HasOperations;
+
+ chartFeatures = desc.features;
+ // Median and mean currently only if first variable is numeric
+ if (!var1 || var1->type() != StatsVariable::Type::Numeric)
+ chartFeatures &= ~(ChartFeatureMedian | ChartFeatureMean);
+
+ // Check that the binners and operation are valid
+ validateBinner(var1Binner, var1, var1Binned);
+ validateBinner(var2Binner, var2, var2Binned);
+ validateOperation(var2Operation, var2, var2HasOperations);
+}
diff --git a/stats/statsstate.h b/stats/statsstate.h
new file mode 100644
index 000000000..d4713d414
--- /dev/null
+++ b/stats/statsstate.h
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+// Describes the current state of the statistics widget
+// (selected variables, chart type, etc.) and is the
+// interface between UI and plotting code.
+#ifndef STATS_STATE_H
+#define STATS_STATE_H
+
+#include <vector>
+#include <QString>
+
+enum class ChartType {
+ DiscreteBar,
+ DiscreteValue,
+ DiscreteCount,
+ DiscreteBox,
+ DiscreteScatter,
+ Pie,
+ HistogramCount,
+ HistogramValue,
+ HistogramBox,
+ HistogramStacked,
+ ScatterPlot
+};
+
+enum class ChartSubType {
+ Vertical = 0,
+ VerticalGrouped,
+ VerticalStacked,
+ Horizontal,
+ HorizontalGrouped,
+ HorizontalStacked,
+ Dots,
+ Box,
+ Pie,
+ Count
+};
+
+struct StatsVariable;
+struct StatsBinner;
+enum class StatsOperation : int;
+
+struct StatsState {
+public:
+ StatsState();
+ int setFirstAxis();
+ int setSecondAxis();
+
+ struct Variable {
+ QString name;
+ int id;
+ };
+ struct VariableList {
+ std::vector<Variable> variables;
+ int selected;
+ };
+ struct Chart {
+ QString name;
+ QString subtypeName;
+ ChartSubType subtype;
+ int id;
+ bool warning; // Not recommended for that combination
+ };
+ struct ChartList {
+ std::vector<Chart> charts;
+ int selected;
+ };
+ struct BinnerList {
+ std::vector<QString> binners;
+ int selected;
+ };
+ struct Feature {
+ QString name;
+ int id;
+ bool selected;
+ };
+ struct UIState {
+ VariableList var1;
+ VariableList var2;
+ QString var1Name;
+ QString var2Name;
+ ChartList charts;
+ std::vector<Feature> features;
+ BinnerList binners1;
+ BinnerList binners2;
+ // Currently, operations are only supported on the second variable
+ // This reuses the variable list - not very nice.
+ VariableList operations2;
+ };
+ UIState getUIState() const;
+
+ // State changers
+ void var1Changed(int id);
+ void var2Changed(int id);
+ void chartChanged(int id);
+ void binner1Changed(int id);
+ void binner2Changed(int id);
+ void var2OperationChanged(int id);
+ void featureChanged(int id, bool state);
+
+ const StatsVariable *var1; // Independent variable
+ const StatsVariable *var2; // Dependent variable (nullptr: count)
+ ChartType type;
+ ChartSubType subtype;
+ bool labels;
+ bool legend;
+ bool median;
+ bool mean;
+ bool quartiles;
+ const StatsBinner *var1Binner; // nullptr: undefined
+ const StatsBinner *var2Binner; // nullptr: undefined
+ StatsOperation var2Operation;
+private:
+ void validate(bool varChanged);
+ bool var1Binned;
+ bool var2Binned;
+ bool var2HasOperations;
+ int chartFeatures;
+};
+
+#endif