diff options
-rw-r--r-- | stats/CMakeLists.txt | 2 | ||||
-rw-r--r-- | stats/statsstate.cpp | 496 | ||||
-rw-r--r-- | stats/statsstate.h | 120 |
3 files changed, 618 insertions, 0 deletions
diff --git a/stats/CMakeLists.txt b/stats/CMakeLists.txt index 2065dada3..594bc6fd2 100644 --- a/stats/CMakeLists.txt +++ b/stats/CMakeLists.txt @@ -23,6 +23,8 @@ set(SUBSURFACE_STATS_SRCS statscolors.cpp statsseries.h statsseries.cpp + statsstate.h + statsstate.cpp statsvariables.h statsvariables.cpp zvalues.h diff --git a/stats/statsstate.cpp b/stats/statsstate.cpp new file mode 100644 index 000000000..e26a805eb --- /dev/null +++ b/stats/statsstate.cpp @@ -0,0 +1,496 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "statsstate.h" +#include "statstranslations.h" +#include "statsvariables.h" + +// Attn: The order must correspond to the enum above +static const char *chart_subtype_names[] = { + QT_TRANSLATE_NOOP("StatsTranslations", "vertical"), + QT_TRANSLATE_NOOP("StatsTranslations", "grouped vertical"), + QT_TRANSLATE_NOOP("StatsTranslations", "stacked vertical"), + QT_TRANSLATE_NOOP("StatsTranslations", "horizontal"), + QT_TRANSLATE_NOOP("StatsTranslations", "grouped horizontal"), + QT_TRANSLATE_NOOP("StatsTranslations", "stacked horizontal"), + QT_TRANSLATE_NOOP("StatsTranslations", "data points"), + QT_TRANSLATE_NOOP("StatsTranslations", "box-whisker"), + QT_TRANSLATE_NOOP("StatsTranslations", "piechart"), +}; + +enum class SupportedVariable { + Count, + Categorical, // Implies that the variable is binned + Continuous, // Implies that the variable is binned + Numeric +}; + +static const int ChartFeatureLabels = 1 << 0; +static const int ChartFeatureLegend = 1 << 1; +static const int ChartFeatureMedian = 1 << 2; +static const int ChartFeatureMean = 1 << 3; +static const int ChartFeatureQuartiles = 1 << 4; + +static const struct ChartTypeDesc { + ChartType id; + const char *name; + SupportedVariable var1; + SupportedVariable var2; + bool var2HasOperations; + const std::vector<ChartSubType> subtypes; + int features; +} chart_types[] = { + { + ChartType::ScatterPlot, + QT_TRANSLATE_NOOP("StatsTranslations", "Scattergraph"), + SupportedVariable::Continuous, + SupportedVariable::Numeric, + false, + { ChartSubType::Dots }, + 0 + }, + { + ChartType::HistogramCount, + QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"), + SupportedVariable::Continuous, + SupportedVariable::Count, + false, + { ChartSubType::Vertical, ChartSubType::Horizontal }, + ChartFeatureLabels | ChartFeatureMedian | ChartFeatureMean + }, + { + ChartType::HistogramValue, + QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"), + SupportedVariable::Continuous, + SupportedVariable::Numeric, + true, + { ChartSubType::Vertical, ChartSubType::Horizontal }, + ChartFeatureLabels + }, + { + ChartType::HistogramBox, + QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"), + SupportedVariable::Continuous, + SupportedVariable::Numeric, + false, + { ChartSubType::Box }, + 0 + }, + { + ChartType::HistogramStacked, + QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"), + SupportedVariable::Continuous, + SupportedVariable::Categorical, + false, + { ChartSubType::VerticalStacked, ChartSubType::HorizontalStacked }, + ChartFeatureLabels | ChartFeatureLegend + }, + { + ChartType::DiscreteScatter, + QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"), + SupportedVariable::Categorical, + SupportedVariable::Numeric, + false, + { ChartSubType::Dots }, + ChartFeatureQuartiles + }, + { + ChartType::DiscreteValue, + QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"), + SupportedVariable::Categorical, + SupportedVariable::Numeric, + true, + { ChartSubType::Vertical, ChartSubType::Horizontal }, + ChartFeatureLabels + }, + { + ChartType::DiscreteCount, + QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"), + SupportedVariable::Categorical, + SupportedVariable::Count, + false, + { ChartSubType::Vertical, ChartSubType::Horizontal }, + ChartFeatureLabels + }, + { + ChartType::DiscreteBox, + QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"), + SupportedVariable::Categorical, + SupportedVariable::Numeric, + false, + { ChartSubType::Box }, + 0 + }, + { + ChartType::Pie, + QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"), + SupportedVariable::Categorical, + SupportedVariable::Count, + false, + { ChartSubType::Pie }, + ChartFeatureLabels | ChartFeatureLegend + }, + { + ChartType::DiscreteBar, + QT_TRANSLATE_NOOP("StatsTranslations", "Barchart"), + SupportedVariable::Categorical, + SupportedVariable::Categorical, + false, + { ChartSubType::VerticalGrouped, ChartSubType::VerticalStacked, ChartSubType::HorizontalGrouped, ChartSubType::HorizontalStacked }, + ChartFeatureLabels | ChartFeatureLegend + } +}; + +// Some charts are valid, but not preferrable. For example a numeric variable +// is better plotted in a histogram than in a categorical bar chart. To +// describe this use an enum: good, bad, invalid. Default to "good" charts +// first, but ultimately let the user decide. +enum ChartValidity { + Good, + Undesired, + Invalid +}; + +static const int count_idx = -1; // Special index for the count variable + +StatsState::StatsState() : + var1(stats_variables[0]), + var2(nullptr), + type(ChartType::DiscreteBar), + subtype(ChartSubType::Vertical), + labels(true), + legend(true), + median(false), + mean(false), + quartiles(true), + var1Binner(nullptr), + var2Binner(nullptr), + var2Operation(StatsOperation::Invalid), + var1Binned(false), + var2Binned(false), + var2HasOperations(false) +{ + validate(true); +} + +static StatsState::VariableList createVariableList(const StatsVariable *selected, bool addCount, const StatsVariable *omit) +{ + StatsState::VariableList res; + res.variables.reserve(stats_variables.size() + addCount); + res.selected = -1; + if (addCount) { + if (selected == nullptr) + res.selected = (int)res.variables.size(); + res.variables.push_back({ StatsTranslations::tr("Count"), count_idx }); + } + for (int i = 0; i < (int)stats_variables.size(); ++i) { + const StatsVariable *variable = stats_variables[i]; + if (variable == omit) + continue; + if (variable == selected) + res.selected = (int)res.variables.size(); + res.variables.push_back({ variable->name(), i }); + } + return res; +} + +// This is a bit lame: we pass Chart/SubChart as an integer to the UI, +// by placing one in the lower and one in the upper 16 bit of a 32 bit integer. +static int toInt(ChartType type, ChartSubType subtype) +{ + return ((int)type << 16) | (int)subtype; +} + +static std::pair<ChartType, ChartSubType> fromInt(int id) +{ + return { (ChartType)(id >> 16), (ChartSubType)(id & 0xff) }; +} + +static ChartValidity variableValidity(StatsVariable::Type type, SupportedVariable var) +{ + switch (var) { + default: + case SupportedVariable::Count: + return ChartValidity::Invalid; // Count has been special cased outside of this function + case SupportedVariable::Categorical: + return type == StatsVariable::Type::Continuous || type == StatsVariable::Type::Numeric ? + ChartValidity::Undesired : ChartValidity::Good; + case SupportedVariable::Continuous: + return type == StatsVariable::Type::Discrete ? ChartValidity::Invalid : ChartValidity::Good; + case SupportedVariable::Numeric: + return type != StatsVariable::Type::Numeric ? ChartValidity::Invalid : ChartValidity::Good; + } +} + +static ChartValidity chartValidity(const ChartTypeDesc &desc, const StatsVariable *var1, const StatsVariable *var2) +{ + if (!var1) + return ChartValidity::Invalid; // Huh? We don't support count as independent variable + + // Check the first variable + ChartValidity valid1 = variableValidity(var1->type(), desc.var1); + if (valid1 == ChartValidity::Invalid) + return ChartValidity::Invalid; + + // Then, check the second variable + if (var2 == nullptr) // Our special marker for "count" + return desc.var2 == SupportedVariable::Count ? valid1 : ChartValidity::Invalid; + + ChartValidity valid2 = variableValidity(var2->type(), desc.var2); + if (valid2 == ChartValidity::Invalid) + return ChartValidity::Invalid; + + return valid1 == ChartValidity::Undesired || valid2 == ChartValidity::Undesired ? + ChartValidity::Undesired : ChartValidity::Good; +} + +// Returns a list of (chart-type, warning) pairs +const std::vector<std::pair<const ChartTypeDesc &, bool>> validCharts(const StatsVariable *var1, const StatsVariable *var2) +{ + std::vector<std::pair<const ChartTypeDesc &, bool>> res; + res.reserve(std::size(chart_types)); + for (const ChartTypeDesc &desc: chart_types) { + ChartValidity valid = chartValidity(desc, var1, var2); + if (valid == ChartValidity::Invalid) + continue; + res.emplace_back(desc, valid == ChartValidity::Undesired); + } + + return res; +} + +static StatsState::ChartList createChartList(const StatsVariable *var1, const StatsVariable *var2, ChartType selectedType, ChartSubType selectedSubType) +{ + StatsState::ChartList res; + res.selected = -1; + for (auto [desc, warn]: validCharts(var1, var2)) { + QString name = StatsTranslations::tr(desc.name); + for (ChartSubType subtype: desc.subtypes) { + int id = toInt(desc.id, subtype); + if (selectedType == desc.id && selectedSubType == subtype) + res.selected = id; + QString subtypeName = StatsTranslations::tr(chart_subtype_names[(int)subtype]); + res.charts.push_back({ name, subtypeName, subtype, toInt(desc.id, subtype), warn }); + } + } + + // If none of the charts are recommended - remove the warning flag. + // This can happen if if first variable is numerical, but the second is categorical. + if (std::all_of(res.charts.begin(), res.charts.end(), [] (const StatsState::Chart &c) { return c.warning; })) { + for (StatsState::Chart &c: res.charts) + c.warning = false; + } + + return res; +} + +static StatsState::BinnerList createBinnerList(bool binned, const StatsVariable *var, const StatsBinner *binner) +{ + StatsState::BinnerList res; + res.selected = -1; + if (!binned || !var) + return res; + std::vector<const StatsBinner *> binners = var->binners(); + if (binners.size() <= 1) + return res; // Don't show combo boxes for single binners + res.binners.reserve(binners.size()); + for (const StatsBinner *bin: binners) { + if (bin == binner) + res.selected = (int)res.binners.size(); + res.binners.push_back(bin->name()); + } + return res; +} + +static StatsState::VariableList createOperationsList(bool hasOperations, const StatsVariable *var, StatsOperation operation) +{ + StatsState::VariableList res; + res.selected = -1; + if (!hasOperations || !var) + return res; + std::vector<StatsOperation> operations = var->supportedOperations(); + res.variables.reserve(operations.size()); + for (StatsOperation op: operations) { + if (op == operation) + res.selected = (int)res.variables.size(); + res.variables.push_back({ StatsVariable::operationName(op), (int)op }); + } + return res; +} + +static std::vector<StatsState::Feature> createFeaturesList(int chartFeatures, bool labels, bool legend, bool median, bool mean, bool quartiles) +{ + std::vector<StatsState::Feature> res; + if (chartFeatures & ChartFeatureLabels) + res.push_back({ StatsTranslations::tr("labels"), ChartFeatureLabels, labels }); + if (chartFeatures & ChartFeatureLegend) + res.push_back({ StatsTranslations::tr("legend"), ChartFeatureLegend, legend }); + if (chartFeatures & ChartFeatureMedian) + res.push_back({ StatsTranslations::tr("median"), ChartFeatureMedian, median }); + if (chartFeatures & ChartFeatureMean) + res.push_back({ StatsTranslations::tr("mean"), ChartFeatureMean, mean }); + if (chartFeatures & ChartFeatureQuartiles) + res.push_back({ StatsTranslations::tr("quartiles"), ChartFeatureQuartiles, quartiles }); + return res; +} + +StatsState::UIState StatsState::getUIState() const +{ + UIState res; + res.var1 = createVariableList(var1, false, nullptr); + res.var2 = createVariableList(var2, true, var1); + res.var1Name = var1 ? var1->name() : QString(); + res.var2Name = var2 ? var2->name() : QString(); + res.charts = createChartList(var1, var2, type, subtype); + res.binners1 = createBinnerList(var1Binned, var1, var1Binner); + res.binners2 = createBinnerList(var2Binned, var2, var2Binner); + res.operations2 = createOperationsList(var2HasOperations, var2, var2Operation); + res.features = createFeaturesList(chartFeatures, labels, legend, median, mean, quartiles); + return res; +} + +static const StatsBinner *idxToBinner(const StatsVariable *v, int idx) +{ + if (!v) + return nullptr; + auto binners = v->binners(); + return idx >= 0 && idx < (int)binners.size() ? binners[idx] : 0; +} + +void StatsState::var1Changed(int id) +{ + var1 = stats_variables[std::clamp(id, 0, (int)stats_variables.size())]; + validate(true); +} + +void StatsState::binner1Changed(int idx) +{ + var1Binner = idxToBinner(var1, idx); + validate(false); +} + +void StatsState::var2Changed(int id) +{ + // The "count" variable is represented by a nullptr + var2 = id == count_idx ? nullptr + : stats_variables[std::clamp(id, 0, (int)stats_variables.size())]; + validate(true); +} + +void StatsState::binner2Changed(int idx) +{ + var2Binner = idxToBinner(var2, idx); + validate(false); +} + +void StatsState::var2OperationChanged(int id) +{ + var2Operation = (StatsOperation)id; + validate(false); +} + +void StatsState::chartChanged(int id) +{ + std::tie(type, subtype) = fromInt(id); // use std::tie to assign two values at once + validate(false); +} + +void StatsState::featureChanged(int id, bool state) +{ + if (id == ChartFeatureLabels) + labels = state; + else if (id == ChartFeatureLegend) + legend = state; + else if (id == ChartFeatureMedian) + median = state; + else if (id == ChartFeatureMean) + mean = state; + else if (id == ChartFeatureQuartiles) + quartiles = state; +} + +// Creates the new chart-type from the current chart-type and a list of possible chart types. +// If the flag "varChanged" is true, the current chart-type will be changed if the +// current chart-type is undesired. +const ChartTypeDesc &newChartType(ChartType type, std::vector<std::pair<const ChartTypeDesc &, bool>> charts, + bool varChanged) +{ + for (auto [desc, warn]: charts) { + // Found it, but if the axis was changed, we change anyway if the chart is "undesired" + if (type == desc.id) { + if (!varChanged || !warn) + return desc; + break; + } + } + + // Find the first non-undesired chart + for (auto [desc, warn]: charts) { + if (!warn) + return desc; + } + + return charts.empty() ? chart_types[0] : charts[0].first; +} + +static void validateBinner(const StatsBinner *&binner, const StatsVariable *var, bool isBinned) +{ + if (!var || !isBinned) { + binner = nullptr; + return; + } + auto binners = var->binners(); + if (std::find(binners.begin(), binners.end(), binner) != binners.end()) + return; + + // For now choose the first binner. However, we might try to be smarter here + // and adapt to the given screen size and the estimated number of bins. + binner = binners.empty() ? nullptr : binners[0]; +} + +static void validateOperation(StatsOperation &operation, const StatsVariable *var, bool hasOperation) +{ + if (!hasOperation) { + operation = StatsOperation::Invalid; + return; + } + std::vector<StatsOperation> ops = var->supportedOperations(); + if (std::find(ops.begin(), ops.end(), operation) != ops.end()) + return; + + operation = ops.empty() ? StatsOperation::Invalid : ops[0]; +} + +// The var changed variable indicates whether this function is called +// after a variable change or a change of the chart type. In the +// former case, the chart type is switched, if it is not recommended. +// In the latter case, the user explicitly chose a non-recommended type, +// so let's use that. +void StatsState::validate(bool varChanged) +{ + // Take care that we don't plot a variable against itself. + // By default plot the count of the first variable. Is that sensible? + if (var1 == var2) + var2 = nullptr; + + // Let's see if the currently selected chart is one of the valid charts + auto charts = validCharts(var1, var2); + const ChartTypeDesc &desc = newChartType(type, charts, varChanged); + type = desc.id; + + // Check if the current subtype is supported by the chart + if (std::find(desc.subtypes.begin(), desc.subtypes.end(), subtype) == desc.subtypes.end()) + subtype = desc.subtypes.empty() ? ChartSubType::Horizontal : desc.subtypes[0]; + + var1Binned = type != ChartType::ScatterPlot; + var2Binned = desc.var2 == SupportedVariable::Categorical || desc.var2 == SupportedVariable::Continuous; + var2HasOperations = desc.var2HasOperations; + + chartFeatures = desc.features; + // Median and mean currently only if first variable is numeric + if (!var1 || var1->type() != StatsVariable::Type::Numeric) + chartFeatures &= ~(ChartFeatureMedian | ChartFeatureMean); + + // Check that the binners and operation are valid + validateBinner(var1Binner, var1, var1Binned); + validateBinner(var2Binner, var2, var2Binned); + validateOperation(var2Operation, var2, var2HasOperations); +} diff --git a/stats/statsstate.h b/stats/statsstate.h new file mode 100644 index 000000000..d4713d414 --- /dev/null +++ b/stats/statsstate.h @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0 +// Describes the current state of the statistics widget +// (selected variables, chart type, etc.) and is the +// interface between UI and plotting code. +#ifndef STATS_STATE_H +#define STATS_STATE_H + +#include <vector> +#include <QString> + +enum class ChartType { + DiscreteBar, + DiscreteValue, + DiscreteCount, + DiscreteBox, + DiscreteScatter, + Pie, + HistogramCount, + HistogramValue, + HistogramBox, + HistogramStacked, + ScatterPlot +}; + +enum class ChartSubType { + Vertical = 0, + VerticalGrouped, + VerticalStacked, + Horizontal, + HorizontalGrouped, + HorizontalStacked, + Dots, + Box, + Pie, + Count +}; + +struct StatsVariable; +struct StatsBinner; +enum class StatsOperation : int; + +struct StatsState { +public: + StatsState(); + int setFirstAxis(); + int setSecondAxis(); + + struct Variable { + QString name; + int id; + }; + struct VariableList { + std::vector<Variable> variables; + int selected; + }; + struct Chart { + QString name; + QString subtypeName; + ChartSubType subtype; + int id; + bool warning; // Not recommended for that combination + }; + struct ChartList { + std::vector<Chart> charts; + int selected; + }; + struct BinnerList { + std::vector<QString> binners; + int selected; + }; + struct Feature { + QString name; + int id; + bool selected; + }; + struct UIState { + VariableList var1; + VariableList var2; + QString var1Name; + QString var2Name; + ChartList charts; + std::vector<Feature> features; + BinnerList binners1; + BinnerList binners2; + // Currently, operations are only supported on the second variable + // This reuses the variable list - not very nice. + VariableList operations2; + }; + UIState getUIState() const; + + // State changers + void var1Changed(int id); + void var2Changed(int id); + void chartChanged(int id); + void binner1Changed(int id); + void binner2Changed(int id); + void var2OperationChanged(int id); + void featureChanged(int id, bool state); + + const StatsVariable *var1; // Independent variable + const StatsVariable *var2; // Dependent variable (nullptr: count) + ChartType type; + ChartSubType subtype; + bool labels; + bool legend; + bool median; + bool mean; + bool quartiles; + const StatsBinner *var1Binner; // nullptr: undefined + const StatsBinner *var2Binner; // nullptr: undefined + StatsOperation var2Operation; +private: + void validate(bool varChanged); + bool var1Binned; + bool var2Binned; + bool var2HasOperations; + int chartFeatures; +}; + +#endif |