summaryrefslogblamecommitdiffstats
path: root/stats/statsstate.cpp
blob: 130a0e5a8bc732179a66e02b4de886d9feab0be5 (plain) (tree)

















                                                                     
             














                                                                      
                                                       






                                                                       
                                    





                                                                    
                                        






                                                                          
                                  






                                                                     
                                   






                                                                    
                                  






                                                                                   
                                   






                                                                      
                                  





                                                                      
                                        






                                                                      
                                   





                                                                      
                                        






                                                                   
                                  













                                                                                                                                                   
                                                                           











                                        
                                              


                       
                                                                                                                          
                                     
                                                                
                          
                      
                                                                 
                                                                                     






















                                                                               
                                                                                                                              
 
                                              
                      
                                                                                                       








                                                                                                            


                                                                                         
                  
                                                                                               
                                   
                                                                                                   


                                              
                                                                                              
 
                                                                                                   

                                              


                                                                             



                                                                                          

                                                                                                                             


                                                                
                                                                                                   






                                                                          


                                                                                                    

                                  
                                                                                       


















                                                                                                                       








                                                                                                                                     

                                   
                                    
                                                                  









                                                                                






                                                               
                                                                                                                                       

                                     
                                                                   
                                                                            







                                                                                                 





























                                                                                                                                               



                                                                                                       


                                                                                                  
                                                                                   

                               






                                                                                  










                                                                               









                                                                                                         



                                    

                                                                                                



                                        




                                                                     




                                             



                                                                     













































                                                                                                             







                                                                                             
 
                   

                                 



                                                    


                                                                               


                                                                                 
 
                                                                                                                 
 

                                                                       

                                                    
 


                                                                      
                                            







                                                                        


                                          
                                                                  

                               
                                                
                                                                       
                                                                                   





                                                                                                          

                                                           
                                                                               






                                                                                                





                                                                                                  
                                      
                                                                          
                                                                          
 
// SPDX-License-Identifier: GPL-2.0
#include "statsstate.h"
#include "statstranslations.h"
#include "statsvariables.h"

// Attn: The order must correspond to the enum above
static const char *chart_subtype_names[] = {
	QT_TRANSLATE_NOOP("StatsTranslations", "vertical"),
	QT_TRANSLATE_NOOP("StatsTranslations", "grouped vertical"),
	QT_TRANSLATE_NOOP("StatsTranslations", "stacked vertical"),
	QT_TRANSLATE_NOOP("StatsTranslations", "horizontal"),
	QT_TRANSLATE_NOOP("StatsTranslations", "grouped horizontal"),
	QT_TRANSLATE_NOOP("StatsTranslations", "stacked horizontal"),
	QT_TRANSLATE_NOOP("StatsTranslations", "data points"),
	QT_TRANSLATE_NOOP("StatsTranslations", "box-whisker"),
	QT_TRANSLATE_NOOP("StatsTranslations", "piechart"),
};

enum class SupportedVariable {
	None,
	Categorical,		// Implies that the variable is binned
	Continuous,		// Implies that the variable is binned
	Numeric
};

static const int ChartFeatureLabels =	 1 << 0;
static const int ChartFeatureLegend =	 1 << 1;
static const int ChartFeatureMedian =	 1 << 2;
static const int ChartFeatureMean =	 1 << 3;
static const int ChartFeatureQuartiles = 1 << 4;

static const struct ChartTypeDesc {
	ChartType id;
	const char *name;
	SupportedVariable var1;
	SupportedVariable var2;
	bool var1Binned, var2Binned, var2HasOperations;
	const std::vector<ChartSubType> subtypes;
	int features;
} chart_types[] = {
	{
		ChartType::ScatterPlot,
		QT_TRANSLATE_NOOP("StatsTranslations", "Scattergraph"),
		SupportedVariable::Continuous,
		SupportedVariable::Numeric,
		false, false, false,
		{ ChartSubType::Dots },
		0
	},
	{
		ChartType::HistogramCount,
		QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
		SupportedVariable::Continuous,
		SupportedVariable::None,
		true, false, false,
		{ ChartSubType::Vertical, ChartSubType::Horizontal },
		ChartFeatureLabels | ChartFeatureMedian | ChartFeatureMean
	},
	{
		ChartType::HistogramValue,
		QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
		SupportedVariable::Continuous,
		SupportedVariable::Numeric,
		true, false, true,
		{ ChartSubType::Vertical, ChartSubType::Horizontal },
		ChartFeatureLabels
	},
	{
		ChartType::HistogramBox,
		QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
		SupportedVariable::Continuous,
		SupportedVariable::Numeric,
		true, false, false,
		{ ChartSubType::Box },
		0
	},
	{
		ChartType::HistogramStacked,
		QT_TRANSLATE_NOOP("StatsTranslations", "Histogram"),
		SupportedVariable::Continuous,
		SupportedVariable::Categorical,
		true, true, false,
		{ ChartSubType::VerticalStacked, ChartSubType::HorizontalStacked },
		ChartFeatureLabels | ChartFeatureLegend
	},
	{
		ChartType::DiscreteScatter,
		QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
		SupportedVariable::Categorical,
		SupportedVariable::Numeric,
		true, false, false,
		{ ChartSubType::Dots },
		ChartFeatureQuartiles
	},
	{
		ChartType::DiscreteValue,
		QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
		SupportedVariable::Categorical,
		SupportedVariable::Numeric,
		true, false, true,
		{ ChartSubType::Vertical, ChartSubType::Horizontal },
		ChartFeatureLabels
	},
	{
		ChartType::DiscreteCount,
		QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
		SupportedVariable::Categorical,
		SupportedVariable::None,
		true, false, false,
		{ ChartSubType::Vertical, ChartSubType::Horizontal },
		ChartFeatureLabels
	},
	{
		ChartType::DiscreteBox,
		QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
		SupportedVariable::Categorical,
		SupportedVariable::Numeric,
		true, false, false,
		{ ChartSubType::Box },
		0
	},
	{
		ChartType::Pie,
		QT_TRANSLATE_NOOP("StatsTranslations", "Categorical"),
		SupportedVariable::Categorical,
		SupportedVariable::None,
		true, false, false,
		{ ChartSubType::Pie },
		ChartFeatureLabels | ChartFeatureLegend
	},
	{
		ChartType::DiscreteBar,
		QT_TRANSLATE_NOOP("StatsTranslations", "Barchart"),
		SupportedVariable::Categorical,
		SupportedVariable::Categorical,
		true, true, false,
		{ ChartSubType::VerticalGrouped, ChartSubType::VerticalStacked, ChartSubType::HorizontalGrouped, ChartSubType::HorizontalStacked },
		ChartFeatureLabels | ChartFeatureLegend
	}
};

// Some charts are valid, but not preferrable. For example a numeric variable
// is better plotted in a histogram than in a categorical bar chart. To
// describe this use an enum: good, bad, invalid. Default to "good" charts
// first, but ultimately let the user decide.
enum ChartValidity {
	Good,
	Undesired,
	Invalid
};

static const int none_idx = -1; // Special index meaning no second variable

StatsState::StatsState() :
	var1(stats_variables[0]),
	var2(nullptr),
	type(ChartType::DiscreteBar),
	subtype(ChartSubType::Vertical),
	labels(true),
	legend(true),
	median(false),
	mean(false),
	quartiles(true),
	var1Binner(nullptr),
	var2Binner(nullptr),
	var2Operation(StatsOperation::Invalid)
{
	validate(true);
}

static StatsState::VariableList createVariableList(const StatsVariable *selected, bool addNone, const StatsVariable *omit)
{
	StatsState::VariableList res;
	res.variables.reserve(stats_variables.size() + addNone);
	res.selected = -1;
	if (addNone) {
		if (selected == nullptr)
			res.selected = (int)res.variables.size();
		res.variables.push_back({ StatsTranslations::tr("none"), none_idx });
	}
	for (int i = 0; i < (int)stats_variables.size(); ++i) {
		const StatsVariable *variable = stats_variables[i];
		if (variable == omit)
			continue;
		if (variable == selected)
			res.selected = (int)res.variables.size();
		res.variables.push_back({ variable->name(), i });
	}
	return res;
}

// This is a bit lame: we pass Chart/SubChart as an integer to the UI,
// by placing one in the lower and one in the upper 16 bit of a 32 bit integer.
static int toInt(ChartType type, ChartSubType subtype)
{
	return ((int)type << 16) | (int)subtype;
}

static std::pair<ChartType, ChartSubType> fromInt(int id)
{
	return { (ChartType)(id >> 16), (ChartSubType)(id & 0xff) };
}

static ChartValidity variableValidity(StatsVariable::Type type, const StatsBinner *binner, SupportedVariable var, bool binned)
{
	if (!!binner != binned)
		return ChartValidity::Invalid;
	switch (var) {
	default:
	case SupportedVariable::None:
		return ChartValidity::Invalid;	// None has been special cased outside of this function
	case SupportedVariable::Categorical:
		return type == StatsVariable::Type::Continuous || type == StatsVariable::Type::Numeric ?
			ChartValidity::Undesired : ChartValidity::Good;
	case SupportedVariable::Continuous:
		return type == StatsVariable::Type::Discrete ? ChartValidity::Invalid : ChartValidity::Good;
	case SupportedVariable::Numeric:
		return type != StatsVariable::Type::Numeric ? ChartValidity::Invalid : ChartValidity::Good;
	}
}

static ChartValidity chartValidity(const ChartTypeDesc &desc,
				  const StatsVariable *var1, const StatsVariable *var2,
				  const StatsBinner *binner1, const StatsBinner *binner2,
				  StatsOperation operation)
{
	if (!var1)
		return ChartValidity::Invalid; // Huh? We don't support no independent variable

	// Check the first variable
	ChartValidity valid1 = variableValidity(var1->type(), binner1, desc.var1, desc.var1Binned);
	if (valid1 == ChartValidity::Invalid)
		return ChartValidity::Invalid;

	// Then, check the second variable
	if (var2 == nullptr) // Our special marker for "none"
		return desc.var2 == SupportedVariable::None ? valid1 : ChartValidity::Invalid;

	ChartValidity valid2 = variableValidity(var2->type(), binner2, desc.var2, desc.var2Binned);
	if (valid2 == ChartValidity::Invalid)
		return ChartValidity::Invalid;

	// Check whether the chart supports operations.
	if ((operation != StatsOperation::Invalid) != desc.var2HasOperations)
		return ChartValidity::Invalid;

	return valid1 == ChartValidity::Undesired || valid2 == ChartValidity::Undesired ? 
		ChartValidity::Undesired : ChartValidity::Good;
}

// Returns a list of (chart-type, warning) pairs
const std::vector<std::pair<const ChartTypeDesc &, bool>> validCharts(const StatsVariable *var1, const StatsVariable *var2,
								      const StatsBinner *binner1, const StatsBinner *binner2,
								      StatsOperation operation)
{
	std::vector<std::pair<const ChartTypeDesc &, bool>> res;
	res.reserve(std::size(chart_types));
	for (const ChartTypeDesc &desc: chart_types) {
		ChartValidity valid = chartValidity(desc, var1, var2, binner1, binner2, operation);
		if (valid == ChartValidity::Invalid)
			continue;
		res.emplace_back(desc, valid == ChartValidity::Undesired);
	}

	return res;
}

static StatsState::ChartList createChartList(const StatsVariable *var1, const StatsVariable *var2,
					     const StatsBinner *binner1, const StatsBinner *binner2,
					     StatsOperation operation,
					     ChartType selectedType, ChartSubType selectedSubType)
{
	StatsState::ChartList res;
	res.selected = -1;
	for (auto [desc, warn]: validCharts(var1, var2, binner1, binner2, operation)) {
		QString name = StatsTranslations::tr(desc.name);
		for (ChartSubType subtype: desc.subtypes) {
			int id = toInt(desc.id, subtype);
			if (selectedType == desc.id && selectedSubType == subtype)
				res.selected = id;
			QString subtypeName = StatsTranslations::tr(chart_subtype_names[(int)subtype]);
			res.charts.push_back({ name, subtypeName, subtype, toInt(desc.id, subtype), warn });
		}
	}

	// If none of the charts are recommended - remove the warning flag.
	// This can happen if if first variable is numerical, but the second is categorical.
	if (std::all_of(res.charts.begin(), res.charts.end(), [] (const StatsState::Chart &c) { return c.warning; })) {
		for (StatsState::Chart &c: res.charts)
			c.warning = false;
	}

	return res;
}

// For non-discrete types propose a "no-binning" option unless this is
// the second variable and has no operations (i.e. is numeric)
static bool noBinningAllowed(const StatsVariable *var, bool second)
{
	if (var->type() == StatsVariable::Type::Discrete)
		return false;
	return !second || var->type() == StatsVariable::Type::Numeric;
}

static StatsState::BinnerList createBinnerList(const StatsVariable *var, const StatsBinner *binner, bool binningAllowed, bool second)
{
	StatsState::BinnerList res;
	res.selected = -1;
	if (!var || !binningAllowed)
		return res;
	std::vector<const StatsBinner *> binners = var->binners();
	res.binners.reserve(binners.size() + 1);
	if (var->type() == StatsVariable::Type::Discrete) {
		if (binners.size() <= 1)
			return res; // Don't show combo boxes for single binners
	} else if (noBinningAllowed(var, second)) {
		if (!second || var->type() == StatsVariable::Type::Numeric) {
			if (!binner)
				res.selected = (int)res.binners.size();
			res.binners.push_back(StatsTranslations::tr("none"));
		}
	}
	for (const StatsBinner *bin: binners) {
		if (bin == binner)
			res.selected = (int)res.binners.size();
		res.binners.push_back(bin->name());
	}
	return res;
}

static StatsState::VariableList createOperationsList(const StatsVariable *var, StatsOperation operation, const StatsBinner *var1Binner)
{
	StatsState::VariableList res;
	res.selected = -1;
	// Operations only possible if the first variable is binned
	if (!var || !var1Binner)
		return res;
	std::vector<StatsOperation> operations = var->supportedOperations();
	if (operations.empty())
		return res;

	res.variables.reserve(operations.size() + 1);

	// Add a "none" entry
	if (operation == StatsOperation::Invalid)
		res.selected = (int)res.variables.size();
	res.variables.push_back({ StatsTranslations::tr("none"), (int)StatsOperation::Invalid });
	for (StatsOperation op: operations) {
		if (op == operation)
			res.selected = (int)res.variables.size();
		res.variables.push_back({ StatsVariable::operationName(op), (int)op });
	}
	return res;
}

static std::vector<StatsState::Feature> createFeaturesList(int chartFeatures, bool labels, bool legend, bool median, bool mean, bool quartiles)
{
	std::vector<StatsState::Feature> res;
	if (chartFeatures & ChartFeatureLabels)
		res.push_back({ StatsTranslations::tr("labels"), ChartFeatureLabels, labels });
	if (chartFeatures & ChartFeatureLegend)
		res.push_back({ StatsTranslations::tr("legend"), ChartFeatureLegend, legend });
	if (chartFeatures & ChartFeatureMedian)
		res.push_back({ StatsTranslations::tr("median"), ChartFeatureMedian, median });
	if (chartFeatures & ChartFeatureMean)
		res.push_back({ StatsTranslations::tr("mean"), ChartFeatureMean, mean });
	if (chartFeatures & ChartFeatureQuartiles)
		res.push_back({ StatsTranslations::tr("quartiles"), ChartFeatureQuartiles, quartiles });
	return res;
}

StatsState::UIState StatsState::getUIState() const
{
	UIState res;
	res.var1 = createVariableList(var1, false, nullptr);
	res.var2 = createVariableList(var2, true, var1);
	res.var1Name = var1 ? var1->name() : QString();
	res.var2Name = var2 ? var2->name() : QString();
	res.charts = createChartList(var1, var2, var1Binner, var2Binner, var2Operation, type, subtype);
	res.binners1 = createBinnerList(var1, var1Binner, true, false);
	// Second variable can only be binned if first variable is binned.
	res.binners2 = createBinnerList(var2, var2Binner, var1Binner != nullptr, true);
	res.operations2 = createOperationsList(var2, var2Operation, var1Binner);
	res.features = createFeaturesList(chartFeatures, labels, legend, median, mean, quartiles);
	return res;
}

static const StatsBinner *idxToBinner(const StatsVariable *v, int idx, bool second)
{
	if (!v)
		return nullptr;

	// Special case: for non-discrete variables, the first entry means "none".
	if (noBinningAllowed(v, second)) {
		if (idx == 0)
			return nullptr;
		--idx;
	}

	auto binners = v->binners();
	return idx >= 0 && idx < (int)binners.size() ? binners[idx] : 0;
}

void StatsState::var1Changed(int id)
{
	var1 = stats_variables[std::clamp(id, 0, (int)stats_variables.size())];
	validate(true);
}

void StatsState::binner1Changed(int idx)
{
	var1Binner = idxToBinner(var1, idx, false);

	// If the first variable is not binned, the second variable must be of the "numeric" type.
	if(!var1Binner && (!var2 || var2->type() != StatsVariable::Type::Numeric)) {
		// Find first variable that is numeric, but not the same as the first
		auto it = std::find_if(stats_variables.begin(), stats_variables.end(),
				       [v1 = var1] (const StatsVariable *v)
				       { return v != v1 && v->type() == StatsVariable::Type::Numeric; });
		var2 = it != stats_variables.end() ? *it : nullptr;
	}

	validate(false);
}

void StatsState::var2Changed(int id)
{
	// The "none" variable is represented by a nullptr
	var2 = id == none_idx ? nullptr
			      : stats_variables[std::clamp(id, 0, (int)stats_variables.size())];
	validate(true);
}

void StatsState::binner2Changed(int idx)
{
	var2Binner = idxToBinner(var2, idx, true);

	// We do not support operations and binning at the same time.
	if (var2Binner)
		var2Operation = StatsOperation::Invalid;

	validate(false);
}

void StatsState::var2OperationChanged(int id)
{
	var2Operation = (StatsOperation)id;

	// We do not support operations and binning at the same time.
	if (var2Operation != StatsOperation::Invalid)
		var2Binner = nullptr;

	validate(false);
}

void StatsState::chartChanged(int id)
{
	std::tie(type, subtype) = fromInt(id); // use std::tie to assign two values at once
	validate(false);
}

void StatsState::featureChanged(int id, bool state)
{
	if (id == ChartFeatureLabels)
		labels = state;
	else if (id == ChartFeatureLegend)
		legend = state;
	else if (id == ChartFeatureMedian)
		median = state;
	else if (id == ChartFeatureMean)
		mean = state;
	else if (id == ChartFeatureQuartiles)
		quartiles = state;
}

// Creates the new chart-type from the current chart-type and a list of possible chart types.
// If the flag "varChanged" is true, the current chart-type will be changed if the
// current chart-type is undesired.
const ChartTypeDesc &newChartType(ChartType type, std::vector<std::pair<const ChartTypeDesc &, bool>> charts,
			      bool varChanged)
{
	for (auto [desc, warn]: charts) {
		// Found it, but if the axis was changed, we change anyway if the chart is "undesired"
		if (type == desc.id) {
			if (!varChanged || !warn)
				return desc;
			break;
		}
	}

	// Find the first non-undesired chart
	for (auto [desc, warn]: charts) {
		if (!warn)
			return desc;
	}

	return charts.empty() ? chart_types[0] : charts[0].first;
}

static const StatsBinner *getFirstBinner(const StatsVariable *var)
{
	if (!var)
		return nullptr;
	auto binners = var->binners();
	return binners.empty() ? nullptr : binners.front();
}

static void validateBinner(const StatsBinner *&binner, const StatsVariable *var, bool second)
{
	if (!var) {
		binner = nullptr;
		return;
	}

	bool noneOk = noBinningAllowed(var, second);
	if (noneOk & !binner)
		return;

	auto binners = var->binners();
	if (std::find(binners.begin(), binners.end(), binner) != binners.end())
		return;

	// For now choose the first binner or no binner if this is a non-discrete
	// variable. However, we might try to be smarter here and adapt to the
	// given screen size and the estimated number of bins.
	binner = binners.empty() || noneOk ? nullptr : binners[0];
}

static void validateOperation(StatsOperation &operation, const StatsVariable *var, const StatsBinner *var1Binner)
{
	// 1) No variable, no operation.
	// 2) We allow operations only if the first variable is binned.
	if (!var) {
		operation = StatsOperation::Invalid;
		return;
	}

	std::vector<StatsOperation> ops = var->supportedOperations();
	if (std::find(ops.begin(), ops.end(), operation) != ops.end())
		return;

	operation = StatsOperation::Invalid;
}

// The var changed variable indicates whether this function is called
// after a variable change or a change of the chart type. In the
// former case, the chart type is switched, if it is not recommended.
// In the latter case, the user explicitly chose a non-recommended type,
// so let's use that.
void StatsState::validate(bool varChanged)
{
	// We need at least one variable
	if (!var1)
		var1 = stats_variables[0];

	// Take care that we don't plot a variable against itself.
	if (var1 == var2)
		var2 = nullptr;

	validateBinner(var1Binner, var1, false);

	// If there is no second variable or the second variable is not
	// "numeric", the first variable must be binned.
	if ((!var2 || var2->type() != StatsVariable::Type::Numeric) && !var1Binner)
		var1Binner = getFirstBinner(var1);

	// Check that the binners and operation are valid
	if (!var1Binner) {
		var2Binner = nullptr;	// Second variable can only be binned if first variable is binned.
		var2Operation = StatsOperation::Invalid;
	}
	validateBinner(var2Binner, var2, true);
	validateOperation(var2Operation, var2, var1Binner);

	// Let's see if the currently selected chart is one of the valid charts
	auto charts = validCharts(var1, var2, var1Binner, var2Binner, var2Operation);

	if (charts.empty()) {
		// Ooops. No valid chart with these settings. The validation should be improved.
		type = ChartType::Invalid;
		return;
	}

	const ChartTypeDesc &desc = newChartType(type, charts, varChanged);
	type = desc.id;

	// Check if the current subtype is supported by the chart
	if (std::find(desc.subtypes.begin(), desc.subtypes.end(), subtype) == desc.subtypes.end())
		subtype = desc.subtypes.empty() ? ChartSubType::Horizontal : desc.subtypes[0];

	chartFeatures = desc.features;
	// Median and mean currently only if the first variable is numeric
	if (!var1 || var1->type() != StatsVariable::Type::Numeric)
		chartFeatures &= ~(ChartFeatureMedian | ChartFeatureMean);
}