@inproceedings { , title = {Bundle entropy as an optimized measure of consumers' systematic product choice combinations in mass transactional data}, abstract = {Understanding and measuring the predictability of consumer purchasing (basket) behaviour is of significant value. While predictability measures such as entropy have been well studied and leveraged in other sectors, their development and application to very large multi-dimensional data sets present in the retailing sector are less common. While a small number of methods exist, we demonstrate they fail to accord with intuition, leading to the potential for misunderstandings between those who conduct the analysis and those who act on the insights. We delineate the requirements for such a measure in this domain to demonstrate these issues in context. A novel measure is then developed based on entropy to directly measure the predictability of basket composition. The measure is designated as bundle entropy (zero denotes a bundle's total predictability, one the total unpredictability). We empirically compare the proposed bundle entropy against existing measures using two large-scale real-world transactional data sets, each including more than 2,000 households (frequent shoppers) over two years. First, we demonstrate how the proposed measure is the only measure that behaves according to the desired properties. Second, we show empirically that bundle entropy differs noticeably from the other measures. Finally, we consider some use case analyses and discuss the utility of the proposed measure in practice.}, conference = {2022 IEEE International Conference on Big Data (Big Data)}, doi = {10.1109/BigData55660.2022.10021062}, isbn = {9781665480468}, pages = {1044-1053}, publicationstatus = {Published}, publisher = {IEEE}, url = {https://nottingham-repository.worktribe.com/output/18230088}, year = {2022}, author = {Mansilla, Roberto and Smith, Gavin and Smith, Andrew and Goulding, James} }