27 #ifndef SCIMATH_CLASSICALSTATS_H 28 #define SCIMATH_CLASSICALSTATS_H 30 #include <casacore/casa/aips.h> 32 #include <casacore/scimath/Mathematics/StatisticsAlgorithm.h> 34 #include <casacore/scimath/Mathematics/StatisticsTypes.h> 35 #include <casacore/scimath/Mathematics/StatisticsUtilities.h> 59 template <
class AccumType,
class DataIterator,
class MaskIterator=const Bool*,
class WeightsIterator=DataIterator>
139 std::map<Double, AccumType>& quantiles,
const std::set<Double>& fractions,
142 uInt binningThreshholdSizeBytes=4096*4096,
Bool persistSortedArray=
False,
150 uInt binningThreshholdSizeBytes=4096*4096,
Bool persistSortedArray=
False,
159 uInt binningThreshholdSizeBytes=4096*4096,
Bool persistSortedArray=
False,
169 virtual void getMinMax(AccumType& mymin, AccumType& mymax);
186 virtual void reset();
208 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
213 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
219 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
220 const MaskIterator& maskBegin,
uInt maskStride
225 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
226 const MaskIterator& maskBegin,
uInt maskStride,
const DataRanges& ranges,
232 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
238 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
244 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
245 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
251 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
252 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride
275 void _doMinMax(AccumType& vmin, AccumType& vmax);
283 vector<vector<uInt64> >& binCounts,
285 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
287 const vector<AccumType>& maxLimit
291 vector<vector<uInt64> >& binCounts,
293 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
299 vector<vector<uInt64> >& binCounts,
301 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
302 const MaskIterator& maskBegin,
uInt maskStride,
307 vector<vector<uInt64> >& binCounts,
309 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
310 const MaskIterator& maskBegin,
uInt maskStride,
const DataRanges& ranges,
316 vector<vector<uInt64> >& binCounts,
318 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
324 vector<vector<uInt64> >& binCounts,
326 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
332 vector<vector<uInt64> >& binCounts,
334 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
335 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
341 vector<vector<uInt64> >& binCounts,
343 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
344 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
368 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
373 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
379 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
380 const MaskIterator& maskBegin,
uInt maskStride
385 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
386 const MaskIterator& maskBegin,
uInt maskStride,
const DataRanges& ranges,
392 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
398 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
404 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
405 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
411 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
412 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride
420 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
425 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
430 vector<AccumType>& ary,
const DataIterator& dataBegin,
431 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
437 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
438 uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
444 vector<AccumType>& ary,
const DataIterator& dataBegin,
445 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride
450 vector<AccumType>& ary,
const DataIterator& dataBegin,
451 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
457 vector<AccumType>& ary,
const DataIterator& dataBegin,
458 const WeightsIterator& weightBegin,
Int64 nr,
uInt dataStride,
459 const MaskIterator& maskBegin,
uInt maskStride
464 vector<AccumType>& ary,
const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
465 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
477 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
478 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
483 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
Int64 nr,
485 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
489 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
490 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
492 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
497 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
Int64 nr,
498 uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
500 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
505 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
506 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
507 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
512 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
513 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
515 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
520 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
521 const WeightsIterator& weightBegin,
Int64 nr,
uInt dataStride,
522 const MaskIterator& maskBegin,
uInt maskStride,
523 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
528 vector<vector<AccumType> >& arys,
uInt64& currentCount,
const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
529 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
531 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt64 maxCount
538 vector<AccumType>& ary,
const DataIterator& dataBegin,
544 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
551 vector<AccumType>& ary,
const DataIterator& dataBegin,
552 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
558 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
559 uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
565 vector<AccumType>& ary,
const DataIterator& dataBegin,
566 const WeightsIterator& weightBegin,
Int64 nr,
uInt dataStride,
572 vector<AccumType>& ary,
const DataIterator& dataBegin,
573 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
579 vector<AccumType>& ary,
const DataIterator& dataBegin,
580 const WeightsIterator& weightBegin,
Int64 nr,
581 uInt dataStride,
const MaskIterator& maskBegin,
587 vector<AccumType>& ary,
const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
588 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
598 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
604 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
610 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
611 const MaskIterator& maskBegin,
uInt maskStride
616 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
617 const MaskIterator& maskBegin,
uInt maskStride,
630 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
636 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
642 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
643 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride
648 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
649 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
661 mutable typename vector<DataIterator>::const_iterator
_dend,
_diter;
662 mutable vector<Int64>::const_iterator
_citer;
664 mutable std::map<uInt, MaskIterator>
_masks;
693 vector<Bool>& allSame, DataIterator dataIter, MaskIterator maskIter,
694 WeightsIterator weightsIter,
uInt64 count,
696 const vector<AccumType>& maxLimit
700 vector<AccumType>& ary, DataIterator dataIter,
701 MaskIterator maskIter, WeightsIterator weightsIter,
706 vector<vector<AccumType> >& arys,
uInt64& currentCount,
707 DataIterator dataIter, MaskIterator maskIter,
708 WeightsIterator weightsIter,
uInt64 dataCount,
709 const vector<std::pair<AccumType, AccumType> >& includeLimits,
715 DataIterator dataIter, MaskIterator maskIter,
716 WeightsIterator weightsIter,
uInt64 dataCount
721 DataIterator dataIter, MaskIterator maskIter,
722 WeightsIterator weightsIter,
uInt64 count
732 vector<AccumType>&
array 736 vector<vector<AccumType> >& arrays,
737 const vector<std::pair<AccumType, AccumType> > &includeLimits,
749 const vector<std::set<uInt64> >& dataIndices,
uInt64 nBins
753 const vector<uInt64>& binNpts,
uInt64 maxArraySize,
754 const vector<std::pair<AccumType, AccumType> >& binLimits,
755 const vector<std::set<uInt64> >& dataIndices,
uInt64 nBins
765 DataIterator& dataIter, MaskIterator& maskIter,
766 WeightsIterator& weightsIter,
uInt64& offset,
uInt nthreads
773 const std::set<uInt64>& dataIndices,
Bool persistSortedArray,
803 vector<Bool>& allSame,
const PtrHolder<vector<vector<uInt64> > >& tBins,
825 const std::set<uInt64>& indices,
uInt64 maxArraySize,
826 Bool persistSortedArray
832 #ifndef CASACORE_NO_AUTO_TEMPLATES 833 #include <casacore/scimath/Mathematics/ClassicalStatistics.tcc> 834 #endif //# CASACORE_NO_AUTO_TEMPLATES void _doMinMax(AccumType &vmin, AccumType &vmax)
scan dataset(s) to find min and max
static const uInt BLOCK_SIZE
vector< std::map< uInt64, AccumType > > _dataFromMultipleBins(const vector< typename StatisticsUtilities< AccumType >::BinDesc > &binDesc, uInt64 maxArraySize, const vector< std::set< uInt64 > > &dataIndices, uInt64 nBins)
extract data from multiple histograms given by binDesc.
Bool _valuesFromSortedArray(std::map< uInt64, AccumType > &values, CountedPtr< uInt64 > knownNpts, const std::set< uInt64 > &indices, uInt64 maxArraySize, Bool persistSortedArray)
get values from sorted array if the array is small enough to be held in memory.
vector< DataIterator >::const_iterator _dend
mutables, used to mitigate repeated code
long long Int64
Define the extra non-standard types used by Casacore (like proposed uSize, Size)
void _computeMinMax(CountedPtr< AccumType > &mymax, CountedPtr< AccumType > &mymin, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount)
LatticeExprNode median(const LatticeExprNode &expr)
AccumType _getStatistic(StatisticsData::STATS stat)
ClassicalStatistics< AccumType, DataIterator, MaskIterator, WeightsIterator > & operator=(const ClassicalStatistics< AccumType, DataIterator, MaskIterator, WeightsIterator > &other)
copy semantics
vector< DataIterator >::const_iterator _diter
virtual void _minMax(CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, Int64 nr, uInt dataStride) const
StatsData< AccumType > _getStatistics()
virtual StatsData< AccumType > & _getStatsData()
retreive stats structure.
TableExprNode array(const TableExprNode &values, const TableExprNodeSet &shape)
Create an array of the given shape and fill it with the values.
void _createDataArray(vector< AccumType > &array)
Create an unsorted array of the complete data set.
unsigned long long uInt64
std::set< uInt64 > _medianIndices(CountedPtr< uInt64 > knownNpts)
get the index (for odd npts) or indices (for even npts) of the median of the sorted array...
PtrHolder(const PtrHolder< T > &other)
virtual std::pair< Int64, Int64 > getStatisticIndex(StatisticsData::STATS stat)
see base class description
std::map< uInt64, AccumType > _indicesToValues(CountedPtr< uInt64 > knownNpts, CountedPtr< AccumType > knownMin, CountedPtr< AccumType > knownMax, uInt64 maxArraySize, const std::set< uInt64 > &dataIndices, Bool persistSortedArray, uInt64 nBins)
get the values for the specified indices in the sorted array of all good data
Bool _getDoMaxMin() const
std::pair< Int64, Int64 > LocationType
std::map< uInt, DataRanges > _ranges
void setStatsToCalculate(std::set< StatisticsData::STATS > &stats)
Provide guidance to algorithms by specifying a priori which statistics the caller would like calculat...
Abstract base class which defines interface for providing "datasets" to the statistics framework when...
Class to calculate statistics in a "classical" sense, ie using accumulators with no special filtering...
void _computeBins(vector< vector< uInt64 > > &bins, vector< CountedPtr< AccumType > > &sameVal, vector< Bool > &allSame, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 count, const vector< typename StatisticsUtilities< AccumType >::BinDesc > &binDesc, const vector< AccumType > &maxLimit)
virtual uInt64 getNPts()
scan the dataset(s) that have been added, and find the number of good points.
Hold and delete pointers not deleted by object destructors.
virtual void getMinMax(AccumType &mymin, AccumType &mymax)
scan the dataset(s) that have been added, and find the min and max.
Int64 _getIDataset() const
vector< vector< uInt64 > > _binCounts(vector< CountedPtr< AccumType > > &sameVal, const vector< typename StatisticsUtilities< AccumType >::BinDesc > &binDesc)
tally the number of data points that fall into each bin provided by binDesc Any points that are less ...
std::map< uInt, WeightsIterator > _weights
void _computeDataArray(vector< AccumType > &ary, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount)
WeightsIterator _myWeights
ALGORITHM
implemented algorithms
virtual const StatsData< AccumType > & _getStatsData() const
virtual void _populateArrays(vector< vector< AccumType > > &arys, uInt64 ¤tCount, const DataIterator &dataBegin, Int64 nr, uInt dataStride, const vector< std::pair< AccumType, AccumType > > &includeLimits, uInt64 maxCount) const
Create a vector of unsorted arrays, one array for each bin defined by includeLimits.
void _addData()
Allows derived classes to do things after data is set or added.
Referenced counted pointer for constant data.
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, Int64 nr, uInt dataStride) const
scan through the data set to determine the number of good (unmasked, weight > 0, within range) points...
virtual AccumType getMedianAbsDevMed(CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
get the median of the absolute deviation about the median of the data.
std::map< uInt, Bool > _isIncludeRanges
Bool hasData() const
Has any data been added to this object? Will return False if the object has been reset and no data ha...
void _accumulate(StatsData< AccumType > &stats, const AccumType &datum, const LocationType &location)
virtual void _findBins(vector< vector< uInt64 > > &binCounts, vector< CountedPtr< AccumType > > &sameVal, vector< Bool > &allSame, const DataIterator &dataBegin, Int64 nr, uInt dataStride, const vector< typename StatisticsUtilities< AccumType >::BinDesc > &binDesc, const vector< AccumType > &maxLimit) const
Get the counts of data within the specified histogram bins.
static void _convertToAbsDevMedArray(vector< AccumType > &myArray, AccumType median)
convert in place by taking the absolute value of the difference of the vector and the median ...
void _computeDataArrays(vector< vector< AccumType > > &arys, uInt64 ¤tCount, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount, const vector< std::pair< AccumType, AccumType > > &includeLimits, uInt64 maxCount)
void _initThreadVars(uInt &nBlocks, uInt64 &extra, uInt &nthreads, PtrHolder< DataIterator > &dataIter, PtrHolder< MaskIterator > &maskIter, PtrHolder< WeightsIterator > &weightsIter, PtrHolder< uInt64 > &offset, uInt nThreadsMax) const
#define DataRanges
Commonly used types in statistics framework.
bool Bool
Define the standard types used by Casacore.
virtual Bool _populateTestArray(vector< AccumType > &ary, const DataIterator &dataBegin, Int64 nr, uInt dataStride, uInt maxElements) const
no weights, no mask, no ranges
virtual void setCalculateAsAdded(Bool c)
Should statistics be updated with calls to addData or should they only be calculated upon calls to ge...
void setDataProvider(StatsDataProvider< AccumType, DataIterator, MaskIterator, WeightsIterator > *dataProvider)
An exception will be thrown if setCalculateAsAdded(True) has been called.
virtual AccumType getMedianAndQuantiles(std::map< Double, AccumType > &quantiles, const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
If one needs to compute both the median and quantile values, it is better to call getMedianAndQuantil...
virtual std::map< Double, AccumType > getQuantiles(const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
Get the specified quantiles.
uInt _nThreadsMax() const
std::map< uInt, MaskIterator > _masks
void _createDataArrays(vector< vector< AccumType > > &arrays, const vector< std::pair< AccumType, AccumType > > &includeLimits, uInt64 maxCount)
static void _makeBins(typename StatisticsUtilities< AccumType >::BinDesc &bins, AccumType minData, AccumType maxData, uInt maxBins, Bool allowPad)
If allowPad is True, then pad the lower side of the lowest bin and the higher side of the highest bin...
virtual ~ClassicalStatistics()
vector< uInt >::const_iterator _dsiter
virtual void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, Int64 nr, uInt dataStride)
no weights, no mask, no ranges
static const uInt CACHE_PADDING
StatsData< AccumType > _statsData
vector< std::map< uInt64, AccumType > > _dataFromSingleBins(const vector< uInt64 > &binNpts, uInt64 maxArraySize, const vector< std::pair< AccumType, AccumType > > &binLimits, const vector< std::set< uInt64 > > &dataIndices, uInt64 nBins)
Bool _increment(Bool includeIDataset)
increment the relevant loop counters
virtual void reset()
reset object to initial state.
virtual StatisticsData::ALGORITHM algorithm() const
get the algorithm that this object uses for computing stats
vector< Int64 >::const_iterator _citer
const Double c
Fundamental physical constants (SI units):
virtual AccumType getMedian(CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
In the following group of methods, if the size of the composite dataset is smaller than binningThresh...
static void _mergeResults(vector< vector< uInt64 > > &bins, vector< CountedPtr< AccumType > > &sameVal, vector< Bool > &allSame, const PtrHolder< vector< vector< uInt64 > > > &tBins, const PtrHolder< vector< CountedPtr< AccumType > > > &tSameVal, const PtrHolder< vector< Bool > > &tAllSame, uInt nThreadsMax)
virtual void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, Int64 nr, uInt dataStride)
has weights, but no mask, no ranges
void _incrementThreadIters(DataIterator &dataIter, MaskIterator &maskIter, WeightsIterator &weightsIter, uInt64 &offset, uInt nthreads) const
increment thread-based iterators
Bool _isNptsSmallerThan(vector< AccumType > &arrayToSort, uInt maxArraySize)
Determine by scanning the dataset if the number of good points is smaller than maxArraySize.
virtual void _updateDataProviderMaxMin(const StatsData< AccumType > &threadStats)
virtual void _populateArray(vector< AccumType > &ary, const DataIterator &dataBegin, Int64 nr, uInt dataStride) const
populate an unsorted array with valid data.
virtual StatsData< AccumType > _getInitialStats() const
void _computeStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 count)
Base class of statistics algorithm class hierarchy.
this file contains all the compiler specific defines
description of a regularly spaced bins with the first bin having lower limit of minLimit and having n...