36 #ifndef VIGRA_SAMPLING_HXX 37 #define VIGRA_SAMPLING_HXX 39 #include "array_vector.hxx" 70 double sample_proportion;
71 unsigned int sample_size;
72 bool sample_with_replacement;
73 bool stratified_sampling;
76 : sample_proportion(1.0),
78 sample_with_replacement(
true),
79 stratified_sampling(
false)
88 sample_with_replacement = in;
98 sample_with_replacement = !in;
128 vigra_precondition(proportion >= 0.0,
129 "SamplerOptions::sampleProportion(): argument must not be negative.");
130 sample_proportion = proportion;
146 stratified_sampling = in;
232 template<
class Random = MersenneTwister >
250 typedef std::map<IndexType, IndexArrayType> StrataIndicesType;
251 typedef std::map<IndexType, int> StrataSizesType;
255 static const int oobInvalid = -1;
257 int total_count_, sample_size_;
258 mutable int current_oob_count_;
259 StrataIndicesType strata_indices_;
260 StrataSizesType strata_sample_size_;
261 IndexArrayType current_sample_;
262 mutable IndexArrayType current_oob_sample_;
263 IsUsedArrayType is_used_;
264 Random default_random_;
265 Random
const & random_;
268 void initStrataCount()
272 int strata_sample_size =
static_cast<int>(
std::ceil(
double(sample_size_) / strataCount()));
273 int strata_total_count = strata_sample_size * strataCount();
275 for(StrataIndicesType::iterator i = strata_indices_.begin();
276 i != strata_indices_.end(); ++i)
278 if(strata_total_count > sample_size_)
280 strata_sample_size_[i->first] = strata_sample_size - 1;
281 --strata_total_count;
285 strata_sample_size_[i->first] = strata_sample_size;
299 Random
const * rnd = 0)
300 : total_count_(totalCount),
301 sample_size_(opt.sample_size == 0
302 ? static_cast<int>((
std::
ceil(total_count_ * opt.sample_proportion)))
304 current_oob_count_(oobInvalid),
305 current_sample_(sample_size_),
306 current_oob_sample_(total_count_),
307 is_used_(total_count_),
308 default_random_(RandomSeed),
309 random_(rnd ? *rnd : default_random_),
312 vigra_precondition(opt.sample_with_replacement || sample_size_ <= total_count_,
313 "Sampler(): Cannot draw without replacement when data size is smaller than sample count.");
315 vigra_precondition(!opt.stratified_sampling,
316 "Sampler(): Stratified sampling requested, but no strata given.");
319 strata_indices_[0].resize(total_count_);
320 for(
int i=0; i<total_count_; ++i)
321 strata_indices_[0][i] = i;
337 template <
class Iterator>
339 Random
const * rnd = 0)
340 : total_count_(strataEnd - strataBegin),
341 sample_size_(opt.sample_size == 0
342 ? static_cast<int>((
std::
ceil(total_count_ * opt.sample_proportion)))
344 current_oob_count_(oobInvalid),
345 current_sample_(sample_size_),
346 current_oob_sample_(total_count_),
347 is_used_(total_count_),
348 default_random_(RandomSeed),
349 random_(rnd ? *rnd : default_random_),
352 vigra_precondition(opt.sample_with_replacement || sample_size_ <= total_count_,
353 "Sampler(): Cannot draw without replacement when data size is smaller than sample count.");
356 if(opt.stratified_sampling)
358 for(
int i = 0; strataBegin != strataEnd; ++i, ++strataBegin)
360 strata_indices_[*strataBegin].push_back(i);
365 strata_indices_[0].resize(total_count_);
366 for(
int i=0; i<total_count_; ++i)
367 strata_indices_[0][i] = i;
370 vigra_precondition(sample_size_ >= static_cast<int>(strata_indices_.size()),
371 "Sampler(): Requested sample count must be at least as large as the number of strata.");
382 return current_sample_[k];
416 return strata_indices_.size();
424 return options_.stratified_sampling;
431 return options_.sample_with_replacement;
438 return current_sample_;
446 if(current_oob_count_ == oobInvalid)
448 current_oob_count_ = 0;
449 for(
int i = 0; i<total_count_; ++i)
453 current_oob_sample_[current_oob_count_] = i;
454 ++current_oob_count_;
458 return current_oob_sample_.
subarray(0, current_oob_count_);
460 IsUsedArrayType
const & is_used()
const 467 template<
class Random>
470 current_oob_count_ = oobInvalid;
471 is_used_.
init(
false);
473 if(options_.sample_with_replacement)
477 StrataIndicesType::iterator iter;
478 for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter)
481 int stratum_size = iter->second.size();
482 for(
int i = 0; i < static_cast<int>(strata_sample_size_[iter->first]); ++i, ++j)
484 current_sample_[j] = iter->second[random_.uniformInt(stratum_size)];
485 is_used_[current_sample_[j]] =
true;
493 StrataIndicesType::iterator iter;
494 for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter)
497 int stratum_size = iter->second.
size();
498 for(
int i = 0; i < static_cast<int>(strata_sample_size_[iter->first]); ++i, ++j)
500 std::swap(iter->second[i], iter->second[i+ random_.uniformInt(stratum_size - i)]);
501 current_sample_[j] = iter->second[i];
502 is_used_[current_sample_[j]] =
true;
508 template<
class Random =RandomTT800 >
513 typedef Int32 IndexType;
515 IndexArrayType used_indices_;
520 PoissonSampler(
double lambda,IndexType minIndex,IndexType maxIndex)
528 used_indices_.clear();
530 for(i=minIndex;i<maxIndex;++i)
535 double L=
exp(-lambda);
539 p*=randfloat.uniform53();
546 used_indices_.push_back(i);
552 IndexType
const & operator[](
int in)
const 554 return used_indices_[in];
557 int numOfSamples()
const 559 return used_indices_.
size();
ArrayVectorView< IndexType > IndexArrayViewType
Definition: sampling.hxx:247
IndexType operator[](int k) const
Definition: sampling.hxx:380
int strataCount() const
Definition: sampling.hxx:414
Sampler(UInt32 totalCount, SamplerOptions const &opt=SamplerOptions(), Random const *rnd=0)
Definition: sampling.hxx:298
Create random samples from a sequence of indices.
Definition: sampling.hxx:233
SamplerOptions & sampleProportion(double proportion)
Determine the number of samples to draw as a proportion of the total number. That is...
Definition: sampling.hxx:126
linalg::TemporaryMatrix< T > exp(MultiArrayView< 2, T, C > const &v)
Definition: array_vector.hxx:954
void sample()
Definition: sampling.hxx:468
bool withReplacement() const
Definition: sampling.hxx:429
Definition: accessor.hxx:43
Sampler(Iterator strataBegin, Iterator strataEnd, SamplerOptions const &opt=SamplerOptions(), Random const *rnd=0)
Definition: sampling.hxx:338
SamplerOptions & sampleSize(unsigned int size)
Draw the given number of samples. If stratifiedSampling is true, the size is equally distributed acro...
Definition: sampling.hxx:109
int sampleSize() const
Definition: sampling.hxx:398
int size() const
Definition: sampling.hxx:405
bool stratifiedSampling() const
Definition: sampling.hxx:422
detail::SelectIntegerType< 32, detail::SignedIntTypes >::type Int32
32-bit signed int
Definition: sized_int.hxx:175
void init(U const &initial)
Definition: array_vector.hxx:146
IndexArrayViewType sampledIndices() const
Definition: sampling.hxx:436
int totalCount() const
Definition: sampling.hxx:391
this_type subarray(size_type begin, size_type end) const
Definition: array_vector.hxx:200
Int32 IndexType
Definition: sampling.hxx:240
SamplerOptions & stratified(bool in=true)
Draw equally many samples from each "stratum". A stratum is a group of like entities, e.g. pixels belonging to the same object class. This is useful to create balanced samples when the class probabilities are very unbalanced (e.g. when there are many background and few foreground pixels). Stratified sampling thus avoids that a trained classifier is biased towards the majority class.
Definition: sampling.hxx:144
SamplerOptions & withReplacement(bool in=true)
Sample from training population with replacement.
Definition: sampling.hxx:86
detail::SelectIntegerType< 32, detail::UnsignedIntTypes >::type UInt32
32-bit unsigned int
Definition: sized_int.hxx:183
size_type size() const
Definition: array_vector.hxx:358
int ceil(FixedPoint< IntBits, FracBits > v)
rounding up.
Definition: fixedpoint.hxx:675
Options object for the Sampler class.
Definition: sampling.hxx:66
IndexArrayViewType oobIndices() const
Definition: sampling.hxx:444
SamplerOptions & withoutReplacement(bool in=true)
Sample from training population without replacement.
Definition: sampling.hxx:96