parallel_scan.h

00001 /*
00002     Copyright 2005-2009 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_parallel_scan_H
00022 #define __TBB_parallel_scan_H
00023 
00024 #include "task.h"
00025 #include "aligned_space.h"
00026 #include <new>
00027 #include "partitioner.h"
00028 
00029 namespace tbb {
00030 
00032 
00033 struct pre_scan_tag {
00034     static bool is_final_scan() {return false;}
00035 };
00036 
00038 
00039 struct final_scan_tag {
00040     static bool is_final_scan() {return true;}
00041 };
00042 
00044 namespace internal {
00045 
00047 
00048     template<typename Range, typename Body>
00049     class final_sum: public task {
00050     public:
00051         Body body;
00052     private:
00053         aligned_space<Range,1> range;
00055         Body* stuff_last;
00056     public:
00057         final_sum( Body& body_ ) :
00058             body(body_,split())
00059         {
00060             poison_pointer(stuff_last);
00061         }
00062         ~final_sum() {
00063             range.begin()->~Range();
00064         }     
00065         void finish_construction( const Range& range_, Body* stuff_last_ ) {
00066             new( range.begin() ) Range(range_);
00067             stuff_last = stuff_last_;
00068         }
00069     private:
00070         /*override*/ task* execute() {
00071             body( *range.begin(), final_scan_tag() );
00072             if( stuff_last )
00073                 stuff_last->assign(body);
00074             return NULL;
00075         }
00076     };       
00077 
00079 
00080     template<typename Range, typename Body>
00081     class sum_node: public task {
00082         typedef final_sum<Range,Body> final_sum_type;
00083     public:
00084         final_sum_type *incoming; 
00085         final_sum_type *body;
00086         Body *stuff_last;
00087     private:
00088         final_sum_type *left_sum;
00089         sum_node *left;
00090         sum_node *right;     
00091         bool left_is_final;
00092         Range range;
00093         sum_node( const Range range_, bool left_is_final_ ) : 
00094             left_sum(NULL), 
00095             left(NULL), 
00096             right(NULL), 
00097             left_is_final(left_is_final_), 
00098             range(range_)
00099         {
00100             // Poison fields that will be set by second pass.
00101             poison_pointer(body);
00102             poison_pointer(incoming);
00103         }
00104         task* create_child( const Range& range, final_sum_type& f, sum_node* n, final_sum_type* incoming, Body* stuff_last ) {
00105             if( !n ) {
00106                 f.recycle_as_child_of( *this );
00107                 f.finish_construction( range, stuff_last );
00108                 return &f;
00109             } else {
00110                 n->body = &f;
00111                 n->incoming = incoming;
00112                 n->stuff_last = stuff_last;
00113                 return n;
00114             }
00115         }
00116         /*override*/ task* execute() {
00117             if( body ) {
00118                 if( incoming )
00119                     left_sum->body.reverse_join( incoming->body );
00120                 recycle_as_continuation();
00121                 sum_node& c = *this;
00122                 task* b = c.create_child(Range(range,split()),*left_sum,right,left_sum,stuff_last);
00123                 task* a = left_is_final ? NULL : c.create_child(range,*body,left,incoming,NULL);
00124                 set_ref_count( (a!=NULL)+(b!=NULL) );
00125                 body = NULL; 
00126                 if( a ) spawn(*b);
00127                 else a = b;
00128                 return a;
00129             } else {
00130                 return NULL;
00131             }
00132         }
00133         template<typename Range_,typename Body_,typename Partitioner_>
00134         friend class start_scan;
00135 
00136         template<typename Range_,typename Body_>
00137         friend class finish_scan;
00138     };
00139 
00141 
00142     template<typename Range, typename Body>
00143     class finish_scan: public task {
00144         typedef sum_node<Range,Body> sum_node_type;
00145         typedef final_sum<Range,Body> final_sum_type;
00146         final_sum_type** const sum;
00147         sum_node_type*& return_slot;
00148     public:
00149         final_sum_type* right_zombie;
00150         sum_node_type& result;
00151 
00152         /*override*/ task* execute() {
00153             __TBB_ASSERT( result.ref_count()==(result.left!=NULL)+(result.right!=NULL), NULL );
00154             if( result.left )
00155                 result.left_is_final = false;
00156             if( right_zombie && sum ) 
00157                 ((*sum)->body).reverse_join(result.left_sum->body);
00158             __TBB_ASSERT( !return_slot, NULL );
00159             if( right_zombie || result.right ) {
00160                 return_slot = &result;
00161             } else {
00162                 destroy( result );
00163             }
00164             if( right_zombie && !sum && !result.right ) {
00165                 destroy(*right_zombie);
00166                 right_zombie = NULL;
00167             }
00168             return NULL;
00169         }
00170 
00171         finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) : 
00172             sum(sum_),
00173             return_slot(return_slot_), 
00174             right_zombie(NULL),
00175             result(result_)
00176         {
00177             __TBB_ASSERT( !return_slot, NULL );
00178         }
00179         ~finish_scan(){
00180 #if __TBB_EXCEPTIONS
00181             if (is_cancelled()) {
00182                 if (result.ref_count() == 0) destroy(result);
00183                 if (right_zombie) destroy(*right_zombie);
00184             }
00185 #endif
00186         }
00187     };
00188 
00190 
00191     template<typename Range, typename Body, typename Partitioner=simple_partitioner>
00192     class start_scan: public task {
00193         typedef sum_node<Range,Body> sum_node_type;
00194         typedef final_sum<Range,Body> final_sum_type;
00195         final_sum_type* body;
00197         final_sum_type** sum; 
00198         sum_node_type** return_slot;
00200         sum_node_type* parent_sum;
00201         bool is_final;
00202         bool is_right_child;
00203         Range range;
00204         typename Partitioner::partition_type partition;
00205         /*override*/ task* execute();
00206 #if __TBB_EXCEPTIONS
00207         tbb::task_group_context &my_context;
00208 #endif
00209     public:
00210         start_scan( sum_node_type*& return_slot_, start_scan& parent, sum_node_type* parent_sum_ 
00211 #if __TBB_EXCEPTIONS
00212             , tbb::task_group_context &_context
00213 #endif
00214             ) :
00215             body(parent.body),
00216             sum(parent.sum),
00217             return_slot(&return_slot_),
00218             parent_sum(parent_sum_),
00219             is_final(parent.is_final),
00220             is_right_child(false),
00221             range(parent.range,split()),
00222             partition(parent.partition,split())
00223 #if __TBB_EXCEPTIONS
00224         , my_context (_context)
00225 #endif
00226         {
00227             __TBB_ASSERT( !*return_slot, NULL );
00228         }
00229 
00230         start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_
00231 #if __TBB_EXCEPTIONS
00232         , tbb::task_group_context &_context
00233 #endif
00234             ) :
00235             body(&body_),
00236             sum(NULL),
00237             return_slot(&return_slot_),
00238             parent_sum(NULL),
00239             is_final(true),
00240             is_right_child(false),
00241             range(range_),
00242             partition(partitioner_)
00243 #if __TBB_EXCEPTIONS
00244             , my_context (_context)
00245 #endif
00246         {
00247             __TBB_ASSERT( !*return_slot, NULL );
00248         }
00249 
00250         static void run(  const Range& range, Body& body, const Partitioner& partitioner 
00251 #if __TBB_EXCEPTIONS
00252         , task_group_context& context
00253 #endif
00254             ) {
00255             if( !range.empty() ) {
00256                 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
00257                 internal::sum_node<Range,Body>* root = NULL;
00258                 typedef internal::final_sum<Range,Body> final_sum_type;
00259 #if __TBB_EXCEPTIONS
00260                 final_sum_type* temp_body = new(task::allocate_root(context)) final_sum_type( body );
00261                 start_pass1_type& pass1 = *new(task::allocate_root(context)) start_pass1_type(
00262                     /*return_slot=*/root,
00263                     range,
00264                     *temp_body,
00265                     partitioner,
00266                     context
00267                     );
00268 #else
00269                 final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body );
00270                 start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
00271                     /*return_slot=*/root,
00272                     range,
00273                     *temp_body,
00274                     partitioner );
00275 #endif
00276                 // The class is intended to destroy allocated tasks if exception occurs
00277                 class task_cleaner: internal::no_copy {
00278                     internal::sum_node<Range,Body>* my_root;
00279                     final_sum_type* my_temp_body;
00280                     const Range& my_range;
00281                     Body& my_body;
00282                     start_pass1_type* my_pass1;
00283                 public:
00284                     bool do_clean; // Set to true if cleanup is required.
00285                     task_cleaner(internal::sum_node<Range,Body>* _root, final_sum_type* _temp_body, const Range& _range, Body& _body, start_pass1_type* _pass1)
00286                         : my_root(_root), my_temp_body(_temp_body), my_range(_range), my_body(_body), my_pass1(_pass1), do_clean(true) {}
00287                     ~task_cleaner(){
00288                         if (do_clean) {
00289                             my_body.assign(my_temp_body->body);
00290                             my_temp_body->finish_construction( my_range, NULL );
00291                             my_temp_body->destroy(*my_temp_body);
00292                         }
00293                     }
00294                 };
00295                 task_cleaner my_cleaner(root, temp_body, range, body, &pass1);
00296 
00297                 task::spawn_root_and_wait( pass1 );
00298                 my_cleaner.do_clean = false;
00299                 if( root ) {
00300                     root->body = temp_body;
00301                     root->incoming = NULL;
00302                     root->stuff_last = &body;
00303                     task::spawn_root_and_wait( *root );
00304                 } else {
00305                     my_cleaner.do_clean = true;
00306                 }
00307             }
00308         }
00309     };
00310 
00311     template<typename Range, typename Body, typename Partitioner>
00312     task* start_scan<Range,Body,Partitioner>::execute() {
00313         typedef internal::finish_scan<Range,Body> finish_pass1_type;
00314         finish_pass1_type* p = parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
00315         // Inspecting p->result.left_sum would ordinarily be a race condition.
00316         // But we inspect it only if we are not a stolen task, in which case we
00317         // know that task assigning to p->result.left_sum has completed.
00318         bool treat_as_stolen = is_right_child && (is_stolen_task() || body!=p->result.left_sum);
00319         if( treat_as_stolen ) {
00320             // Invocation is for right child that has been really stolen or needs to be virtually stolen
00321 #if __TBB_EXCEPTIONS
00322             p->right_zombie = body = new( allocate_root(my_context) ) final_sum_type(body->body);
00323 #else
00324             p->right_zombie = body = new( allocate_root() ) final_sum_type(body->body);
00325 #endif
00326             is_final = false;
00327         }
00328         task* next_task = NULL;
00329         if( (is_right_child && !treat_as_stolen) || !range.is_divisible() || partition.should_execute_range(*this) ) {
00330             if( is_final )
00331                 (body->body)( range, final_scan_tag() );
00332             else if( sum )
00333                 (body->body)( range, pre_scan_tag() );
00334             if( sum ) 
00335                 *sum = body;
00336             __TBB_ASSERT( !*return_slot, NULL );
00337         } else {
00338             sum_node_type* result;
00339             if( parent_sum ) 
00340                 result = new(allocate_additional_child_of(*parent_sum)) sum_node_type(range,/*left_is_final=*/is_final);
00341             else
00342 #if __TBB_EXCEPTIONS
00343                 result = new(task::allocate_root(my_context)) sum_node_type(range,/*left_is_final=*/is_final);
00344 #else
00345                 result = new(task::allocate_root()) sum_node_type(range,/*left_is_final=*/is_final);
00346 #endif
00347             finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*return_slot,sum,*result);
00348             // Split off right child
00349 #if __TBB_EXCEPTIONS
00350             start_scan& b = *new( c.allocate_child() ) start_scan( /*return_slot=*/result->right, *this, result, my_context );
00351 #else
00352             start_scan& b = *new( c.allocate_child() ) start_scan( /*return_slot=*/result->right, *this, result );
00353 #endif
00354             b.is_right_child = true;    
00355             // Left child is recycling of *this.  Must recycle this before spawning b, 
00356             // otherwise b might complete and decrement c.ref_count() to zero, which
00357             // would cause c.execute() to run prematurely.
00358             recycle_as_child_of(c);
00359             c.set_ref_count(2);
00360             c.spawn(b);
00361             sum = &result->left_sum;
00362             return_slot = &result->left;
00363             is_right_child = false;
00364             next_task = this;
00365             parent_sum = result; 
00366             __TBB_ASSERT( !*return_slot, NULL );
00367         }
00368         return next_task;
00369     } 
00370 } // namespace internal
00372 
00373 // Requirements on Range concept are documented in blocked_range.h
00374 
00392 
00394 
00395 template<typename Range, typename Body>
00396 void parallel_scan( const Range& range, Body& body ) {
00397 #if __TBB_EXCEPTIONS
00398     task_group_context context;
00399 #endif // __TBB_EXCEPTIONS
00400     internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER()
00401 #if __TBB_EXCEPTIONS
00402         , context
00403 #endif
00404         );
00405 }
00406 
00408 
00409 template<typename Range, typename Body>
00410 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
00411 #if __TBB_EXCEPTIONS
00412     task_group_context context;
00413 #endif // __TBB_EXCEPTIONS
00414     internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner
00415 #if __TBB_EXCEPTIONS
00416         , context
00417 #endif
00418         );
00419 }
00420 
00422 
00423 template<typename Range, typename Body>
00424 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
00425 #if __TBB_EXCEPTIONS
00426     task_group_context context;
00427 #endif // __TBB_EXCEPTIONS
00428     internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner
00429 #if __TBB_EXCEPTIONS
00430         , context
00431 #endif
00432         );
00433 }
00434 #if __TBB_EXCEPTIONS
00436 
00437 template<typename Range, typename Body>
00438 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner, tbb::task_group_context & context ) {
00439     internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner,context);
00440 }
00441 
00443 
00444 template<typename Range, typename Body>
00445 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner, tbb::task_group_context & context ) {
00446     internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner,context);
00447 }
00448 
00450 
00451 template<typename Range, typename Body>
00452 void parallel_scan( const Range& range, Body& body, tbb::task_group_context & context ) {
00453     internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER(),context);
00454 }
00455 #endif
00456 
00458 
00459 } // namespace tbb
00460 
00461 #endif /* __TBB_parallel_scan_H */
00462 

Copyright © 2005-2009 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.