31 #ifndef SHARK_LINALG_BLAS_KERNELS_DEFAULT_TRMV_HPP 32 #define SHARK_LINALG_BLAS_KERNELS_DEFAULT_TRMV_HPP 34 #include "../../matrix_proxy.hpp" 35 #include "../../vector_expression.hpp" 36 #include <boost/mpl/bool.hpp> 38 namespace shark{
namespace blas{
namespace bindings{
41 template<
bool Unit,
class TriangularA,
class V>
43 matrix_expression<TriangularA>
const& A,
44 vector_expression<V> &b,
45 boost::mpl::false_, row_major
47 typedef typename TriangularA::value_type value_typeA;
48 typedef typename V::value_type value_typeV;
49 std::size_t
size = A().size1();
50 std::size_t
const blockSize = 128;
51 std::size_t numBlocks = size/blockSize;
52 if(numBlocks*blockSize < size) ++numBlocks;
61 value_typeV valueStorage[blockSize];
63 for(std::size_t bi = 1; bi <= numBlocks; ++bi){
64 std::size_t startbi = blockSize*(numBlocks-bi);
65 std::size_t sizebi =
std::min(blockSize,size-startbi);
66 dense_vector_adaptor<value_typeA> values(valueStorage,sizebi);
72 for (std::size_t i = 0; i != sizebi; ++i) {
73 std::size_t posi = startbi+i;
75 for(std::size_t j = 0; j < i; ++j){
76 b()(posi) += A()(posi,startbi+j)*values(j);
78 b()(posi) += values(i)*(Unit? value_typeA(1):A()(posi,posi));
81 for(std::size_t posi = startbi+sizebi; posi !=
size; ++posi){
88 template<
bool Unit,
class TriangularA,
class V>
90 matrix_expression<TriangularA>
const& A,
91 vector_expression<V>& b,
92 boost::mpl::true_, row_major
94 std::size_t size = A().size1();
95 for (std::size_t i = 0; i <
size; ++ i) {
104 template<
bool Unit,
class TriangularA,
class V>
106 matrix_expression<TriangularA>
const& A,
107 vector_expression<V> &b,
108 boost::mpl::false_, column_major
111 std::size_t size = A().size1();
112 for (std::size_t n = 1; n <=
size; ++n) {
113 std::size_t i = size-n;
123 template<
bool Unit,
class TriangularA,
class V>
125 matrix_expression<TriangularA>
const& A,
126 vector_expression<V>& b,
127 boost::mpl::true_, column_major
129 typedef typename TriangularA::value_type value_typeA;
130 typedef typename V::value_type value_typeV;
131 std::size_t size = A().size1();
132 std::size_t
const blockSize = 128;
133 std::size_t numBlocks = size/blockSize;
134 if(numBlocks*blockSize < size) ++numBlocks;
143 value_typeV valueStorage[blockSize];
145 for(std::size_t bj = 0; bj != numBlocks; ++bj){
146 std::size_t startbj = blockSize*bj;
147 std::size_t sizebj =
std::min(blockSize,size-startbj);
148 dense_vector_adaptor<value_typeA> values(valueStorage,sizebj);
152 subrange(b,startbj,startbj+sizebj).clear();
154 for (std::size_t j = 0; j != sizebj; ++j) {
155 std::size_t posj = startbj+j;
156 for(std::size_t i = 0; i < j; ++i){
157 b()(startbj+i) += A()(startbj+i,posj)*values(j);
159 b()(posj) += values(j)*(Unit? 1.0:A()(posj,posj));
162 for(std::size_t posj = startbj+sizebj; posj !=
size; ++posj){
169 template <
bool Upper,
bool Unit,
typename TriangularA,
typename V>
171 matrix_expression<TriangularA>
const& A,
172 vector_expression<V> & b,
177 trmv_impl<Unit>(A, b, boost::mpl::bool_<Upper>(),
typename TriangularA::orientation());