31 # ifndef WIN32_LEAN_AND_MEAN
32 # define WIN32_LEAN_AND_MEAN
33 # endif // WIN32_LEAN_AND_MEAN
53 template<
class T>
int SparseMatrix<T>::UseAlloc=0;
61 internalAllocator.set(blockSize);
73 _maxEntriesPerRow = 0;
85 if( M._contiguous )
Resize( M.
rows , M._maxEntriesPerRow );
87 for(
int i=0 ; i<
rows ; i++ )
97 for(
int i=0 ; i<rows ; i++ ) e +=
int( rowSizes[i] );
103 if( M._contiguous ) Resize( M.
rows , M._maxEntriesPerRow );
104 else Resize( M.
rows );
105 for(
int i=0 ; i<rows ; i++ )
119 FILE* fp = fopen( fileName ,
"wb" );
120 if( !fp )
return false;
121 bool ret =
write( fp );
128 FILE* fp = fopen( fileName ,
"rb" );
129 if( !fp )
return false;
130 bool ret =
read( fp );
137 if( fwrite( &rows ,
sizeof(
int ) , 1 , fp )!=1 )
return false;
138 if( fwrite( rowSizes ,
sizeof(
int ) , rows , fp )!=rows )
return false;
139 for(
int i=0 ; i<rows ; i++ )
if( fwrite( (*
this)[i] ,
sizeof(
MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] )
return false;
146 if( fread( &r ,
sizeof(
int ) , 1 , fp )!=1 )
return false;
148 if( fread( rowSizes ,
sizeof(
int ) , rows , fp )!=rows )
return false;
149 for(
int i=0 ; i<rows ; i++ )
154 if( fread( (*
this)[i] ,
sizeof(
MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] )
return false;
167 if( _contiguous ){
if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
168 else for(
int i=0 ; i<rows ; i++ ){
if( rowSizes[i] ) free( m_ppElements[i] ); }
169 free( m_ppElements );
175 rowSizes = (
int* )malloc(
sizeof(
int ) * r );
176 memset( rowSizes , 0 ,
sizeof(
int ) * r );
180 _maxEntriesPerRow = 0;
188 if( _contiguous ){
if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
189 else for(
int i=0 ; i<rows ; i++ ){
if( rowSizes[i] ) free( m_ppElements[i] ); }
190 free( m_ppElements );
196 rowSizes = (
int* )malloc(
sizeof(
int ) * r );
197 memset( rowSizes , 0 ,
sizeof(
int ) * r );
200 for(
int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
203 _maxEntriesPerRow = e;
211 if (count > _maxEntriesPerRow)
213 POISSON_THROW_EXCEPTION (
pcl::poisson::PoissonBadArgumentException,
"Attempted to set row size on contiguous matrix larger than max row size: (requested)"<< count <<
" > (maximum)" << _maxEntriesPerRow );
215 rowSizes[row] = count;
217 else if( row>=0 && row<rows )
219 if( UseAlloc ) m_ppElements[row] = internalAllocator.newElements(count);
222 if( rowSizes[row] ) free( m_ppElements[row] );
232 Resize(this->m_N, this->m_M);
239 for(
int ij=0; ij < Min( this->Rows(), this->Columns() ); ij++)
240 (*
this)(ij,ij) = T(1);
254 for (
int i=0; i<this->Rows(); i++)
256 for(
int ii=0;ii<m_ppElements[i].size();i++){m_ppElements[i][ii].Value*=V;}
265 for(
int i=0; i<R.Rows(); i++){
266 for(
int ii=0;ii<m_ppElements[i].size();ii++){
267 int N=m_ppElements[i][ii].N;
268 T Value=m_ppElements[i][ii].Value;
283 for (
int i=0; i<rows; i++)
286 for(
int ii=0;ii<rowSizes[i];ii++){
287 temp+=m_ppElements[i][ii].Value * V.
m_pV[m_ppElements[i][ii].N];
298 #pragma omp parallel for num_threads( threads ) schedule( static )
299 for(
int i=0 ; i<rows ; i++ )
303 for(
int j=0 ; j<rowSizes[i] ; j++ ) temp += m_ppElements[i][j].Value * In.
m_pV[m_ppElements[i][j].N];
325 for (
int i=0; i<this->Rows(); i++)
327 for(
int ii=0;ii<m_ppElements[i].size();ii++){
328 M(m_ppElements[i][ii].N,i) = m_ppElements[i][ii].Value;
348 double delta_new , delta_0;
349 for(
int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.
m_pV[i] * r.m_pV[i];
351 if( delta_new<eps )
return 0;
355 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
358 double dDotQ = 0 , alpha = 0;
360 alpha = delta_new / dDotQ;
361 #pragma omp parallel
for num_threads( threads ) schedule(
static )
362 for(
int i=0 ; i<r.Dimensions() ; i++ ) solution.
m_pV[i] += d.
m_pV[i] * T2( alpha );
366 M.
Multiply( solution , r , threads );
370 #pragma omp parallel for num_threads( threads ) schedule( static )
371 for(
int i=0 ; i<r.Dimensions() ; i++ ) r.
m_pV[i] = r.m_pV[i] - q.
m_pV[i] * T2(alpha);
373 double delta_old = delta_new , beta;
375 for(
int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i]*r.m_pV[i];
376 beta = delta_new / delta_old;
377 #pragma omp parallel
for num_threads( threads ) schedule(
static )
378 for(
int i=0 ; i<d.
Dimensions() ; i++ ) d.
m_pV[i] = r.m_pV[i] + d.
m_pV[i] * T2( beta );
392 solution.
Resize(M.Columns());
397 for(i=0;i<iters && rDotR>eps;i++){
400 alpha=rDotR/d.
Dot(Md);
426 for(
int i=0; i<SparseMatrix<T>::rows; i++){
427 for(
int ii=0;ii<SparseMatrix<T>::rowSizes[i];ii++){
441 const T2* in = &In[0];
446 for(
int i=0 ; i<SparseMatrix<T>::rows ; i++ ) dcTerm += in[i];
453 const T2& in_i_ = in[i];
455 for( ; temp!=end ; temp++ )
464 if( addDCTerm )
for(
int i=0 ; i<SparseMatrix<T>::rows ; i++ ) out[i] += dcTerm;
471 const T2* in = &In[0];
472 int threads = OutScratch.
threads();
476 #pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm )
477 for(
int t=0 ; t<threads ; t++ )
479 T2* out = OutScratch[t];
480 memset( out , 0 ,
sizeof( T2 ) * dim );
483 const T2& in_i_ = in[i];
498 #pragma omp parallel for num_threads( threads ) schedule( static )
499 for(
int i=0 ; i<dim ; i++ )
502 for(
int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
508 #pragma omp parallel for num_threads( threads )
509 for(
int t=0 ; t<threads ; t++ )
511 T2* out = OutScratch[t];
512 memset( out , 0 ,
sizeof( T2 ) * dim );
515 const T2& in_i_ = in[i];
528 #pragma omp parallel for num_threads( threads ) schedule( static )
529 for(
int i=0 ; i<dim ; i++ )
532 for(
int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
542 const T2* in = &In[0];
543 int threads = OutScratch.size();
544 #pragma omp parallel for num_threads( threads )
545 for(
int t=0 ; t<threads ; t++ )
546 for(
int i=0 ; i<dim ; i++ ) OutScratch[t][i] = T2(0);
547 #pragma omp parallel for num_threads( threads )
548 for(
int t=0 ; t<threads ; t++ )
550 T2* out = OutScratch[t];
551 for(
int i=bounds[t] ; i<bounds[t+1] ; i++ )
555 const T2& in_i_ = in[i];
557 for( ; temp!=end ; temp++ )
567 #pragma omp parallel for num_threads( threads ) schedule( static )
572 for(
int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
575 #if defined _WIN32 && !defined __MINGW32__
576 #ifndef _AtomicIncrement_
577 #define _AtomicIncrement_
580 float newValue = *ptr;
581 LONG& _newValue = *( (LONG*)&newValue );
585 _oldValue = _newValue;
587 _newValue = InterlockedCompareExchange( (LONG*) ptr , _newValue , _oldValue );
588 if( _newValue==_oldValue )
break;
594 double newValue = *ptr;
595 LONGLONG& _newValue = *( (LONGLONG*)&newValue );
599 _oldValue = _newValue;
601 _newValue = InterlockedCompareExchange64( (LONGLONG*) ptr , _newValue , _oldValue );
603 while( _newValue!=_oldValue );
605 #endif // _AtomicIncrement_
610 const float* in = &In[0];
611 float* out = &Out[0];
613 #pragma omp parallel for num_threads( threads )
614 for(
int t=0 ; t<threads ; t++ )
615 for(
int i=partition[t] ; i<partition[t+1] ; i++ )
619 const float& in_i = in[i];
621 for( ; temp!=end ; temp++ )
624 float v = temp->
Value;
631 #pragma omp parallel for num_threads( threads )
632 for(
int i=0 ; i<A.
rows ; i++ )
636 const float& in_i = in[i];
638 for( ; temp!=end ; temp++ )
641 float v = temp->
Value;
652 const double* in = &In[0];
653 double* out = &Out[0];
656 #pragma omp parallel for num_threads( threads )
657 for(
int t=0 ; t<threads ; t++ )
658 for(
int i=partition[t] ; i<partition[t+1] ; i++ )
662 const double& in_i = in[i];
664 for( ; temp!=end ; temp++ )
674 #pragma omp parallel for num_threads( threads )
675 for(
int i=0 ; i<A.
rows ; i++ )
679 const double& in_i = in[i];
681 for( ; temp!=end ; temp++ )
705 if( solveNormal ) temp.
Resize( dim );
706 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
707 const T2* _b = &b[0];
709 std::vector< int > partition( threads+1 );
712 for(
int i=0 ; i<A.
rows ; i++ ) eCount += A.
rowSizes[i];
714 #pragma omp parallel
for num_threads( threads )
715 for(
int t=0 ; t<threads ; t++ )
718 for(
int i=0 ; i<A.
rows ; i++ )
721 if( _eCount*threads>=eCount*(t+1) )
728 partition[threads] = A.
rows;
735 #pragma omp parallel for num_threads( threads ) schedule( static )
736 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i];
741 #pragma omp parallel for num_threads( threads ) schedule( static )
742 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i];
744 double delta_new = 0 , delta_0;
745 for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
749 fprintf( stderr ,
"[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
753 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
758 for(
int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
759 T2 alpha = T2( delta_new / dDotQ );
760 #pragma omp parallel for num_threads( threads ) schedule( static )
761 for(
int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
762 if( (ii%50)==(50-1) )
767 #pragma omp parallel for num_threads( threads ) schedule( static )
768 for(
int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i];
771 #pragma omp parallel for num_threads( threads ) schedule( static )
772 for(
int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha;
774 double delta_old = delta_new;
776 for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
777 T2 beta = T2( delta_new / delta_old );
778 #pragma omp parallel for num_threads( threads ) schedule( static )
779 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
783 #endif // _WIN32 && !__MINGW32__
788 int threads = scratch.
threads();
792 if( reset ) x.
Resize( dim );
793 if( solveNormal ) temp.
Resize( dim );
794 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
795 const T2* _b = &b[0];
797 double delta_new = 0 , delta_0;
800 A.
Multiply( x , temp , scratch , addDCTerm ) , A.
Multiply( temp , r , scratch , addDCTerm ) , A.
Multiply( b , temp , scratch , addDCTerm );
801 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
802 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
806 A.
Multiply( x , r , scratch , addDCTerm );
807 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
808 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
813 fprintf( stderr ,
"[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
817 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
819 if( solveNormal ) A.
Multiply( d , temp , scratch , addDCTerm ) , A.
Multiply( temp , q , scratch , addDCTerm );
820 else A.
Multiply( d , q , scratch , addDCTerm );
822 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
823 for(
int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
824 T2 alpha = T2( delta_new / dDotQ );
825 double delta_old = delta_new;
827 if( (ii%50)==(50-1) )
829 #pragma omp parallel for num_threads( threads )
830 for(
int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
832 if( solveNormal ) A.
Multiply( x , temp , scratch , addDCTerm ) , A.
Multiply( temp , r , scratch , addDCTerm );
833 else A.
Multiply( x , r , scratch , addDCTerm );
834 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
835 for(
int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
838 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
839 for(
int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
841 T2 beta = T2( delta_new / delta_old );
842 #pragma omp parallel for num_threads( threads )
843 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
854 if( threads<1 ) threads = 1;
855 if( threads>1 ) outScratch.
resize( threads , dim );
856 if( reset ) x.
Resize( dim );
859 if( solveNormal ) temp.
Resize( dim );
860 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
861 const T2* _b = &b[0];
863 double delta_new = 0 , delta_0;
867 if( threads>1 ) A.
Multiply( x , temp , outScratch , addDCTerm ) , A.
Multiply( temp , r , outScratch , addDCTerm ) , A.
Multiply( b , temp , outScratch , addDCTerm );
869 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
870 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
874 if( threads>1 ) A.
Multiply( x , r , outScratch , addDCTerm );
875 else A.
Multiply( x , r , addDCTerm );
876 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
877 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
883 fprintf( stderr ,
"[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
887 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
891 if( threads>1 ) A.
Multiply( d , temp , outScratch , addDCTerm ) , A.
Multiply( temp , q , outScratch , addDCTerm );
892 else A.
Multiply( d , temp , addDCTerm ) , A.
Multiply( temp , q , addDCTerm );
896 if( threads>1 ) A.
Multiply( d , q , outScratch , addDCTerm );
897 else A.
Multiply( d , q , addDCTerm );
900 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
901 for(
int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
902 T2 alpha = T2( delta_new / dDotQ );
903 double delta_old = delta_new;
906 if( (ii%50)==(50-1) )
908 #pragma omp parallel for num_threads( threads )
909 for(
int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
913 if( threads>1 ) A.
Multiply( x , temp , outScratch , addDCTerm ) , A.
Multiply( temp , r , outScratch , addDCTerm );
914 else A.
Multiply( x , temp , addDCTerm ) , A.
Multiply( temp , r , addDCTerm );
918 if( threads>1 ) A.
Multiply( x , r , outScratch , addDCTerm );
919 else A.
Multiply( x , r , addDCTerm );
921 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
922 for(
int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
926 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
927 for(
int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
930 T2 beta = T2( delta_new / delta_old );
931 #pragma omp parallel for num_threads( threads )
932 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
949 for(
int i=0 ; i<iters ; i++ )
956 for(
int j=0 ; j<int(M.
rows) ; j++ ) solution[j] += (b[j]-Md[j])/diagonal[j];
965 for(
int i=0 ; i<SparseMatrix<T>::rows ; i++ )