31 # ifndef WIN32_LEAN_AND_MEAN
32 # define WIN32_LEAN_AND_MEAN
33 # endif // WIN32_LEAN_AND_MEAN
53 template<
class T>
int SparseMatrix<T>::UseAlloc=0;
54 template<
class T> Allocator<MatrixEntry<T> > SparseMatrix<T>::internalAllocator;
61 internalAllocator.set(blockSize);
73 _maxEntriesPerRow = 0;
85 if( M._contiguous )
Resize( M.
rows , M._maxEntriesPerRow );
87 for(
int i=0 ; i<
rows ; i++ )
97 for(
int i=0 ; i<rows ; i++ ) e +=
int( rowSizes[i] );
103 if( M._contiguous ) Resize( M.
rows , M._maxEntriesPerRow );
104 else Resize( M.
rows );
105 for(
int i=0 ; i<rows ; i++ )
119 FILE* fp = fopen( fileName ,
"wb" );
120 if( !fp )
return false;
121 bool ret =
write( fp );
128 FILE* fp = fopen( fileName ,
"rb" );
129 if( !fp )
return false;
130 bool ret =
read( fp );
137 if( fwrite( &rows ,
sizeof(
int ) , 1 , fp )!=1 )
return false;
138 if( fwrite( rowSizes ,
sizeof(
int ) , rows , fp )!=rows )
return false;
139 for(
int i=0 ; i<rows ; i++ ) if( fwrite( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] )
return false;
146 if( fread( &r ,
sizeof(
int ) , 1 , fp )!=1 )
return false;
148 if( fread( rowSizes ,
sizeof(
int ) , rows , fp )!=rows )
return false;
149 for(
int i=0 ; i<rows ; i++ )
154 if( fread( (*
this)[i] ,
sizeof(
MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] )
return false;
167 if( _contiguous ){
if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
168 else for(
int i=0 ; i<rows ; i++ ){
if( rowSizes[i] ) free( m_ppElements[i] ); }
169 free( m_ppElements );
175 rowSizes = (
int* )malloc(
sizeof(
int ) * r );
176 memset( rowSizes , 0 ,
sizeof(
int ) * r );
180 _maxEntriesPerRow = 0;
188 if( _contiguous ){
if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
189 else for(
int i=0 ; i<rows ; i++ ){
if( rowSizes[i] ) free( m_ppElements[i] ); }
190 free( m_ppElements );
196 rowSizes = (
int* )malloc(
sizeof(
int ) * r );
197 memset( rowSizes , 0 ,
sizeof(
int ) * r );
200 for(
int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
203 _maxEntriesPerRow = e;
211 if( count>_maxEntriesPerRow ) fprintf( stderr ,
"[ERROR] Cannot set row size on contiguous matrix: %d<=%d\n" , count , _maxEntriesPerRow ) , exit( 0 );
212 rowSizes[row] = count;
214 else if( row>=0 && row<rows )
216 if( UseAlloc ) m_ppElements[row] = internalAllocator.newElements(count);
219 if( rowSizes[row] ) free( m_ppElements[row] );
229 Resize(this->m_N, this->m_M);
236 for(
int ij=0; ij < Min( this->Rows(), this->Columns() ); ij++)
237 (*
this)(ij,ij) = T(1);
251 for (
int i=0; i<this->Rows(); i++)
253 for(
int ii=0;ii<m_ppElements[i].size();i++){m_ppElements[i][ii].Value*=V;}
262 for(
int i=0; i<R.Rows(); i++){
263 for(
int ii=0;ii<m_ppElements[i].size();ii++){
264 int N=m_ppElements[i][ii].N;
265 T Value=m_ppElements[i][ii].Value;
280 for (
int i=0; i<rows; i++)
283 for(
int ii=0;ii<rowSizes[i];ii++){
284 temp+=m_ppElements[i][ii].Value * V.
m_pV[m_ppElements[i][ii].N];
295 #pragma omp parallel for num_threads( threads ) schedule( static )
296 for(
int i=0 ; i<rows ; i++ )
300 for(
int j=0 ; j<rowSizes[i] ; j++ ) temp += m_ppElements[i][j].Value * In.
m_pV[m_ppElements[i][j].N];
322 for (
int i=0; i<this->Rows(); i++)
324 for(
int ii=0;ii<m_ppElements[i].size();ii++){
325 M(m_ppElements[i][ii].N,i) = m_ppElements[i][ii].Value;
345 double delta_new , delta_0;
346 for(
int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.
m_pV[i] * r.m_pV[i];
348 if( delta_new<eps )
return 0;
352 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
355 double dDotQ = 0 , alpha = 0;
357 alpha = delta_new / dDotQ;
358 #pragma omp parallel
for num_threads( threads ) schedule(
static )
359 for(
int i=0 ; i<r.Dimensions() ; i++ ) solution.
m_pV[i] += d.
m_pV[i] * T2( alpha );
363 M.
Multiply( solution , r , threads );
367 #pragma omp parallel for num_threads( threads ) schedule( static )
368 for(
int i=0 ; i<r.Dimensions() ; i++ ) r.
m_pV[i] = r.m_pV[i] - q.
m_pV[i] * T2(alpha);
370 double delta_old = delta_new , beta;
372 for(
int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i]*r.m_pV[i];
373 beta = delta_new / delta_old;
374 #pragma omp parallel
for num_threads( threads ) schedule(
static )
375 for(
int i=0 ; i<d.
Dimensions() ; i++ ) d.
m_pV[i] = r.m_pV[i] + d.
m_pV[i] * T2( beta );
389 solution.
Resize(M.Columns());
394 for(i=0;i<iters && rDotR>eps;i++){
397 alpha=rDotR/d.
Dot(Md);
423 for(
int i=0; i<SparseMatrix<T>::rows; i++){
424 for(
int ii=0;ii<SparseMatrix<T>::rowSizes[i];ii++){
438 const T2* in = &In[0];
443 for(
int i=0 ; i<SparseMatrix<T>::rows ; i++ ) dcTerm += in[i];
450 const T2& in_i_ = in[i];
452 for( ; temp!=end ; temp++ )
461 if( addDCTerm )
for(
int i=0 ; i<SparseMatrix<T>::rows ; i++ ) out[i] += dcTerm;
468 const T2* in = &In[0];
469 int threads = OutScratch.
threads();
473 #pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm )
474 for(
int t=0 ; t<threads ; t++ )
476 T2* out = OutScratch[t];
477 memset( out , 0 ,
sizeof( T2 ) * dim );
480 const T2& in_i_ = in[i];
495 #pragma omp parallel for num_threads( threads ) schedule( static )
496 for(
int i=0 ; i<dim ; i++ )
499 for(
int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
505 #pragma omp parallel for num_threads( threads )
506 for(
int t=0 ; t<threads ; t++ )
508 T2* out = OutScratch[t];
509 memset( out , 0 ,
sizeof( T2 ) * dim );
512 const T2& in_i_ = in[i];
525 #pragma omp parallel for num_threads( threads ) schedule( static )
526 for(
int i=0 ; i<dim ; i++ )
529 for(
int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
539 const T2* in = &In[0];
540 int threads = OutScratch.size();
541 #pragma omp parallel for num_threads( threads )
542 for(
int t=0 ; t<threads ; t++ )
543 for(
int i=0 ; i<dim ; i++ ) OutScratch[t][i] = T2(0);
544 #pragma omp parallel for num_threads( threads )
545 for(
int t=0 ; t<threads ; t++ )
547 T2* out = OutScratch[t];
548 for(
int i=bounds[t] ; i<bounds[t+1] ; i++ )
552 const T2& in_i_ = in[i];
554 for( ; temp!=end ; temp++ )
564 #pragma omp parallel for num_threads( threads ) schedule( static )
569 for(
int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
572 #if defined _WIN32 && !defined __MINGW32__
573 #ifndef _AtomicIncrement_
574 #define _AtomicIncrement_
575 inline void AtomicIncrement(
volatile float* ptr ,
float addend )
577 float newValue = *ptr;
578 LONG& _newValue = *( (LONG*)&newValue );
582 _oldValue = _newValue;
584 _newValue = InterlockedCompareExchange( (LONG*) ptr , _newValue , _oldValue );
585 if( _newValue==_oldValue )
break;
588 inline void AtomicIncrement(
volatile double* ptr ,
double addend )
591 double newValue = *ptr;
592 LONGLONG& _newValue = *( (LONGLONG*)&newValue );
596 _oldValue = _newValue;
598 _newValue = InterlockedCompareExchange64( (LONGLONG*) ptr , _newValue , _oldValue );
600 while( _newValue!=_oldValue );
602 #endif // _AtomicIncrement_
604 void MultiplyAtomic(
const SparseSymmetricMatrix< T >& A ,
const Vector< float >& In , Vector< float >& Out ,
int threads ,
const int* partition=NULL )
607 const float* in = &In[0];
608 float* out = &Out[0];
610 #pragma omp parallel for num_threads( threads )
611 for(
int t=0 ; t<threads ; t++ )
612 for(
int i=partition[t] ; i<partition[t+1] ; i++ )
614 const MatrixEntry< T >* temp = A[i];
615 const MatrixEntry< T >* end = temp + A.rowSizes[i];
616 const float& in_i = in[i];
618 for( ; temp!=end ; temp++ )
621 float v = temp->Value;
623 AtomicIncrement( out+j , v * in_i );
625 AtomicIncrement( out+i , out_i );
628 #pragma omp parallel for num_threads( threads )
629 for(
int i=0 ; i<A.rows ; i++ )
631 const MatrixEntry< T >* temp = A[i];
632 const MatrixEntry< T >* end = temp + A.rowSizes[i];
633 const float& in_i = in[i];
635 for( ; temp!=end ; temp++ )
638 float v = temp->Value;
640 AtomicIncrement( out+j , v * in_i );
642 AtomicIncrement( out+i , out_i );
646 void MultiplyAtomic(
const SparseSymmetricMatrix< T >& A ,
const Vector< double >& In , Vector< double >& Out ,
int threads ,
const int* partition=NULL )
649 const double* in = &In[0];
650 double* out = &Out[0];
653 #pragma omp parallel for num_threads( threads )
654 for(
int t=0 ; t<threads ; t++ )
655 for(
int i=partition[t] ; i<partition[t+1] ; i++ )
657 const MatrixEntry< T >* temp = A[i];
658 const MatrixEntry< T >* end = temp + A.rowSizes[i];
659 const double& in_i = in[i];
661 for( ; temp!=end ; temp++ )
666 AtomicIncrement( out+j , v * in_i );
668 AtomicIncrement( out+i , out_i );
671 #pragma omp parallel for num_threads( threads )
672 for(
int i=0 ; i<A.rows ; i++ )
674 const MatrixEntry< T >* temp = A[i];
675 const MatrixEntry< T >* end = temp + A.rowSizes[i];
676 const double& in_i = in[i];
678 for( ; temp!=end ; temp++ )
683 AtomicIncrement( out+j , v * in_i );
685 AtomicIncrement( out+i , out_i );
691 int SparseSymmetricMatrix< T >::SolveAtomic(
const SparseSymmetricMatrix< T >& A ,
const Vector< T2 >& b ,
int iters , Vector< T2 >& x , T2 eps ,
int reset ,
int threads ,
bool solveNormal )
694 int dim = b.Dimensions();
700 Vector< T2 > r( dim ) , d( dim ) , q( dim );
702 if( solveNormal ) temp.Resize( dim );
703 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
704 const T2* _b = &b[0];
706 std::vector< int > partition( threads+1 );
709 for(
int i=0 ; i<A.rows ; i++ ) eCount += A.rowSizes[i];
711 #pragma omp parallel
for num_threads( threads )
712 for(
int t=0 ; t<threads ; t++ )
715 for(
int i=0 ; i<A.rows ; i++ )
717 _eCount += A.rowSizes[i];
718 if( _eCount*threads>=eCount*(t+1) )
725 partition[threads] = A.rows;
729 MultiplyAtomic( A , x , temp , threads , &partition[0] );
730 MultiplyAtomic( A , temp , r , threads , &partition[0] );
731 MultiplyAtomic( A , b , temp , threads , &partition[0] );
732 #pragma omp parallel for num_threads( threads ) schedule( static )
733 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i];
737 MultiplyAtomic( A , x , r , threads , &partition[0] );
738 #pragma omp parallel for num_threads( threads ) schedule( static )
739 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i];
741 double delta_new = 0 , delta_0;
742 for(
size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
746 fprintf( stderr ,
"[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
750 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
752 if( solveNormal ) MultiplyAtomic( A , d , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , q , threads , &partition[0] );
753 else MultiplyAtomic( A , d , q , threads , &partition[0] );
755 for(
int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
756 T2 alpha = T2( delta_new / dDotQ );
757 #pragma omp parallel for num_threads( threads ) schedule( static )
758 for(
int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
759 if( (ii%50)==(50-1) )
762 if( solveNormal ) MultiplyAtomic( A , x , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , r , threads , &partition[0] );
763 else MultiplyAtomic( A , x , r , threads , &partition[0] );
764 #pragma omp parallel for num_threads( threads ) schedule( static )
765 for(
int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i];
768 #pragma omp parallel for num_threads( threads ) schedule( static )
769 for(
int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha;
771 double delta_old = delta_new;
773 for(
size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
774 T2 beta = T2( delta_new / delta_old );
775 #pragma omp parallel for num_threads( threads ) schedule( static )
776 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
780 #endif // _WIN32 && !__MINGW32__
785 int threads = scratch.
threads();
789 if( reset ) x.
Resize( dim );
790 if( solveNormal ) temp.Resize( dim );
791 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
792 const T2* _b = &b[0];
794 double delta_new = 0 , delta_0;
797 A.
Multiply( x , temp , scratch , addDCTerm ) , A.
Multiply( temp , r , scratch , addDCTerm ) , A.
Multiply( b , temp , scratch , addDCTerm );
798 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
799 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
803 A.
Multiply( x , r , scratch , addDCTerm );
804 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
805 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
810 fprintf( stderr ,
"[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
814 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
816 if( solveNormal ) A.
Multiply( d , temp , scratch , addDCTerm ) , A.
Multiply( temp , q , scratch , addDCTerm );
817 else A.
Multiply( d , q , scratch , addDCTerm );
819 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
820 for(
int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
821 T2 alpha = T2( delta_new / dDotQ );
822 double delta_old = delta_new;
824 if( (ii%50)==(50-1) )
826 #pragma omp parallel for num_threads( threads )
827 for(
int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
829 if( solveNormal ) A.
Multiply( x , temp , scratch , addDCTerm ) , A.
Multiply( temp , r , scratch , addDCTerm );
830 else A.
Multiply( x , r , scratch , addDCTerm );
831 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
832 for(
int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
835 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
836 for(
int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
838 T2 beta = T2( delta_new / delta_old );
839 #pragma omp parallel for num_threads( threads )
840 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
851 if( threads<1 ) threads = 1;
852 if( threads>1 ) outScratch.
resize( threads , dim );
853 if( reset ) x.
Resize( dim );
856 if( solveNormal ) temp.
Resize( dim );
857 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
858 const T2* _b = &b[0];
860 double delta_new = 0 , delta_0;
864 if( threads>1 ) A.
Multiply( x , temp , outScratch , addDCTerm ) , A.
Multiply( temp , r , outScratch , addDCTerm ) , A.
Multiply( b , temp , outScratch , addDCTerm );
866 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
867 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
871 if( threads>1 ) A.
Multiply( x , r , outScratch , addDCTerm );
872 else A.
Multiply( x , r , addDCTerm );
873 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
874 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
880 fprintf( stderr ,
"[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
884 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
888 if( threads>1 ) A.
Multiply( d , temp , outScratch , addDCTerm ) , A.
Multiply( temp , q , outScratch , addDCTerm );
889 else A.
Multiply( d , temp , addDCTerm ) , A.
Multiply( temp , q , addDCTerm );
893 if( threads>1 ) A.
Multiply( d , q , outScratch , addDCTerm );
894 else A.
Multiply( d , q , addDCTerm );
897 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
898 for(
int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
899 T2 alpha = T2( delta_new / dDotQ );
900 double delta_old = delta_new;
903 if( (ii%50)==(50-1) )
905 #pragma omp parallel for num_threads( threads )
906 for(
int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
910 if( threads>1 ) A.
Multiply( x , temp , outScratch , addDCTerm ) , A.
Multiply( temp , r , outScratch , addDCTerm );
911 else A.
Multiply( x , temp , addDCTerm ) , A.
Multiply( temp , r , addDCTerm );
915 if( threads>1 ) A.
Multiply( x , r , outScratch , addDCTerm );
916 else A.
Multiply( x , r , addDCTerm );
918 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
919 for(
int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
923 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
924 for(
int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
927 T2 beta = T2( delta_new / delta_old );
928 #pragma omp parallel for num_threads( threads )
929 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
946 for(
int i=0 ; i<iters ; i++ )
953 for(
int j=0 ; j<int(M.
rows) ; j++ ) solution[j] += (b[j]-Md[j])/diagonal[j];
962 for(
int i=0 ; i<SparseMatrix<T>::rows ; i++ )
SparseMatrix< T > & operator*=(const T &V)
void SetRowSize(int row, int count)
SparseMatrix< T > Transpose() const
static int UseAllocator(void)
static int SolveSymmetric(const SparseMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, const T2 eps=1e-8, int reset=1, int threads=1)
static int Solve(const SparseSymmetricMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, T2 eps=1e-8, int reset=1, int threads=0, bool addDCTerm=false, bool solveNormal=false)
MatrixEntry< T > ** m_ppElements
PCL_EXPORTS void Multiply(const double in1[2], const double in2[2], double out[2])
T Dot(const Vector &V) const
void getDiagonal(Vector< T2 > &diagonal) const
SparseMatrix< T > operator*(const T &V) const
void write(std::ostream &stream, Type value)
Function for writing data to a stream.
void resize(int threads, int dim)
static int Solve(const SparseMatrix< T > &M, const Vector< T > &b, int iters, Vector< T > &solution, const T eps=1e-8)
SparseMatrix< T > Multiply(const SparseMatrix< T > &M) const
void read(std::istream &stream, Type &value)
Function for reading data from a stream.
bool write(FILE *fp) const
Vector< T2 > Multiply(const Vector< T2 > &V) const
size_t Dimensions() const
Vector< T2 > operator*(const Vector< T2 > &V) const
static void SetAllocator(int blockSize)
SparseMatrix< T > & operator=(const SparseMatrix< T > &M)