Point Cloud Library (PCL)  1.3.1
entropy_range_coder.hpp
Go to the documentation of this file.
00001 /*
00002  * Software License Agreement (BSD License)
00003  *
00004  *  Copyright (c) 2011, Willow Garage, Inc.
00005  *  All rights reserved.
00006  *
00007  *  Redistribution and use in source and binary forms, with or without
00008  *  modification, are permitted provided that the following conditions
00009  *  are met:
00010  *
00011  *   * Redistributions of source code must retain the above copyright
00012  *     notice, this list of conditions and the following disclaimer.
00013  *   * Redistributions in binary form must reproduce the above
00014  *     copyright notice, this list of conditions and the following
00015  *     disclaimer in the documentation and/or other materials provided
00016  *     with the distribution.
00017  *   * Neither the name of Willow Garage, Inc. nor the names of its
00018  *     contributors may be used to endorse or promote products derived
00019  *     from this software without specific prior written permission.
00020  *
00021  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00024  *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
00025  *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00026  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00027  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00028  *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00029  *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00030  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00031  *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00032  *  POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  *
00035  * range coder based on Dmitry Subbotin's carry-less implementation (http://www.compression.ru/ds/)
00036  * Added optimized symbol lookup and fixed/static frequency tables
00037  *
00038  * Author: Julius Kammerl (julius@kammerl.de)
00039  */
00040 
00041 #ifndef __PCL_IO_RANGECODING__HPP
00042 #define __PCL_IO_RANGECODING__HPP
00043 
00044 #include "pcl/compression/entropy_range_coder.h"
00045 
00046 #include <map>
00047 #include <iostream>
00048 #include <vector>
00049 #include <string.h>
00050 #include <algorithm>
00051 #include <stdio.h>
00052 
00053 namespace pcl
00054 {
00055 
00056   using namespace std;
00057 
00059   unsigned long
00060   AdaptiveRangeCoder::encodeCharVectorToStream (const std::vector<char>& inputByteVector_arg,
00061                                                 std::ostream& outputByteStream_arg)
00062   {
00063     DWord freq[257];
00064     uint8_t ch;
00065     unsigned int i, j, f;
00066     char out;
00067 
00068     // define limits
00069     const DWord top = (DWord)1 << 24;
00070     const DWord bottom = (DWord)1 << 16;
00071     const DWord maxRange = (DWord)1 << 16;
00072 
00073     DWord low, range;
00074 
00075     unsigned int readPos;
00076     unsigned int input_size;
00077 
00078     input_size = inputByteVector_arg.size ();
00079 
00080     // init output vector
00081     outputCharVector_.clear();
00082     outputCharVector_.reserve(sizeof(char) * input_size);
00083 
00084     readPos = 0;
00085 
00086     low = 0;
00087     range = (DWord)-1;
00088 
00089     // initialize cumulative frequency table
00090     for (i = 0; i < 257; i++)
00091       freq[i] = i;
00092 
00093     // scan input
00094     while (readPos < input_size)
00095     {
00096 
00097       // read byte
00098       ch = inputByteVector_arg[readPos++];
00099 
00100       // map range
00101       low += freq[ch] * (range /= freq[256]);
00102       range *= freq[ch + 1] - freq[ch];
00103 
00104       // check range limits
00105       while ((low ^ (low + range)) < top || ((range < bottom) && ((range = -low & (bottom - 1)), 1)))
00106       {
00107         out = low >> 24;
00108         range <<= 8;
00109         low <<= 8;
00110         outputCharVector_.push_back(out);
00111       }
00112 
00113       // update frequency table
00114       for (j = ch + 1; j < 257; j++)
00115         freq[j]++;
00116 
00117       // detect overflow
00118       if (freq[256] >= maxRange)
00119       {
00120         // rescale
00121         for (f = 1; f <= 256; f++)
00122         {
00123           freq[f] /= 2;
00124           if (freq[f] <= freq[f - 1])
00125             freq[f] = freq[f - 1] + 1;
00126         }
00127       }
00128 
00129     }
00130 
00131     // flush remaining data
00132     for (i = 0; i < 4; i++)
00133     {
00134       out = low >> 24;
00135       outputCharVector_.push_back(out);
00136       low <<= 8;
00137     }
00138 
00139     // write to stream
00140     outputByteStream_arg.write (&outputCharVector_[0], outputCharVector_.size());
00141 
00142     return (unsigned long)outputCharVector_.size();
00143 
00144   }
00145 
00147   unsigned long
00148   AdaptiveRangeCoder::decodeStreamToCharVector (std::istream& inputByteStream_arg,
00149                                                 std::vector<char>& outputByteVector_arg)
00150   {
00151     uint8_t ch;
00152     DWord freq[257];
00153     unsigned int i, j, f;
00154 
00155     // define limits
00156     const DWord top = (DWord)1 << 24;
00157     const DWord bottom = (DWord)1 << 16;
00158     const DWord maxRange = (DWord)1 << 16;
00159 
00160     DWord low, range;
00161     DWord code;
00162 
00163     unsigned int outputBufPos;
00164     unsigned int output_size = outputByteVector_arg.size ();
00165 
00166     unsigned long streamByteCount;
00167 
00168     streamByteCount = 0;
00169 
00170     outputBufPos = 0;
00171 
00172     code = 0;
00173     low = 0;
00174     range = (DWord)-1;
00175 
00176     // init decoding
00177     for (i = 0; i < 4; i++)
00178     {
00179       inputByteStream_arg.read ((char*)&ch, sizeof(char));
00180       streamByteCount += sizeof(char);
00181       code = (code << 8) | ch;
00182     }
00183 
00184     // init cumulative frequency table
00185     for (i = 0; i <= 256; i++)
00186       freq[i] = i;
00187 
00188     // decoding loop
00189     for (i = 0; i < output_size; i++)
00190     {
00191       uint8_t symbol = 0;
00192       uint8_t sSize = 256 / 2;
00193 
00194       // map code to range
00195       DWord count = (code - low) / (range /= freq[256]);
00196 
00197       // find corresponding symbol
00198       while (sSize > 0)
00199       {
00200         if (freq[symbol + sSize] <= count)
00201         {
00202           symbol += sSize;
00203         }
00204         sSize /= 2;
00205       }
00206 
00207       // output symbol
00208       outputByteVector_arg[outputBufPos++] = symbol;
00209 
00210       // update range limits
00211       low += freq[symbol] * range;
00212       range *= freq[symbol + 1] - freq[symbol];
00213 
00214       // decode range limits
00215       while ((low ^ (low + range)) < top || ((range < bottom) && ((range = -low & (bottom - 1)), 1)))
00216       {
00217         inputByteStream_arg.read ((char*)&ch, sizeof(char));
00218         streamByteCount += sizeof(char);
00219         code = code << 8 | ch;
00220         range <<= 8;
00221         low <<= 8;
00222       }
00223 
00224       // update cumulative frequency table
00225       for (j = symbol + 1; j < 257; j++)
00226         freq[j]++;
00227 
00228       // detect overflow
00229       if (freq[256] >= maxRange)
00230       {
00231         // rescale
00232         for (f = 1; f <= 256; f++)
00233         {
00234           freq[f] /= 2;
00235           if (freq[f] <= freq[f - 1])
00236             freq[f] = freq[f - 1] + 1;
00237         }
00238       }
00239     }
00240 
00241     return streamByteCount;
00242 
00243   }
00244 
00246   unsigned long
00247   StaticRangeCoder::encodeIntVectorToStream (std::vector<unsigned int>& inputIntVector_arg,
00248                                              std::ostream& outputByteStream_arg)
00249   {
00250 
00251     unsigned int inputsymbol;
00252     unsigned int i, f;
00253     char out;
00254 
00255     unsigned long frequencyTableSize;
00256     uint8_t frequencyTableByteSize;
00257 
00258     // define numerical limits
00259     const uint64_t top = (uint64_t)1 << 56;
00260     const uint64_t bottom = (uint64_t)1 << 48;
00261     const uint64_t maxRange = (uint64_t)1 << 48;
00262 
00263     unsigned long input_size = inputIntVector_arg.size ();
00264     uint64_t low, range;
00265 
00266     unsigned int inputSymbol;
00267 
00268     unsigned int readPos;
00269 
00270     unsigned long streamByteCount;
00271 
00272     streamByteCount = 0;
00273 
00274     // init output vector
00275     outputCharVector_.clear();
00276     outputCharVector_.reserve(sizeof(char) * input_size * 2);
00277 
00278     frequencyTableSize = 1;
00279 
00280     readPos = 0;
00281 
00282     // calculate frequency table
00283     cFreqTable_[0] = cFreqTable_[1] = 0;
00284     while (readPos < input_size)
00285     {
00286       inputSymbol = inputIntVector_arg[readPos++];
00287 
00288       if (inputSymbol + 1 >= frequencyTableSize)
00289       {
00290         // frequency table is to small -> adaptively extend it
00291         unsigned long oldfrequencyTableSize;
00292         oldfrequencyTableSize = frequencyTableSize;
00293 
00294         do
00295         {
00296           // increase frequency table size by factor 2
00297           frequencyTableSize <<= 1;
00298         } while (inputSymbol + 1 > frequencyTableSize);
00299 
00300         if (cFreqTable_.size () < frequencyTableSize + 1)
00301         {
00302           // resize frequency vector
00303           cFreqTable_.resize (frequencyTableSize + 1);
00304         }
00305 
00306         // init new frequency range with zero
00307         memset (&cFreqTable_[oldfrequencyTableSize + 1], 0,
00308                 sizeof(uint64_t) * (frequencyTableSize - oldfrequencyTableSize));
00309       }
00310       cFreqTable_[inputSymbol + 1]++;
00311     }
00312     frequencyTableSize++;
00313 
00314     // convert to cumulative frequency table
00315     for (f = 1; f < frequencyTableSize; f++)
00316     {
00317       cFreqTable_[f] = cFreqTable_[f - 1] + cFreqTable_[f];
00318       if (cFreqTable_[f] <= cFreqTable_[f - 1])
00319         cFreqTable_[f] = cFreqTable_[f - 1] + 1;
00320     }
00321 
00322     // rescale if numerical limits are reached
00323     while (cFreqTable_[frequencyTableSize - 1] >= maxRange)
00324     {
00325       for (f = 1; f < cFreqTable_.size (); f++)
00326       {
00327         cFreqTable_[f] /= 2;
00328         ;
00329         if (cFreqTable_[f] <= cFreqTable_[f - 1])
00330           cFreqTable_[f] = cFreqTable_[f - 1] + 1;
00331       }
00332     }
00333 
00334     // calculate amount of bytes per frequeny table entry
00335     frequencyTableByteSize = (uint8_t)ceil (Log2 (cFreqTable_[frequencyTableSize - 1]) / 8.0);
00336 
00337     // write size of frequency table to output stream
00338     outputByteStream_arg.write ((const char *)&frequencyTableSize, sizeof(frequencyTableSize));
00339     outputByteStream_arg.write ((const char *)&frequencyTableByteSize, sizeof(frequencyTableByteSize));
00340 
00341     streamByteCount += sizeof(frequencyTableSize)+sizeof(frequencyTableByteSize);
00342 
00343     // write cumulative  frequency table to output stream
00344     for (f = 1; f < frequencyTableSize; f++)
00345     {
00346       outputByteStream_arg.write ((const char *)&cFreqTable_[f], frequencyTableByteSize);
00347       streamByteCount += frequencyTableByteSize;
00348     }
00349 
00350     readPos = 0;
00351     low = 0;
00352     range = (uint64_t)-1;
00353 
00354     // start encoding
00355     while (readPos < input_size)
00356     {
00357 
00358       // read symol
00359       inputsymbol = inputIntVector_arg[readPos++];
00360 
00361       // map to range
00362       low += cFreqTable_[inputsymbol] * (range /= cFreqTable_[frequencyTableSize - 1]);
00363       range *= cFreqTable_[inputsymbol + 1] - cFreqTable_[inputsymbol];
00364 
00365       // check range limits
00366       while ((low ^ (low + range)) < top || ((range < bottom) && ((range = -low & (bottom - 1)), 1)))
00367       {
00368         out = low >> 56;
00369         range <<= 8;
00370         low <<= 8;
00371         outputCharVector_.push_back(out);
00372       }
00373 
00374     }
00375 
00376     // flush remaining data
00377     for (i = 0; i < 8; i++)
00378     {
00379       out = low >> 56;
00380       outputCharVector_.push_back(out);
00381       low <<= 8;
00382     }
00383 
00384     // write encoded data to stream
00385     outputByteStream_arg.write (&outputCharVector_[0], outputCharVector_.size());
00386 
00387     streamByteCount += outputCharVector_.size();
00388 
00389     return streamByteCount;
00390 
00391   }
00392 
00394   unsigned long
00395   StaticRangeCoder::decodeStreamToIntVector (std::istream& inputByteStream_arg,
00396                                              std::vector<unsigned int>& outputIntVector_arg)
00397   {
00398     uint8_t ch;
00399     unsigned int i, f;
00400 
00401     // define range limits
00402     const uint64_t top = (uint64_t)1 << 56;
00403     const uint64_t bottom = (uint64_t)1 << 48;
00404 
00405     uint64_t low, range;
00406     uint64_t code;
00407 
00408     unsigned int outputBufPos;
00409     unsigned long output_size;
00410 
00411     unsigned long frequencyTableSize;
00412     unsigned char frequencyTableByteSize;
00413 
00414     unsigned long streamByteCount;
00415 
00416     streamByteCount = 0;
00417 
00418     outputBufPos = 0;
00419     output_size = outputIntVector_arg.size ();
00420 
00421     // read size of cumulative frequency table from stream
00422     inputByteStream_arg.read ((char*)&frequencyTableSize, sizeof(frequencyTableSize));
00423     inputByteStream_arg.read ((char*)&frequencyTableByteSize, sizeof(frequencyTableByteSize));
00424 
00425     streamByteCount += sizeof(frequencyTableSize)+sizeof(frequencyTableByteSize);
00426 
00427     // check size of frequency table vector
00428     if (cFreqTable_.size () < frequencyTableSize)
00429     {
00430       cFreqTable_.resize (frequencyTableSize);
00431     }
00432 
00433     // init with zero
00434     memset (&cFreqTable_[0], 0, sizeof(uint64_t) * frequencyTableSize);
00435 
00436     // read cumulative frequency table
00437     for (f = 1; f < frequencyTableSize; f++)
00438     {
00439       inputByteStream_arg.read ((char *)&cFreqTable_[f], frequencyTableByteSize);
00440       streamByteCount += frequencyTableByteSize;
00441     }
00442 
00443     // initialize range & code
00444     code = 0;
00445     low = 0;
00446     range = (uint64_t)-1;
00447 
00448     // init code vector
00449     for (i = 0; i < 8; i++)
00450     {
00451       inputByteStream_arg.read ((char*)&ch, sizeof(char));
00452       streamByteCount += sizeof(char);
00453       code = (code << 8) | ch;
00454     }
00455 
00456     // decoding
00457     for (i = 0; i < output_size; i++)
00458     {
00459       uint64_t count = (code - low) / (range /= cFreqTable_[frequencyTableSize - 1]);
00460 
00461       // sybmol lookup in cumulative frequency table
00462       uint64_t symbol = 0;
00463       uint64_t sSize = (frequencyTableSize - 1) / 2;
00464       while (sSize > 0)
00465       {
00466         if (cFreqTable_[symbol + sSize] <= count)
00467         {
00468           symbol += sSize;
00469         }
00470         sSize /= 2;
00471       }
00472 
00473       // write symbol to output stream
00474       outputIntVector_arg[outputBufPos++] = symbol;
00475 
00476       // map to range
00477       low += cFreqTable_[symbol] * range;
00478       range *= cFreqTable_[symbol + 1] - cFreqTable_[symbol];
00479 
00480       // check range limits
00481       while ((low ^ (low + range)) < top || ((range < bottom) && ((range = -low & (bottom - 1)), 1)))
00482       {
00483         inputByteStream_arg.read ((char*)&ch, sizeof(char));
00484         streamByteCount += sizeof(char);
00485         code = code << 8 | ch;
00486         range <<= 8;
00487         low <<= 8;
00488       }
00489 
00490     }
00491 
00492     return streamByteCount;
00493 
00494 
00495   }
00496 
00498   unsigned long
00499   StaticRangeCoder::encodeCharVectorToStream (const std::vector<char>& inputByteVector_arg,
00500                                               std::ostream& outputByteStream_arg)
00501   {
00502     DWord freq[257];
00503     uint8_t ch;
00504     int i, f;
00505     char out;
00506 
00507     // define numerical limits
00508     const DWord top = (DWord)1 << 24;
00509     const DWord bottom = (DWord)1 << 16;
00510     const DWord maxRange = (DWord)1 << 16;
00511 
00512     DWord low, range;
00513 
00514     unsigned int input_size;
00515     input_size = inputByteVector_arg.size ();
00516 
00517     unsigned int readPos;
00518 
00519     unsigned long streamByteCount;
00520 
00521     streamByteCount = 0;
00522 
00523     // init output vector
00524     outputCharVector_.clear();
00525     outputCharVector_.reserve(sizeof(char) * input_size);
00526 
00527     uint64_t FreqHist[257];
00528 
00529     // calculate frequency table
00530     memset (FreqHist, 0, sizeof(FreqHist));
00531     readPos = 0;
00532     while (readPos < input_size)
00533     {
00534       uint8_t symbol = (uint8_t)inputByteVector_arg[readPos++];
00535       FreqHist[symbol + 1]++;
00536     }
00537 
00538     // convert to cumulative frequency table
00539     freq[0] = 0;
00540     for (f = 1; f <= 256; f++)
00541     {
00542       freq[f] = freq[f - 1] + (DWord)FreqHist[f];
00543       if (freq[f] <= freq[f - 1])
00544         freq[f] = freq[f - 1] + 1;
00545     }
00546 
00547     // rescale if numerical limits are reached
00548     while (freq[256] >= maxRange)
00549     {
00550       for (f = 1; f <= 256; f++)
00551       {
00552         freq[f] /= 2;
00553         ;
00554         if (freq[f] <= freq[f - 1])
00555           freq[f] = freq[f - 1] + 1;
00556       }
00557     }
00558 
00559     // write cumulative  frequency table to output stream
00560     outputByteStream_arg.write ((const char *)&freq[0], sizeof(freq));
00561     streamByteCount += sizeof(freq);
00562 
00563     readPos = 0;
00564 
00565     low = 0;
00566     range = (DWord)-1;
00567 
00568     // start encoding
00569     while (readPos < input_size)
00570     {
00571 
00572       // read symol
00573       ch = inputByteVector_arg[readPos++];
00574 
00575       // map to range
00576       low += freq[ch] * (range /= freq[256]);
00577       range *= freq[ch + 1] - freq[ch];
00578 
00579       // check range limits
00580       while ((low ^ (low + range)) < top || ((range < bottom) && ((range = -low & (bottom - 1)), 1)))
00581       {
00582         out = low >> 24;
00583         range <<= 8;
00584         low <<= 8;
00585         outputCharVector_.push_back(out);
00586       }
00587 
00588     }
00589 
00590     // flush remaining data
00591     for (i = 0; i < 4; i++)
00592     {
00593       out = low >> 24;
00594       outputCharVector_.push_back(out);
00595       low <<= 8;
00596     }
00597 
00598     // write encoded data to stream
00599     outputByteStream_arg.write (&outputCharVector_[0], outputCharVector_.size());
00600 
00601     streamByteCount += outputCharVector_.size();
00602 
00603     return streamByteCount;
00604 
00605   }
00606 
00608   unsigned long
00609   StaticRangeCoder::decodeStreamToCharVector (std::istream& inputByteStream_arg,
00610                                               std::vector<char>& outputByteVector_arg)
00611   {
00612     uint8_t ch;
00613     DWord freq[257];
00614     unsigned int i;
00615 
00616     // define range limits
00617     const DWord top = (DWord)1 << 24;
00618     const DWord bottom = (DWord)1 << 16;
00619 
00620     DWord low, range;
00621     DWord code;
00622 
00623     unsigned int outputBufPos;
00624     unsigned int output_size;
00625 
00626     unsigned long streamByteCount;
00627 
00628     streamByteCount = 0;
00629 
00630     output_size = outputByteVector_arg.size ();
00631 
00632     outputBufPos = 0;
00633 
00634     // read cumulative frequency table
00635     inputByteStream_arg.read ((char*)&freq[0], sizeof(freq));
00636     streamByteCount += sizeof(freq);
00637 
00638     code = 0;
00639     low = 0;
00640     range = (DWord)-1;
00641 
00642     // init code
00643     for (i = 0; i < 4; i++)
00644     {
00645       inputByteStream_arg.read ((char*)&ch, sizeof(char));
00646       streamByteCount += sizeof(char);
00647       code = (code << 8) | ch;
00648     }
00649 
00650     // decoding
00651     for (i = 0; i < output_size; i++)
00652     {
00653       // symbol lookup in cumulative frequency table
00654       uint8_t symbol = 0;
00655       uint8_t sSize = 256 / 2;
00656 
00657       DWord count = (code - low) / (range /= freq[256]);
00658 
00659       while (sSize > 0)
00660       {
00661         if (freq[symbol + sSize] <= count)
00662         {
00663           symbol += sSize;
00664         }
00665         sSize /= 2;
00666       }
00667 
00668       // write symbol to output stream
00669       outputByteVector_arg[outputBufPos++] = symbol;
00670 
00671       low += freq[symbol] * range;
00672       range *= freq[symbol + 1] - freq[symbol];
00673 
00674       // check range limits
00675       while ((low ^ (low + range)) < top || ((range < bottom) && ((range = -low & (bottom - 1)), 1)))
00676       {
00677         inputByteStream_arg.read ((char*)&ch, sizeof(char));
00678         streamByteCount += sizeof(char);
00679         code = code << 8 | ch;
00680         range <<= 8;
00681         low <<= 8;
00682       }
00683 
00684     }
00685 
00686     return streamByteCount;
00687 
00688   }
00689 
00690 }
00691 
00692 #endif
00693 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines