FirteX-高性能全文索引和检索平台

API Documentation


首页 | 名字空间列表 | 类继承关系 | 组合类型列表 | $(BL\录(B | 文件列表 | 名字空间成员 | 组合类型成员 | 文件成员

IndexInput.h

浏览该文件的文档。
00001 //
00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 
00003 // All rights reserved.
00004 // This file is part of FirteX (www.firtex.org)
00005 //
00006 // Use of the FirteX is subject to the terms of the software license set forth in 
00007 // the LICENSE file included with this software, and also available at
00008 // http://www.firtex.org/license.html
00009 //
00010 // Author       : 郭瑞杰(GuoRuijie)
00011 // Email        : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com
00012 // Created      : 2005/12/9
00013 //
00014 #ifndef __INPUTSTREAM_H
00015 #define __INPUTSTREAM_H
00016 
00017 #include "../utility/StdHeader.h"
00018 #include "../utility/FXString.h"
00019 
00020 
00021 #define INDEXINPUT_BUFFSIZE             32768//4096
00022 
00023 namespace firtex
00024 {
00025         namespace store
00026         {
00027                 class CIndexInput
00028                 {       
00029                 public:
00030                         CIndexInput(char* buffer,size_t buffsize);
00031                         CIndexInput(size_t buffsize=0);                 
00032                         virtual ~CIndexInput(void);
00033                 public:
00039                         void read(char* data, size_t length);
00040 
00044                         byte readByte();
00045 
00052                         void  readBytes(byte* b, size_t offset, size_t len);
00053                         
00057                         int32_t readInt();
00058 
00062                         int32_t readVInt();
00063                         
00067                         int64_t readLong();
00068                         
00072                         int64_t readVLong();
00073 
00077                         void readString(string& s);
00078 
00082                         void readString(CFXString& s);
00083 
00091                         void  readChars(char* buffer, size_t start, size_t length);
00092 
00097                         void skipVInt(size_t nNum);
00098 
00102                         int64_t getFilePointer();
00103 
00108                         void  seek(int64_t pos);
00109 
00113                         bool    isEof();
00114 
00118                         int64_t length()const;
00119 
00123                         void    setLength(int64_t newLen);
00124 
00130                         void    setBuffer(char* buf,size_t bufSize);
00131 
00132                 public:
00139                         virtual void    readInternal(char* b, size_t offset, size_t length) = 0;
00140                         
00146                         virtual CIndexInput*    clone(char* buffer,size_t buffsize) = 0;
00147                         
00151                         virtual CIndexInput*    clone() = 0;
00152 
00156                         virtual void    close() = 0;
00157                 protected:                              
00161                         void                    refill();
00162                 protected:
00167                         virtual void    seekInternal(int64_t pos) = 0;
00168                 protected:
00169                         char*           m_buffer;
00170                         size_t          m_bufferSize;                   
00171 
00172                         int64_t         m_bufferStart; // position in file of m_buffer
00173                         size_t          m_bufferLength; // end of valid bytes
00174                         size_t          m_bufferPosition; // next byte to read
00175 
00176                         int64_t         m_length; // set by subclasses
00177                         bool            m_bOwnBuff;
00178 
00179                         friend class CIndexOutput;
00180                 };
00181 
00183                 //
00184                 inline byte CIndexInput::readByte()
00185                 {
00186                         if (m_bufferPosition >= m_bufferLength)
00187                                 refill();
00188                         return m_buffer[m_bufferPosition++];
00189                 }
00190                 inline int32_t CIndexInput:: readInt()
00191                 {
00192                         uint8_t b1 = readByte();
00193                         uint8_t b2 = readByte();
00194                         uint8_t b3 = readByte();
00195                         uint8_t b4 = readByte();
00196                         return ((b1 & 0xFF) << 24) | ((b2 & 0xFF) << 16) | ((b3 & 0xFF) <<  8)
00197                                 | (b4 & 0xFF);
00198                         //return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16) | ((readByte() & 0xFF) << 8) | (readByte() & 0xFF);
00199                 }
00200 
00201                 inline int32_t CIndexInput::readVInt()
00202                 {
00203                         uint8_t b = readByte();
00204                         int32_t i = b & 0x7F;
00205                         for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) {
00206                                 b = readByte();
00207                                 i |= (b & 0x7FL) << shift;
00208                         }
00209                         return i;
00210                 }
00211         
00212                 inline int64_t CIndexInput::readLong()
00213                 {
00214                         int32_t i1 = readInt();
00215                         int32_t i2 = readInt();
00216                         return (((int64_t)i1) << 32) | (i2 & 0xFFFFFFFFL);              
00217                 }
00218 
00219                 inline int64_t CIndexInput::readVLong()
00220                 {
00221                         uint8_t b = readByte();
00222                         int64_t i = b & 0x7F;
00223                         for (int32_t shift = 7; (b & 0x80) != 0; shift += 7)
00224                         {
00225                                 b = readByte();
00226                                 i |= (b & 0x7FLL) << shift;
00227                         }
00228                         return i;
00229                 }
00230 
00231                 inline void CIndexInput::readString(string& s)
00232                 {                               
00233                         size_t length = (size_t)readVInt();                             
00234                         char* chars = new char[length + 1];
00235                         readChars(chars, 0, length);                            
00236                         chars[length] = '\0';           
00237                         s = chars;
00238                         delete chars;
00239                 }
00240 
00241                 inline void CIndexInput::readString(CFXString& s)
00242                 {                               
00243                         size_t length = (size_t)readVInt();
00244                         if(length <=0 )
00245                                 FIRTEX_THROW3(INDEX_COLLAPSE_ERROR,"CIndexInput::readString():the length of string is invalid.");
00246                         s.reserve(length);                              
00247                         readChars(s.data(),0,length);
00248                         s.resize(length);
00249                 }
00250                 
00251                 inline void CIndexInput::readChars(char* buffer, size_t start, size_t length)
00252                 {
00253                         size_t end = start + length;
00254                         for (size_t i = start; i < end; i++)
00255                         {
00256                                 byte b = readByte();
00257                                 if ((b & 0x80) == 0)
00258                                         buffer[i] = (char) (b & 0x7F);
00259                                 else if ((b & 0xE0) != 0xE0)
00260                                 {
00261                                         buffer[i] = (char) (((b & 0x1F) << 6) | (readByte() & 0x3F));
00262                                 }
00263                                 else
00264                                         buffer[i] = (char) (((b & 0x0F) << 12) | ((readByte() & 0x3F) << 6) | (readByte() & 0x3F));
00265                         }
00266                 }
00267                 inline void CIndexInput::refill()
00268                 {
00269                         int64_t start = m_bufferStart + (int64_t)m_bufferPosition;
00270                         int64_t end = start + m_bufferSize;
00271                         if (end > m_length)     //超过了结尾                                    
00272                                 end = m_length;
00273                         m_bufferLength = (size_t)(end - start);
00274                         if (m_bufferLength <= 0)
00275                                 FIRTEX_THROW2(FILEIO_ERROR,"IndexInput:read past EOF.");
00276 
00277                         if (m_buffer == NULL)
00278                                 m_buffer = new char[m_bufferSize]; // allocate m_buffer lazily
00279                         readInternal(m_buffer, 0, m_bufferLength);
00280 
00281                         m_bufferStart = start;
00282                         m_bufferPosition = 0;
00283                 }
00284                 inline void CIndexInput::skipVInt(size_t nNum)
00285                 {
00286                         for (int64_t i = 0;i<nNum;i++)
00287                         {
00288                                 readVInt();
00289                         }
00290                 }
00291 
00292                 inline int64_t CIndexInput::getFilePointer()
00293                 {
00294                         return m_bufferStart + (int64_t)m_bufferPosition;
00295                 }
00296 
00297                 inline void CIndexInput::seek(int64_t pos)
00298                 {
00299                         if(pos > m_length)
00300                                 FIRTEX_THROW3(FILEIO_ERROR,_T("CIndexInput.seek():pos>m_length"));
00301                         if (pos >= m_bufferStart && pos < (m_bufferStart + (int64_t)m_bufferLength))
00302                                 m_bufferPosition = (size_t) (pos - m_bufferStart);//新位置在缓冲区间                            
00303                         else
00304                         {
00305                                 m_bufferStart = pos;
00306                                 m_bufferPosition = 0;
00307                                 m_bufferLength = 0; // trigger refill() on read()
00308                                 seekInternal(pos);
00309                         }
00310                 }
00311                 
00312                 inline bool CIndexInput::isEof()
00313                 {
00314                         return ( (m_bufferStart + (int64_t )m_bufferPosition) >= m_length);
00315                 }
00316                         
00317                 inline int64_t CIndexInput::length()const
00318                 {
00319                         return m_length;
00320                 }
00321                 
00322                 inline void CIndexInput::setLength(int64_t newLen)
00323                 {
00324                         FIRTEX_ASSERT((newLen > 0),_T("CIndexInput.setLength():illegal parameter."));
00325                         m_length = newLen;
00326                 }
00327         }
00328 }
00329 
00330 #endif

http://www.firtex.org http://www.sourceforge.net/projects/firtex