FirteX-高性能全文索引和检索平台API Documentation |
00001 // 00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 00003 // All rights reserved. 00004 // This file is part of FirteX (www.firtex.org) 00005 // 00006 // Use of the FirteX is subject to the terms of the software license set forth in 00007 // the LICENSE file included with this software, and also available at 00008 // http://www.firtex.org/license.html 00009 // 00010 // Author : 郭瑞杰(GuoRuijie) 00011 // Email : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com 00012 // Created : 2005/12/9 00013 // 00014 #ifndef __INPUTSTREAM_H 00015 #define __INPUTSTREAM_H 00016 00017 #include "../utility/StdHeader.h" 00018 #include "../utility/FXString.h" 00019 00020 00021 #define INDEXINPUT_BUFFSIZE 32768//4096 00022 00023 namespace firtex 00024 { 00025 namespace store 00026 { 00027 class CIndexInput 00028 { 00029 public: 00030 CIndexInput(char* buffer,size_t buffsize); 00031 CIndexInput(size_t buffsize=0); 00032 virtual ~CIndexInput(void); 00033 public: 00039 void read(char* data, size_t length); 00040 00044 byte readByte(); 00045 00052 void readBytes(byte* b, size_t offset, size_t len); 00053 00057 int32_t readInt(); 00058 00062 int32_t readVInt(); 00063 00067 int64_t readLong(); 00068 00072 int64_t readVLong(); 00073 00077 void readString(string& s); 00078 00082 void readString(CFXString& s); 00083 00091 void readChars(char* buffer, size_t start, size_t length); 00092 00097 void skipVInt(size_t nNum); 00098 00102 int64_t getFilePointer(); 00103 00108 void seek(int64_t pos); 00109 00113 bool isEof(); 00114 00118 int64_t length()const; 00119 00123 void setLength(int64_t newLen); 00124 00130 void setBuffer(char* buf,size_t bufSize); 00131 00132 public: 00139 virtual void readInternal(char* b, size_t offset, size_t length) = 0; 00140 00146 virtual CIndexInput* clone(char* buffer,size_t buffsize) = 0; 00147 00151 virtual CIndexInput* clone() = 0; 00152 00156 virtual void close() = 0; 00157 protected: 00161 void refill(); 00162 protected: 00167 virtual void seekInternal(int64_t pos) = 0; 00168 protected: 00169 char* m_buffer; 00170 size_t m_bufferSize; 00171 00172 int64_t m_bufferStart; // position in file of m_buffer 00173 size_t m_bufferLength; // end of valid bytes 00174 size_t m_bufferPosition; // next byte to read 00175 00176 int64_t m_length; // set by subclasses 00177 bool m_bOwnBuff; 00178 00179 friend class CIndexOutput; 00180 }; 00181 00183 // 00184 inline byte CIndexInput::readByte() 00185 { 00186 if (m_bufferPosition >= m_bufferLength) 00187 refill(); 00188 return m_buffer[m_bufferPosition++]; 00189 } 00190 inline int32_t CIndexInput:: readInt() 00191 { 00192 uint8_t b1 = readByte(); 00193 uint8_t b2 = readByte(); 00194 uint8_t b3 = readByte(); 00195 uint8_t b4 = readByte(); 00196 return ((b1 & 0xFF) << 24) | ((b2 & 0xFF) << 16) | ((b3 & 0xFF) << 8) 00197 | (b4 & 0xFF); 00198 //return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16) | ((readByte() & 0xFF) << 8) | (readByte() & 0xFF); 00199 } 00200 00201 inline int32_t CIndexInput::readVInt() 00202 { 00203 uint8_t b = readByte(); 00204 int32_t i = b & 0x7F; 00205 for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) { 00206 b = readByte(); 00207 i |= (b & 0x7FL) << shift; 00208 } 00209 return i; 00210 } 00211 00212 inline int64_t CIndexInput::readLong() 00213 { 00214 int32_t i1 = readInt(); 00215 int32_t i2 = readInt(); 00216 return (((int64_t)i1) << 32) | (i2 & 0xFFFFFFFFL); 00217 } 00218 00219 inline int64_t CIndexInput::readVLong() 00220 { 00221 uint8_t b = readByte(); 00222 int64_t i = b & 0x7F; 00223 for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) 00224 { 00225 b = readByte(); 00226 i |= (b & 0x7FLL) << shift; 00227 } 00228 return i; 00229 } 00230 00231 inline void CIndexInput::readString(string& s) 00232 { 00233 size_t length = (size_t)readVInt(); 00234 char* chars = new char[length + 1]; 00235 readChars(chars, 0, length); 00236 chars[length] = '\0'; 00237 s = chars; 00238 delete chars; 00239 } 00240 00241 inline void CIndexInput::readString(CFXString& s) 00242 { 00243 size_t length = (size_t)readVInt(); 00244 if(length <=0 ) 00245 FIRTEX_THROW3(INDEX_COLLAPSE_ERROR,"CIndexInput::readString():the length of string is invalid."); 00246 s.reserve(length); 00247 readChars(s.data(),0,length); 00248 s.resize(length); 00249 } 00250 00251 inline void CIndexInput::readChars(char* buffer, size_t start, size_t length) 00252 { 00253 size_t end = start + length; 00254 for (size_t i = start; i < end; i++) 00255 { 00256 byte b = readByte(); 00257 if ((b & 0x80) == 0) 00258 buffer[i] = (char) (b & 0x7F); 00259 else if ((b & 0xE0) != 0xE0) 00260 { 00261 buffer[i] = (char) (((b & 0x1F) << 6) | (readByte() & 0x3F)); 00262 } 00263 else 00264 buffer[i] = (char) (((b & 0x0F) << 12) | ((readByte() & 0x3F) << 6) | (readByte() & 0x3F)); 00265 } 00266 } 00267 inline void CIndexInput::refill() 00268 { 00269 int64_t start = m_bufferStart + (int64_t)m_bufferPosition; 00270 int64_t end = start + m_bufferSize; 00271 if (end > m_length) //超过了结尾 00272 end = m_length; 00273 m_bufferLength = (size_t)(end - start); 00274 if (m_bufferLength <= 0) 00275 FIRTEX_THROW2(FILEIO_ERROR,"IndexInput:read past EOF."); 00276 00277 if (m_buffer == NULL) 00278 m_buffer = new char[m_bufferSize]; // allocate m_buffer lazily 00279 readInternal(m_buffer, 0, m_bufferLength); 00280 00281 m_bufferStart = start; 00282 m_bufferPosition = 0; 00283 } 00284 inline void CIndexInput::skipVInt(size_t nNum) 00285 { 00286 for (int64_t i = 0;i<nNum;i++) 00287 { 00288 readVInt(); 00289 } 00290 } 00291 00292 inline int64_t CIndexInput::getFilePointer() 00293 { 00294 return m_bufferStart + (int64_t)m_bufferPosition; 00295 } 00296 00297 inline void CIndexInput::seek(int64_t pos) 00298 { 00299 if(pos > m_length) 00300 FIRTEX_THROW3(FILEIO_ERROR,_T("CIndexInput.seek():pos>m_length")); 00301 if (pos >= m_bufferStart && pos < (m_bufferStart + (int64_t)m_bufferLength)) 00302 m_bufferPosition = (size_t) (pos - m_bufferStart);//新位置在缓冲区间 00303 else 00304 { 00305 m_bufferStart = pos; 00306 m_bufferPosition = 0; 00307 m_bufferLength = 0; // trigger refill() on read() 00308 seekInternal(pos); 00309 } 00310 } 00311 00312 inline bool CIndexInput::isEof() 00313 { 00314 return ( (m_bufferStart + (int64_t )m_bufferPosition) >= m_length); 00315 } 00316 00317 inline int64_t CIndexInput::length()const 00318 { 00319 return m_length; 00320 } 00321 00322 inline void CIndexInput::setLength(int64_t newLen) 00323 { 00324 FIRTEX_ASSERT((newLen > 0),_T("CIndexInput.setLength():illegal parameter.")); 00325 m_length = newLen; 00326 } 00327 } 00328 } 00329 00330 #endif
http://www.firtex.org http://www.sourceforge.net/projects/firtex