FirteX-高性能全文索引和检索平台API Documentation |
00001 // 00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 00003 // All rights reserved. 00004 // This file is part of FirteX (www.firtex.org) 00005 // 00006 // Use of the FirteX is subject to the terms of the software license set forth in 00007 // the LICENSE file included with this software, and also available at 00008 // http://www.firtex.org/license.html 00009 // 00010 // Author : 郭瑞杰(GuoRuijie) 00011 // Email : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com 00012 // Created : 2005/12/9 00013 // 00014 #ifndef __INDEXOUTPUT_H 00015 #define __INDEXOUTPUT_H 00016 00017 #include "../utility/StdHeader.h" 00018 #include "IndexInput.h" 00019 00020 #define INDEXOUTPUT_BUFFSIZE 32768//4096 00021 00022 00023 namespace firtex 00024 { 00025 namespace store 00026 { 00027 class CIndexOutput 00028 { 00029 public: 00030 CIndexOutput(char* buffer,size_t buffsize); 00031 CIndexOutput(size_t buffsize=0); 00032 virtual ~CIndexOutput(void); 00033 public: 00038 void write(CIndexInput* pInput,int64_t length); 00039 00045 void write(const char* data,size_t length); 00046 00050 void writeByte(byte b); 00051 00055 void writeBytes(byte* b, size_t length); 00056 00060 void writeInt(int32_t i); 00061 00065 void writeVInt(int32_t i); 00066 00070 void writeLong(int64_t i); 00071 00075 void writeVLong(int64_t i); 00076 00080 void writeString(const CFXString& s); 00081 00088 void writeChars(const char* s, size_t start, size_t length); 00089 00093 byte getVIntLength(int32_t i); 00094 00098 void setBuffer(char* buf,size_t bufSize); 00099 00103 int64_t getFilePointer(); 00104 protected: 00105 void flush(); 00106 public: 00112 virtual void flushBuffer(char* b,size_t len) = 0; 00113 00117 virtual int64_t length() = 0; 00118 00122 virtual void close(); 00123 00128 virtual void seek(int64_t pos); 00129 protected: 00130 char* m_buffer; 00131 size_t m_buffersize; 00132 int64_t m_bufferStart; // m_buffer 在文件中的偏移量 00133 size_t m_bufferPosition; // m_buffer 中的位置 00134 bool m_bOwnBuff; 00135 }; 00137 // 00138 inline void CIndexOutput::write(const char* data,size_t length) 00139 { 00140 if ((m_bufferPosition>0) && ( (int64_t)(m_bufferPosition + length) >= m_buffersize) ) 00141 flush(); 00142 if(m_buffersize < (int64_t)length) 00143 { 00144 flushBuffer((char*)data,length);//直接写 00145 m_bufferStart+=length; 00146 } 00147 else 00148 { 00149 memcpy(m_buffer + m_bufferPosition,data,length); 00150 m_bufferPosition += length; 00151 } 00152 } 00153 inline void CIndexOutput::writeByte(byte b) 00154 { 00155 if (m_bufferPosition >= m_buffersize) 00156 flush(); 00157 m_buffer[m_bufferPosition++] = b; 00158 } 00159 00160 inline void CIndexOutput::writeBytes(byte* b, size_t length) 00161 { 00162 for (size_t i= 0; i < length; i++) 00163 writeByte(b[i]); 00164 } 00165 00166 inline void CIndexOutput::writeInt(int32_t i) 00167 { 00168 writeByte((byte) (i >> 24)); 00169 writeByte((byte) (i >> 16)); 00170 writeByte((byte) (i >> 8)); 00171 writeByte((byte) i); 00172 } 00173 00174 inline void CIndexOutput::writeVInt(int32_t i) 00175 { 00176 uint32_t ui = i; 00177 while ((ui & ~0x7F) != 0) 00178 { 00179 writeByte((uint8_t)((ui & 0x7f) | 0x80)); 00180 ui >>= 7; 00181 } 00182 writeByte( (uint8_t)ui ); 00183 } 00184 00185 inline void CIndexOutput::writeLong(int64_t i) 00186 { 00187 writeInt((int32_t) (i >> 32)); 00188 writeInt((int32_t) i); 00189 } 00190 00191 inline void CIndexOutput::writeVLong(int64_t i) 00192 { 00193 uint64_t ui = i; 00194 while ((ui & ~0x7F) != 0) 00195 { 00196 writeByte((uint8_t)((ui & 0x7f) | 0x80)); 00197 ui >>= 7; 00198 } 00199 writeByte((uint8_t)ui); 00200 } 00201 inline void CIndexOutput::writeString(const CFXString& s) 00202 { 00203 int32_t length = (int32_t)s.length(); 00204 writeVInt(length); 00205 writeChars(s.c_str(), 0, length); 00206 } 00207 inline void CIndexOutput::writeChars(const char* s, size_t start, size_t length) 00208 { 00209 uint64_t end = start + length; 00210 for (size_t i = start; i < end; i++) 00211 { 00212 int32_t code = (int32_t) s[i]; 00213 if (code >= 0x01 && code <= 0x7F) 00214 writeByte((byte) code); 00215 else if (((code >= 0x80) && (code <= 0x7FF)) || code == 0) 00216 { 00217 writeByte((byte) (0xC0 | (code >> 6))); 00218 writeByte((byte) (0x80 | (code & 0x3F))); 00219 } 00220 else 00221 { 00222 writeByte((byte) (0xE0 | (((uint32_t) code) >> 12))); 00223 writeByte((byte) (0x80 | ((code >> 6) & 0x3F))); 00224 writeByte((byte) (0x80 | (code & 0x3F))); 00225 } 00226 } 00227 } 00228 00229 inline byte CIndexOutput::getVIntLength(int32_t i) 00230 { 00231 byte l = 1; 00232 uint32_t ui = i; 00233 while ((ui & ~0x7F) != 0) 00234 { 00235 l++; 00236 ui >>= 7; //doing unsigned shift 00237 } 00238 return l; 00239 } 00240 00241 inline void CIndexOutput::setBuffer(char* buf,size_t bufSize) 00242 { 00243 if(m_bufferStart!=0 || m_bufferPosition != 0) 00244 { 00245 FIRTEX_THROW2(UNSUPPORTED_ERROR," void CIndexOutput::setBuffer(char* buf,size_t bufSize):you must call setBuffer() before reading any data."); 00246 } 00247 if(m_bOwnBuff && m_buffer) 00248 { 00249 delete[] m_buffer; 00250 } 00251 m_buffer = buf; 00252 m_buffersize = bufSize; 00253 m_bOwnBuff = false; 00254 } 00255 inline void CIndexOutput:: flush() 00256 { 00257 flushBuffer(m_buffer, m_bufferPosition); 00258 m_bufferStart += m_bufferPosition; 00259 m_bufferPosition = 0; 00260 } 00261 inline int64_t CIndexOutput::getFilePointer() 00262 { 00263 return m_bufferStart + (int64_t)m_bufferPosition; 00264 } 00265 } 00266 } 00267 00268 #endif
http://www.firtex.org http://www.sourceforge.net/projects/firtex