FirteX-高性能全文索引和检索平台API Documentation |
00001 // 00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 00003 // All rights reserved. 00004 // This file is part of FirteX (www.firtex.org) 00005 // 00006 // Use of the FirteX is subject to the terms of the software license set forth in 00007 // the LICENSE file included with this software, and also available at 00008 // http://www.firtex.org/license.html 00009 // 00010 // Author : 郭瑞杰(GuoRuijie) 00011 // Email : ruijieguo@software.ict.ac.cn 00012 // Created : 2005/12/26 00013 // 00014 00015 #ifndef _BARRELTERMDOCS_H 00016 #define _BARRELTERMDOCS_H 00017 00018 #include "../store/IndexInput.h" 00019 #include "TermDocs.h" 00020 #include "TermInfo.h" 00021 #include "TermReader.h" 00022 00023 using namespace firtex::store; 00024 00025 #define PAGE_SIZE 32768 //32K 00026 00027 namespace firtex 00028 { 00029 namespace index 00030 { 00031 class CBarrelTermDocs : public CTermDocs 00032 { 00033 public: 00034 CBarrelTermDocs(); 00035 CBarrelTermDocs(CTermReader* pReader,CIndexInput* pDfiStream,CTermInfo& ti,CBitVector* deletedDocs); 00036 virtual ~CBarrelTermDocs(void); 00037 public: 00042 bool seek(CTerm* term); 00043 00047 freq_t docFreq(){return m_termInfo.docFreq();} 00048 00055 count_t next(docid_t*& docs, count_t*& freqs); 00056 00064 bool skipTo(docid_t target,docid_t& nearTarget); 00065 00070 bool next(); 00071 00076 docid_t doc(); 00077 00082 count_t freq(); 00083 00087 void close(); 00088 protected: 00089 bool decode(); 00090 00101 int bsearch(docid_t docs[],int start,int end,docid_t key,docid_t& keyFound); 00102 protected: 00103 CTermInfo m_termInfo; 00104 CIndexInput* m_pDocFreqStream; 00105 CTermReader* m_pTermReader; 00106 CBitVector* m_deletedDods; 00107 00108 docid_t m_docPage[PAGE_SIZE]; 00109 count_t m_freqPage[PAGE_SIZE]; 00110 int m_pageSize; //页的实际使用大小 00111 int m_length; //.dfi block长度 00112 int m_pagePosition; //页在docfreqs中的位置 00113 int m_docPointer; //在页中的当前位置 00114 00115 docid_t m_lastDecodeDocID; //解压缩的最后一个docid 00116 docid_t m_lastDocID; //term的最后一个docid 00117 int m_count; //已经解压的doc freqs 00118 }; 00120 // 00121 inline int CBarrelTermDocs::bsearch(docid_t docs[],int start,int end,docid_t key,docid_t& keyFound) 00122 { 00123 int k; 00124 int nk = end; 00125 keyFound = docs[end]; 00126 while (start<=end) 00127 { 00128 k = (start + end)/2; 00129 if(key == docs[k])//找到 00130 { 00131 keyFound = key; 00132 return k; 00133 } 00134 if(key < docs[k])//查找左半边 00135 { 00136 end = k - 1; 00137 if(k >= start) 00138 { 00139 keyFound = docs[k]; 00140 nk =k; 00141 } 00142 } 00143 else //查找右半边 00144 { 00145 start = k + 1; 00146 /*if(k <= end) 00147 { 00148 keyFound = docs[k]; 00149 nk =k; 00150 }*/ 00151 if(start <= end) 00152 { 00153 if(docs[start] > key) 00154 { 00155 keyFound = docs[start]; 00156 nk = start; 00157 } 00158 } 00159 } 00160 } 00161 return nk; 00162 } 00163 00164 } 00165 } 00166 00167 #endif
http://www.firtex.org http://www.sourceforge.net/projects/firtex