FirteX-高性能全文索引和检索平台

API Documentation


首页 | 名字空间列表 | 类继承关系 | 组合类型列表 | $(BL\录(B | 文件列表 | 名字空间成员 | 组合类型成员 | 文件成员

BarrelTermDocs.h

浏览该文件的文档。
00001 //
00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 
00003 // All rights reserved.
00004 // This file is part of FirteX (www.firtex.org)
00005 //
00006 // Use of the FirteX is subject to the terms of the software license set forth in 
00007 // the LICENSE file included with this software, and also available at
00008 // http://www.firtex.org/license.html
00009 //
00010 // Author       : 郭瑞杰(GuoRuijie)
00011 // Email        : ruijieguo@software.ict.ac.cn
00012 // Created      : 2005/12/26
00013 //
00014 
00015 #ifndef _BARRELTERMDOCS_H
00016 #define _BARRELTERMDOCS_H
00017 
00018 #include "../store/IndexInput.h"
00019 #include "TermDocs.h"
00020 #include "TermInfo.h"
00021 #include "TermReader.h"
00022 
00023 using namespace firtex::store;
00024 
00025 #define PAGE_SIZE       32768           //32K
00026 
00027 namespace firtex
00028 {
00029         namespace index
00030         {
00031                 class CBarrelTermDocs : public CTermDocs
00032                 {
00033                 public:
00034                         CBarrelTermDocs();
00035                         CBarrelTermDocs(CTermReader* pReader,CIndexInput* pDfiStream,CTermInfo& ti,CBitVector* deletedDocs);
00036                         virtual ~CBarrelTermDocs(void);
00037                 public:
00042                         bool    seek(CTerm* term);      
00043 
00047                         freq_t  docFreq(){return m_termInfo.docFreq();}
00048 
00055                         count_t next(docid_t*& docs, count_t*& freqs);
00056 
00064                         bool skipTo(docid_t target,docid_t& nearTarget);
00065                         
00070                         bool    next();
00071 
00076                         docid_t doc();
00077 
00082                         count_t freq();
00083 
00087                         void    close();
00088                 protected:                      
00089                         bool    decode();
00090 
00101                         int             bsearch(docid_t docs[],int start,int end,docid_t key,docid_t& keyFound);
00102                 protected:
00103                         CTermInfo               m_termInfo;
00104                         CIndexInput*    m_pDocFreqStream;
00105                         CTermReader*    m_pTermReader;
00106                         CBitVector*             m_deletedDods;
00107 
00108                         docid_t                 m_docPage[PAGE_SIZE];
00109                         count_t                 m_freqPage[PAGE_SIZE];
00110                         int                             m_pageSize;                             //页的实际使用大小
00111                         int                             m_length;                               //.dfi block长度
00112                         int                             m_pagePosition;                 //页在docfreqs中的位置
00113                         int                             m_docPointer;                   //在页中的当前位置
00114 
00115                         docid_t                 m_lastDecodeDocID;              //解压缩的最后一个docid                         
00116                         docid_t                 m_lastDocID;                    //term的最后一个docid
00117                         int                             m_count;                                //已经解压的doc freqs
00118                 };
00120                 //
00121                 inline int      CBarrelTermDocs::bsearch(docid_t docs[],int start,int end,docid_t key,docid_t& keyFound)
00122                 {
00123                         int k;
00124                         int nk = end;
00125                         keyFound = docs[end];
00126                         while (start<=end)
00127                         {
00128                                 k = (start + end)/2;
00129                                 if(key == docs[k])//找到
00130                                 {
00131                                         keyFound = key;
00132                                         return k;
00133                                 }
00134                                 if(key < docs[k])//查找左半边
00135                                 {
00136                                         end = k - 1;
00137                                         if(k >= start)
00138                                         {
00139                                                 keyFound = docs[k];
00140                                                 nk =k;
00141                                         }
00142                                 }
00143                                 else //查找右半边
00144                                 {
00145                                         start = k + 1;
00146                                         /*if(k <= end)
00147                                         {
00148                                                 keyFound = docs[k];
00149                                                 nk =k;
00150                                         }*/
00151                                         if(start <= end)
00152                                         {
00153                                                 if(docs[start] > key)
00154                                                 {
00155                                                         keyFound = docs[start];
00156                                                         nk = start;
00157                                                 }
00158                                         }
00159                                 }
00160                         }
00161                         return nk;
00162                 }
00163 
00164         }
00165 }
00166 
00167 #endif

http://www.firtex.org http://www.sourceforge.net/projects/firtex