FirteX-高性能全文索引和检索平台

API Documentation


首页 | 名字空间列表 | 类继承关系 | 组合类型列表 | $(BL\录(B | 文件列表 | 名字空间成员 | 组合类型成员 | 文件成员

WordFieldMerger.h

浏览该文件的文档。
00001 //
00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 
00003 // All rights reserved.
00004 // This file is part of FirteX (www.firtex.org)
00005 //
00006 // Use of the FirteX is subject to the terms of the software license set forth in 
00007 // the LICENSE file included with this software, and also available at
00008 // http://www.firtex.org/license.html
00009 //
00010 // Author       : 郭瑞杰(GuoRuijie)
00011 // Email        : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com
00012 // Created      : 2005/12/3
00013 //
00014 #ifndef _WORDFIELDMERGER_H
00015 #define _WORDFIELDMERGER_H
00016 
00017 #include "../utility/StdHeader.h"
00018 #include <vector>
00019 #include "FieldMerger.h"
00020 #include "../store/IndexInput.h"
00021 #include "TermInfo.h"
00022 using namespace std;
00023 using namespace firtex::store;
00024 
00025 
00026 namespace firtex
00027 {
00028         namespace index
00029         {
00030                 struct CWordFieldMergeEntry:public CFieldMergeEntry
00031                 {
00032                 public:                 
00033                         CWordFieldMergeEntry(CBarrelInfo* pBarrelInfo,CFieldInfo* pFieldInfo):CFieldMergeEntry(pBarrelInfo,pFieldInfo)
00034                         {
00035                                 count = -1;             
00036                                 tid = -1;               
00037                                 firstDocID = -1;        
00038                                 lastDocID = -1; 
00039 
00040                                 tdiReader = NULL;
00041                                 dfiReader = NULL;
00042                                 ptiReader = NULL;
00043 
00044                                 dfiLen = 0;
00045                                 ptiLen = 0;
00046                                 cur = 0;
00047                         }
00048                         ~CWordFieldMergeEntry()
00049                         {
00050                                 if(tdiReader != NULL)
00051                                 {
00052                                         tdiReader->close();
00053                                         delete tdiReader;
00054                                 }
00055                                 if(dfiReader != NULL)
00056                                 {
00057                                         dfiReader->close();
00058                                         delete dfiReader;
00059                                 }
00060                                 if(ptiReader != NULL)
00061                                 {
00062                                         ptiReader->close();
00063                                         delete ptiReader;
00064                                 }       
00065                         }
00066                         bool open(CDirectory* pDirectory,char* buf,size_t bufsize)
00067                         {                                                       
00068                                 if(bufsize > 3*INDEXINPUT_BUFFSIZE)
00069                                 {
00070                                         size_t len = bufsize/3;
00071                                         tdiReader = pDirectory->openInput(m_pBarrelInfo->name() + ".tdi",buf,len);
00072                                         buf += len;
00073                                         dfiReader = pDirectory->openInput(m_pBarrelInfo->name() + ".dfi",buf,len);
00074                                         buf += len;
00075                                         ptiReader = pDirectory->openInput(m_pBarrelInfo->name() + ".pti",buf,len);
00076                                 }
00077                                 else 
00078                                 {                                       
00079                                         tdiReader = pDirectory->openInput(m_pBarrelInfo->name() + ".tdi");                                      
00080                                         dfiReader = pDirectory->openInput(m_pBarrelInfo->name() + ".dfi");                                      
00081                                         ptiReader = pDirectory->openInput(m_pBarrelInfo->name() + ".pti");
00082                                 }                       
00083                                 int64_t tLen,dLen,pLen;
00084                                 m_pFieldInfo->getLength(&tLen,&dLen,&pLen);
00085                                 if(tLen <= 0)
00086                                 {
00087                                         count = 0;
00088                                         delete tdiReader;
00089                                         tdiReader = NULL;
00090                                         delete dfiReader;
00091                                         dfiReader = NULL;
00092                                         delete ptiReader;
00093                                         ptiReader = NULL;
00094                                         return false;
00095                                 }
00096                                 
00097 
00098                                 tdiReader->seek(m_pFieldInfo->getIndexOffset());
00099                                 tdiReader->setLength(tdiReader->getFilePointer() + tLen);                               
00100                                 count = tdiReader->readInt();   //读Term总数
00101                                 if(count <= 0)
00102                                         return false;
00104                                 tid = tdiReader->readVInt();
00105                                 count_t df = tdiReader->readVInt();
00106                                 fileoffset_t of1 = tdiReader->readVLong();
00107                                 fileoffset_t of2 = tdiReader->readVLong();
00108                                 ti.set(df,of1,of2);
00109 
00110                                 dfiReader->seek(of1);
00111                                 ptiReader->seek(of2);
00112 
00113                                 dfiReader->setLength(dfiReader->getFilePointer() + dLen);
00114                                 ptiReader->setLength(ptiReader->getFilePointer() + pLen);
00115 
00116                                 ptiLen = ptiReader->readVInt();
00117                                 lastDocID = m_pBarrelInfo->minDocID() + dfiReader->readVInt();
00118                                 dfiLen = dfiReader->readVInt();
00119                                 firstDocID = m_pBarrelInfo->minDocID() + dfiReader->readVInt();
00120                                 cur++;
00121                                 return true;
00122                         }
00123                         bool next()
00124                         {
00125                                 if(cur >= count)
00126                                 {
00127                                         return false;
00128                                         tid = -1;
00129                                 }
00130                                 tid = tdiReader->readVInt();
00131                                 count_t df = tdiReader->readVInt();
00132                                 fileoffset_t of1 = tdiReader->readVLong();
00133                                 fileoffset_t of2 = tdiReader->readVLong();
00134                                 ti.set(df,of1,of2);
00135                                 dfiReader->seek(of1);
00136                                 ptiReader->seek(of2);
00137                                 
00138                                 ptiLen = ptiReader->readVInt();
00139                                 lastDocID = m_pBarrelInfo->minDocID() + dfiReader->readVInt();
00140                                 dfiLen = dfiReader->readVInt();
00141                                 firstDocID = m_pBarrelInfo->minDocID() + dfiReader->readVInt();
00142                                 cur++;
00143                                 return true;
00144                         }
00145                 protected:
00146                         CWordFieldMergeEntry()
00147                         {
00148                         }
00149                 public:
00150                         count_t                 count;          //Term Count
00151                         termid_t                tid;            //Term ID
00152                         CTermInfo               ti;                     //Term Info     
00153                         docid_t                 firstDocID;     //Term的第一个文档编号 
00154                         docid_t                 lastDocID;      //Term的最后一个文档编号                        
00155                         
00156                         CIndexInput*    tdiReader;
00157                         CIndexInput*    dfiReader;
00158                         CIndexInput*    ptiReader;
00159 
00160                         int                             dfiLen;         //dfi文件Block长度
00161                         int                             ptiLen;         //pti文件Block长度
00162                         
00163                         int                             cur;            //当前处理的Term位置
00164                         friend class CWordFieldMerger;
00165                 };
00166 
00167                 class CWordFieldMerger :        public CFieldMerger
00168                 {
00169                 public:
00170                         CWordFieldMerger(void);
00171                         virtual ~CWordFieldMerger(void);
00172                 public:
00173                         void    addField(CBarrelInfo* pBarrelInfo,CFieldInfo* pFieldInfo);
00174                         count_t merge(CDirectory* pDirectory,CIndexOutputDescriptor* pDesc);
00175                 protected:
00176                         void    close();
00177                 protected:
00178                         vector<CWordFieldMergeEntry*>   m_mergeFields;
00179                 };
00180         }
00181 }
00182 
00183 #endif

http://www.firtex.org http://www.sourceforge.net/projects/firtex