FirteX-高性能全文索引和检索平台API Documentation |
00001 // 00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 00003 // All rights reserved. 00004 // This file is part of FirteX (www.firtex.org) 00005 // 00006 // Use of the FirteX is subject to the terms of the software license set forth in 00007 // the LICENSE file included with this software, and also available at 00008 // http://www.firtex.org/license.html 00009 // 00010 // Author : 郭瑞杰(GuoRuijie) 00011 // Email : ruijieguo@software.ict.ac.cn 00012 // Created : 2006/6/10 00013 // 00014 00015 #ifndef _DATEFIELDMERGER_H 00016 #define _DATEFIELDMERGER_H 00017 00018 #include "../utility/StdHeader.h" 00019 #include <vector> 00020 #include "FieldMerger.h" 00021 #include "../store/IndexInput.h" 00022 #include "TermInfo.h" 00023 using namespace std; 00024 using namespace firtex::store; 00025 00026 00027 namespace firtex 00028 { 00029 namespace index 00030 { 00031 struct CDateFieldMergeEntry:public CFieldMergeEntry 00032 { 00033 public: 00034 CDateFieldMergeEntry(CBarrelInfo* pBarrelInfo,CFieldInfo* pFieldInfo):CFieldMergeEntry(pBarrelInfo,pFieldInfo) 00035 { 00036 count = -1; 00037 tid = -1; 00038 firstDocID = -1; 00039 lastDocID = -1; 00040 00041 tdiReader = NULL; 00042 dfiReader = NULL; 00043 00044 dfiLen = 0; 00045 cur = 0; 00046 } 00047 ~CDateFieldMergeEntry() 00048 { 00049 if(tdiReader != NULL) 00050 { 00051 tdiReader->close(); 00052 delete tdiReader; 00053 } 00054 if(dfiReader != NULL) 00055 { 00056 dfiReader->close(); 00057 delete dfiReader; 00058 } 00059 } 00060 bool open(CDirectory* pDirectory,char* buf,size_t bufsize) 00061 { 00062 if(bufsize > 3*INDEXINPUT_BUFFSIZE) 00063 { 00064 size_t len = bufsize/2; 00065 tdiReader = pDirectory->openInput(m_pBarrelInfo->name() + ".tdi",buf,len); 00066 buf += len; 00067 dfiReader = pDirectory->openInput(m_pBarrelInfo->name() + ".dfi",buf,len); 00068 } 00069 else 00070 { 00071 tdiReader = pDirectory->openInput(m_pBarrelInfo->name() + ".tdi"); 00072 dfiReader = pDirectory->openInput(m_pBarrelInfo->name() + ".dfi"); 00073 } 00074 int64_t tLen,dLen,pLen; 00075 m_pFieldInfo->getLength(&tLen,&dLen,&pLen); 00076 if(tLen <= 0) 00077 { 00078 count = 0; 00079 delete tdiReader; 00080 tdiReader = NULL; 00081 delete dfiReader; 00082 dfiReader = NULL; 00083 return false; 00084 } 00085 00086 00087 tdiReader->seek(m_pFieldInfo->getIndexOffset()); 00088 tdiReader->setLength(tdiReader->getFilePointer() + tLen); 00089 count = tdiReader->readInt(); //读Term总数 00090 if(count <= 0) 00091 return false; 00093 tid = tdiReader->readVInt(); 00094 count_t df = tdiReader->readVInt(); 00095 fileoffset_t of1 = tdiReader->readVLong(); 00096 ti.set(df,of1,0); 00097 00098 dfiReader->seek(of1); 00099 00100 dfiReader->setLength(dfiReader->getFilePointer() + dLen); 00101 00102 lastDocID = m_pBarrelInfo->minDocID() + dfiReader->readVInt(); 00103 dfiLen = dfiReader->readVInt(); 00104 firstDocID = m_pBarrelInfo->minDocID() + dfiReader->readVInt(); 00105 cur++; 00106 return true; 00107 } 00108 bool next() 00109 { 00110 if(cur >= count) 00111 { 00112 return false; 00113 tid = -1; 00114 } 00115 tid = tdiReader->readVInt(); 00116 count_t df = tdiReader->readVInt(); 00117 fileoffset_t of1 = tdiReader->readVLong(); 00118 ti.set(df,of1,0); 00119 dfiReader->seek(of1); 00120 00121 lastDocID = m_pBarrelInfo->minDocID() + dfiReader->readVInt(); 00122 dfiLen = dfiReader->readVInt(); 00123 firstDocID = m_pBarrelInfo->minDocID() + dfiReader->readVInt(); 00124 cur++; 00125 return true; 00126 } 00127 protected: 00128 CDateFieldMergeEntry() 00129 { 00130 } 00131 public: 00132 count_t count; //Term Count 00133 termid_t tid; //Term ID 00134 CTermInfo ti; //Term Info 00135 docid_t firstDocID; //Term的第一个文档编号 00136 docid_t lastDocID; //Term的最后一个文档编号 00137 00138 CIndexInput* tdiReader; 00139 CIndexInput* dfiReader; 00140 00141 int dfiLen; //dfi文件Block长度 00142 00143 int cur; //当前处理的Term位置 00144 friend class CDateFieldMerger; 00145 }; 00146 00147 class CDateFieldMerger : public CFieldMerger 00148 { 00149 public: 00150 CDateFieldMerger(void); 00151 virtual ~CDateFieldMerger(void); 00152 public: 00153 void addField(CBarrelInfo* pBarrelInfo,CFieldInfo* pFieldInfo); 00154 count_t merge(CDirectory* pDirectory,CIndexOutputDescriptor* pDesc); 00155 protected: 00156 void close(); 00157 protected: 00158 vector<CDateFieldMergeEntry*> m_mergeFields; 00159 }; 00160 } 00161 } 00162 00163 #endif
http://www.firtex.org http://www.sourceforge.net/projects/firtex