FirteX-高性能全文索引和检索平台API Documentation |
00001 // 00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 00003 // All rights reserved. 00004 // This file is part of FirteX (www.firtex.org) 00005 // 00006 // Use of the FirteX is subject to the terms of the software license set forth in 00007 // the LICENSE file included with this software, and also available at 00008 // http://www.firtex.org/license.html 00009 // 00010 // Author : 郭瑞杰(GuoRuijie) 00011 // Email : ruijieguo@software.ict.ac.cn 00012 // Created : 2005/12/09 00013 // 00014 00015 #ifndef _INDEXMERGER_H 00016 #define _INDEXMERGER_H 00017 00018 #include "BarrelsInfo.h" 00019 #include "FieldsInfo.h" 00020 #include "../store/IndexInput.h" 00021 #include "../store/IndexOutput.h" 00022 #include "../store/Directory.h" 00023 00024 #include "FieldMerger.h" 00025 00026 #include <vector> 00027 #include <queue> 00028 using namespace std; 00029 using namespace firtex::store; 00030 00031 namespace firtex 00032 { 00033 namespace index 00034 { 00035 class CBarrelMergeEntry 00036 { 00037 CBarrelMergeEntry() 00038 { 00039 m_pBarrelInfo = NULL; 00040 m_pFieldsInfo = NULL; 00041 m_level = 0; 00042 } 00043 00044 CBarrelMergeEntry(CBarrelInfo* i,int l) 00045 { 00046 m_pBarrelInfo = new CBarrelInfo(*i); 00047 m_pFieldsInfo = new CFieldsInfo(); 00048 m_level = l; 00049 00050 } 00051 CBarrelMergeEntry(const string& name,docid_t minDID,count_t docCount,int l) 00052 { 00053 m_pBarrelInfo = new CBarrelInfo(name,minDID,docCount); 00054 m_pFieldsInfo = new CFieldsInfo(); 00055 m_level = l; 00056 00057 } 00058 ~CBarrelMergeEntry() 00059 { 00060 if(m_pFieldsInfo) 00061 { 00062 delete m_pFieldsInfo; 00063 m_pFieldsInfo = NULL; 00064 } 00065 if(m_pBarrelInfo) 00066 { 00067 delete m_pBarrelInfo; 00068 m_pBarrelInfo = NULL; 00069 } 00070 } 00071 public: 00072 00073 static bool less (CBarrelMergeEntry* pElem1, CBarrelMergeEntry* pElem2 ) 00074 { 00075 return pElem1->m_pBarrelInfo->docCount() < pElem2->m_pBarrelInfo->docCount(); 00076 } 00077 00078 void load(CDirectory* pDirectory) 00079 { 00080 CIndexInput* fdiStream = pDirectory->openInput(m_pBarrelInfo->name() + ".fdi"); 00081 m_pFieldsInfo->read(fdiStream);//读取域信息 00082 fdiStream->close(); 00083 delete fdiStream; 00084 } 00085 int size() 00086 { 00087 return (m_pBarrelInfo?m_pBarrelInfo->docCount():0); 00088 } 00089 protected: 00090 CBarrelInfo* m_pBarrelInfo; //索引信息 00091 CFieldsInfo* m_pFieldsInfo; // 00092 int m_level; //合并级别 00093 00094 friend class CIndexMerger; 00095 }; 00096 00097 00098 typedef vector<CBarrelMergeEntry*>BarrelVector; 00099 00100 class CIndexMerger 00101 { 00102 public: 00103 CIndexMerger(); 00104 CIndexMerger(char* buffer,size_t bufsize); 00105 virtual~CIndexMerger(void); 00106 public: 00111 bool merge(CDirectory* pDirectory); 00112 00119 void merge(CDirectory* pDirectoryDest,CDirectory* pDirectorySrc,bool bDeleteSrc = false); 00120 00126 bool optimizeMerge(CDirectory* pDirectory,bool bDeleteDocs = false); 00127 00128 protected: 00129 CBarrelMergeEntry* mergeBarrels(CDirectory* pDirectory,BarrelVector* barrels,int level,int order); 00130 bool mergeStoredFields(CDirectory* pDirectory,BarrelVector* barrels,int level,int order); 00131 void mergeNorms(CDirectory* pDirectory,BarrelVector* barrels,int level,int order); 00132 void mergeDeletedDocs(CDirectory* pDirectory,BarrelVector* barrels,int level,int order); 00133 void mergeTermVector(CDirectory* pDirectory,BarrelVector* barrels,int level,int order); 00134 00135 void updateBarrelDocID(CBarrelMergeEntry*pStart,BarrelVector bLevel[],int level); 00136 void renameBarrels(CDirectory* pDirectory,BarrelVector bLevel[],int numLevels,const tchar* suffix ,bool bClear); 00137 protected: 00138 char* m_buffer; 00139 size_t m_bufsize; 00140 }; 00141 } 00142 } 00143 00144 00145 #endif
http://www.firtex.org http://www.sourceforge.net/projects/firtex