FirteX-高性能全文索引和检索平台

API Documentation


首页 | 名字空间列表 | 类继承关系 | 组合类型列表 | $(BL\录(B | 文件列表 | 名字空间成员 | 组合类型成员 | 文件成员

IndexMerger.h

浏览该文件的文档。
00001 //
00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 
00003 // All rights reserved.
00004 // This file is part of FirteX (www.firtex.org)
00005 //
00006 // Use of the FirteX is subject to the terms of the software license set forth in 
00007 // the LICENSE file included with this software, and also available at
00008 // http://www.firtex.org/license.html
00009 //
00010 // Author       : 郭瑞杰(GuoRuijie)
00011 // Email        : ruijieguo@software.ict.ac.cn
00012 // Created      : 2005/12/09
00013 //
00014 
00015 #ifndef _INDEXMERGER_H
00016 #define _INDEXMERGER_H
00017 
00018 #include "BarrelsInfo.h"
00019 #include "FieldsInfo.h"
00020 #include "../store/IndexInput.h"
00021 #include "../store/IndexOutput.h"
00022 #include "../store/Directory.h"
00023 
00024 #include "FieldMerger.h"
00025 
00026 #include <vector>
00027 #include <queue>
00028 using namespace std;
00029 using namespace firtex::store;
00030 
00031 namespace firtex
00032 {
00033         namespace index
00034         {               
00035                 class CBarrelMergeEntry
00036                 {
00037                         CBarrelMergeEntry()
00038                         {
00039                                 m_pBarrelInfo = NULL;
00040                                 m_pFieldsInfo = NULL;
00041                                 m_level = 0;
00042                         }
00043 
00044                         CBarrelMergeEntry(CBarrelInfo* i,int l)
00045                         {
00046                                 m_pBarrelInfo = new CBarrelInfo(*i);
00047                                 m_pFieldsInfo = new CFieldsInfo();
00048                                 m_level = l;
00049                                 
00050                         }
00051                         CBarrelMergeEntry(const string& name,docid_t minDID,count_t docCount,int l)
00052                         {
00053                                 m_pBarrelInfo = new CBarrelInfo(name,minDID,docCount);
00054                                 m_pFieldsInfo = new CFieldsInfo();
00055                                 m_level = l;
00056 
00057                         }
00058                         ~CBarrelMergeEntry()
00059                         {
00060                                 if(m_pFieldsInfo)
00061                                 {
00062                                         delete m_pFieldsInfo;
00063                                         m_pFieldsInfo = NULL;
00064                                 }
00065                                 if(m_pBarrelInfo)
00066                                 {
00067                                         delete m_pBarrelInfo;
00068                                         m_pBarrelInfo = NULL;
00069                                 }
00070                         }
00071                 public:
00072                         
00073                         static bool less (CBarrelMergeEntry* pElem1, CBarrelMergeEntry* pElem2 )
00074                         {
00075                                 return pElem1->m_pBarrelInfo->docCount() < pElem2->m_pBarrelInfo->docCount();
00076                         }       
00077 
00078                         void load(CDirectory* pDirectory)
00079                         {
00080                                 CIndexInput* fdiStream = pDirectory->openInput(m_pBarrelInfo->name() + ".fdi");
00081                                 m_pFieldsInfo->read(fdiStream);//读取域信息
00082                                 fdiStream->close();
00083                                 delete fdiStream;                               
00084                         }
00085                         int size()
00086                         {
00087                                 return (m_pBarrelInfo?m_pBarrelInfo->docCount():0);
00088                         }
00089                 protected:
00090                         CBarrelInfo*    m_pBarrelInfo;          //索引信息
00091                         CFieldsInfo*    m_pFieldsInfo;          //
00092                         int                             m_level;                        //合并级别
00093 
00094                         friend class CIndexMerger;
00095                 };
00096                                                 
00097 
00098                 typedef vector<CBarrelMergeEntry*>BarrelVector;
00099 
00100                 class CIndexMerger
00101                 {
00102                 public:
00103                         CIndexMerger();
00104                         CIndexMerger(char* buffer,size_t bufsize);
00105                         virtual~CIndexMerger(void);
00106                 public:                 
00111                         bool    merge(CDirectory* pDirectory);
00112                         
00119                         void    merge(CDirectory* pDirectoryDest,CDirectory* pDirectorySrc,bool bDeleteSrc = false);
00120 
00126                         bool    optimizeMerge(CDirectory* pDirectory,bool bDeleteDocs = false);
00127                 
00128                 protected:
00129                         CBarrelMergeEntry*      mergeBarrels(CDirectory* pDirectory,BarrelVector* barrels,int level,int order);
00130                         bool                            mergeStoredFields(CDirectory* pDirectory,BarrelVector* barrels,int level,int order);
00131                         void                            mergeNorms(CDirectory* pDirectory,BarrelVector* barrels,int level,int order);
00132                         void                            mergeDeletedDocs(CDirectory* pDirectory,BarrelVector* barrels,int level,int order);
00133                         void                            mergeTermVector(CDirectory* pDirectory,BarrelVector* barrels,int level,int order);
00134 
00135                         void                            updateBarrelDocID(CBarrelMergeEntry*pStart,BarrelVector bLevel[],int level);
00136                         void                            renameBarrels(CDirectory* pDirectory,BarrelVector bLevel[],int numLevels,const tchar* suffix ,bool bClear);
00137                 protected:
00138                         char*   m_buffer;
00139                         size_t  m_bufsize;                      
00140                 };
00141         }
00142 }
00143 
00144 
00145 #endif

http://www.firtex.org http://www.sourceforge.net/projects/firtex