FirteX-高性能全文索引和检索平台

API Documentation


首页 | 名字空间列表 | 类继承关系 | 组合类型列表 | $(BL\录(B | 文件列表 | 名字空间成员 | 组合类型成员 | 文件成员

IndexWriter.h

浏览该文件的文档。
00001 //
00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 
00003 // All rights reserved.
00004 // This file is part of FirteX (www.firtex.org)
00005 //
00006 // Use of the FirteX is subject to the terms of the software license set forth in 
00007 // the LICENSE file included with this software, and also available at
00008 // http://www.firtex.org/license.html
00009 //
00010 // Author       : 郭瑞杰(GuoRuijie)
00011 // Email        : ruijieguo@software.ict.ac.cn
00012 // Created      : 2005/11/20
00013 //
00014 
00015 #ifndef __INDEXWRITER_H
00016 #define __INDEXWRITER_H
00017 #include <string>
00018 #include <iostream>
00019 #include <fstream>
00020 
00021 using namespace std;
00022 
00023 #include "Index.h"
00024 #include "../analyzer/Analyzer.h"
00025 #include "../utility/Reader.h"
00026 #include "../document/Document.h"
00027 #include "../store/Directory.h"
00028 #include "IndexParameter.h"
00029 #include "../utility/Argument.h"
00030 //#include "../collection/Collection.h"
00031 
00032 
00033 using namespace std;
00034 using namespace firtex::analyzer;
00035 using namespace firtex::utility;
00036 using namespace firtex;
00037 using namespace firtex::store;
00038 
00039 namespace firtex
00040 {
00041         namespace collection
00042         {
00043                 class CCollection;
00044         }
00045 }
00046 
00047 namespace firtex
00048 {
00049         namespace index
00050         {               
00051                 
00052                 class CIndexWriter
00053                 {
00054                 public:
00055                         enum MergeMode
00056                         {
00057                                 NO_MERGE                        = 0,    //不合并
00058                                 MERGE                           = 1,    //合并
00059                                 OPTIMIZE                        = 2,    //优化,最终仅生成当个索引,这将增加索引合并时间,
00060                                                                                 //同时增加增量索引的代价,但可以较快搜索
00061                                 DELDOCS_OPTIMIZE        = 3,    //同OPTIMIZE,但是将删除已标志删除的文档,比OPTIMIZE合并更慢
00062                         };
00063                 public:
00064                         CIndexWriter(const tchar* indexName,CAnalyzer* analyzer,bool bCreate = true,MergeMode mm = MERGE);
00065                         CIndexWriter(CDirectory* pDirectory,CAnalyzer* analyzer,bool bCreate = true,MergeMode mm = MERGE);
00066                         CIndexWriter(CAnalyzer* analyzer,CIndex* pIndex);
00067                         virtual~CIndexWriter();                 
00068                 public:
00069                         
00078                         void            open(const tchar* indexName,CAnalyzer* analyzer,bool bCreate = true,MergeMode mm = MERGE);
00079 
00088                         void            open(CDirectory* pDirectory,CAnalyzer* analyzer,bool bCreate = true,MergeMode mm = MERGE);
00094                         bool            addDocument(const tchar* filename);
00095 
00101                         bool            addDocument(CIndexParameter* pIndexParam);
00102 
00108                         bool            addDocument(document::CDocument* pDoc);
00109 
00114                         bool            addIndex(CDirectory* pDirectory);
00115 
00119                         void            mergeIndex();
00120 
00125                         void            optimizeIndex(bool bDeleteDocs = false);
00126 
00130                         void            close();                
00131                 public:
00136                         void            attachAnalyzer(CAnalyzer* pAnalyzer);
00137 
00142                         CAnalyzer*      detachAnalyzer();
00143 
00148                         void            attachIndex(CIndex* pIndex);
00149 
00154                         CIndex*         detachIndex();
00155 
00161                         CIndex*         getIndex(){return m_pIndexer;}          
00162                 public:
00168                         static void     buildIndex(const tchar* argFile);
00169 
00174                         static void buildIndex(CArgument* pArgument);
00175 
00183                         static firtex::collection::CCollection* prepareBuildIndex(const tchar* argFile);
00184 
00192                         static firtex::collection::CCollection* prepareBuildIndex(CArgument* pArgument);
00193                 protected:                      
00194                         CAnalyzer*              m_pAnalyzer;
00195                         CIndex*                 m_pIndexer;
00196                         MergeMode               m_eMergeMode;
00197                         bool                    m_bMerged;
00198                 };
00200                 //inline functions
00201                 inline bool CIndexWriter::addDocument(document::CDocument* pDoc)
00202                 {
00203                         document::CDocument* pAnalyzedDoc = NULL;
00204                         if(m_pAnalyzer)
00205                                 pAnalyzedDoc = m_pAnalyzer->analyze(pDoc);
00206                         else return false;
00207                         if(pAnalyzedDoc == NULL)
00208                                 return false;
00209                         if(m_pIndexer)
00210                                 m_pIndexer->addDocument(pAnalyzedDoc);
00211                         else return false;
00212                         return true;
00213                 }               
00214         }
00215 }
00216 #endif

http://www.firtex.org http://www.sourceforge.net/projects/firtex