FirteX-高性能全文索引和检索平台API Documentation |
00001 // 00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 00003 // All rights reserved. 00004 // This file is part of FirteX (www.firtex.org) 00005 // 00006 // Use of the FirteX is subject to the terms of the software license set forth in 00007 // the LICENSE file included with this software, and also available at 00008 // http://www.firtex.org/license.html 00009 // 00010 // Author : 郭瑞杰(GuoRuijie) 00011 // Email : ruijieguo@software.ict.ac.cn 00012 // Created : 2005/11/20 00013 // 00014 00015 #ifndef __INDEXWRITER_H 00016 #define __INDEXWRITER_H 00017 #include <string> 00018 #include <iostream> 00019 #include <fstream> 00020 00021 using namespace std; 00022 00023 #include "Index.h" 00024 #include "../analyzer/Analyzer.h" 00025 #include "../utility/Reader.h" 00026 #include "../document/Document.h" 00027 #include "../store/Directory.h" 00028 #include "IndexParameter.h" 00029 #include "../utility/Argument.h" 00030 //#include "../collection/Collection.h" 00031 00032 00033 using namespace std; 00034 using namespace firtex::analyzer; 00035 using namespace firtex::utility; 00036 using namespace firtex; 00037 using namespace firtex::store; 00038 00039 namespace firtex 00040 { 00041 namespace collection 00042 { 00043 class CCollection; 00044 } 00045 } 00046 00047 namespace firtex 00048 { 00049 namespace index 00050 { 00051 00052 class CIndexWriter 00053 { 00054 public: 00055 enum MergeMode 00056 { 00057 NO_MERGE = 0, //不合并 00058 MERGE = 1, //合并 00059 OPTIMIZE = 2, //优化,最终仅生成当个索引,这将增加索引合并时间, 00060 //同时增加增量索引的代价,但可以较快搜索 00061 DELDOCS_OPTIMIZE = 3, //同OPTIMIZE,但是将删除已标志删除的文档,比OPTIMIZE合并更慢 00062 }; 00063 public: 00064 CIndexWriter(const tchar* indexName,CAnalyzer* analyzer,bool bCreate = true,MergeMode mm = MERGE); 00065 CIndexWriter(CDirectory* pDirectory,CAnalyzer* analyzer,bool bCreate = true,MergeMode mm = MERGE); 00066 CIndexWriter(CAnalyzer* analyzer,CIndex* pIndex); 00067 virtual~CIndexWriter(); 00068 public: 00069 00078 void open(const tchar* indexName,CAnalyzer* analyzer,bool bCreate = true,MergeMode mm = MERGE); 00079 00088 void open(CDirectory* pDirectory,CAnalyzer* analyzer,bool bCreate = true,MergeMode mm = MERGE); 00094 bool addDocument(const tchar* filename); 00095 00101 bool addDocument(CIndexParameter* pIndexParam); 00102 00108 bool addDocument(document::CDocument* pDoc); 00109 00114 bool addIndex(CDirectory* pDirectory); 00115 00119 void mergeIndex(); 00120 00125 void optimizeIndex(bool bDeleteDocs = false); 00126 00130 void close(); 00131 public: 00136 void attachAnalyzer(CAnalyzer* pAnalyzer); 00137 00142 CAnalyzer* detachAnalyzer(); 00143 00148 void attachIndex(CIndex* pIndex); 00149 00154 CIndex* detachIndex(); 00155 00161 CIndex* getIndex(){return m_pIndexer;} 00162 public: 00168 static void buildIndex(const tchar* argFile); 00169 00174 static void buildIndex(CArgument* pArgument); 00175 00183 static firtex::collection::CCollection* prepareBuildIndex(const tchar* argFile); 00184 00192 static firtex::collection::CCollection* prepareBuildIndex(CArgument* pArgument); 00193 protected: 00194 CAnalyzer* m_pAnalyzer; 00195 CIndex* m_pIndexer; 00196 MergeMode m_eMergeMode; 00197 bool m_bMerged; 00198 }; 00200 //inline functions 00201 inline bool CIndexWriter::addDocument(document::CDocument* pDoc) 00202 { 00203 document::CDocument* pAnalyzedDoc = NULL; 00204 if(m_pAnalyzer) 00205 pAnalyzedDoc = m_pAnalyzer->analyze(pDoc); 00206 else return false; 00207 if(pAnalyzedDoc == NULL) 00208 return false; 00209 if(m_pIndexer) 00210 m_pIndexer->addDocument(pAnalyzedDoc); 00211 else return false; 00212 return true; 00213 } 00214 } 00215 } 00216 #endif
http://www.firtex.org http://www.sourceforge.net/projects/firtex