FirteX-高性能全文索引和检索平台API Documentation |
00001 // 00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 00003 // All rights reserved. 00004 // This file is part of FirteX (www.firtex.org) 00005 // 00006 // Use of the FirteX is subject to the terms of the software license set forth in 00007 // the LICENSE file included with this software, and also available at 00008 // http://www.firtex.org/license.html 00009 // 00010 // Author : 郭瑞杰(GuoRuijie) 00011 // Email : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com 00012 // Created : 2005/11/20 00013 // 00014 #ifndef __ANALYZER_H 00015 #define __ANALYZER_H 00016 00017 #if _MSC_VER > 1000 00018 #pragma once 00019 #endif // _MSC_VER > 1000 00020 00021 #include "../utility/StdHeader.h" 00022 #include "../document/Document.h" 00023 #include "../parser/Parser.h" 00024 #include "Tokens.h" 00025 #include "TokenFilter.h" 00026 #include "../index/IndexParameter.h" 00027 #include "../document/doctypedef.h" 00028 00029 using namespace firtex; 00030 using namespace firtex::parser; 00031 using namespace firtex::index; 00032 00033 namespace firtex 00034 { 00035 namespace analyzer 00036 { 00037 class CAnalyzer 00038 { 00039 public: 00040 static const tstring category; 00041 static const tstring identifier; 00042 public: 00043 CAnalyzer(); 00044 CAnalyzer(CParser* pParser); 00045 virtual ~CAnalyzer(void); 00046 public: 00047 public: 00051 virtual tstring getCategory() { return category; } 00055 virtual tstring getIdentifier() { return m_identifier; } 00060 void attachParser(CParser* pParser){m_pParser = pParser;} 00061 00065 CParser* detachParser(); 00066 00071 void attachTokenFilter(CTokenFilter* pTokenFilter); 00072 00076 CTokenFilter* detachTokenFilter(); 00077 00083 document::CDocument* analyze(CIndexParameter* pArg); 00084 00090 document::CDocument* analyze(document::CDocument* pParsedDoc); 00091 00098 CTokens* nextTokens(CReader* reader,CTokens* pInput); 00099 00103 virtual void close(); 00104 public: 00109 virtual TokenType getTokenType() = 0; 00110 protected: 00117 virtual CTokens* nextTokensInternal(CReader* reader,CTokens* pInput) = 0; 00118 protected: 00119 CTokens* tokenFilter(CTokens* pTokensInput); 00120 protected: 00121 tstring m_identifier; 00122 CTokenFilter* m_pTokenFilter; 00123 CParser* m_pParser; 00124 00125 document::CDocument* m_pCachedDoc; 00126 CReader* m_reader; 00127 }; 00129 //inline functions 00130 inline document::CDocument* CAnalyzer::analyze(CIndexParameter* pArg) 00131 { 00132 document::CDocument* pParsedDoc = NULL; 00133 if(m_pParser) 00134 { 00135 pParsedDoc = m_pParser->parse(pArg); 00136 } 00137 if(pParsedDoc == NULL) 00138 return NULL; 00139 return analyze(pParsedDoc); 00140 } 00141 inline CTokens* CAnalyzer::tokenFilter(CTokens* pTokensInput) 00142 { 00143 if(m_pTokenFilter) 00144 return m_pTokenFilter->filter(pTokensInput); 00145 return pTokensInput; 00146 } 00147 inline CTokens* CAnalyzer::nextTokens(CReader* reader,CTokens* pInput) 00148 { 00149 return tokenFilter(nextTokensInternal(reader,pInput)); 00150 } 00151 } 00152 } 00153 00154 00155 #endif
http://www.firtex.org http://www.sourceforge.net/projects/firtex