FirteX-高性能全文索引和检索平台API Documentation |
00001 // 00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 00003 // All rights reserved. 00004 // This file is part of FirteX (www.firtex.org) 00005 // 00006 // Use of the FirteX is subject to the terms of the software license set forth in 00007 // the LICENSE file included with this software, and also available at 00008 // http://www.firtex.org/license.html 00009 // 00010 // Author : 郭瑞杰(GuoRuijie) 00011 // Email : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com 00012 // Created : 2005/11/24 00013 // 00014 #ifndef _FISTOPPERFILTER_H 00015 #define _FISTOPPERFILTER_H 00016 #include "../utility/StdHeader.h" 00017 00018 #include "TokenFilter.h" 00019 00020 using namespace std; 00021 00022 #define STOPWORD_ID -1 00023 00024 namespace firtex 00025 { 00026 namespace analyzer 00027 { 00028 class CStopFilter : public CTokenFilter 00029 { 00030 public: 00031 CStopFilter(const tchar* stopfile); 00032 CStopFilter(); 00033 virtual ~CStopFilter(void); 00034 00035 public: 00036 void load(const tchar* filename); 00037 void add(const termid_t tid); 00038 void clear(); 00039 00040 bool stopWord(const termid_t tid) const; 00041 00042 protected: 00043 CTokens* filterInternal(CTokens* pInputTokens); 00044 protected: 00045 tstring m_sStopwordFile; 00046 00047 bool* m_pStopList; //停用词数组 00048 termid_t m_minTerm; //最小停用词 00049 termid_t m_maxTerm; //最大停用词 00050 }; 00052 // 00053 inline bool CStopFilter::stopWord(const termid_t tid) const 00054 { 00055 00056 if( (tid >= m_minTerm) && (tid <= m_maxTerm) && (m_pStopList[tid-m_minTerm]) ) 00057 return true; 00058 else return false; 00059 } 00060 } 00061 } 00062 #endif
http://www.firtex.org http://www.sourceforge.net/projects/firtex