FirteX-高性能全文索引和检索平台

API Documentation


首页 | 名字空间列表 | 类继承关系 | 组合类型列表 | $(BL\录(B | 文件列表 | 名字空间成员 | 组合类型成员 | 文件成员

StopFilter.h

浏览该文件的文档。
00001 //
00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 
00003 // All rights reserved.
00004 // This file is part of FirteX (www.firtex.org)
00005 //
00006 // Use of the FirteX is subject to the terms of the software license set forth in 
00007 // the LICENSE file included with this software, and also available at
00008 // http://www.firtex.org/license.html
00009 //
00010 // Author       : 郭瑞杰(GuoRuijie)
00011 // Email        : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com
00012 // Created      : 2005/11/24
00013 //
00014 #ifndef _FISTOPPERFILTER_H
00015 #define _FISTOPPERFILTER_H
00016 #include "../utility/StdHeader.h"
00017 
00018 #include "TokenFilter.h"
00019 
00020 using namespace std;
00021 
00022 #define STOPWORD_ID     -1
00023 
00024 namespace firtex
00025 {
00026         namespace analyzer
00027         {
00028                 class CStopFilter :     public CTokenFilter
00029                 {
00030                 public:
00031                         CStopFilter(const tchar* stopfile);
00032                         CStopFilter();
00033                         virtual ~CStopFilter(void);
00034         
00035                 public:
00036                         void    load(const tchar* filename);    
00037                         void    add(const termid_t tid);                
00038                         void    clear();                        
00039                         
00040                         bool    stopWord(const termid_t tid) const;
00041                 
00042                 protected:
00043                         CTokens*        filterInternal(CTokens* pInputTokens);
00044                 protected:
00045                         tstring         m_sStopwordFile;
00046 
00047                         bool*           m_pStopList; //停用词数组
00048                         termid_t        m_minTerm;       //最小停用词
00049                         termid_t        m_maxTerm;       //最大停用词
00050                 };
00052                 //
00053                 inline bool CStopFilter::stopWord(const termid_t tid) const
00054                 {
00055 
00056                         if( (tid >= m_minTerm) && (tid <= m_maxTerm) && (m_pStopList[tid-m_minTerm]) )
00057                                 return true;
00058                         else return false;      
00059                 }
00060         }
00061 }
00062 #endif

http://www.firtex.org http://www.sourceforge.net/projects/firtex