FirteX-高性能全文索引和检索平台

API Documentation


首页 | 名字空间列表 | 类继承关系 | 组合类型列表 | $(BL\录(B | 文件列表 | 名字空间成员 | 组合类型成员 | 文件成员

Tokens.h

浏览该文件的文档。
00001 //
00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 
00003 // All rights reserved.
00004 // This file is part of FirteX (www.firtex.org)
00005 //
00006 // Use of the FirteX is subject to the terms of the software license set forth in 
00007 // the LICENSE file included with this software, and also available at
00008 // http://www.firtex.org/license.html
00009 //
00010 // Author       : 郭瑞杰(GuoRuijie)
00011 // Email        : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com
00012 // Created      : 2005/11/20
00013 //
00014 #ifndef _TOKENS_H
00015 #define _TOKENS_H
00016 
00017 #if _MSC_VER > 1000
00018 #pragma once
00019 #endif // _MSC_VER > 1000
00020 
00021 #include <vector>
00022 #include "Token.h"
00023 using namespace std;
00024 
00025 namespace firtex
00026 {
00027         namespace analyzer
00028         {
00029 #define  TOKENS_MAX     10000
00030                 class CTokens
00031                 {                       
00032                 public:
00033                         CTokens(void);
00034                         CTokens(TokenType type);
00035                         CTokens(TokenType type,int nMaxTokens);
00036                         CTokens(TokenType type,char* buffer,size_t size);
00037                         CTokens(const CTokens& clone);
00038                         virtual ~CTokens(void);
00039                 public:
00040                         void            startIterator();
00041                         bool            hasNext();
00042                         CToken*         next();
00043                 public:
00044                         TokenType       getType(){return m_type;}
00045                         void            setType(TokenType type){m_type = type;}
00046 
00047                         size_t          getCapacity(){return m_nCapacity;}
00048                         int                     getTokenNum(){return m_nTokenNum;}
00049 
00050                         int                     getMaxTokens(){return m_nMaxTokens;}
00051                         void            setMaxTokens(int nMaxTokens);
00052 
00053                         void            clear();                        
00054                         char*           getBuffer(){return m_buffer;}   
00055                         
00056                         inline bool     appendWord(termid_t tid);
00057                         inline bool     appendDate(int64_t dt);
00058                         inline bool     appendName();
00059                         inline bool     appendCompany();
00060                         inline bool     appendNum();
00061                         inline bool     appendAlpha();
00062                         inline bool     appendAlphaNum();
00063                         inline bool     appendURL();
00064                 protected:
00069                         void            grow(size_t nNewCapacity=0);
00070                         void            readString(string& s);
00071                         void            writeString(const string& s);
00072                         int32_t         readVInt();
00073                         void            writeVInt(int32_t i);
00074                 protected:                                              
00075                         TokenType       m_type;                         //类型
00076                         char*           m_buffer;                       //token缓冲
00077                         size_t          m_nCapacity;            //缓冲容量,如果m_tokenSize>0,则m_nCapacity*m_tokenSize为缓冲实际大小
00078                                                                                         //否则,m_nCapacity即为实际容量大小                     
00079                         size_t          m_nTokenPointer;        //最后一个Token在m_buffer中的位置
00080 
00081                         int                     m_nTokenNum;            //包含单个Token个数                     
00082                         int                     m_tokenSize;            //单个token所占的长度,以字节为单位,-1表示变长
00083                         int                     m_nMaxTokens;           //允许最大Token数目
00084 
00085                         int                     m_nIterator;            //用于在迭代时保存迭代位置
00086                         size_t          m_pCurTokenPointer;     //用于在迭代时保存当前Token在m_buffer中的位置
00087                         bool            m_bOwn;
00088 
00089                         vector<CToken*> m_tokens;
00090                 };
00092                 //inline Function
00093                 inline bool CTokens::appendWord(termid_t tid)
00094                 {
00095                         if(m_nTokenNum >= m_nMaxTokens)
00096                                 return false;                           
00097                         *(((termid_t*)m_buffer) + m_nTokenNum) = tid;
00098                         m_nTokenNum++;
00099                         return true;
00100                 }
00101                 inline bool CTokens::appendDate(int64_t dt)
00102                 {
00103                         if(m_nTokenNum >= m_nMaxTokens)
00104                                 return false;                           
00105                         *(((int64_t*)m_buffer) + m_nTokenNum) = dt;
00106                         m_nTokenNum++;
00107                         return true;
00108                 }
00109                 inline bool     CTokens::appendName()
00110                 {
00111                         return true;
00112                 }
00113                 inline bool     CTokens::appendCompany()
00114                 {
00115                         return true;
00116                 }
00117                 inline bool     CTokens::appendNum()
00118                 {
00119                         return true;
00120                 }
00121                 inline bool     CTokens::appendAlpha()
00122                 {
00123                         return true;
00124                 }
00125                 inline bool     CTokens::appendAlphaNum()
00126                 {
00127                         return true;
00128                 }
00129                 inline bool CTokens::appendURL()
00130                 {
00131                         return true;
00132                 }
00133         }
00134 }
00135 
00136 #endif

http://www.firtex.org http://www.sourceforge.net/projects/firtex