FirteX-高性能全文索引和检索平台API Documentation |
00001 // 00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 00003 // All rights reserved. 00004 // This file is part of FirteX (www.firtex.org) 00005 // 00006 // Use of the FirteX is subject to the terms of the software license set forth in 00007 // the LICENSE file included with this software, and also available at 00008 // http://www.firtex.org/license.html 00009 // 00010 // Author : 郭瑞杰(GuoRuijie) 00011 // Email : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com 00012 // Created : 2005/11/20 00013 // 00014 #ifndef _TOKENS_H 00015 #define _TOKENS_H 00016 00017 #if _MSC_VER > 1000 00018 #pragma once 00019 #endif // _MSC_VER > 1000 00020 00021 #include <vector> 00022 #include "Token.h" 00023 using namespace std; 00024 00025 namespace firtex 00026 { 00027 namespace analyzer 00028 { 00029 #define TOKENS_MAX 10000 00030 class CTokens 00031 { 00032 public: 00033 CTokens(void); 00034 CTokens(TokenType type); 00035 CTokens(TokenType type,int nMaxTokens); 00036 CTokens(TokenType type,char* buffer,size_t size); 00037 CTokens(const CTokens& clone); 00038 virtual ~CTokens(void); 00039 public: 00040 void startIterator(); 00041 bool hasNext(); 00042 CToken* next(); 00043 public: 00044 TokenType getType(){return m_type;} 00045 void setType(TokenType type){m_type = type;} 00046 00047 size_t getCapacity(){return m_nCapacity;} 00048 int getTokenNum(){return m_nTokenNum;} 00049 00050 int getMaxTokens(){return m_nMaxTokens;} 00051 void setMaxTokens(int nMaxTokens); 00052 00053 void clear(); 00054 char* getBuffer(){return m_buffer;} 00055 00056 inline bool appendWord(termid_t tid); 00057 inline bool appendDate(int64_t dt); 00058 inline bool appendName(); 00059 inline bool appendCompany(); 00060 inline bool appendNum(); 00061 inline bool appendAlpha(); 00062 inline bool appendAlphaNum(); 00063 inline bool appendURL(); 00064 protected: 00069 void grow(size_t nNewCapacity=0); 00070 void readString(string& s); 00071 void writeString(const string& s); 00072 int32_t readVInt(); 00073 void writeVInt(int32_t i); 00074 protected: 00075 TokenType m_type; //类型 00076 char* m_buffer; //token缓冲 00077 size_t m_nCapacity; //缓冲容量,如果m_tokenSize>0,则m_nCapacity*m_tokenSize为缓冲实际大小 00078 //否则,m_nCapacity即为实际容量大小 00079 size_t m_nTokenPointer; //最后一个Token在m_buffer中的位置 00080 00081 int m_nTokenNum; //包含单个Token个数 00082 int m_tokenSize; //单个token所占的长度,以字节为单位,-1表示变长 00083 int m_nMaxTokens; //允许最大Token数目 00084 00085 int m_nIterator; //用于在迭代时保存迭代位置 00086 size_t m_pCurTokenPointer; //用于在迭代时保存当前Token在m_buffer中的位置 00087 bool m_bOwn; 00088 00089 vector<CToken*> m_tokens; 00090 }; 00092 //inline Function 00093 inline bool CTokens::appendWord(termid_t tid) 00094 { 00095 if(m_nTokenNum >= m_nMaxTokens) 00096 return false; 00097 *(((termid_t*)m_buffer) + m_nTokenNum) = tid; 00098 m_nTokenNum++; 00099 return true; 00100 } 00101 inline bool CTokens::appendDate(int64_t dt) 00102 { 00103 if(m_nTokenNum >= m_nMaxTokens) 00104 return false; 00105 *(((int64_t*)m_buffer) + m_nTokenNum) = dt; 00106 m_nTokenNum++; 00107 return true; 00108 } 00109 inline bool CTokens::appendName() 00110 { 00111 return true; 00112 } 00113 inline bool CTokens::appendCompany() 00114 { 00115 return true; 00116 } 00117 inline bool CTokens::appendNum() 00118 { 00119 return true; 00120 } 00121 inline bool CTokens::appendAlpha() 00122 { 00123 return true; 00124 } 00125 inline bool CTokens::appendAlphaNum() 00126 { 00127 return true; 00128 } 00129 inline bool CTokens::appendURL() 00130 { 00131 return true; 00132 } 00133 } 00134 } 00135 00136 #endif
http://www.firtex.org http://www.sourceforge.net/projects/firtex