FirteX-高性能全文索引和检索平台API Documentation |
00001 // 00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 00003 // All rights reserved. 00004 // This file is part of FirteX (www.firtex.org) 00005 // 00006 // Use of the FirteX is subject to the terms of the software license set forth in 00007 // the LICENSE file included with this software, and also available at 00008 // http://www.firtex.org/license.html 00009 // 00010 // Author : 郭瑞杰(GuoRuijie) 00011 // Email : ruijieguo@software.ict.ac.cn 00012 // Created : 2005/12/04 00013 // 00014 00015 #ifndef __FIELDSINFO_H 00016 #define __FIELDSINFO_H 00017 00018 #include <string> 00019 #include "../utility/StdHeader.h" 00020 #include "../document/Field.h" 00021 #include "../store/IndexInput.h" 00022 #include "../store/IndexOutput.h" 00023 00024 #include <map> 00025 using namespace std; 00026 using namespace firtex::document; 00027 using namespace firtex::store; 00028 00029 00030 namespace firtex 00031 { 00032 namespace index 00033 { 00034 class CFieldInfo 00035 { 00036 public: 00037 00038 CFieldInfo(void) 00039 { 00040 m_flag = 0; 00041 m_id = -1; 00042 reset(); 00043 } 00044 CFieldInfo(const CFieldInfo& src) 00045 { 00046 m_flag = src.m_flag; 00047 m_id = src.m_id; 00048 m_indexOffset = src.m_indexOffset; 00049 m_name = src.m_name; 00050 00051 m_totalTerms = src.m_totalTerms; 00052 m_distTerms = src.m_distTerms; 00053 m_indexOffset = src.m_indexOffset; 00054 00055 m_tdiLength = src.m_tdiLength; 00056 m_dfiLength = src.m_dfiLength; 00057 m_ptiLength = src.m_ptiLength; 00058 } 00059 ~CFieldInfo(void) 00060 { 00061 } 00062 public: 00063 fieldid_t getID(){return m_id;} 00064 void setID(fieldid_t fid){ m_id = fid;} 00065 00066 const tstring& getName(){return m_name;} 00067 void setName(const tstring& name){m_name = name;} 00068 00069 fileoffset_t getIndexOffset(){return m_indexOffset;} 00070 void setIndexOffset(fileoffset_t offset){m_indexOffset = offset;} 00071 00072 uint64_t numTerms(){return m_totalTerms;} 00073 uint64_t distinctNumTerms(){return m_distTerms;} 00074 00075 void setNumTerms(uint64_t n){m_totalTerms = n;} 00076 void setDistinctNumTerms(uint64_t n){m_distTerms = n;} 00077 00078 void setLength(int64_t tdiLen,int64_t dfiLen,int64_t ptiLen){m_tdiLength = tdiLen;m_dfiLength=dfiLen;m_ptiLength=ptiLen;} 00079 void getLength(int64_t* tdiLen,int64_t* dfiLen,int64_t* ptiLen); 00080 00081 FieldFlag getFlag(){return m_flag;} 00082 void setFlag(FieldFlag flag){m_flag = flag;} 00083 00084 FieldType getType(){return FIELDTYPE(m_flag);} 00085 00086 fielddata_t getDataType(){return FIELDDATATYPE(m_flag);} 00087 00088 bool isIndexed(){return ((m_flag & BIT_INDEX_MASK) == BIT_INDEX_MASK);} 00089 bool isAnalyzed(){return ((m_flag & BIT_ANALYZE_MASK)==BIT_ANALYZE_MASK);} 00090 00091 bool isStored(){return ((m_flag & BIT_STORE_MASK) == BIT_STORE_MASK);} 00092 bool isCompressed(){return ((m_flag & BIT_COMPRESS_MASK)==BIT_COMPRESS_MASK);} 00093 00094 bool isStoreTermVector(){return (isSequenceVector() || isFreqVector() || isPositionVector());} 00095 bool isSequenceVector(){return ((m_flag & BIT_TERMVECTOR_SEQUENCE_MASK) == BIT_TERMVECTOR_SEQUENCE_MASK);} 00096 bool isFreqVector(){return ((m_flag & BIT_TERMVECTOR_FREQ_MASK)==BIT_TERMVECTOR_FREQ_MASK);} 00097 bool isPositionVector(){return ((m_flag & BIT_TERMVECTOR_POSITION_MASK)==BIT_TERMVECTOR_POSITION_MASK);} 00098 00099 void reset(){m_totalTerms = m_distTerms = 0;m_indexOffset = -1;m_tdiLength = m_dfiLength = m_ptiLength = 0;} 00100 protected: 00101 fieldid_t m_id; 00102 tstring m_name; //字段名 00103 FieldFlag m_flag; //字段标志 00104 00105 /*以下6项只有当该字段是索引字段时有效*/ 00106 uint64_t m_totalTerms; //总共出现词数 00107 uint64_t m_distTerms; //非重复词数 00108 fileoffset_t m_indexOffset; //这个字段的索引数据在.tdi文件中的偏移 00109 00110 int64_t m_tdiLength; //该字段词典数据(.tdi)的长度 00111 int64_t m_dfiLength; //该字段文档和频率数据(.dfi)的长度 00112 int64_t m_ptiLength; //该字段位置数据(.pti)的长度 00113 00114 friend class CFieldsInfo; 00115 }; 00117 // 00118 inline void CFieldInfo::getLength(int64_t* tdiLen,int64_t* dfiLen,int64_t* ptiLen) 00119 { 00120 if(tdiLen) 00121 *tdiLen = m_tdiLength; 00122 if(dfiLen) 00123 *dfiLen = m_dfiLength; 00124 if(ptiLen) 00125 *ptiLen = m_ptiLength; 00126 } 00127 00128 00129 class CFieldsInfo 00130 { 00131 public: 00132 CFieldsInfo(); 00133 virtual ~CFieldsInfo(); 00134 public: 00138 void addField(CField* pField); 00139 00143 void addField(CFieldInfo* pFieldInfo); 00147 void read(CIndexInput* pIndexInput); 00151 void write(CIndexOutput* pIndexOutput); 00152 00156 void clear(); 00157 00161 void reset(); 00162 00163 00169 fieldid_t getFieldID(const string& fname); 00170 00176 string getFieldName(fieldid_t fid); 00177 00183 CFieldInfo* getField(fieldid_t fid); 00184 00190 CFieldInfo* getField(const tchar* field); 00191 00197 void setFieldOffset(fieldid_t fid,fileoffset_t offset); 00203 fileoffset_t getFieldOffset(fieldid_t fid); 00204 00209 void setDistinctNumTerms(fieldid_t fid,uint64_t distterms); 00210 00215 uint64_t distinctNumTerms(fieldid_t fid); 00216 00221 uint64_t numTerms(fieldid_t fid); 00222 00226 int numFields(){return (int)m_fdInfosByNum.size();} 00227 00231 int numIndexFields(); 00232 public: 00233 CFieldInfo* operator[](int i){return m_fdInfosByNum[i];} 00234 void startIterator(); 00235 bool hasNext(); 00236 CFieldInfo* next(); 00237 protected: 00238 map<tstring,CFieldInfo*> m_fdInfosByName; 00239 map<fieldid_t,CFieldInfo*> m_fdInfosByNum; 00240 map<fieldid_t,CFieldInfo*>::iterator m_fdInfosIterator; 00241 00242 typedef map<fieldid_t,CFieldInfo*>::iterator FieldInfo_Iter; 00243 }; 00245 //Inline functions 00246 inline void CFieldsInfo::startIterator() 00247 { 00248 m_fdInfosIterator = m_fdInfosByNum.begin(); 00249 } 00250 inline bool CFieldsInfo::hasNext() 00251 { 00252 return (m_fdInfosIterator != m_fdInfosByNum.end()); 00253 } 00254 inline CFieldInfo* CFieldsInfo::next() 00255 { 00256 CFieldInfo* pInfo = m_fdInfosIterator->second; 00257 m_fdInfosIterator++; 00258 return pInfo; 00259 } 00260 inline int CFieldsInfo::numIndexFields() 00261 { 00262 map<fieldid_t,CFieldInfo*>::iterator iter = m_fdInfosByNum.begin(); 00263 int indFields = 0; 00264 while (iter != m_fdInfosByNum.end()) 00265 { 00266 if(FIELDBITS(iter->second->getFlag()) & BIT_INDEX_MASK) 00267 indFields++; 00268 iter++; 00269 } 00270 return indFields; 00271 } 00272 } 00273 } 00274 00275 00276 00277 #endif
http://www.firtex.org http://www.sourceforge.net/projects/firtex