FirteX-高性能全文索引和检索平台

API Documentation


首页 | 名字空间列表 | 类继承关系 | 组合类型列表 | $(BL\录(B | 文件列表 | 名字空间成员 | 组合类型成员 | 文件成员

FieldsInfo.h

浏览该文件的文档。
00001 //
00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 
00003 // All rights reserved.
00004 // This file is part of FirteX (www.firtex.org)
00005 //
00006 // Use of the FirteX is subject to the terms of the software license set forth in 
00007 // the LICENSE file included with this software, and also available at
00008 // http://www.firtex.org/license.html
00009 //
00010 // Author       : 郭瑞杰(GuoRuijie)
00011 // Email        : ruijieguo@software.ict.ac.cn
00012 // Created      : 2005/12/04
00013 //
00014 
00015 #ifndef __FIELDSINFO_H
00016 #define __FIELDSINFO_H
00017 
00018 #include <string>
00019 #include "../utility/StdHeader.h"
00020 #include "../document/Field.h"
00021 #include "../store/IndexInput.h"
00022 #include "../store/IndexOutput.h"
00023 
00024 #include <map>
00025 using namespace std;
00026 using namespace firtex::document;
00027 using namespace firtex::store;
00028 
00029 
00030 namespace firtex
00031 {
00032         namespace index
00033         {               
00034                 class CFieldInfo
00035                 {
00036                 public:
00037 
00038                         CFieldInfo(void)
00039                         {
00040                                 m_flag = 0;
00041                                 m_id = -1;
00042                                 reset();
00043                         }
00044                         CFieldInfo(const CFieldInfo& src)
00045                         {
00046                                 m_flag = src.m_flag;
00047                                 m_id = src.m_id;
00048                                 m_indexOffset = src.m_indexOffset;
00049                                 m_name = src.m_name;
00050 
00051                                 m_totalTerms = src.m_totalTerms;
00052                                 m_distTerms = src.m_distTerms;
00053                                 m_indexOffset = src.m_indexOffset;
00054 
00055                                 m_tdiLength = src.m_tdiLength;
00056                                 m_dfiLength = src.m_dfiLength;
00057                                 m_ptiLength = src.m_ptiLength;
00058                         }
00059                         ~CFieldInfo(void)
00060                         {                               
00061                         }
00062                 public:
00063                         fieldid_t               getID(){return m_id;}
00064                         void                    setID(fieldid_t fid){ m_id = fid;}
00065 
00066                         const tstring&  getName(){return m_name;}
00067                         void                    setName(const tstring& name){m_name = name;}            
00068 
00069                         fileoffset_t    getIndexOffset(){return m_indexOffset;}
00070                         void                    setIndexOffset(fileoffset_t offset){m_indexOffset = offset;}
00071 
00072                         uint64_t                numTerms(){return m_totalTerms;}
00073                         uint64_t                distinctNumTerms(){return m_distTerms;}
00074 
00075                         void                    setNumTerms(uint64_t n){m_totalTerms = n;}
00076                         void                    setDistinctNumTerms(uint64_t n){m_distTerms = n;}
00077 
00078                         void                    setLength(int64_t tdiLen,int64_t dfiLen,int64_t ptiLen){m_tdiLength = tdiLen;m_dfiLength=dfiLen;m_ptiLength=ptiLen;}
00079                         void                    getLength(int64_t* tdiLen,int64_t* dfiLen,int64_t* ptiLen);
00080 
00081                         FieldFlag               getFlag(){return m_flag;}
00082                         void                    setFlag(FieldFlag flag){m_flag = flag;}
00083 
00084                         FieldType               getType(){return FIELDTYPE(m_flag);}
00085 
00086                         fielddata_t             getDataType(){return FIELDDATATYPE(m_flag);}
00087 
00088                         bool                    isIndexed(){return ((m_flag & BIT_INDEX_MASK) == BIT_INDEX_MASK);}
00089                         bool                    isAnalyzed(){return ((m_flag & BIT_ANALYZE_MASK)==BIT_ANALYZE_MASK);}
00090 
00091                         bool                    isStored(){return ((m_flag & BIT_STORE_MASK) == BIT_STORE_MASK);}
00092                         bool                    isCompressed(){return ((m_flag & BIT_COMPRESS_MASK)==BIT_COMPRESS_MASK);}
00093                         
00094                         bool                    isStoreTermVector(){return (isSequenceVector() || isFreqVector() || isPositionVector());}
00095                         bool                    isSequenceVector(){return ((m_flag & BIT_TERMVECTOR_SEQUENCE_MASK) == BIT_TERMVECTOR_SEQUENCE_MASK);}
00096                         bool                    isFreqVector(){return ((m_flag & BIT_TERMVECTOR_FREQ_MASK)==BIT_TERMVECTOR_FREQ_MASK);}
00097                         bool                    isPositionVector(){return ((m_flag & BIT_TERMVECTOR_POSITION_MASK)==BIT_TERMVECTOR_POSITION_MASK);}
00098 
00099                         void                    reset(){m_totalTerms = m_distTerms = 0;m_indexOffset = -1;m_tdiLength = m_dfiLength = m_ptiLength = 0;}
00100                 protected:
00101                         fieldid_t               m_id;
00102                         tstring                 m_name;                 //字段名
00103                         FieldFlag               m_flag;                 //字段标志
00104                         
00105                         /*以下6项只有当该字段是索引字段时有效*/
00106                         uint64_t                m_totalTerms;   //总共出现词数
00107                         uint64_t                m_distTerms;    //非重复词数
00108                         fileoffset_t    m_indexOffset;  //这个字段的索引数据在.tdi文件中的偏移          
00109 
00110                         int64_t                 m_tdiLength;    //该字段词典数据(.tdi)的长度
00111                         int64_t                 m_dfiLength;    //该字段文档和频率数据(.dfi)的长度
00112                         int64_t                 m_ptiLength;    //该字段位置数据(.pti)的长度
00113 
00114                         friend class CFieldsInfo;
00115                 };
00117                 //
00118                 inline void CFieldInfo::getLength(int64_t* tdiLen,int64_t* dfiLen,int64_t* ptiLen)
00119                 {
00120                         if(tdiLen)
00121                                 *tdiLen = m_tdiLength;
00122                         if(dfiLen)
00123                                 *dfiLen = m_dfiLength;
00124                         if(ptiLen)
00125                                 *ptiLen = m_ptiLength;
00126                 }
00127 
00128                 
00129                 class CFieldsInfo
00130                 {
00131                 public:
00132                         CFieldsInfo();
00133                         virtual ~CFieldsInfo();
00134                 public:
00138                         void                    addField(CField* pField);
00139 
00143                         void                    addField(CFieldInfo* pFieldInfo);
00147                         void                    read(CIndexInput* pIndexInput);
00151                         void                    write(CIndexOutput* pIndexOutput);
00152 
00156                         void                    clear();        
00157 
00161                         void                    reset();
00162                          
00163 
00169                         fieldid_t               getFieldID(const string& fname);
00170                         
00176                         string                  getFieldName(fieldid_t fid);
00177 
00183                         CFieldInfo*             getField(fieldid_t fid);
00184 
00190                         CFieldInfo*             getField(const tchar* field);
00191 
00197                         void                    setFieldOffset(fieldid_t fid,fileoffset_t offset);
00203                         fileoffset_t    getFieldOffset(fieldid_t fid);
00204 
00209                         void                    setDistinctNumTerms(fieldid_t fid,uint64_t distterms);
00210 
00215                         uint64_t                distinctNumTerms(fieldid_t fid);
00216 
00221                         uint64_t                numTerms(fieldid_t fid);
00222                         
00226                         int                             numFields(){return (int)m_fdInfosByNum.size();}
00227 
00231                         int                             numIndexFields();
00232                 public:
00233                         CFieldInfo*             operator[](int i){return m_fdInfosByNum[i];}
00234                         void                    startIterator();
00235                         bool                    hasNext();
00236                         CFieldInfo*             next();                 
00237                 protected:
00238                         map<tstring,CFieldInfo*>                m_fdInfosByName;
00239                         map<fieldid_t,CFieldInfo*>      m_fdInfosByNum;
00240                         map<fieldid_t,CFieldInfo*>::iterator m_fdInfosIterator;
00241 
00242                         typedef map<fieldid_t,CFieldInfo*>::iterator FieldInfo_Iter;
00243                 };
00245                 //Inline functions
00246                 inline void CFieldsInfo::startIterator()
00247                 {
00248                         m_fdInfosIterator = m_fdInfosByNum.begin();
00249                 }
00250                 inline bool CFieldsInfo::hasNext()
00251                 {
00252                         return (m_fdInfosIterator != m_fdInfosByNum.end());
00253                 }
00254                 inline CFieldInfo* CFieldsInfo::next()
00255                 {
00256                         CFieldInfo* pInfo = m_fdInfosIterator->second;
00257                         m_fdInfosIterator++;
00258                         return pInfo;
00259                 }
00260                 inline int CFieldsInfo::numIndexFields()
00261                 {
00262                         map<fieldid_t,CFieldInfo*>::iterator iter = m_fdInfosByNum.begin();
00263                         int indFields = 0;
00264                         while (iter != m_fdInfosByNum.end())
00265                         {
00266                                 if(FIELDBITS(iter->second->getFlag()) & BIT_INDEX_MASK)
00267                                         indFields++;
00268                                 iter++;
00269                         }
00270                         return indFields;
00271                 }
00272         }
00273 }
00274 
00275 
00276 
00277 #endif

http://www.firtex.org http://www.sourceforge.net/projects/firtex