FirteX-高性能全文索引和检索平台

API Documentation


首页 | 名字空间列表 | 类继承关系 | 组合类型列表 | $(BL\录(B | 文件列表 | 名字空间成员 | 组合类型成员 | 文件成员

WordFreqVector.h

浏览该文件的文档。
00001 //
00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 
00003 // All rights reserved.
00004 // This file is part of FirteX (www.firtex.org)
00005 //
00006 // Use of the FirteX is subject to the terms of the software license set forth in 
00007 // the LICENSE file included with this software, and also available at
00008 // http://www.firtex.org/license.html
00009 //
00010 // Author       : 郭瑞杰(GuoRuijie)
00011 // Email        : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com
00012 // Created      : 2006/7/15
00013 //
00014 #ifndef _WORDFREQVECTOR_H
00015 #define _WORDFREQVECTOR_H
00016 
00017 #if _MSC_VER > 1000
00018 #pragma once
00019 #endif // _MSC_VER > 1000
00020 
00021 #include "TermFreqVector.h"
00022 #include "../store/IndexInput.h"
00023 #include "../utility/DynamicArray.h"
00024 using namespace firtex::store;
00025 using namespace firtex::utility;
00026 
00027 
00028 namespace firtex
00029 {
00030         namespace index
00031         {
00032                 class CWordFreqVector : public CTermFreqVector
00033                 {
00034                         typedef struct _map_item
00035                         {
00036                                 termid_t        tid;
00037                                 freq_t          freq;
00038                         }map_item;
00039                 public:
00040                         CWordFreqVector();
00041                         CWordFreqVector(const tchar* field,CIndexInput* pTVVInput);
00042                         virtual ~CWordFreqVector(void);
00043                 public:                 
00049                         void    open(const tchar* field,CIndexInput* pInput);
00053             count_t                     numTerms();
00054 
00058             count_t                     numDistinctTerms();
00059 
00065                         const CTerm*    getTerms();
00066 
00072                         const termid_t* getTermIDs();
00073 
00079                         const freq_t*   getTermFrequencies();
00080 
00084                         void                    getTermFreqVector(termid_t*& tids,freq_t*& freqs,count_t& size);
00085 
00091                         int                             indexOf(termid_t tid);
00092                 protected:
00098                         void    addField(CIndexOutput*  pOutput,CField* pField);
00099 
00103                         inline void quickSort(map_item items[], int lo, int hi);
00104 
00105                         void    readTermVector();
00106                 private:
00107                         CIndexInput*    m_pTVVInput;
00108                         count_t                 m_numTerms;
00109                         count_t                 m_numDistinctTerms;
00110                         termid_t*               m_termIDs;
00111                         freq_t*                 m_termFreqs;
00112                         CDynamicArray<freq_t>*  m_pTermDictionary;
00113                 };
00114 
00116                 //
00117                 inline void CWordFreqVector::quickSort(map_item items[], int lo, int hi)
00118                 {
00119                         if (lo >= hi)
00120                                 return;
00121 
00122                         int mid = (lo + hi) / 2;
00123                         map_item tmp;
00124 
00125                         if (items[lo].tid > items[mid].tid) 
00126                         {
00127                                 tmp = items[lo];
00128                                 items[lo] = items[mid];
00129                                 items[mid] = tmp;
00130                         }
00131 
00132                         if (items[mid].tid > items[hi].tid)
00133                         {
00134                                 tmp = items[mid];
00135                                 items[mid] = items[hi];
00136                                 items[hi] = tmp;
00137 
00138                                 if (items[lo].tid > items[mid].tid)
00139                                 {
00140                                         tmp = items[lo];
00141                                         items[lo] = items[mid];
00142                                         items[mid] = tmp;
00143                                 }
00144                         }
00145 
00146                         int left = lo + 1;
00147                         int right = hi - 1;
00148 
00149                         if (left >= right)
00150                                 return;
00151 
00152                         termid_t partition = items[mid].tid;
00153 
00154                         for (; ;) 
00155                         {
00156                                 while (items[right].tid > partition)
00157                                         --right;
00158 
00159                                 while ( (left < right) && (items[left].tid <= partition))
00160                                         ++left;
00161 
00162                                 if (left < right) 
00163                                 {
00164                                         tmp = items[left];
00165                                         items[left] = items[right];
00166                                         items[right] = tmp;
00167                                         --right;
00168                                 }
00169                                 else 
00170                                 {
00171                                         break;
00172                                 }
00173                         }
00174 
00175                         quickSort(items, lo, left);
00176                         quickSort(items, left + 1, hi);
00177                 }               
00178         }
00179 }
00180 
00181 #endif

http://www.firtex.org http://www.sourceforge.net/projects/firtex