FirteX-高性能全文索引和检索平台API Documentation |
00001 // 00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 00003 // All rights reserved. 00004 // This file is part of FirteX (www.firtex.org) 00005 // 00006 // Use of the FirteX is subject to the terms of the software license set forth in 00007 // the LICENSE file included with this software, and also available at 00008 // http://www.firtex.org/license.html 00009 // 00010 // Author : 郭瑞杰(GuoRuijie) 00011 // Email : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com 00012 // Created : 2006/7/15 00013 // 00014 #ifndef _WORDFREQVECTOR_H 00015 #define _WORDFREQVECTOR_H 00016 00017 #if _MSC_VER > 1000 00018 #pragma once 00019 #endif // _MSC_VER > 1000 00020 00021 #include "TermFreqVector.h" 00022 #include "../store/IndexInput.h" 00023 #include "../utility/DynamicArray.h" 00024 using namespace firtex::store; 00025 using namespace firtex::utility; 00026 00027 00028 namespace firtex 00029 { 00030 namespace index 00031 { 00032 class CWordFreqVector : public CTermFreqVector 00033 { 00034 typedef struct _map_item 00035 { 00036 termid_t tid; 00037 freq_t freq; 00038 }map_item; 00039 public: 00040 CWordFreqVector(); 00041 CWordFreqVector(const tchar* field,CIndexInput* pTVVInput); 00042 virtual ~CWordFreqVector(void); 00043 public: 00049 void open(const tchar* field,CIndexInput* pInput); 00053 count_t numTerms(); 00054 00058 count_t numDistinctTerms(); 00059 00065 const CTerm* getTerms(); 00066 00072 const termid_t* getTermIDs(); 00073 00079 const freq_t* getTermFrequencies(); 00080 00084 void getTermFreqVector(termid_t*& tids,freq_t*& freqs,count_t& size); 00085 00091 int indexOf(termid_t tid); 00092 protected: 00098 void addField(CIndexOutput* pOutput,CField* pField); 00099 00103 inline void quickSort(map_item items[], int lo, int hi); 00104 00105 void readTermVector(); 00106 private: 00107 CIndexInput* m_pTVVInput; 00108 count_t m_numTerms; 00109 count_t m_numDistinctTerms; 00110 termid_t* m_termIDs; 00111 freq_t* m_termFreqs; 00112 CDynamicArray<freq_t>* m_pTermDictionary; 00113 }; 00114 00116 // 00117 inline void CWordFreqVector::quickSort(map_item items[], int lo, int hi) 00118 { 00119 if (lo >= hi) 00120 return; 00121 00122 int mid = (lo + hi) / 2; 00123 map_item tmp; 00124 00125 if (items[lo].tid > items[mid].tid) 00126 { 00127 tmp = items[lo]; 00128 items[lo] = items[mid]; 00129 items[mid] = tmp; 00130 } 00131 00132 if (items[mid].tid > items[hi].tid) 00133 { 00134 tmp = items[mid]; 00135 items[mid] = items[hi]; 00136 items[hi] = tmp; 00137 00138 if (items[lo].tid > items[mid].tid) 00139 { 00140 tmp = items[lo]; 00141 items[lo] = items[mid]; 00142 items[mid] = tmp; 00143 } 00144 } 00145 00146 int left = lo + 1; 00147 int right = hi - 1; 00148 00149 if (left >= right) 00150 return; 00151 00152 termid_t partition = items[mid].tid; 00153 00154 for (; ;) 00155 { 00156 while (items[right].tid > partition) 00157 --right; 00158 00159 while ( (left < right) && (items[left].tid <= partition)) 00160 ++left; 00161 00162 if (left < right) 00163 { 00164 tmp = items[left]; 00165 items[left] = items[right]; 00166 items[right] = tmp; 00167 --right; 00168 } 00169 else 00170 { 00171 break; 00172 } 00173 } 00174 00175 quickSort(items, lo, left); 00176 quickSort(items, left + 1, hi); 00177 } 00178 } 00179 } 00180 00181 #endif
http://www.firtex.org http://www.sourceforge.net/projects/firtex