FirteX-高性能全文索引和检索平台API Documentation |
00001 // 00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 00003 // All rights reserved. 00004 // This file is part of FirteX (www.firtex.org) 00005 // 00006 // Use of the FirteX is subject to the terms of the software license set forth in 00007 // the LICENSE file included with this software, and also available at 00008 // http://www.firtex.org/license.html 00009 // 00010 // Author : 郭瑞杰(GuoRuijie) 00011 // Email : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com 00012 // Created : 2006/3/17 00013 // 00014 #ifndef __PHRASESCORER_H 00015 #define __PHRASESCORER_H 00016 00017 #include "Scorer.h" 00018 #include "../index/TermPositions.h" 00019 #include "../index/NormBytes.h" 00020 using namespace firtex::index; 00021 00022 namespace firtex 00023 { 00024 namespace search 00025 { 00026 #define PHRASESCORE_CACHESIZE 1024 00027 #define PHRASE_PAGESIZE 1024 00028 class CWeight; 00029 class CSearcher; 00030 class CPhraseScorer : public CScorer 00031 { 00032 class CPhrasePositions 00033 { 00034 public: 00035 CPhrasePositions(CTermPositions* pos,int nPos) 00036 :m_pTermPositions(pos) 00037 ,m_pNext(NULL) 00038 ,m_nPhrasePosition(nPos) 00039 { 00040 } 00041 ~CPhrasePositions() 00042 { 00043 if(m_pNext) 00044 { 00045 00046 delete m_pNext; 00047 m_pNext = NULL; 00048 } 00049 if(m_pTermPositions) 00050 { 00051 delete m_pTermPositions; 00052 m_pTermPositions = NULL; 00053 } 00054 } 00055 public: 00056 bool skipTo(docid_t target,docid_t& nearTarget) 00057 { 00058 return m_pTermPositions->skipTo(target,nearTarget); 00059 } 00060 bool next(){return m_pTermPositions->next();} 00061 docid_t doc(){return m_pTermPositions->doc();} 00062 00063 bool skipToPosition(int startPhrase,loc_t target,loc_t& nearTarget) 00064 { 00065 return m_pTermPositions->skipToPosition(target + (m_nPhrasePosition - startPhrase),nearTarget); 00066 } 00067 loc_t nextPosition() 00068 { 00069 return m_pTermPositions->nextPosition(); 00070 } 00071 00072 bool operator < (const CPhrasePositions& p)const 00073 { 00074 return (m_pTermPositions->docFreq() < p.m_pTermPositions->docFreq()); 00075 } 00076 00077 int getPhrasePosition(){return m_nPhrasePosition;} 00078 protected: 00079 CTermPositions* m_pTermPositions; 00080 CPhrasePositions* m_pNext; 00081 int m_nPhrasePosition; 00082 00083 friend class CPhraseScorer; 00084 }; 00085 00086 struct PhrasePositionsGreater 00087 { 00088 bool operator () (CPhrasePositions*& left,CPhrasePositions*& right) const 00089 { 00090 return ( (*left) < (*right) ); 00091 } 00092 }; 00093 00094 typedef priority_queue<CPhrasePositions*,deque<CPhrasePositions*>,PhrasePositionsGreater> CPhraseQueue; 00095 00096 public: 00097 CPhraseScorer(CWeight* pWeight,CSimilarity* pSimilarity,int slop,CTermPositions** ppPos,int size,CNormBytes* norms); 00098 virtual ~CPhraseScorer(void); 00099 public: 00104 count_t nextDocs(); 00105 00112 count_t scores(docid_t*& docs,score_t*& scores); 00113 00121 bool skipTo(docid_t target,docid_t& nearTarget); 00122 00127 bool next(); 00128 00129 /* 00130 * 返回当前位置文档的打分结果,必须配合{@link skipTo(docid_t,DOC_ID)}或{@link next()}使用,只有返回true后才有效 00131 */ 00132 score_t score(); 00133 00134 /* 00135 * 返回当前位置文档ID,必须配合{@link skipTo(docid_t,DOC_ID)}或{@link next()}使用,只有返回true后才有效 00136 */ 00137 docid_t doc(); 00138 protected: 00139 void firstToLast() 00140 { 00141 m_pLast->m_pNext = m_pFirst; 00142 m_pLast = m_pFirst; 00143 m_pFirst = m_pFirst->m_pNext; 00144 m_pLast->m_pNext = NULL; 00145 } 00146 00147 freq_t phraseFreq(); 00148 virtual freq_t exactPhraseFreq(); 00149 virtual freq_t sloppyPhraseFreq(); 00150 protected: 00151 CWeight* m_pWeight; 00152 CTermDocs* m_pTermDocs; 00153 00154 score_t m_fWeights; 00155 docid_t m_docs[PHRASE_PAGESIZE]; //文档ID,内存由CTermDocs维护 00156 freq_t m_freqs[PHRASE_PAGESIZE]; //词频,内存由CTermDocs维护 00157 score_t m_scores[PHRASE_PAGESIZE]; //打分结果 00158 count_t m_numDocs; //文档数 00159 00160 score_t m_scoreCache[PHRASESCORE_CACHESIZE]; //Score Cache 00161 00162 int m_nSlop; 00163 CPhrasePositions* m_pFirst; 00164 CPhrasePositions* m_pLast; 00165 int m_nNumPositions; 00166 CNormBytes* m_norms; 00167 }; 00168 } 00169 } 00170 00171 #endif
http://www.firtex.org http://www.sourceforge.net/projects/firtex