FirteX-高性能全文索引和检索平台API Documentation |
00001 // 00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 00003 // All rights reserved. 00004 // This file is part of FirteX (www.firtex.org) 00005 // 00006 // Use of the FirteX is subject to the terms of the software license set forth in 00007 // the LICENSE file included with this software, and also available at 00008 // http://www.firtex.org/license.html 00009 // 00010 // Author : 郭瑞杰(GuoRuijie) 00011 // Email : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com 00012 // Created : 2006/5/9 00013 // 00014 #ifndef _DISJUNCTIONSCORER_H 00015 #define _DISJUNCTIONSCORER_H 00016 00017 #include <list> 00018 #include "../utility/PriorityQueue.h" 00019 #include "Scorer.h" 00020 using namespace std; 00021 using namespace firtex::utility; 00022 00023 namespace firtex 00024 { 00025 namespace search 00026 { 00027 class CDisjunctionScorer : public CScorer 00028 { 00029 #define DISJUNCTION_PAGESIZE 1024 00030 00031 class CScorerQueue : public CPriorityQueue<CScorer*> 00032 { 00033 public: 00034 CScorerQueue(size_t size) 00035 { 00036 initialize(size,false); 00037 } 00038 protected: 00039 bool lessThan(CScorer* o1, CScorer* o2) 00040 { 00041 return o1->doc() < o2->doc(); 00042 } 00043 }; 00044 00045 public: 00046 CDisjunctionScorer(); 00047 CDisjunctionScorer(list<CScorer*>* scorers,int minShouldMatch=1); 00048 virtual ~CDisjunctionScorer(void); 00049 public: 00054 count_t nextDocs(); 00055 00062 count_t scores(docid_t*& docs,score_t*& scores); 00063 00071 bool skipTo(docid_t target,docid_t& nearTarget); 00072 00077 bool next(); 00078 00079 /* 00080 * 返回当前位置文档的打分结果,必须配合{@link skipTo(docid_t,DOC_ID)}或{@link next()}使用,只有返回true后才有效 00081 */ 00082 score_t score(); 00083 00084 /* 00085 * 返回当前位置文档ID,必须配合{@link skipTo(docid_t,DOC_ID)}或{@link next()}使用,只有返回true后才有效 00086 */ 00087 docid_t doc(); 00088 00089 /* 00090 * add one scorer 00091 * @param pScorer The scorer witch will be added to the m_scoresList 00092 */ 00093 void add(CScorer* pScorer){m_scoresList.push_back(pScorer);} 00094 protected: 00095 void initScorerQueue(); 00096 bool doNext(docid_t& currentDoc,score_t& currentScore); 00097 protected: 00098 list<CScorer*> m_scoresList; 00099 CScorerQueue* m_pScorerQueue; 00100 00101 int m_minShouldMatch; //只要需要匹配的个数 00102 00103 docid_t m_docs[DISJUNCTION_PAGESIZE]; //文档ID,内存由CTermDocs维护 00104 score_t m_scores[DISJUNCTION_PAGESIZE]; //打分结果 00105 count_t m_numDocs; //文档数 00106 }; 00108 // 00109 inline bool CDisjunctionScorer::doNext(docid_t& currentDoc,score_t& currentScore) 00110 { 00111 CScorer* top; 00112 int32_t nrMatchers; 00113 do 00114 { // repeat until minimum nr of matchers 00115 top = m_pScorerQueue->top(); 00116 currentDoc = top->doc(); 00117 currentScore = top->score(); 00118 nrMatchers = 1; 00119 do 00120 { // Until all subscorers are after currentDoc 00121 if (top->next()) 00122 { 00123 m_pScorerQueue->adjustTop(); 00124 } 00125 else 00126 { 00127 m_pScorerQueue->pop(); 00128 if ((int32_t)m_pScorerQueue->size() < (m_minShouldMatch - nrMatchers)) 00129 { 00130 // Not enough subscorers left for a match on this document, 00131 // and also no more chance of any further match. 00132 return false; 00133 } 00134 if (m_pScorerQueue->size() == 0) 00135 { 00136 break; // nothing more to advance, check for last match. 00137 } 00138 } 00139 top = m_pScorerQueue->top(); 00140 if (top->doc() != currentDoc) 00141 { 00142 break; // All remaining subscorers are after currentDoc. 00143 } 00144 else 00145 { 00146 currentScore += top->score(); 00147 nrMatchers++; 00148 } 00149 } while (true); 00150 00151 if (nrMatchers >= m_minShouldMatch) 00152 { 00153 return true; 00154 } 00155 else if ((int32_t)m_pScorerQueue->size() < m_minShouldMatch) 00156 { 00157 return false; 00158 } 00159 } while (true); 00160 } 00161 } 00162 } 00163 00164 #endif 00165
http://www.firtex.org http://www.sourceforge.net/projects/firtex