FirteX-高性能全文索引和检索平台

API Documentation


首页 | 名字空间列表 | 类继承关系 | 组合类型列表 | $(BL\录(B | 文件列表 | 名字空间成员 | 组合类型成员 | 文件成员

DisjunctionScorer.h

浏览该文件的文档。
00001 //
00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 
00003 // All rights reserved.
00004 // This file is part of FirteX (www.firtex.org)
00005 //
00006 // Use of the FirteX is subject to the terms of the software license set forth in 
00007 // the LICENSE file included with this software, and also available at
00008 // http://www.firtex.org/license.html
00009 //
00010 // Author       : 郭瑞杰(GuoRuijie)
00011 // Email        : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com
00012 // Created      : 2006/5/9
00013 //
00014 #ifndef _DISJUNCTIONSCORER_H
00015 #define _DISJUNCTIONSCORER_H
00016 
00017 #include <list>
00018 #include "../utility/PriorityQueue.h"
00019 #include "Scorer.h"
00020 using namespace std;
00021 using namespace firtex::utility;
00022 
00023 namespace firtex
00024 {
00025         namespace search
00026         {
00027                 class CDisjunctionScorer :      public CScorer
00028                 {       
00029 #define DISJUNCTION_PAGESIZE            1024
00030 
00031                         class CScorerQueue : public CPriorityQueue<CScorer*>
00032                         {
00033                         public:
00034                                 CScorerQueue(size_t size) 
00035                                 {
00036                                         initialize(size,false);
00037                                 }
00038                         protected:
00039                                 bool lessThan(CScorer* o1, CScorer* o2) 
00040                                 {
00041                                         return o1->doc() < o2->doc();
00042                                 }
00043                         };
00044 
00045                 public:
00046                         CDisjunctionScorer();
00047                         CDisjunctionScorer(list<CScorer*>* scorers,int minShouldMatch=1);
00048                         virtual ~CDisjunctionScorer(void);
00049                 public:
00054                         count_t nextDocs();
00055 
00062                         count_t scores(docid_t*& docs,score_t*& scores);
00063 
00071                         bool    skipTo(docid_t target,docid_t& nearTarget);
00072 
00077                         bool    next();
00078 
00079                         /*
00080                          * 返回当前位置文档的打分结果,必须配合{@link skipTo(docid_t,DOC_ID)}或{@link next()}使用,只有返回true后才有效
00081                          */
00082                         score_t score();
00083 
00084                         /*
00085                          * 返回当前位置文档ID,必须配合{@link skipTo(docid_t,DOC_ID)}或{@link next()}使用,只有返回true后才有效
00086                          */
00087                         docid_t doc();
00088         
00089                         /*
00090                          * add one scorer
00091                          * @param pScorer The scorer witch will be added to the m_scoresList
00092                          */
00093                         void    add(CScorer* pScorer){m_scoresList.push_back(pScorer);}
00094                 protected:
00095                         void    initScorerQueue();
00096                         bool    doNext(docid_t& currentDoc,score_t& currentScore);
00097                 protected:
00098                         list<CScorer*>  m_scoresList;
00099                         CScorerQueue*   m_pScorerQueue;
00100 
00101                         int                     m_minShouldMatch;                               //只要需要匹配的个数
00102 
00103                         docid_t         m_docs[DISJUNCTION_PAGESIZE];   //文档ID,内存由CTermDocs维护                   
00104                         score_t         m_scores[DISJUNCTION_PAGESIZE]; //打分结果
00105                         count_t         m_numDocs;                                              //文档数        
00106                 };
00108                 //
00109                 inline bool CDisjunctionScorer::doNext(docid_t& currentDoc,score_t& currentScore)
00110                 {                       
00111                         CScorer* top;                   
00112                         int32_t nrMatchers;     
00113                         do
00114                         {       // repeat until minimum nr of matchers
00115                                 top = m_pScorerQueue->top();
00116                                 currentDoc = top->doc();
00117                                 currentScore = top->score();
00118                                 nrMatchers = 1;
00119                                 do
00120                                 {       // Until all subscorers are after currentDoc
00121                                         if (top->next()) 
00122                                         {
00123                                                 m_pScorerQueue->adjustTop();
00124                                         }
00125                                         else 
00126                                         {
00127                                                 m_pScorerQueue->pop();
00128                                                 if ((int32_t)m_pScorerQueue->size() < (m_minShouldMatch - nrMatchers)) 
00129                                                 {
00130                                                         // Not enough subscorers left for a match on this document,
00131                                                         // and also no more chance of any further match.
00132                                                         return false;
00133                                                 }
00134                                                 if (m_pScorerQueue->size() == 0)
00135                                                 {
00136                                                         break; // nothing more to advance, check for last match.
00137                                                 }
00138                                         }
00139                                         top = m_pScorerQueue->top();
00140                                         if (top->doc() != currentDoc) 
00141                                         {
00142                                                 break; // All remaining subscorers are after currentDoc.
00143                                         }
00144                                         else
00145                                         {
00146                                                 currentScore += top->score();
00147                                                 nrMatchers++;
00148                                         }
00149                                 } while (true);
00150 
00151                                 if (nrMatchers >= m_minShouldMatch)
00152                                 {
00153                                         return true;
00154                                 }
00155                                 else if ((int32_t)m_pScorerQueue->size() < m_minShouldMatch) 
00156                                 {
00157                                         return false;
00158                                 }
00159                         } while (true);                                         
00160                 }
00161         }
00162 }
00163 
00164 #endif
00165 

http://www.firtex.org http://www.sourceforge.net/projects/firtex