FirteX-高性能全文索引和检索平台

API Documentation


首页 | 名字空间列表 | 类继承关系 | 组合类型列表 | $(BL\录(B | 文件列表 | 名字空间成员 | 组合类型成员 | 文件成员

PhraseScorer.h

浏览该文件的文档。
00001 //
00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 
00003 // All rights reserved.
00004 // This file is part of FirteX (www.firtex.org)
00005 //
00006 // Use of the FirteX is subject to the terms of the software license set forth in 
00007 // the LICENSE file included with this software, and also available at
00008 // http://www.firtex.org/license.html
00009 //
00010 // Author       : 郭瑞杰(GuoRuijie)
00011 // Email        : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com
00012 // Created      : 2006/3/17
00013 //
00014 #ifndef __PHRASESCORER_H
00015 #define __PHRASESCORER_H
00016 
00017 #include "Scorer.h"
00018 #include "../index/TermPositions.h"
00019 #include "../index/NormBytes.h"
00020 using namespace firtex::index;
00021 
00022 namespace firtex
00023 {
00024         namespace search
00025         {
00026 #define PHRASESCORE_CACHESIZE   1024
00027 #define PHRASE_PAGESIZE 1024
00028                 class CWeight;
00029                 class CSearcher;                
00030                 class CPhraseScorer :   public CScorer
00031                 {
00032                         class CPhrasePositions
00033                         {
00034                         public:
00035                                 CPhrasePositions(CTermPositions* pos,int nPos)
00036                                         :m_pTermPositions(pos)
00037                                         ,m_pNext(NULL)
00038                                         ,m_nPhrasePosition(nPos)
00039                                 {                                       
00040                                 }
00041                                 ~CPhrasePositions()
00042                                 {
00043                                         if(m_pNext)
00044                                         {
00045                                                 
00046                                                 delete m_pNext;
00047                                                 m_pNext = NULL;
00048                                         }
00049                                         if(m_pTermPositions)
00050                                         {
00051                                                 delete m_pTermPositions;
00052                                                 m_pTermPositions = NULL;
00053                                         }
00054                                 }
00055                         public:                         
00056                                 bool    skipTo(docid_t target,docid_t& nearTarget)
00057                                 {
00058                                         return m_pTermPositions->skipTo(target,nearTarget);
00059                                 }
00060                                 bool    next(){return m_pTermPositions->next();}
00061                                 docid_t doc(){return m_pTermPositions->doc();}
00062 
00063                                 bool    skipToPosition(int startPhrase,loc_t target,loc_t& nearTarget)
00064                                 {
00065                                         return m_pTermPositions->skipToPosition(target + (m_nPhrasePosition - startPhrase),nearTarget);
00066                                 }
00067                                 loc_t   nextPosition()
00068                                 {
00069                                         return m_pTermPositions->nextPosition();
00070                                 }                               
00071 
00072                                 bool operator < (const CPhrasePositions& p)const
00073                                 {
00074                                         return (m_pTermPositions->docFreq() < p.m_pTermPositions->docFreq());
00075                                 }
00076 
00077                                 int     getPhrasePosition(){return m_nPhrasePosition;}
00078                         protected:
00079                                 CTermPositions*         m_pTermPositions;
00080                                 CPhrasePositions*       m_pNext;
00081                                 int                                     m_nPhrasePosition;
00082 
00083                                 friend class CPhraseScorer;                             
00084                         };
00085 
00086                         struct PhrasePositionsGreater
00087                         {
00088                                 bool operator () (CPhrasePositions*& left,CPhrasePositions*& right) const
00089                                 {
00090                                         return ( (*left) < (*right) );
00091                                 }
00092                         };
00093 
00094                         typedef priority_queue<CPhrasePositions*,deque<CPhrasePositions*>,PhrasePositionsGreater> CPhraseQueue;
00095 
00096                 public:
00097                         CPhraseScorer(CWeight* pWeight,CSimilarity* pSimilarity,int slop,CTermPositions** ppPos,int size,CNormBytes* norms);
00098                         virtual ~CPhraseScorer(void);
00099                 public:
00104                         count_t nextDocs();
00105 
00112                         count_t scores(docid_t*& docs,score_t*& scores);
00113 
00121                         bool    skipTo(docid_t target,docid_t& nearTarget);
00122 
00127                         bool    next();
00128 
00129                         /*
00130                          * 返回当前位置文档的打分结果,必须配合{@link skipTo(docid_t,DOC_ID)}或{@link next()}使用,只有返回true后才有效
00131                          */
00132                         score_t score();
00133 
00134                         /*
00135                          * 返回当前位置文档ID,必须配合{@link skipTo(docid_t,DOC_ID)}或{@link next()}使用,只有返回true后才有效
00136                          */
00137                         docid_t doc();
00138                 protected:
00139                         void    firstToLast()
00140                         {                               
00141                                 m_pLast->m_pNext = m_pFirst;
00142                                 m_pLast = m_pFirst;
00143                                 m_pFirst = m_pFirst->m_pNext;                           
00144                                 m_pLast->m_pNext = NULL;
00145                         }
00146 
00147                         freq_t phraseFreq();
00148                         virtual freq_t exactPhraseFreq();
00149                         virtual freq_t sloppyPhraseFreq();
00150                 protected:
00151                         CWeight*                m_pWeight;
00152                         CTermDocs*              m_pTermDocs;
00153 
00154                         score_t         m_fWeights;
00155                         docid_t         m_docs[PHRASE_PAGESIZE];        //文档ID,内存由CTermDocs维护
00156                         freq_t          m_freqs[PHRASE_PAGESIZE];       //词频,内存由CTermDocs维护
00157                         score_t         m_scores[PHRASE_PAGESIZE];      //打分结果
00158                         count_t         m_numDocs;              //文档数
00159 
00160                         score_t         m_scoreCache[PHRASESCORE_CACHESIZE];    //Score Cache
00161 
00162                         int                     m_nSlop;
00163                         CPhrasePositions*       m_pFirst;
00164                         CPhrasePositions*       m_pLast;
00165                         int                                     m_nNumPositions;
00166                         CNormBytes*                     m_norms;
00167                 };
00168         }
00169 }
00170 
00171 #endif

http://www.firtex.org http://www.sourceforge.net/projects/firtex