FirteX-高性能全文索引和检索平台API Documentation |
00001 // 00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 00003 // All rights reserved. 00004 // This file is part of FirteX (www.firtex.org) 00005 // 00006 // Use of the FirteX is subject to the terms of the software license set forth in 00007 // the LICENSE file included with this software, and also available at 00008 // http://www.firtex.org/license.html 00009 // 00010 // Author : 郭瑞杰(GuoRuijie) 00011 // Email : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com 00012 // Created : 2006/8/5 00013 // 00014 #ifndef _FIRTEXCOLLECTION_H 00015 #define _FIRTEXCOLLECTION_H 00016 00017 #if _MSC_VER > 1000 00018 #pragma once 00019 #endif // _MSC_VER > 1000 00020 00021 00022 #include "Collection.h" 00023 00024 namespace firtex 00025 { 00026 namespace collection 00027 { 00028 class CFirteXCollection : public CCollection 00029 { 00030 public: 00031 static const tstring identifier; 00032 public: 00033 CFirteXCollection(const tchar* location,CIndexWriter* pWriter); 00034 virtual ~CFirteXCollection(void); 00035 protected: 00036 class CTagPair 00037 { 00038 public: 00039 CTagPair(schemaid_t sid,const tchar* tag) 00040 :schemaid(sid) 00041 { 00042 tagLen = (int32_t)_tcslen(tag); 00043 beginTag = new tchar[tagLen + 3]; 00044 beginTag[0] = _T('<'); 00045 _tcscpy(beginTag + 1,tag); 00046 beginTag[tagLen + 1] = _T('>'); 00047 beginTag[tagLen + 2] = 0; 00048 endTag = new tchar[tagLen + 4]; 00049 _tcscpy(endTag + 2,tag); 00050 endTag[0] = _T('<'); 00051 endTag[1] = _T('/'); 00052 endTag[tagLen + 2] = _T('>'); 00053 endTag[tagLen + 3] = 0; 00054 } 00055 ~CTagPair() 00056 { 00057 delete[] beginTag; 00058 delete[] endTag; 00059 } 00060 public: 00061 schemaid_t schemaid; 00062 tchar* beginTag; 00063 tchar* endTag; 00064 int32_t tagLen; 00065 }; 00066 protected: 00070 bool scanInternal(); 00071 protected: 00072 bool readIndexFile(const tstring& indexfile); 00073 bool processTag(char*& buff,CTagPair* p,char*& value,size_t& valueLen,bool skip = false); 00074 private: 00075 tstring m_sDirectory; 00076 00077 CTagPair** m_tags; 00078 int32_t m_numTags; 00079 }; 00081 inline bool CFirteXCollection::processTag(char*& buff,CTagPair* p,char*& value,size_t& valueLen,bool skip) 00082 { 00083 char* bTag = strstr(buff,p->beginTag); 00084 if(bTag == NULL) 00085 { 00086 buff += strlen(p->endTag); 00087 //FIRTEX_CLOG(level::warn) << "can't find begin tag: " << p->first<<endl; 00088 return false; 00089 } 00090 char* eTag = strstr(bTag,p->endTag); 00091 if(eTag == NULL) 00092 { 00093 FIRTEX_CLOG(level::warn) << "can't find end tag : " << p->endTag<<endl; 00094 return false; 00095 } 00096 bTag += (p->tagLen + 2); 00097 if(!skip) 00098 { 00099 value = bTag; 00100 valueLen = eTag - bTag; 00101 buff = eTag + p->tagLen + 3;// strlen(p->endTag); 00102 } 00103 else 00104 { 00105 buff = bTag; 00106 } 00107 return true; 00108 } 00109 } 00110 } 00111 00112 #endif
http://www.firtex.org http://www.sourceforge.net/projects/firtex