FirteX-高性能全文索引和检索平台

API Documentation


首页 | 名字空间列表 | 类继承关系 | 组合类型列表 | $(BL\录(B | 文件列表 | 名字空间成员 | 组合类型成员 | 文件成员

FirteXCOM.h

浏览该文件的文档。
00001 //
00002 // Copyright(C) 2005--2006 Institute of Computing Tech, Chinese Academy of Sciences. 
00003 // All rights reserved.
00004 // This file is part of FirteX (www.firtex.org)
00005 //
00006 // Use of the FirteX is subject to the terms of the software license set forth in 
00007 // the LICENSE file included with this software, and also available at
00008 // http://www.firtex.org/license.html
00009 //
00010 // Author       : 郭瑞杰(GuoRuijie)
00011 // Email        : ruijieguo@software.ict.ac.cn,ruijieguo@gmail.com
00012 // Created      : 2006/5/8
00013 //
00014 #ifndef _FIRTEXCOM_H
00015 #define _FIRTEXCOM_H
00016 
00017 #include "../com/Com.h"
00018 
00019 #ifdef __cplusplus
00020 
00021 namespace firtex
00022 {
00023         namespace plugin
00024         {
00025                 //ANSI字符串定义
00026         typedef struct _tagstr 
00027         {
00028                         char*   str;
00029                         size_t  length;
00030         }STR;
00031 
00032                 //宽字符串定义
00033                 typedef struct _tagwstr 
00034                 {
00035                         wchar_t*        str;
00036                         size_t          length;
00037                 }WSTR;
00038 
00039                 typedef int32_t fieldid_t;      
00040 
00041                 typedef uint8_t FIELDTYPE;
00042 
00043                 enum FIELDTYPEENUM
00044                 {
00045                         _FIELD_NONE             = 0x00,         //匿名
00046                         _FIELD_WORD             = 0x01,         //普通的词,一般经过Analyzer形成WordID
00047                         _FIELD_DATE             = 0x02,         //时间日期
00048                         _FIELD_NAME             = 0x03,         //人名,字符串
00049                         _FIELD_COMPANY  = 0x04,         //机构名,字符串
00050                         _FIELD_EMAIL            = 0x05,         //Email,字符串
00051                         _FIELD_NUM              = 0x06,         //数字
00052                         _FIELD_ALPHA            = 0x07,         //字母,字符串
00053                         _FIELD_ALPNUM   = 0x08,         //数字字母组合,字符串
00054                         _FIELD_URL              = 0x09,         //URL,字符串
00055                 };
00056 
00057                 typedef unsigned short NUMBERTYPE;
00058 
00059                 //数字类型定义
00060                 enum NUMENUM
00061                 {
00062                         NT_EMPTY        = 0,
00063                         NT_NULL         = 1,
00064                         NT_BOOL         = 2,
00065                         NT_I1           = 3,
00066                         NT_UI1          = 4,
00067                         NT_I2           = 5,
00068                         NT_UI2          = 6,
00069                         NT_I4           = 7,
00070                         NT_UI4          = 8,
00071                         NT_I8           = 9,
00072                         NT_UI8          = 10,
00073 
00074                         NT_R4           = 11,
00075                         NT_R8           = 12,
00076                 };
00077 
00078                 //数字定义
00079                 typedef struct _tagNUMBERVAR
00080                 {
00081                         NUMBERTYPE      nt;                             //Number Type
00082                         union
00083                         {
00084                                 bool            bval;
00085                                 int8_t          i8val;
00086                                 uint8_t         ui8val;
00087                                 int16_t         i16val;
00088                                 uint16_t        ui16val;
00089                                 int32_t         i32val;
00090                                 uint32_t        ui32val;
00091                                 int64_t         i64val;
00092                                 uint64_t        ui64val;
00093 
00094                                 float           fval;
00095                                 double          dbval;                          
00096                         }value;
00097                 }NUMBERVAR;
00098 
00099                 //二进制数值定义
00100                 typedef struct _tagDATARECORD
00101                 {
00102                         byte*   data;                   //data 
00103                         size_t  length;                 //length of data
00104                         bool    nocopy;                 //copy data or not
00105                 }DATARECORD;
00106 
00107                 typedef unsigned short INDEXTYPE;
00108 
00109                 //索引数据类型定义
00110                 enum INDEXENUM
00111                 {
00112                         IT_EMPTY                = 0,                    //Empty
00113                         IT_STR                  = 1,                    //ANSI
00114                         IT_WSTR                 = 2,                    //UNICODE
00115                         IT_NUMBER               = 3,                    //Number
00116                         IT_DATARECORD   = 4,                    //Binary data
00117                 };
00118 
00119                 //索引数据定义
00120                 typedef struct _tagINDEXDATAVAR
00121                 {
00122                         INDEXTYPE       it;
00123                         union
00124                         {                       
00125                                 struct
00126                                 {
00127                                         union
00128                                         {
00129                                                 STR             strval;         //ANSI
00130                                                 WSTR    wstrval;        //Unicode
00131                                         };
00132                                         bool    nocopy;                 //copy text or not
00133                                 }text;
00134                                 NUMBERVAR       numval;                 //Number
00135                                 DATARECORD      drval;
00136                         }data;
00137                 }INDEXDATAVAR;
00138 
00139                 typedef INDEXTYPE METATYPE;
00140                 typedef INDEXDATAVAR METADATAVAR;
00141 
00142                 typedef unsigned int method_type;
00143                 typedef method_type Store_;
00144                 typedef method_type Index_;
00145                 typedef method_type TermVector_;
00146                 
00147                 enum Index
00148                 {
00149                         INDEX_NO                        = 1,
00150                         INDEX_ANALYZER          = 2,
00151                         INDEX_UN_ANALYZER       = 3,
00152                 };
00153                 enum Store
00154                 {
00155                         STORE_YES               = 1,
00156                         STORE_COMPRESS  = 2,
00157                         STORE_NO                = 3
00158                 };
00159                 enum TermVector
00160                 {
00161                         TERMVECTOR_NO                           = 1,
00162                         TERMVECTOR_SEQUENCE                     = 2,    //存储文档Analyze后的原始序列
00163                         TERMVECTOR_WITHOUT_POSITION     = 3,    //不存储位置信息,仅存储词和词频信息
00164                         TERMVECTOR_WITH_POSITION        = 4,    //存储词,词频,词位置信息
00165                 };
00166 
00167                 //{09B42270-8ACD-4e8a-B125-A79B46003F5D}
00168                 class IDocumentSchema : public firtex::com::IUnknown
00169                 {
00170                 public:         
00171                         static const firtex::com::FX_IID iid;
00172                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE addKeywordItem(/* [in] */firtex::com::BSTR name,
00173                                                                                                                                                   /* [in] */firtex::plugin::FIELDTYPE ft,
00174                                                                                                                                                   /* [out] */firtex::plugin::fieldid_t* pid) = 0;
00175                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE addUnIndexedItem(/* [in] */firtex::com::BSTR name,
00176                                                                                                                                                         /* [out] */firtex::plugin::fieldid_t* pid) = 0;
00177                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE addTextItem(/* [in] */firtex::com::BSTR name,
00178                                                                                                                                            /* [in] */firtex::plugin::FIELDTYPE ft,
00179                                                                                                                                            /* [in] */firtex::plugin::Store_ store,
00180                                                                                                                                            /* [in] */firtex::plugin::TermVector_ termVector,
00181                                                                                                                                            /* [out] */firtex::plugin::fieldid_t* pid) = 0;
00182                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE addUnStoredItem(/* [in] */firtex::com::BSTR name,
00183                                                                                                                                                    /* [in] */firtex::plugin::FIELDTYPE ft,
00184                                                                                                                                                    /* [in] */firtex::plugin::TermVector_ termVector,
00185                                                                                                                                                    /* [out] */firtex::plugin::fieldid_t* pid) = 0;
00186                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE addBinaryItem(/* [in] */firtex::com::BSTR name,
00187                                                                                                                                                  /* [in] */firtex::com::bool_t bCompress,
00188                                                                                                                                                  /* [out] */firtex::plugin::fieldid_t* pid) = 0;
00189                 };
00190 
00191                 // {C92FA9F4-86FD-48c1-AB00-B16C78D2983F}
00192                 class IDocument : public firtex::com::IUnknown
00193                 {
00194                 public:         
00195                         static const firtex::com::FX_IID iid;
00196                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE setSchema(/* [in] */IDocumentSchema* pSchema) = 0;
00197                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE addField(/* [in] */firtex::plugin::fieldid_t id,
00198                                                                                                                                                   /* [in] */firtex::plugin::INDEXDATAVAR* value) = 0;
00199                 };
00200 
00201                 typedef unsigned short ARGTYPE;
00202                 enum ARGENUM
00203                 {
00204                         ARG_FILE        = 1,
00205                         ARG_FILEW       = 2,
00206                         ARG_BUFFER      = 3,
00207                 };
00208 
00209                 typedef struct _tagINDEXPARAMETER
00210                 {
00211                         ARGTYPE at;
00212                         union
00213                         {
00214                                 STR             buffer;         //Buffer
00215                                 STR             file;           //文件路径,ANSI
00216                                 WSTR    wfile;          //文件路径,UNICODE
00217                         }param;
00218                 }INDEXPARAMETER;
00219 
00220                 // {31F417A2-5111-4e49-8D14-EA1D63F6CD5D}
00221                 class IIndexParameter : public firtex::com::IUnknown
00222                 {
00223                 public:                         
00224                         static const firtex::com::FX_IID iid;
00225 
00226                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE getParam(/* [out][retval] */ firtex::plugin::INDEXPARAMETER* pParam) = 0;                  
00227                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE addMetadata(/* [in] */firtex::plugin::fieldid_t id,
00228                                                                                                                                                          /* [in] */firtex::plugin::METADATAVAR* pMeta) = 0;                     
00229                 };
00230 
00231                 // {750C8BE7-1858-4c61-A97C-C864CF552EFD}
00232                 class IReader : public firtex::com::IUnknown
00233                 {
00234                 public:         
00235                         static const firtex::com::FX_IID iid;
00236                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE read(/* [in] [retval] */byte* data,/* [in] */int32_t length) = 0;
00237                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE readWithNoCopy(/* [out] [retval] */byte** data,/* [out] [retval] */int32_t* length) = 0;
00238                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE close() = 0;
00239                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE getFilePointer(/* [out] [retval] */int64_t* position) = 0;
00240                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE seek(/* [in] */int64_t position) = 0;
00241                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE isEof(/* [out] [retval] */firtex::com::bool_t* bEof) = 0;
00242                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE length(/* [out] [retval] */int64_t* len) = 0;
00243                 };
00244 
00245                 enum TOKENENUM
00246                 {
00247                         TOKEN_NONE              = 0x0,
00248                         TOKEN_WORD              = 0x01, //词
00249                         TOKEN_DATE              = 0x02, //时间日期
00250                         TOKEN_NAME              = 0x04, //人名
00251                         TOKEN_COMPANY   = 0x08, //机构名
00252                         TOKEN_EMAIL             = 0x10, //Email
00253                         TOKEN_NUM               = 0x20, //数字
00254                         TOKEN_ALPHA             = 0x40, //字母
00255                         TOKEN_ALPNUM    = 0x80, //数字字母组合
00256                         TOKEN_URL               = 0x100,//URL
00257                 };
00258 
00259 
00260                 typedef uint32_t        TOKENTYPE;
00261                 typedef int32_t         TERMID;
00262                 // {E6F0F10C-DF0B-411a-9A3E-EA2BCD4DF256}
00263                 class ITokens : public firtex::com::IUnknown
00264                 {
00265                 public:         
00266                         static const firtex::com::FX_IID iid;
00267 
00268                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE getType(/* [out] [retval] */TOKENTYPE* type) = 0;
00269                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE setType(/* [in] */TOKENTYPE type) = 0;
00270                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE getCapacity(/* [out] [retval]*/int32_t* cap) = 0;
00271                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE getTokenNum(/* [out] [retval]*/int32_t* numTokens) = 0;
00272                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE getMaxTokens(/* [out] [retval]*/int32_t* maxTokens) = 0;
00273                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE setMaxTokens(/* [in] */int32_t maxTokens) = 0;
00274                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE clear() = 0;
00275                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE getBuffer(/* [out] [retval] */byte** buf) = 0;
00276                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE appendWord(/* [in] */TERMID tid,/* [out] [retval] */firtex::com::bool_t* bSuc) = 0;
00277                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE appendDate(/* [in] */int64_t dt,/* [out] [retval] */firtex::com::bool_t* bSuc) = 0;
00278                 };                      
00279         
00280                 //{D6CC19E5-3FEA-4c96-B50C-C176080E89C5}
00281                 class IParserPlugin : public firtex::com::IUnknown
00282                 {               
00283                 public:
00284                         static const firtex::com::FX_IID iid;
00285                         
00286                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE getCategory(/* [out] [retval] */firtex::com::BSTR* retVal) = 0;
00287                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE getIdentifier(/* [out] [retval] */firtex::com::BSTR* retVal) = 0;
00288                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE getFileType(/* [out] [retval] */firtex::com::BSTR* retVal) = 0;
00289                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE setTermVector(/* [in] */firtex::com::BSTR field,/* [in] */firtex::plugin::TermVector_ tv) = 0;
00290                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE getTermVector(/* [in] */firtex::com::BSTR field,/* [out] [retval] */firtex::plugin::TermVector_* ptv) = 0;
00291                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE defineSchema(/* [in] [out] [retval] */IDocumentSchema* pSchema) = 0;
00292                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE parseInternal(/* [in] */IDocument* pDoc,
00293                                                                                                                                                            /* [in] */IIndexParameter* param) = 0;
00294                 };
00295 
00296                 // {7D7D876B-CCE4-4fe0-8389-A20106A828A3}
00297                 class IAnalyzerPlugin : public firtex::com::IUnknown
00298                 {               
00299                 public:
00300                         static const firtex::com::FX_IID iid;
00301 
00302                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE getCategory(/* [out] [retval] */firtex::com::BSTR* retVal) = 0;
00303                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE getIdentifier(/* [out] [retval] */firtex::com::BSTR* retVal) = 0;
00304                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE getTokenType(/* [out] [retval] */TOKENTYPE* retVal) = 0;
00305                         virtual firtex::com::FX_HRESULT FX_STDMETHODCALLTYPE nextTokensInternal(/* [in] */IReader* reader,/* [in] [out] [retval]*/ITokens* tokens) = 0;
00306                 };
00307         }
00308 }
00309 #endif //__cplusplus
00310 
00311 #endif

http://www.firtex.org http://www.sourceforge.net/projects/firtex