| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | #include "fts3Int.h" |
| | #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| | #ifdef SQLITE_ENABLE_ICU |
| |
|
| | #include <assert.h> |
| | #include <string.h> |
| | #include "fts3_tokenizer.h" |
| |
|
| | #include <unicode/ubrk.h> |
| | #include <unicode/ucol.h> |
| | #include <unicode/ustring.h> |
| | #include <unicode/utf16.h> |
| |
|
| | typedef struct IcuTokenizer IcuTokenizer; |
| | typedef struct IcuCursor IcuCursor; |
| |
|
| | struct IcuTokenizer { |
| | sqlite3_tokenizer base; |
| | char *zLocale; |
| | }; |
| |
|
| | struct IcuCursor { |
| | sqlite3_tokenizer_cursor base; |
| |
|
| | UBreakIterator *pIter; |
| | int nChar; |
| | UChar *aChar; |
| | int *aOffset; |
| |
|
| | int nBuffer; |
| | char *zBuffer; |
| |
|
| | int iToken; |
| | }; |
| |
|
| | |
| | |
| | |
| | static int icuCreate( |
| | int argc, |
| | const char * const *argv, |
| | sqlite3_tokenizer **ppTokenizer |
| | ){ |
| | IcuTokenizer *p; |
| | int n = 0; |
| |
|
| | if( argc>0 ){ |
| | n = strlen(argv[0])+1; |
| | } |
| | p = (IcuTokenizer *)sqlite3_malloc64(sizeof(IcuTokenizer)+n); |
| | if( !p ){ |
| | return SQLITE_NOMEM; |
| | } |
| | memset(p, 0, sizeof(IcuTokenizer)); |
| |
|
| | if( n ){ |
| | p->zLocale = (char *)&p[1]; |
| | memcpy(p->zLocale, argv[0], n); |
| | } |
| |
|
| | *ppTokenizer = (sqlite3_tokenizer *)p; |
| |
|
| | return SQLITE_OK; |
| | } |
| |
|
| | |
| | |
| | |
| | static int icuDestroy(sqlite3_tokenizer *pTokenizer){ |
| | IcuTokenizer *p = (IcuTokenizer *)pTokenizer; |
| | sqlite3_free(p); |
| | return SQLITE_OK; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | static int icuOpen( |
| | sqlite3_tokenizer *pTokenizer, |
| | const char *zInput, |
| | int nInput, |
| | sqlite3_tokenizer_cursor **ppCursor |
| | ){ |
| | IcuTokenizer *p = (IcuTokenizer *)pTokenizer; |
| | IcuCursor *pCsr; |
| |
|
| | const int32_t opt = U_FOLD_CASE_DEFAULT; |
| | UErrorCode status = U_ZERO_ERROR; |
| | int nChar; |
| |
|
| | UChar32 c; |
| | int iInput = 0; |
| | int iOut = 0; |
| |
|
| | *ppCursor = 0; |
| |
|
| | if( zInput==0 ){ |
| | nInput = 0; |
| | zInput = ""; |
| | }else if( nInput<0 ){ |
| | nInput = strlen(zInput); |
| | } |
| | nChar = nInput+1; |
| | pCsr = (IcuCursor *)sqlite3_malloc64( |
| | sizeof(IcuCursor) + |
| | ((nChar+3)&~3) * sizeof(UChar) + |
| | (nChar+1) * sizeof(int) |
| | ); |
| | if( !pCsr ){ |
| | return SQLITE_NOMEM; |
| | } |
| | memset(pCsr, 0, sizeof(IcuCursor)); |
| | pCsr->aChar = (UChar *)&pCsr[1]; |
| | pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3]; |
| |
|
| | pCsr->aOffset[iOut] = iInput; |
| | U8_NEXT(zInput, iInput, nInput, c); |
| | while( c>0 ){ |
| | int isError = 0; |
| | c = u_foldCase(c, opt); |
| | U16_APPEND(pCsr->aChar, iOut, nChar, c, isError); |
| | if( isError ){ |
| | sqlite3_free(pCsr); |
| | return SQLITE_ERROR; |
| | } |
| | pCsr->aOffset[iOut] = iInput; |
| |
|
| | if( iInput<nInput ){ |
| | U8_NEXT(zInput, iInput, nInput, c); |
| | }else{ |
| | c = 0; |
| | } |
| | } |
| |
|
| | pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status); |
| | if( !U_SUCCESS(status) ){ |
| | sqlite3_free(pCsr); |
| | return SQLITE_ERROR; |
| | } |
| | pCsr->nChar = iOut; |
| |
|
| | ubrk_first(pCsr->pIter); |
| | *ppCursor = (sqlite3_tokenizer_cursor *)pCsr; |
| | return SQLITE_OK; |
| | } |
| |
|
| | |
| | |
| | |
| | static int icuClose(sqlite3_tokenizer_cursor *pCursor){ |
| | IcuCursor *pCsr = (IcuCursor *)pCursor; |
| | ubrk_close(pCsr->pIter); |
| | sqlite3_free(pCsr->zBuffer); |
| | sqlite3_free(pCsr); |
| | return SQLITE_OK; |
| | } |
| |
|
| | |
| | |
| | |
| | static int icuNext( |
| | sqlite3_tokenizer_cursor *pCursor, |
| | const char **ppToken, |
| | int *pnBytes, |
| | int *piStartOffset, |
| | int *piEndOffset, |
| | int *piPosition |
| | ){ |
| | IcuCursor *pCsr = (IcuCursor *)pCursor; |
| |
|
| | int iStart = 0; |
| | int iEnd = 0; |
| | int nByte = 0; |
| |
|
| | while( iStart==iEnd ){ |
| | UChar32 c; |
| |
|
| | iStart = ubrk_current(pCsr->pIter); |
| | iEnd = ubrk_next(pCsr->pIter); |
| | if( iEnd==UBRK_DONE ){ |
| | return SQLITE_DONE; |
| | } |
| |
|
| | while( iStart<iEnd ){ |
| | int iWhite = iStart; |
| | U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c); |
| | if( u_isspace(c) ){ |
| | iStart = iWhite; |
| | }else{ |
| | break; |
| | } |
| | } |
| | assert(iStart<=iEnd); |
| | } |
| |
|
| | do { |
| | UErrorCode status = U_ZERO_ERROR; |
| | if( nByte ){ |
| | char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte); |
| | if( !zNew ){ |
| | return SQLITE_NOMEM; |
| | } |
| | pCsr->zBuffer = zNew; |
| | pCsr->nBuffer = nByte; |
| | } |
| |
|
| | u_strToUTF8( |
| | pCsr->zBuffer, pCsr->nBuffer, &nByte, |
| | &pCsr->aChar[iStart], iEnd-iStart, |
| | &status |
| | ); |
| | } while( nByte>pCsr->nBuffer ); |
| |
|
| | *ppToken = pCsr->zBuffer; |
| | *pnBytes = nByte; |
| | *piStartOffset = pCsr->aOffset[iStart]; |
| | *piEndOffset = pCsr->aOffset[iEnd]; |
| | *piPosition = pCsr->iToken++; |
| |
|
| | return SQLITE_OK; |
| | } |
| |
|
| | |
| | |
| | |
| | static const sqlite3_tokenizer_module icuTokenizerModule = { |
| | 0, |
| | icuCreate, |
| | icuDestroy, |
| | icuOpen, |
| | icuClose, |
| | icuNext, |
| | 0, |
| | }; |
| |
|
| | |
| | |
| | |
| | void sqlite3Fts3IcuTokenizerModule( |
| | sqlite3_tokenizer_module const**ppModule |
| | ){ |
| | *ppModule = &icuTokenizerModule; |
| | } |
| |
|
| | #endif |
| | #endif |
| |
|