| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | if {![info exists testdir]} { |
| | set testdir [file join [file dirname [info script]] .. .. .. test] |
| | } |
| | source $testdir/tester.tcl |
| |
|
| | ifcapable !fts5 { |
| | proc return_if_no_fts5 {} { |
| | finish_test |
| | return -code return |
| | } |
| | return |
| | } else { |
| | proc return_if_no_fts5 {} {} |
| | } |
| |
|
| | catch { |
| | sqlite3_fts5_may_be_corrupt 0 |
| | reset_db |
| | } |
| |
|
| | proc fts5_test_poslist {cmd} { |
| | set res [list] |
| | for {set i 0} {$i < [$cmd xInstCount]} {incr i} { |
| | lappend res [string map {{ } .} [$cmd xInst $i]] |
| | } |
| | set res |
| | } |
| |
|
| | proc fts5_test_poslist2 {cmd} { |
| | set res [list] |
| |
|
| | for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} { |
| | $cmd xPhraseForeach $i c o { |
| | lappend res $i.$c.$o |
| | } |
| | } |
| |
|
| | |
| | sort_poslist $res |
| | } |
| |
|
| | proc fts5_test_insttoken {cmd iInst iToken} { |
| | $cmd xInstToken $iInst $iToken |
| | } |
| |
|
| | proc fts5_test_collist {cmd} { |
| | set res [list] |
| |
|
| | for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} { |
| | $cmd xPhraseColumnForeach $i c { lappend res $i.$c } |
| | } |
| |
|
| | set res |
| | } |
| |
|
| | proc fts5_collist {cmd iPhrase} { |
| | set res [list] |
| | $cmd xPhraseColumnForeach $iPhrase c { lappend res $c } |
| | set res |
| | } |
| |
|
| | proc fts5_test_columnsize {cmd} { |
| | set res [list] |
| | for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { |
| | lappend res [$cmd xColumnSize $i] |
| | } |
| | set res |
| | } |
| |
|
| | proc fts5_columntext {cmd iCol} { |
| | $cmd xColumnText $iCol |
| | } |
| | proc fts5_columnlocale {cmd iCol} { |
| | $cmd xColumnLocale $iCol |
| | } |
| |
|
| | proc fts5_test_columntext {cmd} { |
| | set res [list] |
| | for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { |
| | lappend res [$cmd xColumnText $i] |
| | } |
| | set res |
| | } |
| |
|
| | proc fts5_test_columnlocale {cmd} { |
| | set res [list] |
| | for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { |
| | lappend res [$cmd xColumnLocale $i] |
| | } |
| | set res |
| | } |
| |
|
| | proc fts5_test_columntotalsize {cmd} { |
| | set res [list] |
| | for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { |
| | lappend res [$cmd xColumnTotalSize $i] |
| | } |
| | set res |
| | } |
| |
|
| | proc test_append_token {varname token iStart iEnd} { |
| | upvar $varname var |
| | lappend var $token |
| | return "SQLITE_OK" |
| | } |
| | proc fts5_test_tokenize {cmd} { |
| | set res [list] |
| | for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { |
| | set tokens [list] |
| | $cmd xTokenize [$cmd xColumnText $i] [list test_append_token tokens] |
| | lappend res $tokens |
| | } |
| | set res |
| | } |
| |
|
| | proc fts5_test_rowcount {cmd} { |
| | $cmd xRowCount |
| | } |
| |
|
| | proc fts5_test_rowid {cmd} { |
| | $cmd xRowid |
| | } |
| |
|
| | proc test_queryphrase_cb {cnt cmd} { |
| | upvar $cnt L |
| | for {set i 0} {$i < [$cmd xInstCount]} {incr i} { |
| | foreach {ip ic io} [$cmd xInst $i] break |
| | set A($ic) 1 |
| | } |
| | foreach ic [array names A] { |
| | lset L $ic [expr {[lindex $L $ic] + 1}] |
| | } |
| | } |
| | proc fts5_test_queryphrase {cmd} { |
| | set res [list] |
| | for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} { |
| | set cnt [list] |
| | for {set j 0} {$j < [$cmd xColumnCount]} {incr j} { lappend cnt 0 } |
| | $cmd xQueryPhrase $i [list test_queryphrase_cb cnt] |
| | lappend res $cnt |
| | } |
| | set res |
| | } |
| |
|
| | proc fts5_queryphrase {cmd iPhrase} { |
| | set cnt [list] |
| | for {set j 0} {$j < [$cmd xColumnCount]} {incr j} { lappend cnt 0 } |
| | $cmd xQueryPhrase $iPhrase [list test_queryphrase_cb cnt] |
| | set cnt |
| | } |
| |
|
| | proc fts5_test_phrasecount {cmd} { |
| | $cmd xPhraseCount |
| | } |
| |
|
| | proc fts5_test_all {cmd} { |
| | set res [list] |
| | lappend res columnsize [fts5_test_columnsize $cmd] |
| | lappend res columntext [fts5_test_columntext $cmd] |
| | lappend res columntotalsize [fts5_test_columntotalsize $cmd] |
| | lappend res poslist [fts5_test_poslist $cmd] |
| | lappend res tokenize [fts5_test_tokenize $cmd] |
| | lappend res rowcount [fts5_test_rowcount $cmd] |
| | set res |
| | } |
| |
|
| | proc fts5_aux_test_functions {db} { |
| | foreach f { |
| | fts5_test_columnsize |
| | fts5_test_columntext |
| | fts5_test_columnlocale |
| | fts5_test_columntotalsize |
| | fts5_test_poslist |
| | fts5_test_poslist2 |
| | fts5_test_collist |
| | fts5_test_insttoken |
| | fts5_test_tokenize |
| | fts5_test_rowcount |
| | fts5_test_rowid |
| | fts5_test_all |
| |
|
| | fts5_test_queryphrase |
| | fts5_test_phrasecount |
| | fts5_columntext |
| | fts5_columnlocale |
| | fts5_queryphrase |
| | fts5_collist |
| | } { |
| | sqlite3_fts5_create_function $db $f $f |
| | } |
| | } |
| |
|
| | proc fts5_segcount {tbl} { |
| | set N 0 |
| | foreach n [fts5_level_segs $tbl] { incr N $n } |
| | set N |
| | } |
| |
|
| | proc fts5_level_segs {tbl} { |
| | set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10" |
| | set ret [list] |
| | foreach L [lrange [db one $sql] 1 end] { |
| | lappend ret [expr [llength $L] - 3] |
| | } |
| | set ret |
| | } |
| |
|
| | proc fts5_level_segids {tbl} { |
| | set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10" |
| | set ret [list] |
| | foreach L [lrange [db one $sql] 1 end] { |
| | set lvl [list] |
| | foreach S [lrange $L 3 end] { |
| | regexp {id=([1234567890]*)} $S -> segid |
| | lappend lvl $segid |
| | } |
| | lappend ret $lvl |
| | } |
| | set ret |
| | } |
| |
|
| | proc fts5_rnddoc {n} { |
| | set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j] |
| | set doc [list] |
| | for {set i 0} {$i < $n} {incr i} { |
| | lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]" |
| | } |
| | set doc |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | proc nearset {aCol args} { |
| |
|
| | |
| | |
| | set O(-near) 10 |
| | set O(-col) {} |
| | set O(-pc) "" |
| | set O(-dict) "" |
| |
|
| | set nOpt [lsearch -exact $args --] |
| | if {$nOpt<0} { error "no -- option" } |
| |
|
| | |
| | set lPhrase [lrange $args [expr $nOpt+1] end] |
| | set nPhrase [llength $lPhrase] |
| |
|
| | foreach {k v} [lrange $args 0 [expr $nOpt-1]] { |
| | if {[info exists O($k)]==0} { error "unrecognized option $k" } |
| | set O($k) $v |
| | } |
| |
|
| | if {$O(-pc) == ""} { |
| | set counter 0 |
| | } else { |
| | upvar $O(-pc) counter |
| | } |
| |
|
| | if {$O(-dict)!=""} { upvar $O(-dict) aDict } |
| |
|
| | for {set j 0} {$j < [llength $aCol]} {incr j} { |
| | for {set i 0} {$i < $nPhrase} {incr i} { |
| | set A($j,$i) [list] |
| | } |
| | } |
| |
|
| | |
| | for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} { |
| |
|
| | |
| | |
| | |
| | if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue |
| | set zCol [lindex $aCol $iCol] |
| | set nToken [llength $zCol] |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | set iLast [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] |
| | for { } {$iLast < $nToken} {incr iLast} { |
| |
|
| | catch { array unset B } |
| | |
| | for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { |
| | set p [lindex $lPhrase $iPhrase] |
| | set nPm1 [expr {[llength $p] - 1}] |
| | set iFirst [expr $iLast - $O(-near) - [llength $p]] |
| |
|
| | for {set i $iFirst} {$i <= $iLast} {incr i} { |
| | set lCand [lrange $zCol $i [expr $i+$nPm1]] |
| | set bMatch 1 |
| | foreach tok $p term $lCand { |
| | if {[nearset_match aDict $tok $term]==0} { set bMatch 0 ; break } |
| | } |
| | if {$bMatch} { lappend B($iPhrase) $i } |
| | } |
| |
|
| | if {![info exists B($iPhrase)]} break |
| | } |
| |
|
| | if {$iPhrase==$nPhrase} { |
| | for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { |
| | set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)] |
| | set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)] |
| | } |
| | } |
| | } |
| | } |
| |
|
| | set res [list] |
| | |
| |
|
| | for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { |
| | for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} { |
| | foreach a $A($iCol,$iPhrase) { |
| | lappend res "$counter.$iCol.$a" |
| | } |
| | } |
| | incr counter |
| | } |
| |
|
| | |
| | sort_poslist $res |
| | } |
| |
|
| | proc nearset_match {aDictVar tok term} { |
| | if {[string match $tok $term]} { return 1 } |
| |
|
| | upvar $aDictVar aDict |
| | if {[info exists aDict($tok)]} { |
| | foreach s $aDict($tok) { |
| | if {[string match $s $term]} { return 1 } |
| | } |
| | } |
| | return 0; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | proc sort_poslist {L} { |
| | lsort -command instcompare $L |
| | } |
| | proc instcompare {lhs rhs} { |
| | foreach {p1 c1 o1} [split $lhs .] {} |
| | foreach {p2 c2 o2} [split $rhs .] {} |
| |
|
| | set res [expr $c1 - $c2] |
| | if {$res==0} { set res [expr $o1 - $o2] } |
| | if {$res==0} { set res [expr $p1 - $p2] } |
| |
|
| | return $res |
| | } |
| |
|
| | |
| | |
| | |
| | proc AND {args} { |
| | foreach a $args { |
| | if {[llength $a]==0} { return [list] } |
| | } |
| | sort_poslist [concat {*}$args] |
| | } |
| | proc OR {args} { |
| | sort_poslist [concat {*}$args] |
| | } |
| | proc NOT {a b} { |
| | if {[llength $b]>0} { return [list] } |
| | return $a |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | proc gobble_whitespace {textvar} { |
| | upvar $textvar t |
| | regexp {([ ]*)(.*)} $t -> space t |
| | return [string length $space] |
| | } |
| |
|
| | proc gobble_text {textvar wordvar} { |
| | upvar $textvar t |
| | upvar $wordvar w |
| | regexp {([^ ]*)(.*)} $t -> w t |
| | return [string length $w] |
| | } |
| |
|
| | proc fts5_tokenize_split {text} { |
| | set token "" |
| | set ret [list] |
| | set iOff [gobble_whitespace text] |
| | while {[set nToken [gobble_text text word]]} { |
| | lappend ret $word $iOff [expr $iOff+$nToken] |
| | incr iOff $nToken |
| | incr iOff [gobble_whitespace text] |
| | } |
| |
|
| | set ret |
| | } |
| |
|
| | |
| | |
| | proc foreach_detail_mode {prefix script} { |
| | set saved $::testprefix |
| | foreach d [list full col none] { |
| | set s [string map [list %DETAIL% $d] $script] |
| | set ::detail $d |
| | set ::testprefix "$prefix-$d" |
| | reset_db |
| | uplevel $s |
| | unset ::detail |
| | } |
| | set ::testprefix $saved |
| | } |
| |
|
| | proc detail_check {} { |
| | if {$::detail != "none" && $::detail!="full" && $::detail!="col"} { |
| | error "not in foreach_detail_mode {...} block" |
| | } |
| | } |
| | proc detail_is_none {} { detail_check ; expr {$::detail == "none"} } |
| | proc detail_is_col {} { detail_check ; expr {$::detail == "col" } } |
| | proc detail_is_full {} { detail_check ; expr {$::detail == "full"} } |
| |
|
| | proc foreach_tokenizer_mode {prefix script} { |
| | set saved $::testprefix |
| | foreach {d mapping} { |
| | "" {} |
| | "-origintext" {, tokenize="origintext unicode61", tokendata=1} |
| | } { |
| | set s [string map [list %TOKENIZER% $mapping] $script] |
| | set ::testprefix "$prefix$d" |
| | reset_db |
| | sqlite3_fts5_register_origintext db |
| | uplevel $s |
| | } |
| | set ::testprefix $saved |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | proc fts5_poslist2collist {poslist} { |
| | set res [list] |
| | foreach h $poslist { |
| | regexp {(.*)\.[1234567890]+} $h -> cand |
| | lappend res $cand |
| | } |
| | set res [lsort -command fts5_collist_elem_compare -unique $res] |
| | return $res |
| | } |
| |
|
| | |
| | proc fts5_collist_elem_compare {a b} { |
| | foreach {a1 a2} [split $a .] {} |
| | foreach {b1 b2} [split $b .] {} |
| |
|
| | if {$a1==$b1} { return [expr $a2 - $b2] } |
| | return [expr $a1 - $b1] |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | proc fts5_query_data {expr tbl {order ASC} {aDictVar ""}} { |
| |
|
| | |
| | |
| | |
| | db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) } |
| |
|
| | set d "" |
| | if {$aDictVar != ""} { |
| | upvar $aDictVar aDict |
| | set d aDict |
| | } |
| |
|
| | set cols "" |
| | foreach e $lCols { append cols ", '$e'" } |
| | set tclexpr [db one [subst -novar { |
| | SELECT fts5_expr_tcl( $expr, 'nearset $cols -dict $d -pc ::pc' [set cols] ) |
| | }]] |
| |
|
| | set res [list] |
| | db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x { |
| | set cols [list] |
| | foreach col $lCols { lappend cols $x($col) } |
| | |
| | set ::pc 0 |
| | set rowdata [eval $tclexpr] |
| | if {$rowdata != ""} { |
| | lappend res $x(rowid) $rowdata [fts5_poslist2collist $rowdata] |
| | } |
| | } |
| |
|
| | set res |
| | } |
| |
|
| | |
| | |
| | |
| | proc fts5_poslist_data {expr tbl {order ASC} {aDictVar ""}} { |
| | set res [list] |
| |
|
| | if {$aDictVar!=""} { |
| | upvar $aDictVar aDict |
| | set dict aDict |
| | } else { |
| | set dict "" |
| | } |
| |
|
| | foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] { |
| | lappend res $rowid $poslist |
| | } |
| | set res |
| | } |
| |
|
| | proc fts5_collist_data {expr tbl {order ASC} {aDictVar ""}} { |
| | set res [list] |
| |
|
| | if {$aDictVar!=""} { |
| | upvar $aDictVar aDict |
| | set dict aDict |
| | } else { |
| | set dict "" |
| | } |
| |
|
| | foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] { |
| | lappend res $rowid $collist |
| | } |
| | set res |
| | } |
| |
|
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | proc fts5_expr_ok {expr tbl} { |
| |
|
| | if {![detail_is_full]} { |
| | set nearset "nearset_rc" |
| | if {[detail_is_col]} { set nearset "nearset_rf" } |
| |
|
| | set ::expr_not_ok 0 |
| | db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) } |
| |
|
| | set cols "" |
| | foreach e $lCols { append cols ", '$e'" } |
| | set ::pc 0 |
| | set tclexpr [db one [subst -novar { |
| | SELECT fts5_expr_tcl( $expr, '[set nearset] $cols -pc ::pc' [set cols] ) |
| | }]] |
| | eval $tclexpr |
| | if {$::expr_not_ok} { return 0 } |
| | } |
| |
|
| | return 1 |
| | } |
| |
|
| | |
| | proc nearset_rf {aCol args} { |
| | set idx [lsearch -exact $args --] |
| | if {$idx != [llength $args]-2 || [llength [lindex $args end]]!=1} { |
| | set ::expr_not_ok 1 |
| | } |
| | list |
| | } |
| |
|
| | |
| | proc nearset_rc {aCol args} { |
| | nearset_rf $aCol {*}$args |
| | if {[lsearch $args -col]>=0} { |
| | set ::expr_not_ok 1 |
| | } |
| | list |
| | } |
| |
|
| | proc dump {tname} { |
| | execsql_pp "SELECT * FROM ${tname}_idx" |
| | execsql_pp "SELECT id, quote(block), fts5_decode(id,block) FROM ${tname}_data" |
| | } |
| |
|
| | |
| | |
| | |
| | proc tclnum_tokenize {mode tflags text} { |
| | foreach {w iStart iEnd} [fts5_tokenize_split $text] { |
| | sqlite3_fts5_token $w $iStart $iEnd |
| | if {$tflags == $mode && [info exists ::tclnum_syn($w)]} { |
| | foreach s $::tclnum_syn($w) { sqlite3_fts5_token -colo $s $iStart $iEnd } |
| | } |
| | } |
| | } |
| |
|
| | proc tclnum_create {args} { |
| | set mode query |
| | if {[llength $args]} { |
| | set mode [lindex $args 0] |
| | } |
| | if {$mode != "query" && $mode != "document"} { error "bad mode: $mode" } |
| | return [list tclnum_tokenize $mode] |
| | } |
| |
|
| | proc fts5_tclnum_register {db} { |
| | foreach SYNDICT { |
| | {zero 0} |
| | {one 1 i} |
| | {two 2 ii} |
| | {three 3 iii} |
| | {four 4 iv} |
| | {five 5 v} |
| | {six 6 vi} |
| | {seven 7 vii} |
| | {eight 8 viii} |
| | {nine 9 ix} |
| |
|
| | {a1 a2 a3 a4 a5 a6 a7 a8 a9} |
| | {b1 b2 b3 b4 b5 b6 b7 b8 b9} |
| | {c1 c2 c3 c4 c5 c6 c7 c8 c9} |
| | } { |
| | foreach s $SYNDICT { |
| | set o [list] |
| | foreach x $SYNDICT {if {$x!=$s} {lappend o $x}} |
| | set ::tclnum_syn($s) $o |
| | } |
| | } |
| | sqlite3_fts5_create_tokenizer db tclnum tclnum_create |
| | } |
| | |
| | |
| | |
| |
|
| |
|