diff --git a/tests/support/util.tcl b/tests/support/util.tcl index c35441ab..5ea85c9e 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -682,20 +682,34 @@ proc string2printable s { return $res } -# Check that probability of each element are between {min_prop} and {max_prop}. -proc check_histogram_distribution {res min_prop max_prop} { +# Calculation value of Chi-Square Distribution. By this value +# we can verify the random distribution sample confidence. +# Based on the following wiki: +# https://en.wikipedia.org/wiki/Chi-square_distribution +# +# param res Random sample list +# return Value of Chi-Square Distribution +# +# x2_value: return of chi_square_value function +# df: Degrees of freedom, Number of independent values minus 1 +# +# By using x2_value and df to back check the cardinality table, +# we can know the confidence of the random sample. +proc chi_square_value {res} { unset -nocomplain mydict foreach key $res { dict incr mydict $key 1 } + set x2_value 0 + set p [expr [llength $res] / [dict size $mydict]] foreach key [dict keys $mydict] { set value [dict get $mydict $key] - set probability [expr {double($value) / [llength $res]}] - if {$probability < $min_prop || $probability > $max_prop} { - return false - } + + # Aggregate the chi-square value of each element + set v [expr {pow($value - $p, 2) / $p}] + set x2_value [expr {$x2_value + $v}] } - return true + return $x2_value } diff --git a/tests/unit/type/hash.tcl b/tests/unit/type/hash.tcl index 2eea9889..fcf97eed 100644 --- a/tests/unit/type/hash.tcl +++ b/tests/unit/type/hash.tcl @@ -105,8 +105,9 @@ start_server {tags {"hash"}} { assert_equal [llength $res] 2002 # Test random uniform distribution + # df = 9, 40 means 0.00001 probability set res [r hrandfield myhash -1000] - assert_equal [check_histogram_distribution $res 0.05 0.15] true + assert_lessthan [chi_square_value $res] 40 # 2) Check that all the elements actually belong to the original hash. foreach {key val} $res { @@ -199,7 +200,8 @@ start_server {tags {"hash"}} { } } assert_equal $all_ele_return true - assert_equal [check_histogram_distribution $allkey 0.05 0.15] true + # df = 9, 40 means 0.00001 probability + assert_lessthan [chi_square_value $allkey] 40 } } r config set hash-max-ziplist-value $original_max_value diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl index 4eb93a21..5548ca3a 100644 --- a/tests/unit/type/set.tcl +++ b/tests/unit/type/set.tcl @@ -533,8 +533,9 @@ start_server { } # Use negative count (PATH 1). + # df = 9, 40 means 0.00001 probability set res [r srandmember myset -1000] - assert_equal [check_histogram_distribution $res 0.05 0.15] true + assert_lessthan [chi_square_value $res] 40 # Use positive count (both PATH 3 and PATH 4). foreach size {8 2} { @@ -547,7 +548,8 @@ start_server { lappend allkey $ele } } - assert_equal [check_histogram_distribution $allkey 0.05 0.15] true + # df = 9, 40 means 0.00001 probability + assert_lessthan [chi_square_value $allkey] 40 } } } diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl index 2456815f..0170d2bf 100644 --- a/tests/unit/type/zset.tcl +++ b/tests/unit/type/zset.tcl @@ -1655,8 +1655,9 @@ start_server {tags {"zset"}} { assert_equal [llength $res] 2002 # Test random uniform distribution + # df = 9, 40 means 0.00001 probability set res [r zrandmember myzset -1000] - assert_equal [check_histogram_distribution $res 0.05 0.15] true + assert_lessthan [chi_square_value $res] 40 # 2) Check that all the elements actually belong to the original zset. foreach {key val} $res { @@ -1749,7 +1750,8 @@ start_server {tags {"zset"}} { } } assert_equal $all_ele_return true - assert_equal [check_histogram_distribution $allkey 0.05 0.15] true + # df = 9, 40 means 0.00001 probability + assert_lessthan [chi_square_value $allkey] 40 } } r config set zset-max-ziplist-value $original_max_value