Use chi-square for random distributivity verification in test (#8709)
Problem: Currently, when performing random distribution verification, we determine the probability of each element occurring in the sum, but the probability is only an estimate, these tests had rare sporadic failures, and we cannot verify what the probability of failure will be. Solution: Using the chi-square distribution instead of the original random distribution validation makes the test more reasonable and easier to find problems.
This commit is contained in:
parent
636aa8de76
commit
569a3f4548
@ -682,20 +682,34 @@ proc string2printable s {
|
|||||||
return $res
|
return $res
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check that probability of each element are between {min_prop} and {max_prop}.
|
# Calculation value of Chi-Square Distribution. By this value
|
||||||
proc check_histogram_distribution {res min_prop max_prop} {
|
# we can verify the random distribution sample confidence.
|
||||||
|
# Based on the following wiki:
|
||||||
|
# https://en.wikipedia.org/wiki/Chi-square_distribution
|
||||||
|
#
|
||||||
|
# param res Random sample list
|
||||||
|
# return Value of Chi-Square Distribution
|
||||||
|
#
|
||||||
|
# x2_value: return of chi_square_value function
|
||||||
|
# df: Degrees of freedom, Number of independent values minus 1
|
||||||
|
#
|
||||||
|
# By using x2_value and df to back check the cardinality table,
|
||||||
|
# we can know the confidence of the random sample.
|
||||||
|
proc chi_square_value {res} {
|
||||||
unset -nocomplain mydict
|
unset -nocomplain mydict
|
||||||
foreach key $res {
|
foreach key $res {
|
||||||
dict incr mydict $key 1
|
dict incr mydict $key 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
set x2_value 0
|
||||||
|
set p [expr [llength $res] / [dict size $mydict]]
|
||||||
foreach key [dict keys $mydict] {
|
foreach key [dict keys $mydict] {
|
||||||
set value [dict get $mydict $key]
|
set value [dict get $mydict $key]
|
||||||
set probability [expr {double($value) / [llength $res]}]
|
|
||||||
if {$probability < $min_prop || $probability > $max_prop} {
|
# Aggregate the chi-square value of each element
|
||||||
return false
|
set v [expr {pow($value - $p, 2) / $p}]
|
||||||
}
|
set x2_value [expr {$x2_value + $v}]
|
||||||
}
|
}
|
||||||
|
|
||||||
return true
|
return $x2_value
|
||||||
}
|
}
|
||||||
|
@ -105,8 +105,9 @@ start_server {tags {"hash"}} {
|
|||||||
assert_equal [llength $res] 2002
|
assert_equal [llength $res] 2002
|
||||||
|
|
||||||
# Test random uniform distribution
|
# Test random uniform distribution
|
||||||
|
# df = 9, 40 means 0.00001 probability
|
||||||
set res [r hrandfield myhash -1000]
|
set res [r hrandfield myhash -1000]
|
||||||
assert_equal [check_histogram_distribution $res 0.05 0.15] true
|
assert_lessthan [chi_square_value $res] 40
|
||||||
|
|
||||||
# 2) Check that all the elements actually belong to the original hash.
|
# 2) Check that all the elements actually belong to the original hash.
|
||||||
foreach {key val} $res {
|
foreach {key val} $res {
|
||||||
@ -199,7 +200,8 @@ start_server {tags {"hash"}} {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert_equal $all_ele_return true
|
assert_equal $all_ele_return true
|
||||||
assert_equal [check_histogram_distribution $allkey 0.05 0.15] true
|
# df = 9, 40 means 0.00001 probability
|
||||||
|
assert_lessthan [chi_square_value $allkey] 40
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
r config set hash-max-ziplist-value $original_max_value
|
r config set hash-max-ziplist-value $original_max_value
|
||||||
|
@ -533,8 +533,9 @@ start_server {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Use negative count (PATH 1).
|
# Use negative count (PATH 1).
|
||||||
|
# df = 9, 40 means 0.00001 probability
|
||||||
set res [r srandmember myset -1000]
|
set res [r srandmember myset -1000]
|
||||||
assert_equal [check_histogram_distribution $res 0.05 0.15] true
|
assert_lessthan [chi_square_value $res] 40
|
||||||
|
|
||||||
# Use positive count (both PATH 3 and PATH 4).
|
# Use positive count (both PATH 3 and PATH 4).
|
||||||
foreach size {8 2} {
|
foreach size {8 2} {
|
||||||
@ -547,7 +548,8 @@ start_server {
|
|||||||
lappend allkey $ele
|
lappend allkey $ele
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert_equal [check_histogram_distribution $allkey 0.05 0.15] true
|
# df = 9, 40 means 0.00001 probability
|
||||||
|
assert_lessthan [chi_square_value $allkey] 40
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1655,8 +1655,9 @@ start_server {tags {"zset"}} {
|
|||||||
assert_equal [llength $res] 2002
|
assert_equal [llength $res] 2002
|
||||||
|
|
||||||
# Test random uniform distribution
|
# Test random uniform distribution
|
||||||
|
# df = 9, 40 means 0.00001 probability
|
||||||
set res [r zrandmember myzset -1000]
|
set res [r zrandmember myzset -1000]
|
||||||
assert_equal [check_histogram_distribution $res 0.05 0.15] true
|
assert_lessthan [chi_square_value $res] 40
|
||||||
|
|
||||||
# 2) Check that all the elements actually belong to the original zset.
|
# 2) Check that all the elements actually belong to the original zset.
|
||||||
foreach {key val} $res {
|
foreach {key val} $res {
|
||||||
@ -1749,7 +1750,8 @@ start_server {tags {"zset"}} {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert_equal $all_ele_return true
|
assert_equal $all_ele_return true
|
||||||
assert_equal [check_histogram_distribution $allkey 0.05 0.15] true
|
# df = 9, 40 means 0.00001 probability
|
||||||
|
assert_lessthan [chi_square_value $allkey] 40
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
r config set zset-max-ziplist-value $original_max_value
|
r config set zset-max-ziplist-value $original_max_value
|
||||||
|
Loading…
Reference in New Issue
Block a user