"""
Utility functions for statistical testing
"""
[docs]def get_sig(pval):
"""Returns asterisk depending on the magnitude of significance"""
if pval < 0.001:
sig = "***"
elif pval < 0.01:
sig = "**"
elif pval < 0.05:
sig = "*"
else:
sig = "ns" # non-significant
return sig
[docs]def z_test(count1, nbos1, count2, nobs2):
"""Z-test for proportion"""
import numpy as np
from statsmodels.stats.proportion import proportions_ztest
count = np.array([count1, nbos1])
nobs = np.array([count2, nobs2])
stat, pval = proportions_ztest(count, nobs)
sig = get_sig(stat.pvalue) # print out asterisk
return stat, pval, sig
[docs]def paired_ttest(arr1, arr2):
"""Performs paired t-test between two arrays"""
from scipy.stats import ttest_rel
stat = ttest_rel(arr1, arr2, nan_policy="omit")
sig = get_sig(stat.pvalue) # print out asterisk
degree_of_freedom = len(arr1) - 1
msg1 = f"t({degree_of_freedom}) = {stat.statistic :.4f}"
if stat.pvalue < 0.001: # mark significance
msg2 = "p < 0.001"
else:
msg2 = f"p = {stat.pvalue :.3f}"
msg = msg1 + ", " + msg2
return stat.statistic, stat.pvalue, msg, sig
[docs]def two_sample_ttest(arr1, arr2):
"""Performs independent two-sample t-test between two arrays"""
import numpy as np
from scipy import stats
# Remove nan if any
arr1 = arr1[~np.isnan(arr1)]
arr2 = arr2[~np.isnan(arr2)]
tval, pval = stats.ttest_ind(arr1, arr2, nan_policy="omit")
sig = get_sig(pval) # print out asterisk
degree_of_freedom = len(arr1) + len(arr2) - 2
msg1 = f"t({degree_of_freedom}) = {tval : 3f}"
if pval < 0.001: # mark significance
msg2 = "p < 0.001"
else:
msg2 = f"p={pval :.3f}"
msg = msg1 + ", " + msg2
return tval, pval, msg, sig
[docs]def rank_sum_test(arr1, arr2):
"""Performs rank-sum test (non-parametric independent 2-sample test)"""
import numpy as np
from scipy.stats import ranksums
# Remove nan if any
arr1 = arr1[~np.isnan(arr1)]
arr2 = arr2[~np.isnan(arr2)]
z, pval = ranksums(arr1, arr2)
sig = get_sig(pval) # print out asterisk
msg1 = f"Z = {z : .3f}"
if pval < 0.001: # mark significance
msg2 = "p < 0.001"
else:
msg2 = f"p={pval :.4f}"
msg = msg1 + ", " + msg2
return z, pval, msg, sig
[docs]def signed_rank_test(arr1, arr2):
"""Wilcoxon signed-rank test (non-parametric paired test)"""
from scipy.stats import wilcoxon
## Todo: the output slightly different from statview. This needs to be checked.
z, pval = wilcoxon(arr1, arr2)
sig = get_sig(pval) # print out asterisk
msg1 = f"Z = {z : .3f}"
if pval < 0.001: # mark significance
msg2 = "p < 0.001"
else:
msg2 = f"p={pval :.4f}"
msg = msg1 + ", " + msg2
return z, pval, msg, sig
[docs]def two_sample_ks_test(arr1, arr2, alternative="two-sided"):
"""Performs Kolmogorov-Smirnov test to compare two distributions"""
import numpy as np
from scipy import stats
# Remove nan if any
arr1 = arr1[~np.isnan(arr1)]
arr2 = arr2[~np.isnan(arr2)]
stat, pval = stats.ks_2samp(arr1, arr2, alternative=alternative)
sig = get_sig(pval) # print out asterisk
if pval < 0.001: # mark significance
msg = "p < 0.001"
else:
msg = f"p={pval :.3f}"
return pval, msg, sig