-
Notifications
You must be signed in to change notification settings - Fork 53
Expand file tree
/
Copy pathconfint_1group.py
More file actions
140 lines (105 loc) · 4.2 KB
/
confint_1group.py
File metadata and controls
140 lines (105 loc) · 4.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
"""A range of functions to compute bootstraps for a single sample."""
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/API/confint_1group.ipynb.
# %% auto #0
__all__ = ['create_bootstrap_indexes', 'compute_1group_jackknife', 'compute_1group_acceleration', 'compute_1group_bootstraps',
'compute_1group_bias_correction', 'summary_ci_1group']
# %% ../../nbs/API/confint_1group.ipynb #9181f236
import numpy as np
from numpy.random import PCG64, RandomState
from scipy.stats import norm
from numpy import sort as npsort
# %% ../../nbs/API/confint_1group.ipynb #bac09924
def create_bootstrap_indexes(array, resamples=5000, random_seed=12345):
"""Given an array-like, returns a generator of bootstrap indexes
to be used for resampling.
"""
rng = RandomState(PCG64(random_seed))
indexes = range(0, len(array))
out = (rng.choice(indexes, len(indexes), replace=True) for i in range(0, resamples))
return out
def compute_1group_jackknife(x, func, *args, **kwargs):
"""
Returns the jackknife bootstraps for func(x).
"""
from . import confint_2group_diff as ci_2g
jackknives = [i for i in ci_2g.create_jackknife_indexes(x)]
out = [func(x[j], *args, **kwargs) for j in jackknives]
del jackknives # memory management.
return out
def compute_1group_acceleration(jack_dist):
"""
Returns the accaleration value based on the jackknife distribution.
"""
from . import confint_2group_diff as ci_2g
return ci_2g._calc_accel(jack_dist)
def compute_1group_bootstraps(
x, func, resamples=5000, random_seed=12345, *args, **kwargs
):
"""Bootstraps func(x), with the number of specified resamples."""
# Create bootstrap indexes.
boot_indexes = create_bootstrap_indexes(
x, resamples=resamples, random_seed=random_seed
)
out = [func(x[b], *args, **kwargs) for b in boot_indexes]
del boot_indexes
return out
def compute_1group_bias_correction(x, bootstraps, func, *args, **kwargs):
metric = func(x, *args, **kwargs)
prop_boots_less_than_metric = sum(bootstraps < metric) / len(bootstraps)
return norm.ppf(prop_boots_less_than_metric)
def summary_ci_1group(
x: np.array, # An numerical iterable.
func, # The function to be applied to x.
resamples: int = 5000, # The number of bootstrap resamples to be taken of func(x).
alpha: float = 0.05, # Denotes the likelihood that the confidence interval produced _does not_ include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced.
random_seed: int = 12345, # `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.
sort_bootstraps: bool = True,
*args,
**kwargs
):
"""
Given an array-like x, returns func(x), and a bootstrap confidence
interval of func(x).
Returns
-------
A dictionary with the following five keys:
`summary`: float.
The outcome of func(x).
`func`: function.
The function applied to x.
`bca_ci_low`: float
`bca_ci_high`: float.
The bias-corrected and accelerated confidence interval, for the
given alpha.
`bootstraps`: array.
The bootstraps used to generate the confidence interval.
These will be sorted in ascending order if `sort_bootstraps`
was True.
"""
from . import confint_2group_diff as ci2g
boots = compute_1group_bootstraps(
x, func, resamples=resamples, random_seed=random_seed, *args, **kwargs
)
bias = compute_1group_bias_correction(x, boots, func)
jk = compute_1group_jackknife(x, func, *args, **kwargs)
accel = compute_1group_acceleration(jk)
del jk
ci_idx = ci2g.compute_interval_limits(bias, accel, resamples, alpha)
boots_sorted = npsort(boots)
low = boots_sorted[ci_idx[0]]
high = boots_sorted[ci_idx[1]]
if sort_bootstraps:
B = boots_sorted
else:
B = boots
del boots
del boots_sorted
out = {
"summary": func(x),
"func": func,
"bca_ci_low": low,
"bca_ci_high": high,
"bootstraps": B,
}
del B
return out