-
Notifications
You must be signed in to change notification settings - Fork 53
Expand file tree
/
Copy path_api.py
More file actions
189 lines (168 loc) · 6.63 KB
/
_api.py
File metadata and controls
189 lines (168 loc) · 6.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
"""Loading data and relevant groups"""
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/load.ipynb.
# %% auto #0
__all__ = ['load', 'prop_dataset']
# %% ../nbs/API/load.ipynb #218e4f14
def load(
data,
idx=None,
x=None,
y=None,
paired=None,
id_col=None,
ci=95,
resamples=5000,
random_seed=12345,
proportional=False,
delta2=False,
experiment=None,
experiment_label=None,
x1_level=None,
mini_meta=False,
ps_adjust=False,
):
"""
Loads data in preparation for estimation statistics.
This is designed to work with pandas DataFrames.
Parameters
----------
data : pandas DataFrame
idx : tuple
List of column names (if 'x' is not supplied) or of category names
(if 'x' is supplied). This can be expressed as a tuple of tuples,
with each individual tuple producing its own contrast plot
x : string or list, default None
Column name(s) of the independent variable. This can be expressed as
a list of 2 elements if and only if 'delta2' is True; otherwise it
can only be a string.
y : string, default None
Column names for data to be plotted on the x-axis and y-axis.
paired : string, default None
The type of the experiment under which the data are obtained. If 'paired'
is None then the data will not be treated as paired data in the subsequent
calculations. If 'paired' is 'baseline', then in each tuple of x, other
groups will be paired up with the first group (as control). If 'paired' is
'sequential', then in each tuple of x, each group will be paired up with
its previous group (as control).
id_col : default None.
Required if `paired` is True.
ci : integer, default 95
The confidence interval width. The default of 95 produces 95%
confidence intervals.
resamples : integer, default 5000.
The number of resamples taken to generate the bootstraps which are used
to generate the confidence intervals.
random_seed : int, default 12345
This integer is used to seed the random number generator during
bootstrap resampling, ensuring that the confidence intervals
reported are replicable.
proportional : boolean, default False.
An indicator of whether the data is binary or not. When set to True, it
specifies that the data consists of binary data, where the values are
limited to 0 and 1. The code is not suitable for analyzing proportion
data that contains non-numeric values, such as strings like 'yes' and 'no'.
When False or not provided, the algorithm assumes that
the data is continuous and uses a non-proportional representation.
delta2 : boolean, default False
Indicator of delta-delta experiment
experiment : String, default None
The name of the column of the dataframe which contains the label of
experiments
experiment_lab : list, default None
A list of String to specify the order of subplots for delta-delta plots.
This can be expressed as a list of 2 elements if and only if 'delta2'
is True; otherwise it can only be a string.
x1_level : list, default None
A list of String to specify the order of subplots for delta-delta plots.
This can be expressed as a list of 2 elements if and only if 'delta2'
is True; otherwise it can only be a string.
mini_meta : boolean, default False
Indicator of weighted delta calculation.
ps_adjust : boolean, default False
Indicator of whether to adjust calculated p-value according to Phipson & Smyth (2010)
# https://doi.org/10.2202/1544-6115.1585
Returns
-------
A `Dabest` object.
"""
from dabest import Dabest
return Dabest(
data,
idx,
x,
y,
paired,
id_col,
ci,
resamples,
random_seed,
proportional,
delta2,
experiment,
experiment_label,
x1_level,
mini_meta,
ps_adjust,
)
# %% ../nbs/API/load.ipynb #570ff65a
import numpy as np
from typing import Union, Optional
import pandas as pd
def prop_dataset(
group: Union[
list, tuple, np.ndarray, dict
], # Accepts lists, tuples, or numpy ndarrays of numeric types.
group_names: Optional[list] = None,
):
"""
Convenient function to generate a dataframe of binary data.
"""
if isinstance(group, dict):
# If group_names is not provided, use the keys of the dict as group_names
if group_names is None:
group_names = list(group.keys())
elif not set(group_names) == set(group.keys()):
# Check if the group_names provided is the same as the keys of the dict
raise ValueError("group_names must be the same as the keys of the dict.")
# Check if the values in the dict are numeric
if not all(
[isinstance(group[name], (list, tuple, np.ndarray)) for name in group_names]
):
raise ValueError(
"group must be a dict of lists, tuples, or numpy ndarrays of numeric types."
)
# Check if the values in the dict only have two elements under each parent key
if not all([len(group[name]) == 2 for name in group_names]):
raise ValueError("Each parent key should have only two elements.")
group_val = group
else:
if group_names is None:
raise ValueError("group_names must be provided if group is not a dict.")
# Check if the length of group is two times of the length of group_names
if not len(group) == 2 * len(group_names):
raise ValueError(
"The length of group must be two times of the length of group_names."
)
group_val = {
group_names[i]: [group[i * 2], group[i * 2 + 1]]
for i in range(len(group_names))
}
# Check if the sum of values in group_val under each key are the same
if not all(
[
sum(group_val[name]) == sum(group_val[group_names[0]])
for name in group_val.keys()
]
):
raise ValueError("The sum of values under each key must be the same.")
id_col = pd.Series(range(1, sum(group_val[group_names[0]]) + 1))
final_df = pd.DataFrame()
for name in group_val.keys():
col = (
np.repeat(0, group_val[name][0]).tolist()
+ np.repeat(1, group_val[name][1]).tolist()
)
df = pd.DataFrame({name: col})
final_df = pd.concat([final_df, df], axis=1)
final_df["ID"] = id_col
return final_df