-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpopgen-tracts-dating.qmd
More file actions
122 lines (85 loc) · 4.06 KB
/
popgen-tracts-dating.qmd
File metadata and controls
122 lines (85 loc) · 4.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# Admixture tracts and dating
**WIP:** Extracting ancestry tracts, chromosome painting, and using
admixture tracts and LD patterns for dating admixture events.
```{r}
library(slendr)
init_env()
library(cowplot)
# helper function for plotting tracts simulated later
plot_tracts <- function(tracts, ind) {
ind_tracts <- filter(tracts, name %in% ind) %>%
mutate(haplotype = paste0(name, "\nhap. ", haplotype))
ind_tracts$haplotype <- factor(ind_tracts$haplotype, levels = unique(ind_tracts$haplotype[order(ind_tracts$node_id)]))
ggplot(ind_tracts) +
geom_rect(aes(xmin = left, xmax = right, ymin = 1, ymax = 2, fill = name), linewidth = 1) +
labs(x = "coordinate along a chromosome [bp]") +
theme_bw() +
theme(
legend.position = "none",
axis.text.y = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.grid = element_blank()
) +
facet_grid(haplotype ~ .) +
expand_limits(x = 0) +
scale_x_continuous(labels = scales::comma)
}
popZ <- population("popZ", time = 3000, N = 5000)
popX <- population("popX", time = 1500, N = 5000, parent = popZ)
popY <- population("popY", time = 1500, N = 5000, parent = popZ)
gf <- gene_flow(from = popX, to = popY, rate = 0.2, start = 800, end = 799)
model <- compile_model(list(popZ, popX, popY), generation_time = 1, gene_flow = gf)
schedule <- rbind(
schedule_sampling(model, times = seq(1500, 0, by = -100), list(popY, 50)),
schedule_sampling(model, times = 0, list(popX, 10), list(popZ, 10))
)
# Use the function plot_model() to make sure that the model and the sampling schedule
# are defined correctly (there's no such thing as too many sanity checks when doing research)
plot_model(model, proportions = TRUE, samples = schedule)
ts <- msprime(model, samples = schedule, sequence_length = 100e6, recombination_rate = 1e-8)
library(dplyr)
library(ggplot2)
tracts <- ts_tracts(ts, census = 800)
tracts
sample_times <- ts_samples(ts) %>% select(name, time)
# adding haplotype ID and time (automatically done by the dev slendr)
tracts <- tracts %>% dplyr::group_by(name) %>% dplyr::mutate(haplotype = dplyr::dense_rank(node_id)) %>% dplyr::ungroup() %>% inner_join(sample_times)
# Select sampled individuals from different ages (remember, we recorded oldest individuals
# at 1000 generations ago, the youngest individuals at 0 generations ago). Use the function
# plot_tracts to visualize their ancestry tracts. What do you see in terms of the number of
# tracts and the length of tracts across these individuals? Can you eyeball some distinctive
# pattern?
subset_inds <- tracts %>% group_by(time) %>% distinct(name) %>% slice_sample(n = 1) %>% pull(name)
subset_inds
plot_tracts(tracts, subset_inds)
# It looks like the older individual is, the closer they lived to the start of the admixture event,
# and the longer the tracts they carry will be. Compute the average length of a ancestry tracts in each
# sample age group and visualize the length distribution of these tracts based on their age:
tracts %>%
group_by(time) %>%
summarise(mean(length))
ggplot(tracts, aes(length, color = factor(time))) +
geom_density() +
coord_cartesian(xlim = c(0, 3e6)) +
theme_bw()
# Now, try to work backwards. Assuming you have the following distribution of tract lengths...
tracts <- filter(tracts, time == 0, length <= 1e6)
bins <- hist(tracts$length, breaks = 50, plot = FALSE)
length <- bins$mids
density <- bins$density
plot(length, density)
lambda_mle <- 1 / mean(tracts$length)
lambda_mle / 1e-8
y_mle <- dexp(length, rate = lambda_mle)
lines(length, y_mle, lty = 2, col = "darkgreen", lwd = 2)
nls_res <- nls(density ~ SSasymp(length, Asym, R0, lrc))
nls_res
lambda_nls <- exp(unname(coef(nls_res)["lrc"]))
lambda_nls / 1e-8
y_nls <- predict(nls_res, newdata = data.frame(length = length))
lines(length, y_nls, lty = 2, col = "purple", lwd = 2)
legend("topright", fill = c("darkgreen", "purple"),
legend = c(paste("MLE, t =", round(lambda_mle / 1e-8, 1), "generations ago"),
paste("MLE, t =", round(lambda_nls / 1e-8, 1), "generations ago")))
```