Skip to content

Commit fefad41

Browse files
authored
Merge pull request #92: Re-organize Dockerfile
2 parents 3de02d1 + a7159ec commit fefad41

1 file changed

Lines changed: 105 additions & 91 deletions

File tree

Dockerfile

Lines changed: 105 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -28,36 +28,87 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
2828
RUN curl -fsSL https://deb.nodesource.com/setup_14.x | bash - \
2929
&& apt-get install -y nodejs
3030

31-
# Downloading dependencies, these should be pinned to specific versions
31+
# Add dependencies. All should be pinned to specific versions, except
32+
# Nextstrain-maintained software.
33+
# This includes pathogen-specific workflow dependencies. Since we only maintain a
34+
# single Docker image to support all pathogen workflows, some pathogen-specific
35+
# functionality must live in this Dockerfile. The following dependencies may be
36+
# used by multiple pathogen workflows, but they have been commented according to
37+
# the original pathogen that added these dependencies.
38+
39+
# Create directories to be copied in final stage.
40+
RUN mkdir -p /final/bin /final/share /final/libexec
3241

33-
# mafft
34-
WORKDIR /build/mafft
35-
RUN curl -fsSL https://mafft.cbrc.jp/alignment/software/mafft-7.475-linux.tgz \
36-
| tar xzvpf - --no-same-owner --strip-components=2 mafft-linux64/mafftdir/
3742

38-
# RAxML
43+
# 1. Build programs from source
44+
45+
# Build RAxML
46+
# AVX should be widely-supported enough
3947
WORKDIR /build/RAxML
4048
RUN curl -fsSL https://api.github.com/repos/stamatak/standard-RAxML/tarball/v8.2.12 \
41-
| tar xzvpf - --no-same-owner --strip-components=1
42-
RUN make -f Makefile.AVX.PTHREADS.gcc # AVX should be widely-supported enough
49+
| tar xzvpf - --no-same-owner --strip-components=1 \
50+
&& make -f Makefile.AVX.PTHREADS.gcc \
51+
&& cp -p raxmlHPC-PTHREADS-AVX /final/bin
4352

44-
# FastTree
53+
# Build FastTree
4554
WORKDIR /build/FastTree
4655
RUN curl -fsSL https://api.github.com/repos/tsibley/FastTree/tarball/50c5b098ea085b46de30bfc29da5e3f113353e6f \
47-
| tar xzvpf - --no-same-owner --strip-components=1
48-
RUN make FastTreeDblMP
56+
| tar xzvpf - --no-same-owner --strip-components=1 \
57+
&& make FastTreeDblMP \
58+
&& cp -p FastTreeDblMP /final/bin
4959

50-
# IQ-TREE
51-
WORKDIR /build/IQ-TREE
52-
RUN curl -fsSL https://github.com/iqtree/iqtree2/releases/download/v2.1.2/iqtree-2.1.2-Linux.tar.gz \
53-
| tar xzvpf - --no-same-owner --strip-components=1
54-
RUN mv bin/iqtree2 bin/iqtree
55-
56-
# vcftools
60+
# Build vcftools
5761
WORKDIR /build/vcftools
5862
RUN curl -fsSL https://github.com/vcftools/vcftools/releases/download/v0.1.16/vcftools-0.1.16.tar.gz \
59-
| tar xzvpf - --no-same-owner --strip-components=2
60-
RUN ./configure --prefix=$PWD/built && make && make install
63+
| tar xzvpf - --no-same-owner --strip-components=2 \
64+
&& ./configure --prefix=$PWD/built \
65+
&& make && make install \
66+
&& cp -rp built/bin/* /final/bin \
67+
&& cp -rp built/share/* /final/share
68+
69+
70+
# 2. Download pre-built programs
71+
72+
# Download MAFFT
73+
WORKDIR /download/mafft
74+
RUN curl -fsSL https://mafft.cbrc.jp/alignment/software/mafft-7.475-linux.tgz \
75+
| tar xzvpf - --no-same-owner --strip-components=2 mafft-linux64/mafftdir/ \
76+
&& cp -p bin/* /final/bin \
77+
&& cp -p libexec/* /final/libexec
78+
79+
# Download IQ-TREE
80+
WORKDIR /download/IQ-TREE
81+
RUN curl -fsSL https://github.com/iqtree/iqtree2/releases/download/v2.1.2/iqtree-2.1.2-Linux.tar.gz \
82+
| tar xzvpf - --no-same-owner --strip-components=1 \
83+
&& mv bin/iqtree2 /final/bin/iqtree
84+
85+
# Download Nextalign v1
86+
RUN curl -fsSL -o /final/bin/nextalign1 https://github.com/nextstrain/nextclade/releases/download/1.11.0/nextalign-Linux-x86_64
87+
88+
# Download Nextclade v1
89+
RUN curl -fsSL -o /final/bin/nextclade1 https://github.com/nextstrain/nextclade/releases/download/1.11.0/nextclade-Linux-x86_64
90+
91+
# Download tsv-utils
92+
RUN curl -L -o tsv-utils.tar.gz https://github.com/eBay/tsv-utils/releases/download/v2.2.0/tsv-utils-v2.2.0_linux-x86_64_ldc2.tar.gz \
93+
&& tar -x --no-same-owner -v -C /final/bin -z --strip-components 2 --wildcards -f tsv-utils.tar.gz "*/bin/*" \
94+
&& rm -f tsv-utils.tar.gz
95+
96+
# Download csvtk
97+
RUN curl -L https://github.com/shenwei356/csvtk/releases/download/v0.24.0/csvtk_linux_amd64.tar.gz | tar xz --no-same-owner -C /final/bin
98+
99+
# Download seqkit
100+
RUN curl -L https://github.com/shenwei356/seqkit/releases/download/v2.2.0/seqkit_linux_amd64.tar.gz | tar xz --no-same-owner -C /final/bin
101+
102+
# Download gofasta (for ncov/Pangolin)
103+
RUN curl -fsSL https://github.com/virus-evolution/gofasta/releases/download/v0.0.6/gofasta-linux-amd64 \
104+
-o /final/bin/gofasta
105+
106+
# Download minimap2 (for ncov/Pangolin)
107+
RUN curl -fsSL https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 \
108+
| tar xjvpf - --no-same-owner --strip-components=1 -C /final/bin minimap2-2.24_x64-linux/minimap2
109+
110+
111+
# 3. Install programs via pip
61112

62113
# Install envdir, which is used by pathogen builds
63114
RUN pip3 install envdir==1.0.1
@@ -74,12 +125,36 @@ RUN pip3 install snakemake==5.10.0
74125
# from Google Storage URIs.
75126
RUN pip3 install google-cloud-storage==2.1.0
76127

77-
# Add Nextstrain components
128+
# Install epiweeks (for ncov)
129+
RUN pip3 install epiweeks==2.1.2
130+
131+
# Install Pangolin and PangoLEARN + deps (for ncov)
132+
RUN pip3 install git+https://github.com/cov-lineages/pangolin.git@v3.1.17
133+
RUN pip3 install git+https://github.com/cov-lineages/pangoLEARN.git@2021-12-06
134+
RUN pip3 install git+https://github.com/cov-lineages/scorpio.git@v0.3.16
135+
RUN pip3 install git+https://github.com/cov-lineages/constellations.git@v0.1.1
136+
RUN pip3 install git+https://github.com/cov-lineages/pango-designation.git@19d9a537b9
137+
138+
139+
# 4. Add Nextstrain components
78140

79141
# Allow caching to be avoided from here on out by calling
80142
# docker build --build-arg CACHE_DATE="$(date)"
81143
ARG CACHE_DATE
82144

145+
# Nextclade/Nextalign v2 are downloaded directly but using the latest version,
146+
# so they belong after CACHE_DATE (unlike Nextclade/Nextalign v1).
147+
148+
# Download Nextalign v2
149+
# Set default Nextalign version to 2
150+
RUN curl -fsSL -o /final/bin/nextalign2 https://github.com/nextstrain/nextclade/releases/latest/download/nextalign-x86_64-unknown-linux-gnu \
151+
&& ln -sv nextalign2 /final/bin/nextalign
152+
153+
# Download Nextclade v2
154+
# Set default Nextclade version to 2
155+
RUN curl -fsSL -o /final/bin/nextclade2 https://github.com/nextstrain/nextclade/releases/latest/download/nextclade-x86_64-unknown-linux-gnu \
156+
&& ln -sv nextclade2 /final/bin/nextclade
157+
83158
# Add helpers for build
84159
COPY builder-scripts/download-repo builder-scripts/latest-augur-release-tag /builder-scripts/
85160

@@ -100,27 +175,8 @@ RUN pip3 install --requirement=/nextstrain/fauna/requirements.txt
100175
# accessible and importable.
101176
RUN pip3 install --editable "/nextstrain/augur"
102177

103-
# Install pathogen-specific workflow dependencies. Since we only maintain a
104-
# single Docker image to support all pathogen workflows, some pathogen-specific
105-
# functionality must live in this Dockerfile. The following dependencies may be
106-
# used by multiple pathogen workflows, but they have been commented according to
107-
# the original pathogen that added these dependencies.
108-
109-
# ncov
110-
RUN pip3 install epiweeks==2.1.2
111-
112-
# Add Pangolin and PangoLEARN + deps
113-
RUN curl -fsSL https://github.com/virus-evolution/gofasta/releases/download/v0.0.6/gofasta-linux-amd64 \
114-
-o /usr/local/bin/gofasta \
115-
&& chmod a+rx /usr/local/bin/gofasta
116-
RUN cd /usr/local/bin && curl -fsSL https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 \
117-
| tar xjvpf - --no-same-owner --strip-components=1 minimap2-2.24_x64-linux/minimap2
118-
RUN pip install pysam
119-
RUN pip install git+https://github.com/cov-lineages/pangolin.git@v3.1.17
120-
RUN pip install git+https://github.com/cov-lineages/pangoLEARN.git@2021-12-06
121-
RUN pip install git+https://github.com/cov-lineages/scorpio.git@v0.3.16
122-
RUN pip install git+https://github.com/cov-lineages/constellations.git@v0.1.1
123-
RUN pip install git+https://github.com/cov-lineages/pango-designation.git@19d9a537b9
178+
# pysam (for ncov/Pangolin)
179+
RUN pip3 install pysam
124180

125181
# Install Node deps, build Auspice, and link it into the global search path. A
126182
# fresh install is only ~40 seconds, so we're not worrying about caching these
@@ -159,56 +215,16 @@ RUN curl -fsSL https://deb.nodesource.com/setup_14.x | bash - \
159215
# Configure bash for interactive usage
160216
COPY bashrc /etc/bash.bashrc
161217

162-
# Add custom built programs
163-
ENV MAFFT_BINARIES=/usr/local/libexec
164-
COPY --from=builder /build/mafft/bin/ /usr/local/bin/
165-
COPY --from=builder /build/mafft/libexec/ /usr/local/libexec/
166-
COPY --from=builder \
167-
/build/RAxML/raxmlHPC-PTHREADS-AVX \
168-
/build/FastTree/FastTreeDblMP \
169-
/build/IQ-TREE/bin/iqtree \
170-
/usr/local/bin/
171-
172-
COPY --from=builder /build/vcftools/built/bin/ /usr/local/bin/
173-
COPY --from=builder /build/vcftools/built/share/ /usr/local/share/
174-
175-
# Add Nextalign v2
176-
RUN curl -fsSL https://github.com/nextstrain/nextclade/releases/latest/download/nextalign-x86_64-unknown-linux-gnu \
177-
--output /usr/local/bin/nextalign2 \
178-
&& chmod a+rx /usr/local/bin/nextalign2
179-
180-
# Add Nextclade v2
181-
RUN curl -fsSL https://github.com/nextstrain/nextclade/releases/latest/download/nextclade-x86_64-unknown-linux-gnu \
182-
--output /usr/local/bin/nextclade2 \
183-
&& chmod a+rx /usr/local/bin/nextclade2
184-
185-
# Add Nextalign v1
186-
RUN curl -fsSL https://github.com/nextstrain/nextclade/releases/download/1.11.0/nextalign-Linux-x86_64 \
187-
--output /usr/local/bin/nextalign1 \
188-
&& chmod a+rx /usr/local/bin/nextalign1
218+
# Copy binaries
219+
COPY --from=builder /final/bin/ /usr/local/bin/
220+
COPY --from=builder /final/share/ /usr/local/share/
221+
COPY --from=builder /final/libexec/ /usr/local/libexec/
189222

190-
# Add Nextclade v1
191-
RUN curl -fsSL https://github.com/nextstrain/nextclade/releases/download/1.11.0/nextclade-Linux-x86_64 \
192-
--output /usr/local/bin/nextclade1 \
193-
&& chmod a+rx /usr/local/bin/nextclade1
194-
195-
# Set default Nextclade and Nextalign version to 2
196-
RUN ln -sv nextclade2 /usr/local/bin/nextclade \
197-
&& ln -sv nextalign2 /usr/local/bin/nextalign
198-
199-
# Add tsv-utils
200-
RUN curl -L -o tsv-utils.tar.gz https://github.com/eBay/tsv-utils/releases/download/v2.2.0/tsv-utils-v2.2.0_linux-x86_64_ldc2.tar.gz \
201-
&& tar -x --no-same-owner -v -C /usr/local/bin -z --strip-components 2 --wildcards -f tsv-utils.tar.gz "*/bin/*" \
202-
&& rm -f tsv-utils.tar.gz
203-
204-
# Add csvtk
205-
RUN curl -L https://github.com/shenwei356/csvtk/releases/download/v0.24.0/csvtk_linux_amd64.tar.gz | tar xz --no-same-owner -C /usr/local/bin
206-
207-
# Add seqkit
208-
RUN curl -L https://github.com/shenwei356/seqkit/releases/download/v2.2.0/seqkit_linux_amd64.tar.gz | tar xz --no-same-owner -C /usr/local/bin
223+
# Set MAFFT_BINARIES explicitly for MAFFT
224+
ENV MAFFT_BINARIES=/usr/local/libexec
209225

210226
# Ensure all container users can execute these programs
211-
RUN chmod a+rX /usr/local/bin/* /usr/local/libexec/*
227+
RUN chmod a+rx /usr/local/bin/* /usr/local/libexec/*
212228

213229
# Add installed Python libs
214230
COPY --from=builder /usr/local/lib/python3.7/site-packages/ /usr/local/lib/python3.7/site-packages/
@@ -226,8 +242,6 @@ COPY --from=builder \
226242
/usr/local/bin/augur \
227243
/usr/local/bin/aws \
228244
/usr/local/bin/envdir \
229-
/usr/local/bin/gofasta \
230-
/usr/local/bin/minimap2 \
231245
/usr/local/bin/nextstrain \
232246
/usr/local/bin/pangolin \
233247
/usr/local/bin/pangolearn.smk \

0 commit comments

Comments
 (0)