@@ -28,36 +28,87 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
2828RUN curl -fsSL https://deb.nodesource.com/setup_14.x | bash - \
2929 && apt-get install -y nodejs
3030
31- # Downloading dependencies, these should be pinned to specific versions
31+ # Add dependencies. All should be pinned to specific versions, except
32+ # Nextstrain-maintained software.
33+ # This includes pathogen-specific workflow dependencies. Since we only maintain a
34+ # single Docker image to support all pathogen workflows, some pathogen-specific
35+ # functionality must live in this Dockerfile. The following dependencies may be
36+ # used by multiple pathogen workflows, but they have been commented according to
37+ # the original pathogen that added these dependencies.
38+
39+ # Create directories to be copied in final stage.
40+ RUN mkdir -p /final/bin /final/share /final/libexec
3241
33- # mafft
34- WORKDIR /build/mafft
35- RUN curl -fsSL https://mafft.cbrc.jp/alignment/software/mafft-7.475-linux.tgz \
36- | tar xzvpf - --no-same-owner --strip-components=2 mafft-linux64/mafftdir/
3742
38- # RAxML
43+ # 1. Build programs from source
44+
45+ # Build RAxML
46+ # AVX should be widely-supported enough
3947WORKDIR /build/RAxML
4048RUN curl -fsSL https://api.github.com/repos/stamatak/standard-RAxML/tarball/v8.2.12 \
41- | tar xzvpf - --no-same-owner --strip-components=1
42- RUN make -f Makefile.AVX.PTHREADS.gcc # AVX should be widely-supported enough
49+ | tar xzvpf - --no-same-owner --strip-components=1 \
50+ && make -f Makefile.AVX.PTHREADS.gcc \
51+ && cp -p raxmlHPC-PTHREADS-AVX /final/bin
4352
44- # FastTree
53+ # Build FastTree
4554WORKDIR /build/FastTree
4655RUN curl -fsSL https://api.github.com/repos/tsibley/FastTree/tarball/50c5b098ea085b46de30bfc29da5e3f113353e6f \
47- | tar xzvpf - --no-same-owner --strip-components=1
48- RUN make FastTreeDblMP
56+ | tar xzvpf - --no-same-owner --strip-components=1 \
57+ && make FastTreeDblMP \
58+ && cp -p FastTreeDblMP /final/bin
4959
50- # IQ-TREE
51- WORKDIR /build/IQ-TREE
52- RUN curl -fsSL https://github.com/iqtree/iqtree2/releases/download/v2.1.2/iqtree-2.1.2-Linux.tar.gz \
53- | tar xzvpf - --no-same-owner --strip-components=1
54- RUN mv bin/iqtree2 bin/iqtree
55-
56- # vcftools
60+ # Build vcftools
5761WORKDIR /build/vcftools
5862RUN curl -fsSL https://github.com/vcftools/vcftools/releases/download/v0.1.16/vcftools-0.1.16.tar.gz \
59- | tar xzvpf - --no-same-owner --strip-components=2
60- RUN ./configure --prefix=$PWD/built && make && make install
63+ | tar xzvpf - --no-same-owner --strip-components=2 \
64+ && ./configure --prefix=$PWD/built \
65+ && make && make install \
66+ && cp -rp built/bin/* /final/bin \
67+ && cp -rp built/share/* /final/share
68+
69+
70+ # 2. Download pre-built programs
71+
72+ # Download MAFFT
73+ WORKDIR /download/mafft
74+ RUN curl -fsSL https://mafft.cbrc.jp/alignment/software/mafft-7.475-linux.tgz \
75+ | tar xzvpf - --no-same-owner --strip-components=2 mafft-linux64/mafftdir/ \
76+ && cp -p bin/* /final/bin \
77+ && cp -p libexec/* /final/libexec
78+
79+ # Download IQ-TREE
80+ WORKDIR /download/IQ-TREE
81+ RUN curl -fsSL https://github.com/iqtree/iqtree2/releases/download/v2.1.2/iqtree-2.1.2-Linux.tar.gz \
82+ | tar xzvpf - --no-same-owner --strip-components=1 \
83+ && mv bin/iqtree2 /final/bin/iqtree
84+
85+ # Download Nextalign v1
86+ RUN curl -fsSL -o /final/bin/nextalign1 https://github.com/nextstrain/nextclade/releases/download/1.11.0/nextalign-Linux-x86_64
87+
88+ # Download Nextclade v1
89+ RUN curl -fsSL -o /final/bin/nextclade1 https://github.com/nextstrain/nextclade/releases/download/1.11.0/nextclade-Linux-x86_64
90+
91+ # Download tsv-utils
92+ RUN curl -L -o tsv-utils.tar.gz https://github.com/eBay/tsv-utils/releases/download/v2.2.0/tsv-utils-v2.2.0_linux-x86_64_ldc2.tar.gz \
93+ && tar -x --no-same-owner -v -C /final/bin -z --strip-components 2 --wildcards -f tsv-utils.tar.gz "*/bin/*" \
94+ && rm -f tsv-utils.tar.gz
95+
96+ # Download csvtk
97+ RUN curl -L https://github.com/shenwei356/csvtk/releases/download/v0.24.0/csvtk_linux_amd64.tar.gz | tar xz --no-same-owner -C /final/bin
98+
99+ # Download seqkit
100+ RUN curl -L https://github.com/shenwei356/seqkit/releases/download/v2.2.0/seqkit_linux_amd64.tar.gz | tar xz --no-same-owner -C /final/bin
101+
102+ # Download gofasta (for ncov/Pangolin)
103+ RUN curl -fsSL https://github.com/virus-evolution/gofasta/releases/download/v0.0.6/gofasta-linux-amd64 \
104+ -o /final/bin/gofasta
105+
106+ # Download minimap2 (for ncov/Pangolin)
107+ RUN curl -fsSL https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 \
108+ | tar xjvpf - --no-same-owner --strip-components=1 -C /final/bin minimap2-2.24_x64-linux/minimap2
109+
110+
111+ # 3. Install programs via pip
61112
62113# Install envdir, which is used by pathogen builds
63114RUN pip3 install envdir==1.0.1
@@ -74,12 +125,36 @@ RUN pip3 install snakemake==5.10.0
74125# from Google Storage URIs.
75126RUN pip3 install google-cloud-storage==2.1.0
76127
77- # Add Nextstrain components
128+ # Install epiweeks (for ncov)
129+ RUN pip3 install epiweeks==2.1.2
130+
131+ # Install Pangolin and PangoLEARN + deps (for ncov)
132+ RUN pip3 install git+https://github.com/cov-lineages/pangolin.git@v3.1.17
133+ RUN pip3 install git+https://github.com/cov-lineages/pangoLEARN.git@2021-12-06
134+ RUN pip3 install git+https://github.com/cov-lineages/scorpio.git@v0.3.16
135+ RUN pip3 install git+https://github.com/cov-lineages/constellations.git@v0.1.1
136+ RUN pip3 install git+https://github.com/cov-lineages/pango-designation.git@19d9a537b9
137+
138+
139+ # 4. Add Nextstrain components
78140
79141# Allow caching to be avoided from here on out by calling
80142# docker build --build-arg CACHE_DATE="$(date)"
81143ARG CACHE_DATE
82144
145+ # Nextclade/Nextalign v2 are downloaded directly but using the latest version,
146+ # so they belong after CACHE_DATE (unlike Nextclade/Nextalign v1).
147+
148+ # Download Nextalign v2
149+ # Set default Nextalign version to 2
150+ RUN curl -fsSL -o /final/bin/nextalign2 https://github.com/nextstrain/nextclade/releases/latest/download/nextalign-x86_64-unknown-linux-gnu \
151+ && ln -sv nextalign2 /final/bin/nextalign
152+
153+ # Download Nextclade v2
154+ # Set default Nextclade version to 2
155+ RUN curl -fsSL -o /final/bin/nextclade2 https://github.com/nextstrain/nextclade/releases/latest/download/nextclade-x86_64-unknown-linux-gnu \
156+ && ln -sv nextclade2 /final/bin/nextclade
157+
83158# Add helpers for build
84159COPY builder-scripts/download-repo builder-scripts/latest-augur-release-tag /builder-scripts/
85160
@@ -100,27 +175,8 @@ RUN pip3 install --requirement=/nextstrain/fauna/requirements.txt
100175# accessible and importable.
101176RUN pip3 install --editable "/nextstrain/augur"
102177
103- # Install pathogen-specific workflow dependencies. Since we only maintain a
104- # single Docker image to support all pathogen workflows, some pathogen-specific
105- # functionality must live in this Dockerfile. The following dependencies may be
106- # used by multiple pathogen workflows, but they have been commented according to
107- # the original pathogen that added these dependencies.
108-
109- # ncov
110- RUN pip3 install epiweeks==2.1.2
111-
112- # Add Pangolin and PangoLEARN + deps
113- RUN curl -fsSL https://github.com/virus-evolution/gofasta/releases/download/v0.0.6/gofasta-linux-amd64 \
114- -o /usr/local/bin/gofasta \
115- && chmod a+rx /usr/local/bin/gofasta
116- RUN cd /usr/local/bin && curl -fsSL https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 \
117- | tar xjvpf - --no-same-owner --strip-components=1 minimap2-2.24_x64-linux/minimap2
118- RUN pip install pysam
119- RUN pip install git+https://github.com/cov-lineages/pangolin.git@v3.1.17
120- RUN pip install git+https://github.com/cov-lineages/pangoLEARN.git@2021-12-06
121- RUN pip install git+https://github.com/cov-lineages/scorpio.git@v0.3.16
122- RUN pip install git+https://github.com/cov-lineages/constellations.git@v0.1.1
123- RUN pip install git+https://github.com/cov-lineages/pango-designation.git@19d9a537b9
178+ # pysam (for ncov/Pangolin)
179+ RUN pip3 install pysam
124180
125181# Install Node deps, build Auspice, and link it into the global search path. A
126182# fresh install is only ~40 seconds, so we're not worrying about caching these
@@ -159,56 +215,16 @@ RUN curl -fsSL https://deb.nodesource.com/setup_14.x | bash - \
159215# Configure bash for interactive usage
160216COPY bashrc /etc/bash.bashrc
161217
162- # Add custom built programs
163- ENV MAFFT_BINARIES=/usr/local/libexec
164- COPY --from=builder /build/mafft/bin/ /usr/local/bin/
165- COPY --from=builder /build/mafft/libexec/ /usr/local/libexec/
166- COPY --from=builder \
167- /build/RAxML/raxmlHPC-PTHREADS-AVX \
168- /build/FastTree/FastTreeDblMP \
169- /build/IQ-TREE/bin/iqtree \
170- /usr/local/bin/
171-
172- COPY --from=builder /build/vcftools/built/bin/ /usr/local/bin/
173- COPY --from=builder /build/vcftools/built/share/ /usr/local/share/
174-
175- # Add Nextalign v2
176- RUN curl -fsSL https://github.com/nextstrain/nextclade/releases/latest/download/nextalign-x86_64-unknown-linux-gnu \
177- --output /usr/local/bin/nextalign2 \
178- && chmod a+rx /usr/local/bin/nextalign2
179-
180- # Add Nextclade v2
181- RUN curl -fsSL https://github.com/nextstrain/nextclade/releases/latest/download/nextclade-x86_64-unknown-linux-gnu \
182- --output /usr/local/bin/nextclade2 \
183- && chmod a+rx /usr/local/bin/nextclade2
184-
185- # Add Nextalign v1
186- RUN curl -fsSL https://github.com/nextstrain/nextclade/releases/download/1.11.0/nextalign-Linux-x86_64 \
187- --output /usr/local/bin/nextalign1 \
188- && chmod a+rx /usr/local/bin/nextalign1
218+ # Copy binaries
219+ COPY --from=builder /final/bin/ /usr/local/bin/
220+ COPY --from=builder /final/share/ /usr/local/share/
221+ COPY --from=builder /final/libexec/ /usr/local/libexec/
189222
190- # Add Nextclade v1
191- RUN curl -fsSL https://github.com/nextstrain/nextclade/releases/download/1.11.0/nextclade-Linux-x86_64 \
192- --output /usr/local/bin/nextclade1 \
193- && chmod a+rx /usr/local/bin/nextclade1
194-
195- # Set default Nextclade and Nextalign version to 2
196- RUN ln -sv nextclade2 /usr/local/bin/nextclade \
197- && ln -sv nextalign2 /usr/local/bin/nextalign
198-
199- # Add tsv-utils
200- RUN curl -L -o tsv-utils.tar.gz https://github.com/eBay/tsv-utils/releases/download/v2.2.0/tsv-utils-v2.2.0_linux-x86_64_ldc2.tar.gz \
201- && tar -x --no-same-owner -v -C /usr/local/bin -z --strip-components 2 --wildcards -f tsv-utils.tar.gz "*/bin/*" \
202- && rm -f tsv-utils.tar.gz
203-
204- # Add csvtk
205- RUN curl -L https://github.com/shenwei356/csvtk/releases/download/v0.24.0/csvtk_linux_amd64.tar.gz | tar xz --no-same-owner -C /usr/local/bin
206-
207- # Add seqkit
208- RUN curl -L https://github.com/shenwei356/seqkit/releases/download/v2.2.0/seqkit_linux_amd64.tar.gz | tar xz --no-same-owner -C /usr/local/bin
223+ # Set MAFFT_BINARIES explicitly for MAFFT
224+ ENV MAFFT_BINARIES=/usr/local/libexec
209225
210226# Ensure all container users can execute these programs
211- RUN chmod a+rX /usr/local/bin/* /usr/local/libexec/*
227+ RUN chmod a+rx /usr/local/bin/* /usr/local/libexec/*
212228
213229# Add installed Python libs
214230COPY --from=builder /usr/local/lib/python3.7/site-packages/ /usr/local/lib/python3.7/site-packages/
@@ -226,8 +242,6 @@ COPY --from=builder \
226242 /usr/local/bin/augur \
227243 /usr/local/bin/aws \
228244 /usr/local/bin/envdir \
229- /usr/local/bin/gofasta \
230- /usr/local/bin/minimap2 \
231245 /usr/local/bin/nextstrain \
232246 /usr/local/bin/pangolin \
233247 /usr/local/bin/pangolearn.smk \
0 commit comments