11use crate :: {
22 Config ,
33 backends:: StorageBackendMethods ,
4- blob:: { StreamUpload , StreamingBlob } ,
4+ blob:: { StreamUpload , StreamUploadSource , StreamingBlob } ,
5+ crc32_for_path,
56 errors:: PathNotFoundError ,
67 metrics:: StorageMetrics ,
78 types:: FileRange ,
@@ -13,20 +14,17 @@ use aws_sdk_s3::{
1314 Client ,
1415 config:: { Region , retry:: RetryConfig } ,
1516 error:: { ProvideErrorMetadata , SdkError } ,
16- primitives:: ByteStream ,
17- types:: { Delete , ObjectIdentifier } ,
17+ primitives:: { ByteStream , Length } ,
18+ types:: { ChecksumAlgorithm , Delete , ObjectIdentifier } ,
1819} ;
1920use aws_smithy_types_convert:: date_time:: DateTimeExt ;
21+ use base64:: { Engine as _, engine:: general_purpose:: STANDARD as b64} ;
2022use chrono:: Utc ;
2123use docs_rs_headers:: { ETag , compute_etag} ;
22- use futures_util:: {
23- TryStreamExt ,
24- stream:: { BoxStream , StreamExt } ,
25- } ;
26- use http_body:: Frame ;
27- use http_body_util:: StreamBody ;
24+ use docs_rs_utils:: spawn_blocking;
25+ use futures_util:: stream:: { BoxStream , StreamExt } ;
2826use opentelemetry:: KeyValue ;
29- use tokio_util :: io :: ReaderStream ;
27+ use tokio :: fs ;
3028use tracing:: { error, warn} ;
3129
3230// error codes to check for when trying to determine if an error is
@@ -46,6 +44,8 @@ static NOT_FOUND_ERROR_CODES: [&str; 5] = [
4644 "XMinioInvalidObjectName" ,
4745] ;
4846
47+ const S3_UPLOAD_BUFFER_SIZE : usize = 1024 * 1024 ; // 1 MiB
48+
4949trait S3ResultExt < T > {
5050 fn convert_errors ( self ) -> anyhow:: Result < T > ;
5151}
@@ -243,34 +243,74 @@ impl StorageBackendMethods for S3Backend {
243243 compression,
244244 } = upload;
245245
246- let content_length = source. content_length ( ) . await ?;
246+ let ( content_length, checksum_crc32) = match & source {
247+ StreamUploadSource :: Bytes ( bytes) => ( bytes. len ( ) as u64 , None ) ,
248+ StreamUploadSource :: File ( local_path) => {
249+ let local_path = local_path. clone ( ) ;
250+
251+ (
252+ fs:: metadata ( & local_path) . await ?. len ( ) ,
253+ Some (
254+ spawn_blocking ( move || Ok ( b64. encode ( crc32_for_path ( local_path) ?) ) ) . await ?,
255+ ) ,
256+ )
257+ }
258+ } ;
247259
248260 let mut last_err = None ;
249261
250262 for attempt in 1 ..=3 {
251- let reader = source. reader ( ) . await ?;
252- let stream = ReaderStream :: new ( reader) . map_ok ( Frame :: data) ;
263+ let body = match & source {
264+ StreamUploadSource :: Bytes ( bytes) => ByteStream :: from ( bytes. clone ( ) ) ,
265+ StreamUploadSource :: File ( path) => {
266+ // NOTE:
267+ // reading the upload-data from a local path is
268+ // "retryable" in the AWS SDK sense.
269+ // ".file" (file pointer) is not retryable.
270+ ByteStream :: read_from ( )
271+ . path ( path)
272+ . buffer_size ( S3_UPLOAD_BUFFER_SIZE )
273+ . length ( Length :: Exact ( content_length) )
274+ . build ( )
275+ . await ?
276+ }
277+ } ;
253278
254- match self
279+ let mut request = self
255280 . client
256281 . put_object ( )
257282 . bucket ( & self . bucket )
258283 . key ( & path)
259- . body ( ByteStream :: from_body_1_x ( StreamBody :: new ( stream ) ) )
284+ . body ( body )
260285 . content_length ( content_length as i64 )
261286 . content_type ( mime. to_string ( ) )
262- . set_content_encoding ( compression. map ( |alg| alg. to_string ( ) ) )
263- . send ( )
264- . await
265- {
287+ . set_content_encoding ( compression. map ( |alg| alg. to_string ( ) ) ) ;
288+
289+ // NOTE: when you try to stream-upload a local file, the AWS SDK by default
290+ // uses a "middleware" to calculate the checksum for the content, to compare it after
291+ // uploading.
292+ // This piece is broken right now, but only when using S3 directly. On minio, all is
293+ // fiine.
294+ // I don't want to disable checksums so we're sure the files are uploaded correctly.
295+ // So the only alternative (outside of trying to fix the SDK) is to calculate the
296+ // checksum ourselves. This is a little annoying because this means we have to read the
297+ // whole file before upload. But since I don't want to load all files into memory before
298+ // upload, this is the only option.
299+ if let Some ( checksum_crc32) = & checksum_crc32 {
300+ request = request
301+ . checksum_algorithm ( ChecksumAlgorithm :: Crc32 )
302+ . checksum_crc32 ( checksum_crc32) ;
303+ }
304+
305+ match request. send ( ) . await {
266306 Ok ( _) => {
267307 self . otel_metrics
268308 . uploaded_files
269309 . add ( 1 , & [ KeyValue :: new ( "attempt" , attempt. to_string ( ) ) ] ) ;
270310 return Ok ( ( ) ) ;
271311 }
272312 Err ( err) => {
273- warn ! ( ?err, attempt = attempt + 1 , %path, "failed to upload blob to S3" ) ;
313+ warn ! ( ?err, attempt, %path, "failed to upload blob to S3" ) ;
274314 last_err = Some ( err) ;
275315 }
276316 }
0 commit comments