Skip to content

Commit 311a6f5

Browse files
committed
test: use _DummyListBuffer in test_reads.py to avoid GIL contention (googleapis#16860)
This PR replaces `io.BytesIO` with a custom `_DummyListBuffer` in `test_reads.py`. During profiling, `io.BytesIO.write()` holds the GIL while copying data. This introduces significant noise and bottlenecks in performance tests with high concurrency or large data transfers. This change avoids GIL contention and reduces noise when profiling the code (specifically noted by the user as causing noise when profiling the format code).
1 parent f1a4f9e commit 311a6f5

1 file changed

Lines changed: 23 additions & 5 deletions

File tree

  • packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads

packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,7 @@
2323
import threading
2424
import time
2525
from concurrent.futures import ThreadPoolExecutor
26-
from io import BytesIO
2726
from typing import List, NamedTuple, Optional
28-
2927
import pytest
3028

3129
import tests.perf.microbenchmarks.time_based.reads.config as config
@@ -226,6 +224,26 @@ def _download_time_based_json(client, filename, params):
226224
)
227225

228226

227+
# _DummyListBuffer is used instead of io.BytesIO to avoid GIL contention
228+
# during profiling. io.BytesIO.write() holds the GIL while copying data,
229+
# which introduces significant noise and bottlenecks in performance tests
230+
# with high concurrency or large data transfers.
231+
# This buffer simply collects chunks in a list and tracks the total size.
232+
class _DummyListBuffer:
233+
def __init__(self):
234+
self.chunks = []
235+
self.size = 0
236+
237+
def write(self, data):
238+
self.chunks.append(data)
239+
nbytes = len(data)
240+
self.size += nbytes
241+
return nbytes
242+
243+
def getvalue(self):
244+
return b"".join(self.chunks)
245+
246+
229247
async def _download_time_based_async(client, filename, params):
230248
mrd = AsyncMultiRangeDownloader(client, params.bucket_name, filename)
231249
await mrd.open()
@@ -253,17 +271,17 @@ async def _worker_coro():
253271
offset = random.randint(
254272
0, params.file_size_bytes - params.chunk_size_bytes
255273
)
256-
ranges.append((offset, params.chunk_size_bytes, BytesIO()))
274+
ranges.append((offset, params.chunk_size_bytes, _DummyListBuffer()))
257275
else: # seq
258276
for _ in range(params.num_ranges):
259-
ranges.append((offset, params.chunk_size_bytes, BytesIO()))
277+
ranges.append((offset, params.chunk_size_bytes, _DummyListBuffer()))
260278
offset += params.chunk_size_bytes
261279
if offset + params.chunk_size_bytes > params.file_size_bytes:
262280
offset = 0 # Reset offset if end of file is reached
263281

264282
await mrd.download_ranges(ranges)
265283

266-
bytes_in_buffers = sum(r[2].getbuffer().nbytes for r in ranges)
284+
bytes_in_buffers = sum(r[2].size for r in ranges)
267285
assert bytes_in_buffers == params.chunk_size_bytes * params.num_ranges
268286

269287
if not is_warming_up:

0 commit comments

Comments
 (0)