Skip to content

Commit c3c609e

Browse files
authored
test: use _DummyListBuffer in test_reads.py to avoid GIL contention (#16860)
This PR replaces `io.BytesIO` with a custom `_DummyListBuffer` in `test_reads.py`. During profiling, `io.BytesIO.write()` holds the GIL while copying data. This introduces significant noise and bottlenecks in performance tests with high concurrency or large data transfers. This change avoids GIL contention and reduces noise when profiling the code (specifically noted by the user as causing noise when profiling the format code).
1 parent 4e535d2 commit c3c609e

1 file changed

Lines changed: 23 additions & 5 deletions

File tree

  • packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads

packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,7 @@
1919
import os
2020
import random
2121
import time
22-
from io import BytesIO
2322
from typing import List, NamedTuple, Optional
24-
2523
import pytest
2624

2725
import tests.perf.microbenchmarks.time_based.reads.config as config
@@ -170,6 +168,26 @@ def _download_time_based_json(client, filename, params):
170168
)
171169

172170

171+
# _DummyListBuffer is used instead of io.BytesIO to avoid GIL contention
172+
# during profiling. io.BytesIO.write() holds the GIL while copying data,
173+
# which introduces significant noise and bottlenecks in performance tests
174+
# with high concurrency or large data transfers.
175+
# This buffer simply collects chunks in a list and tracks the total size.
176+
class _DummyListBuffer:
177+
def __init__(self):
178+
self.chunks = []
179+
self.size = 0
180+
181+
def write(self, data):
182+
self.chunks.append(data)
183+
nbytes = len(data)
184+
self.size += nbytes
185+
return nbytes
186+
187+
def getvalue(self):
188+
return b"".join(self.chunks)
189+
190+
173191
async def _download_time_based_async(client, filename, params):
174192
mrd = AsyncMultiRangeDownloader(client, params.bucket_name, filename)
175193
await mrd.open()
@@ -197,17 +215,17 @@ async def _worker_coro():
197215
offset = random.randint(
198216
0, params.file_size_bytes - params.chunk_size_bytes
199217
)
200-
ranges.append((offset, params.chunk_size_bytes, BytesIO()))
218+
ranges.append((offset, params.chunk_size_bytes, _DummyListBuffer()))
201219
else: # seq
202220
for _ in range(params.num_ranges):
203-
ranges.append((offset, params.chunk_size_bytes, BytesIO()))
221+
ranges.append((offset, params.chunk_size_bytes, _DummyListBuffer()))
204222
offset += params.chunk_size_bytes
205223
if offset + params.chunk_size_bytes > params.file_size_bytes:
206224
offset = 0 # Reset offset if end of file is reached
207225

208226
await mrd.download_ranges(ranges)
209227

210-
bytes_in_buffers = sum(r[2].getbuffer().nbytes for r in ranges)
228+
bytes_in_buffers = sum(r[2].size for r in ranges)
211229
assert bytes_in_buffers == params.chunk_size_bytes * params.num_ranges
212230

213231
if not is_warming_up:

0 commit comments

Comments
 (0)