test: use _DummyListBuffer in test_reads.py to avoid GIL contention (googleapis#16860)

zhixiangli · zhixiangli · commit 311a6f56c50a · 2026-05-06T07:19:42.000Z
This PR replaces `io.BytesIO` with a custom `_DummyListBuffer` in
`test_reads.py`.

During profiling, `io.BytesIO.write()` holds the GIL while copying data.
This introduces significant noise and bottlenecks in performance tests
with high concurrency or large data transfers.

This change avoids GIL contention and reduces noise when profiling the
code (specifically noted by the user as causing noise when profiling the
format code).
diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py
@@ -23,9 +23,7 @@
 import threading
 import time
 from concurrent.futures import ThreadPoolExecutor
-from io import BytesIO
 from typing import List, NamedTuple, Optional
-
 import pytest
 
 import tests.perf.microbenchmarks.time_based.reads.config as config
@@ -226,6 +224,26 @@ def _download_time_based_json(client, filename, params):
     )
 
 
+# _DummyListBuffer is used instead of io.BytesIO to avoid GIL contention
+# during profiling. io.BytesIO.write() holds the GIL while copying data,
+# which introduces significant noise and bottlenecks in performance tests
+# with high concurrency or large data transfers.
+# This buffer simply collects chunks in a list and tracks the total size.
+class _DummyListBuffer:
+    def __init__(self):
+        self.chunks = []
+        self.size = 0
+
+    def write(self, data):
+        self.chunks.append(data)
+        nbytes = len(data)
+        self.size += nbytes
+        return nbytes
+
+    def getvalue(self):
+        return b"".join(self.chunks)
+
+
 async def _download_time_based_async(client, filename, params):
     mrd = AsyncMultiRangeDownloader(client, params.bucket_name, filename)
     await mrd.open()
@@ -253,17 +271,17 @@ async def _worker_coro():
                     offset = random.randint(
                         0, params.file_size_bytes - params.chunk_size_bytes
                     )
-                    ranges.append((offset, params.chunk_size_bytes, BytesIO()))
+                    ranges.append((offset, params.chunk_size_bytes, _DummyListBuffer()))
             else:  # seq
                 for _ in range(params.num_ranges):
-                    ranges.append((offset, params.chunk_size_bytes, BytesIO()))
+                    ranges.append((offset, params.chunk_size_bytes, _DummyListBuffer()))
                     offset += params.chunk_size_bytes
                     if offset + params.chunk_size_bytes > params.file_size_bytes:
                         offset = 0  # Reset offset if end of file is reached
 
             await mrd.download_ranges(ranges)
 
-            bytes_in_buffers = sum(r[2].getbuffer().nbytes for r in ranges)
+            bytes_in_buffers = sum(r[2].size for r in ranges)
             assert bytes_in_buffers == params.chunk_size_bytes * params.num_ranges
 
             if not is_warming_up: