Skip to content

Commit 8d961f8

Browse files
committed
fix(gooddata-sdk): stream Arrow IPC response directly, no BytesIO
Switch read_result_arrow to explicitly request application/vnd.apache.arrow.stream via Accept header and pipe the HTTP response directly into ipc.open_stream(), eliminating the intermediate BytesIO buffer. Update tests accordingly.
1 parent 6a20f1c commit 8d961f8

2 files changed

Lines changed: 122 additions & 7 deletions

File tree

packages/gooddata-sdk/src/gooddata_sdk/compute/model/execution.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -393,19 +393,24 @@ def read_result_arrow(self) -> pyarrow.Table:
393393
raise ImportError(
394394
"pyarrow is required to use read_result_arrow(). Install it with: pip install gooddata-sdk[arrow]"
395395
)
396-
import io
397396

398-
response = self._actions_api.retrieve_result_binary(
399-
workspace_id=self._workspace_id,
400-
result_id=self.result_id,
397+
header_params: dict = {"Accept": "application/vnd.apache.arrow.stream"}
398+
if self.cancel_token:
399+
header_params["X-GDC-CANCEL-TOKEN"] = self.cancel_token
400+
401+
response = self._actions_api.api_client.call_api(
402+
resource_path="/api/v1/actions/workspaces/{workspaceId}/execution/afm/execute/result/{resultId}/binary",
403+
method="GET",
404+
path_params={"workspaceId": self._workspace_id, "resultId": self.result_id},
405+
header_params=header_params,
406+
response_type=None,
401407
_preload_content=False,
402-
**({"x_gdc_cancel_token": self.cancel_token} if self.cancel_token else {}),
408+
_return_http_data_only=True,
403409
)
404410
try:
405-
buf = io.BytesIO(response.read())
411+
return _ipc.open_stream(response).read_all()
406412
finally:
407413
response.release_conn()
408-
return _ipc.open_file(buf).read_all()
409414

410415
def cancel(self) -> None:
411416
"""
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# (C) 2026 GoodData Corporation
2+
from __future__ import annotations
3+
4+
import io
5+
from unittest.mock import MagicMock, patch
6+
7+
import pytest
8+
9+
pyarrow = pytest.importorskip("pyarrow")
10+
11+
12+
def _make_ipc_stream_bytes() -> bytes:
13+
"""Return minimal Arrow IPC stream bytes for a one-row table."""
14+
import pyarrow as pa
15+
from pyarrow import ipc
16+
17+
table = pa.table({"x": pa.array([1.0])})
18+
buf = io.BytesIO()
19+
with ipc.new_stream(buf, table.schema) as writer:
20+
writer.write_table(table)
21+
return buf.getvalue()
22+
23+
24+
class _FakeResponse(io.RawIOBase):
25+
"""Minimal file-like with release_conn to simulate a urllib3 HTTPResponse."""
26+
27+
def __init__(self, data: bytes) -> None:
28+
super().__init__()
29+
self._buf = io.BytesIO(data)
30+
self.release_conn = MagicMock()
31+
32+
def read(self, n: int = -1) -> bytes:
33+
return self._buf.read(n)
34+
35+
def readable(self) -> bool:
36+
return True
37+
38+
def readinto(self, b: bytearray) -> int:
39+
data = self._buf.read(len(b))
40+
n = len(data)
41+
b[:n] = data
42+
return n
43+
44+
45+
def _make_bare(ipc_bytes: bytes):
46+
"""Return a BareExecutionResponse backed by a mock API client."""
47+
from gooddata_sdk.compute.model.execution import BareExecutionResponse
48+
49+
mock_api_client = MagicMock()
50+
mock_response = _FakeResponse(ipc_bytes)
51+
mock_api_client.actions_api.api_client.call_api.return_value = mock_response
52+
53+
afm_exec_response = {
54+
"execution_response": {
55+
"links": {"executionResult": "result-id-123"},
56+
"dimensions": [],
57+
}
58+
}
59+
bare = BareExecutionResponse(
60+
api_client=mock_api_client,
61+
workspace_id="ws-id",
62+
execution_response=afm_exec_response,
63+
)
64+
return bare, mock_response
65+
66+
67+
def test_read_result_arrow_returns_table() -> None:
68+
"""read_result_arrow reads the stream from the binary endpoint and returns a pa.Table."""
69+
import pyarrow as pa
70+
71+
ipc_bytes = _make_ipc_stream_bytes()
72+
bare, mock_response = _make_bare(ipc_bytes)
73+
74+
result = bare.read_result_arrow()
75+
76+
assert isinstance(result, pa.Table)
77+
mock_response.release_conn.assert_called_once()
78+
79+
80+
def test_read_result_arrow_requests_stream_format() -> None:
81+
"""read_result_arrow sets Accept: application/vnd.apache.arrow.stream explicitly."""
82+
ipc_bytes = _make_ipc_stream_bytes()
83+
bare, _ = _make_bare(ipc_bytes)
84+
85+
bare.read_result_arrow()
86+
87+
call_kwargs = bare._actions_api.api_client.call_api.call_args.kwargs
88+
assert call_kwargs["header_params"]["Accept"] == "application/vnd.apache.arrow.stream"
89+
90+
91+
def test_read_result_arrow_without_cancel_token() -> None:
92+
"""Without a cancel_token the call omits the cancel header."""
93+
ipc_bytes = _make_ipc_stream_bytes()
94+
bare, _ = _make_bare(ipc_bytes)
95+
96+
bare.read_result_arrow()
97+
98+
call_kwargs = bare._actions_api.api_client.call_api.call_args.kwargs
99+
assert "X-GDC-CANCEL-TOKEN" not in call_kwargs["header_params"]
100+
101+
102+
def test_read_result_arrow_no_pyarrow_raises() -> None:
103+
"""When pyarrow is not installed, read_result_arrow raises ImportError."""
104+
from gooddata_sdk.compute.model import execution as _exec_mod
105+
106+
ipc_bytes = _make_ipc_stream_bytes()
107+
bare, _ = _make_bare(ipc_bytes)
108+
109+
with patch.object(_exec_mod, "_ipc", None), pytest.raises(ImportError, match="pyarrow is required"):
110+
bare.read_result_arrow()

0 commit comments

Comments
 (0)