Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.

Commit 739a382

Browse files
committed
don't convert json in rowiterator._columns
1 parent df87a70 commit 739a382

4 files changed

Lines changed: 27 additions & 6 deletions

File tree

google/cloud/bigquery/_pandas_helpers.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,9 @@ def _row_iterator_page_to_arrow(page, column_names, arrow_types):
759759

760760
arrays = []
761761
for column_index, arrow_type in enumerate(arrow_types):
762+
# RowIterator parses JSON, but for arrow, we actually want to keep them
763+
# as strings.
764+
# TODO: Support STRUCT<JSON> and ARRAY<JSON>.
762765
arrays.append(pyarrow.array(page._columns[column_index], type=arrow_type))
763766

764767
if isinstance(column_names, pyarrow.Schema):

google/cloud/bigquery/_pyarrow_helpers.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ def pyarrow_timestamp():
6666
# Prefer JSON type built-in to pyarrow (adding in 19.0.0), if available.
6767
# Otherwise, fallback to db-dtypes, where the JSONArrowType was added in 1.4.0,
6868
# but since they might have an older db-dtypes, have string as a fallback for that.
69-
if hasattr(pyarrow, "json_"):
70-
json_arrow_type = pyarrow.json_(pyarrow.string())
71-
elif hasattr(db_dtypes, "JSONArrowType"):
69+
# TODO(https://github.com/pandas-dev/pandas/issues/60958): switch to
70+
# pyarrow.json_(pyarrow.string()) if available and supported by pandas.
71+
if hasattr(db_dtypes, "JSONArrowType"):
7272
json_arrow_type = db_dtypes.JSONArrowType()
7373
else:
7474
json_arrow_type = pyarrow.string()

google/cloud/bigquery/exceptions.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,11 @@ class BigQueryStorageNotFoundError(BigQueryError):
3333

3434
class LegacyPandasError(BigQueryError):
3535
"""Raised when too old a version of pandas package is detected at runtime."""
36+
37+
38+
class BigQueryWarning(UserWarning):
39+
"""Base class for all custom warnings defined by the BigQuery client."""
40+
41+
42+
class JSONDtypeWarning(BigQueryWarning):
43+
"""Raised when JSON is used in to_dataframe() API."""

google/cloud/bigquery/table.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3531,12 +3531,22 @@ def _row_iterator_page_columns(schema, response):
35313531
columns = []
35323532
rows = response.get("rows", [])
35333533

3534-
def get_column_data(field_index, field):
3534+
def get_column_data(field_index):
35353535
for row in rows:
3536-
yield _helpers._field_from_json(row["f"][field_index]["v"], field)
3536+
yield row["f"][field_index]["v"]
3537+
3538+
def parse_column_data(column, field):
3539+
# pyarrow.json_() type needs to keep the data as a string, not parsed.
3540+
# TODO: support STRUCT<JSON> and ARRAY<JSON>
3541+
if field.field_type.casefold() == "json":
3542+
for value in column:
3543+
yield value
3544+
3545+
for value in column:
3546+
yield _helpers._field_from_json(value, field)
35373547

35383548
for field_index, field in enumerate(schema):
3539-
columns.append(get_column_data(field_index, field))
3549+
columns.append(parse_column_data(get_column_data(field_index), field))
35403550

35413551
return columns
35423552

0 commit comments

Comments
 (0)