Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions sdks/python/apache_beam/dataframe/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ def seekable(self):

@property
def closed(self):
return False
return getattr(self._underlying, 'closed', False)

def __iter__(self):
# For pandas is_file_like.
Expand Down Expand Up @@ -584,7 +584,18 @@ def _read(self, size=-1):
return res

def flush(self):
self._underlying.flush()
if not self.closed:
try:
self._underlying.flush()
except ValueError:
pass

def close(self):
if not self.closed and hasattr(self._underlying, 'close'):
try:
self._underlying.close()
except (OSError, ValueError):
pass
Comment thread
shunping marked this conversation as resolved.


class _ReadFromPandasDoFn(beam.DoFn, beam.RestrictionProvider):
Expand Down
45 changes: 45 additions & 0 deletions sdks/python/apache_beam/dataframe/io_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,51 @@ def test_truncating_filehandle_iter(self):
self._run_truncating_file_handle_iter_test('aaa b cccccccccccccccccccc')
self._run_truncating_file_handle_iter_test('aaa b ccccccccccccccccc ')

def test_truncating_filehandle_flush_on_closed_stream(self):
class ClosedFlushingStream(StringIO):
def flush(self):
if self.closed:
raise ValueError("I/O operation on closed file.")
super().flush()

s = 'a b c'
tracker = restriction_trackers.OffsetRestrictionTracker(
restriction_trackers.OffsetRange(0, len(s)))
underlying = ClosedFlushingStream(s)
handle = io._TruncatingFileHandle(
underlying, tracker, splitter=io._DelimSplitter(' ', 10))

# Verify that calling flush() when the underlying stream is closed
# succeeds without raising ValueError.
underlying.close()
handle.flush()
Comment thread
shunping marked this conversation as resolved.
handle.close()

def test_truncating_filehandle_exception_suppression(self):
class FaultyStream(StringIO):
@property
def closed(self):
return False

def flush(self):
raise ValueError("Simulated flush error")

def close(self):
raise OSError("Simulated close error")

s = 'a b c'
tracker = restriction_trackers.OffsetRestrictionTracker(
restriction_trackers.OffsetRange(0, len(s)))
underlying = FaultyStream(s)
handle = io._TruncatingFileHandle(
underlying, tracker, splitter=io._DelimSplitter(' ', 10))

# Verify that ValueError raised during flush() is safely suppressed.
handle.flush()

# Verify that OSError raised during close() is safely suppressed.
handle.close()

@parameterized.expand([
('defaults', {}),
('header', dict(header=1)),
Expand Down
Loading