Skip to content

Commit 0367422

Browse files
authored
Optimize Serialization (#781)
This PR optimizes array serialization by preallocating the necessary space and reading all array items all at once, instead of reading them one by one.
1 parent 15a9697 commit 0367422

2 files changed

Lines changed: 43 additions & 24 deletions

File tree

hazelcast/serialization/input.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -147,10 +147,10 @@ def read_string(self):
147147
length = self.read_int()
148148
if length == NULL_ARRAY_LENGTH:
149149
return None
150-
result = bytearray(length)
151-
if length > 0:
152-
self.read_into(result, 0, length)
153-
return result.decode("utf-8")
150+
self._check_available(self._pos, length)
151+
result = self._buffer[self._pos : self._pos + length].decode("utf-8")
152+
self._pos += length
153+
return result
154154

155155
def read_byte_array(self):
156156
length = self.read_int()
@@ -163,28 +163,28 @@ def read_byte_array(self):
163163
return result
164164

165165
def read_i8_array(self) -> typing.List[int]:
166-
return self._read_array_fnc(self.read_byte)
166+
return self._bulk_read(BYTE_SIZE_IN_BYTES, "b")
167167

168168
def read_boolean_array(self):
169-
return self._read_array_fnc(self.read_boolean)
169+
return self._bulk_read(BOOLEAN_SIZE_IN_BYTES, "?")
170170

171171
def read_char_array(self):
172172
return self._read_array_fnc(self.read_char)
173173

174174
def read_int_array(self):
175-
return self._read_array_fnc(self.read_int)
175+
return self._bulk_read(INT_SIZE_IN_BYTES, "i")
176176

177177
def read_long_array(self):
178-
return self._read_array_fnc(self.read_long)
178+
return self._bulk_read(LONG_SIZE_IN_BYTES, "q")
179179

180180
def read_double_array(self):
181-
return self._read_array_fnc(self.read_double)
181+
return self._bulk_read(DOUBLE_SIZE_IN_BYTES, "d")
182182

183183
def read_float_array(self):
184-
return self._read_array_fnc(self.read_float)
184+
return self._bulk_read(FLOAT_SIZE_IN_BYTES, "f")
185185

186186
def read_short_array(self):
187-
return self._read_array_fnc(self.read_short)
187+
return self._bulk_read(SHORT_SIZE_IN_BYTES, "h")
188188

189189
def read_string_array(self):
190190
return self._read_array_fnc(self.read_string)
@@ -215,7 +215,18 @@ def read_utf(self):
215215
def read_utf_array(self):
216216
return self.read_string_array()
217217

218-
# HELPERS
218+
def _bulk_read(self, item_size, fmt_char):
219+
length = self.read_int()
220+
if length == NULL_ARRAY_LENGTH:
221+
return None
222+
223+
nbytes = length * item_size
224+
self._check_available(self._pos, nbytes)
225+
endian = ">" if self._is_big_endian else "<"
226+
result = list(struct.unpack_from(f"{endian}{length}{fmt_char}", self._buffer, self._pos))
227+
self._pos += nbytes
228+
return result
229+
219230
def _check_available(self, position, size):
220231
if position < 0:
221232
raise ValueError

hazelcast/serialization/output.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import typing
2-
31
from hazelcast.serialization.api import *
42
from hazelcast.serialization.bits import *
53

@@ -157,28 +155,28 @@ def write_byte_array(self, val):
157155
self._pos += _len
158156

159157
def write_signed_byte_array(self, val: typing.List[int]) -> None:
160-
self._write_array_fnc(val, self.write_signed_byte)
158+
self._bulk_write(val, BYTE_SIZE_IN_BYTES, "b")
161159

162160
def write_boolean_array(self, val):
163-
self._write_array_fnc(val, self.write_boolean)
161+
self._bulk_write(val, BOOLEAN_SIZE_IN_BYTES, "?")
164162

165163
def write_char_array(self, val):
166164
self._write_array_fnc(val, self.write_char)
167165

168166
def write_int_array(self, val):
169-
self._write_array_fnc(val, self.write_int)
167+
self._bulk_write(val, INT_SIZE_IN_BYTES, "i")
170168

171169
def write_long_array(self, val):
172-
self._write_array_fnc(val, self.write_long)
170+
self._bulk_write(val, LONG_SIZE_IN_BYTES, "q")
173171

174172
def write_double_array(self, val):
175-
self._write_array_fnc(val, self.write_double)
173+
self._bulk_write(val, DOUBLE_SIZE_IN_BYTES, "d")
176174

177175
def write_float_array(self, val):
178-
self._write_array_fnc(val, self.write_float)
176+
self._bulk_write(val, FLOAT_SIZE_IN_BYTES, "f")
179177

180178
def write_short_array(self, val):
181-
self._write_array_fnc(val, self.write_short)
179+
self._bulk_write(val, SHORT_SIZE_IN_BYTES, "h")
182180

183181
def write_string_array(self, val):
184182
self._write_array_fnc(val, self.write_string)
@@ -204,16 +202,26 @@ def set_position(self, position):
204202
self._pos = position
205203

206204
def write_zero_bytes(self, count):
207-
for _ in range(0, count):
208-
self._write(0)
205+
self._ensure_available(count)
206+
self._buffer[self._pos : self._pos + count] = bytes(count)
207+
self._pos += count
209208

210209
def write_utf(self, val):
211210
self.write_string(val)
212211

213212
def write_utf_array(self, val):
214213
self.write_string_array(val)
215214

216-
# HELPERS
215+
def _bulk_write(self, val, item_size, fmt_char):
216+
length = len(val) if val is not None else NULL_ARRAY_LENGTH
217+
self.write_int(length)
218+
if length > 0:
219+
nbytes = length * item_size
220+
self._ensure_available(nbytes)
221+
endian = ">" if self._is_big_endian else "<"
222+
struct.pack_into(f"{endian}{length}{fmt_char}", self._buffer, self._pos, *val)
223+
self._pos += nbytes
224+
217225
def _write_array_fnc(self, val, item_write_fnc):
218226
_len = len(val) if val is not None else NULL_ARRAY_LENGTH
219227
self.write_int(_len)

0 commit comments

Comments
 (0)