@@ -2246,10 +2246,23 @@ sendControlMessage(icpkthdr *pkt, int fd, struct sockaddr *addr, socklen_t peerL
22462246 if (gp_interconnect_full_crc )
22472247 addCRC (pkt );
22482248
2249- char errDetail [100 ];
2250- snprintf (errDetail , sizeof (errDetail ), "Send control message: got error with seq %u" , pkt -> seq );
2251- /* Retry for infinite times since we have no retransmit mechanism for control message */
2252- n = sendtoWithRetry (fd , (const char * ) pkt , pkt -> len , 0 , addr , peerLen , -1 , errDetail );
2249+ /* retry 10 times for sending control message */
2250+ int counter = 0 ;
2251+ while (counter < 10 )
2252+ {
2253+ counter ++ ;
2254+ n = sendto (fd , (const char * ) pkt , pkt -> len , 0 , addr , peerLen );
2255+ if (n < 0 )
2256+ {
2257+ if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK )
2258+ continue ;
2259+ else {
2260+ write_log ("sendcontrolmessage: got errno %d" , errno );
2261+ return ;
2262+ }
2263+ }
2264+ break ;
2265+ }
22532266 if (n < pkt -> len )
22542267 write_log ("sendcontrolmessage: got error %d errno %d seq %d" , n , errno , pkt -> seq );
22552268}
@@ -5392,6 +5405,19 @@ sendtoWithRetry(int socket, const void *message, size_t length,
53925405 return n ;
53935406 }
53945407
5408+ /*
5409+ * If the OS can detect an MTU issue on the host network interfaces, we
5410+ * would get EMSGSIZE here. So, bail with a HINT about checking MTU.
5411+ */
5412+ if (errno == EMSGSIZE )
5413+ {
5414+ ereport (ERROR , (errcode (ERRCODE_GP_INTERCONNECTION_ERROR ),
5415+ errmsg ("Interconnect error writing an outgoing packet: %m" ),
5416+ errdetail ("error during sendto() call (error:%d).\n"
5417+ "%s" , save_errno , errDetail ),
5418+ errhint ("check if interface MTU is equal across the cluster and lower than gp_max_packet_size" )));
5419+ }
5420+
53955421 ereport (ERROR , (errcode (ERRCODE_GP_INTERCONNECTION_ERROR ),
53965422 errmsg ("Interconnect error writing an outgoing packet: %m" ),
53975423 errdetail ("error during sendto() call (error:%d).\n"
0 commit comments