@@ -2247,10 +2247,23 @@ sendControlMessage(icpkthdr *pkt, int fd, struct sockaddr *addr, socklen_t peerL
22472247 if (gp_interconnect_full_crc )
22482248 addCRC (pkt );
22492249
2250- char errDetail [100 ];
2251- snprintf (errDetail , sizeof (errDetail ), "Send control message: got error with seq %u" , pkt -> seq );
2252- /* Retry for infinite times since we have no retransmit mechanism for control message */
2253- n = sendtoWithRetry (fd , (const char * ) pkt , pkt -> len , 0 , addr , peerLen , -1 , errDetail );
2250+ /* retry 10 times for sending control message */
2251+ int counter = 0 ;
2252+ while (counter < 10 )
2253+ {
2254+ counter ++ ;
2255+ n = sendto (fd , (const char * ) pkt , pkt -> len , 0 , addr , peerLen );
2256+ if (n < 0 )
2257+ {
2258+ if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK )
2259+ continue ;
2260+ else {
2261+ write_log ("sendcontrolmessage: got errno %d" , errno );
2262+ return ;
2263+ }
2264+ }
2265+ break ;
2266+ }
22542267 if (n < pkt -> len )
22552268 write_log ("sendcontrolmessage: got error %d errno %d seq %d" , n , errno , pkt -> seq );
22562269}
@@ -5399,6 +5412,19 @@ sendtoWithRetry(int socket, const void *message, size_t length,
53995412 return n ;
54005413 }
54015414
5415+ /*
5416+ * If the OS can detect an MTU issue on the host network interfaces, we
5417+ * would get EMSGSIZE here. So, bail with a HINT about checking MTU.
5418+ */
5419+ if (errno == EMSGSIZE )
5420+ {
5421+ ereport (ERROR , (errcode (ERRCODE_GP_INTERCONNECTION_ERROR ),
5422+ errmsg ("Interconnect error writing an outgoing packet: %m" ),
5423+ errdetail ("error during sendto() call (error:%d).\n"
5424+ "%s" , save_errno , errDetail ),
5425+ errhint ("check if interface MTU is equal across the cluster and lower than gp_max_packet_size" )));
5426+ }
5427+
54025428 ereport (ERROR , (errcode (ERRCODE_GP_INTERCONNECTION_ERROR ),
54035429 errmsg ("Interconnect error writing an outgoing packet: %m" ),
54045430 errdetail ("error during sendto() call (error:%d).\n"
0 commit comments