mirror of
https://git.freebsd.org/ports.git
synced 2025-07-17 09:19:15 -04:00
Provide a better patch to fix connection timeouts
Obtained from: haproxy ML
This commit is contained in:
parent
2b62b2a65b
commit
5eb9de224a
Notes:
svn2git
2021-03-31 03:12:20 +00:00
svn path=/head/; revision=436194
2 changed files with 96 additions and 56 deletions
|
@ -3,7 +3,7 @@
|
|||
|
||||
PORTNAME= haproxy
|
||||
PORTVERSION= 1.7.3
|
||||
PORTREVISION= 1
|
||||
PORTREVISION= 2
|
||||
CATEGORIES= net www
|
||||
MASTER_SITES= http://www.haproxy.org/download/1.7/src/
|
||||
DISTFILES= ${PORTNAME}-${DISTVERSION}${EXTRACT_SUFX}
|
||||
|
|
|
@ -1,60 +1,100 @@
|
|||
--- src/proto_tcp.c.orig 2017-02-28 11:59:23.000000000 +0300
|
||||
+++ src/proto_tcp.c 2017-03-12 11:55:08.528932000 +0300
|
||||
@@ -474,16 +474,10 @@ int tcp_connect_server(struct connection
|
||||
if (global.tune.server_rcvbuf)
|
||||
setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
|
||||
From afbf56b951967e8fa4d509e423fdcb11c27d40e2 Mon Sep 17 00:00:00 2001
|
||||
From: Willy Tarreau <w@1wt.eu>
|
||||
Date: Tue, 14 Mar 2017 20:19:29 +0100
|
||||
Subject: BUG/MAJOR: connection: update CO_FL_CONNECTED before calling the
|
||||
data layer
|
||||
|
||||
Matthias Fechner reported a regression in 1.7.3 brought by the backport
|
||||
of commit 819efbf ("BUG/MEDIUM: tcp: don't poll for write when connect()
|
||||
succeeds"), causing some connections to fail to establish once in a while.
|
||||
While this commit itself was a fix for a bad sequencing of connection
|
||||
events, it in fact unveiled a much deeper bug going back to the connection
|
||||
rework era in v1.5-dev12 : 8f8c92f ("MAJOR: connection: add a new
|
||||
CO_FL_CONNECTED flag").
|
||||
|
||||
It's worth noting that in a lab reproducing a similar environment as
|
||||
Matthias' about only 1 every 19000 connections exhibit this behaviour,
|
||||
making the issue not so easy to observe. A trick to make the problem
|
||||
more observable consists in disabling non-blocking mode on the socket
|
||||
before calling connect() and re-enabling it later, so that connect()
|
||||
always succeeds. Then it becomes 100% reproducible.
|
||||
|
||||
The problem is that this CO_FL_CONNECTED flag is tested after deciding to
|
||||
call the data layer (typically the stream interface but might be a health
|
||||
check as well), and that the decision to call the data layer relies on a
|
||||
change of one of the flags covered by the CO_FL_CONN_STATE set, which is
|
||||
made of CO_FL_CONNECTED among others.
|
||||
|
||||
Before the fix above, this bug couldn't appear with TCP but it could
|
||||
appear with Unix sockets. Indeed, connect() was always considered
|
||||
blocking so the CO_FL_WAIT_L4_CONN connection flag was always set, and
|
||||
polling for write events was always enabled. This used to guarantee that
|
||||
the conn_fd_handler() could detect a change among the CO_FL_CONN_STATE
|
||||
flags.
|
||||
|
||||
Now with the fix above, if a connect() immediately succeeds for non-ssl
|
||||
connection with send-proxy enabled, and no data in the buffer (thus TCP
|
||||
mode only), the CO_FL_WAIT_L4_CONN flag is not set, the lack of data in
|
||||
the buffer doesn't enable polling flags for the data layer, the
|
||||
CO_FL_CONNECTED flag is not set due to send-proxy still being pending,
|
||||
and once send-proxy is done, its completion doesn't cause the data layer
|
||||
to be woken up due to the fact that CO_FL_CONNECT is still not present
|
||||
and that the CO_FL_SEND_PROXY flag is not watched in CO_FL_CONN_STATE.
|
||||
|
||||
Then no progress is made when data are received from the client (and
|
||||
attempted to be forwarded), because a CF_WRITE_NULL (or CF_WRITE_PARTIAL)
|
||||
flag is needed for the stream-interface state to turn from SI_ST_CON to
|
||||
SI_ST_EST, allowing ->chk_snd() to be called when new data arrive. And
|
||||
the only way to set this flag is to call the data layer of course.
|
||||
|
||||
After the connect timeout, the connection gets killed and if in the mean
|
||||
time some data have accumulated in the buffer, the retry will succeed.
|
||||
|
||||
This patch fixes this situation by simply placing the update of
|
||||
CO_FL_CONNECTED where it should have been, before the check for a flag
|
||||
change needed to wake up the data layer and not after.
|
||||
|
||||
This fix must be backported to 1.7, 1.6 and 1.5. Versions not having
|
||||
the patch above are still affected for unix sockets.
|
||||
|
||||
Special thanks to Matthias Fechner who provided a very detailed bug
|
||||
report with a bisection designating the faulty patch, and to Olivier
|
||||
Houchard for providing full access to a pretty similar environment where
|
||||
the issue could first be reproduced.
|
||||
(cherry picked from commit 7bf3fa3c23f6a1b7ed1212783507ac50f7e27544)
|
||||
---
|
||||
src/connection.c | 11 +++++++----
|
||||
1 file changed, 7 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/src/connection.c b/src/connection.c
|
||||
index 26fc5f6..1e4c9aa 100644
|
||||
--- src/connection.c
|
||||
+++ src/connection.c
|
||||
@@ -131,6 +131,13 @@ void conn_fd_handler(int fd)
|
||||
}
|
||||
|
||||
- if (connect(fd, (struct sockaddr *)&conn->addr.to, get_addr_len(&conn->addr.to)) == -1) {
|
||||
- if (errno == EINPROGRESS || errno == EALREADY) {
|
||||
- /* common case, let's wait for connect status */
|
||||
- conn->flags |= CO_FL_WAIT_L4_CONN;
|
||||
- }
|
||||
- else if (errno == EISCONN) {
|
||||
- /* should normally not happen but if so, indicates that it's OK */
|
||||
- conn->flags &= ~CO_FL_WAIT_L4_CONN;
|
||||
- }
|
||||
- else if (errno == EAGAIN || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
|
||||
+ if ((connect(fd, (struct sockaddr *)&conn->addr.to, get_addr_len(&conn->addr.to)) == -1) &&
|
||||
+ (errno != EINPROGRESS) && (errno != EALREADY) && (errno != EISCONN)) {
|
||||
leave:
|
||||
+ /* Verify if the connection just established. The CO_FL_CONNECTED flag
|
||||
+ * being included in CO_FL_CONN_STATE, its change will be noticed by
|
||||
+ * the next block and be used to wake up the data layer.
|
||||
+ */
|
||||
+ if (unlikely(!(conn->flags & (CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN | CO_FL_CONNECTED))))
|
||||
+ conn->flags |= CO_FL_CONNECTED;
|
||||
+
|
||||
+ if (errno == EAGAIN || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
|
||||
char *msg;
|
||||
if (errno == EAGAIN || errno == EADDRNOTAVAIL) {
|
||||
msg = "no free ports";
|
||||
@@ -520,10 +514,6 @@ int tcp_connect_server(struct connection
|
||||
return SF_ERR_SRVCL;
|
||||
}
|
||||
}
|
||||
- else {
|
||||
- /* connect() == 0, this is great! */
|
||||
- conn->flags &= ~CO_FL_WAIT_L4_CONN;
|
||||
- }
|
||||
/* The wake callback may be used to process a critical error and abort the
|
||||
* connection. If so, we don't want to go further as the connection will
|
||||
* have been released and the FD destroyed.
|
||||
@@ -140,10 +147,6 @@ void conn_fd_handler(int fd)
|
||||
conn->data->wake(conn) < 0)
|
||||
return;
|
||||
|
||||
conn->flags |= CO_FL_ADDR_TO_SET;
|
||||
|
||||
@@ -533,6 +523,7 @@ int tcp_connect_server(struct connection
|
||||
|
||||
conn_ctrl_init(conn); /* registers the FD */
|
||||
fdtab[fd].linger_risk = 1; /* close hard if needed */
|
||||
+ conn_sock_want_send(conn); /* for connect status */
|
||||
|
||||
if (conn_xprt_init(conn) < 0) {
|
||||
conn_force_close(conn);
|
||||
@@ -540,17 +531,6 @@ int tcp_connect_server(struct connection
|
||||
return SF_ERR_RESOURCE;
|
||||
}
|
||||
|
||||
- if (conn->flags & (CO_FL_HANDSHAKE | CO_FL_WAIT_L4_CONN)) {
|
||||
- conn_sock_want_send(conn); /* for connect status, proxy protocol or SSL */
|
||||
- }
|
||||
- else {
|
||||
- /* If there's no more handshake, we need to notify the data
|
||||
- * layer when the connection is already OK otherwise we'll have
|
||||
- * no other opportunity to do it later (eg: health checks).
|
||||
- */
|
||||
- data = 1;
|
||||
- }
|
||||
- /* Last check, verify if the connection just established */
|
||||
- if (unlikely(!(conn->flags & (CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN | CO_FL_CONNECTED))))
|
||||
- conn->flags |= CO_FL_CONNECTED;
|
||||
-
|
||||
if (data)
|
||||
conn_data_want_send(conn); /* prepare to send data if any */
|
||||
/* remove the events before leaving */
|
||||
fdtab[fd].ev &= FD_POLL_STICKY;
|
||||
|
||||
--
|
||||
1.7.12.1
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue