From 646bf8bc26ca32206852b312b2053891c5f45792 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Fri, 6 Jul 2012 21:17:45 +0200
Subject: [PATCH] Recover from lost link when running in slave only mode.

Under Linux, when the link goes down our multicast socket becomes stale.
We always poll(2) for events, but the link down does not trigger any event
to let us know that something is wrong. Once the port enters master mode
and starts announcing itself, the socket throws an error. This in turn
causes a fault, and we reopen the socket when clearing the fault.

However, in the case of slave only mode, if the port is listening then
it will never send, discover the link error, or repair the socket. This
patch fixes the issue by simply reopening the socket after an announce
timeout.

[ Another way would be to use a netlink socket, but that would add too
  much complexity as it poorly matches our port/interface model. ]

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
---
 port.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/port.c b/port.c
index 9d3a966..10e9a03 100644
--- a/port.c
+++ b/port.c
@@ -690,6 +690,20 @@ no_timers:
 	return -1;
 }
 
+static int port_renew_transport(struct port *p)
+{
+	if (!port_is_enabled(p)) {
+		return 0;
+	}
+	clock_remove_fda(p->clock, p, p->fda);
+	transport_close(p->trp, &p->fda);
+	if (transport_open(p->trp, p->name, &p->fda, p->timestamping)) {
+		return -1;
+	}
+	clock_install_fda(p->clock, p, p->fda);
+	return 0;
+}
+
 /*
  * Returns non-zero if the announce message is different than last.
  */
@@ -1238,6 +1252,9 @@ enum fsm_event port_event(struct port *p, int fd_index)
 		if (p->best)
 			fc_clear(p->best);
 		port_set_announce_tmo(p);
+		if (clock_slave_only(p->clock) && port_renew_transport(p)) {
+			return EV_FAULT_DETECTED;
+		}
 		return EV_ANNOUNCE_RECEIPT_TIMEOUT_EXPIRES;
 
 	case FD_DELAY_TIMER: