From 646bf8bc26ca32206852b312b2053891c5f45792 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Fri, 6 Jul 2012 21:17:45 +0200 Subject: [PATCH] Recover from lost link when running in slave only mode. Under Linux, when the link goes down our multicast socket becomes stale. We always poll(2) for events, but the link down does not trigger any event to let us know that something is wrong. Once the port enters master mode and starts announcing itself, the socket throws an error. This in turn causes a fault, and we reopen the socket when clearing the fault. However, in the case of slave only mode, if the port is listening then it will never send, discover the link error, or repair the socket. This patch fixes the issue by simply reopening the socket after an announce timeout. [ Another way would be to use a netlink socket, but that would add too much complexity as it poorly matches our port/interface model. ] Signed-off-by: Richard Cochran --- port.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/port.c b/port.c index 9d3a966..10e9a03 100644 --- a/port.c +++ b/port.c @@ -690,6 +690,20 @@ no_timers: return -1; } +static int port_renew_transport(struct port *p) +{ + if (!port_is_enabled(p)) { + return 0; + } + clock_remove_fda(p->clock, p, p->fda); + transport_close(p->trp, &p->fda); + if (transport_open(p->trp, p->name, &p->fda, p->timestamping)) { + return -1; + } + clock_install_fda(p->clock, p, p->fda); + return 0; +} + /* * Returns non-zero if the announce message is different than last. */ @@ -1238,6 +1252,9 @@ enum fsm_event port_event(struct port *p, int fd_index) if (p->best) fc_clear(p->best); port_set_announce_tmo(p); + if (clock_slave_only(p->clock) && port_renew_transport(p)) { + return EV_FAULT_DETECTED; + } return EV_ANNOUNCE_RECEIPT_TIMEOUT_EXPIRES; case FD_DELAY_TIMER: