Skip to content

Instantly share code, notes, and snippets.

@jirutka
Last active June 3, 2024 12:16
Show Gist options
  • Save jirutka/20fbe0531099b09c0627bb52f2959aa5 to your computer and use it in GitHub Desktop.
Save jirutka/20fbe0531099b09c0627bb52f2959aa5 to your computer and use it in GitHub Desktop.
Simple Keepalived notify script for Valkey/Redis master-replica failover

This is a simple solution for when you use keepalived with VRRP to failover between two instances of an application (e.g. nginx) that uses Redis/Valkey.

It doesn’t provide a high guarantee of data consistency, so it’s only suitable for caches, user sessions and similar!

keepalived.conf example:

vrrp_instance vi {
    ...
    notify "/etc/keepalived/scripts/notify_valkey.sh"
}
#!/bin/sh
# This script assumes there are two keepalived and Valkey instances.
set -eu
# Maximum time in milliseconds a master will wait in the waiting-for-sync state
# before aborting the failover attempt.
FAILOVER_TIMEOUT=2000
# Time in seconds that a replica will wait for a failover initiated by the
# master to occur.
FAILOVER_WAIT=8
REDIS_CLI='valkey-cli'
REDIS_PORT=6379
SYSLOG_TAG='keepalived-valkey'
log() {
local level="$1"
local msg="$2"
logger -s -t "$SYSLOG_TAG" -p local0.$level "$msg"
}
redis_cmd() {
local stderr
if ! stderr="$($REDIS_CLI -e "$@" 2>&1 >/dev/null)"; then
log err "Command $1 failed: $stderr"
return 1
fi
}
repl_info() {
local name="$1"
$REDIS_CLI -e INFO REPLICATION | sed -En "s/^$name:(\S*).*/\1/p"
}
is_master() {
[ "$(repl_info 'role')" = master ]
}
replica_to_master() {
# NOTE: When keepalived on the primary host is started and the secondary is
# MASTER, it enters the BACKUP mode first, so Valkey syncs data from the
# secondary node, then keepalived enters the MASTER mode and Valkey becomes
# the master.
if is_master; then
log info "I'm already master, nothing to be done"
return
fi
if [ "$(repl_info master_link_status)" = 'up' ]; then
log info "Current master is up, waiting $FAILOVER_WAIT seconds for failover"
sleep $FAILOVER_WAIT
fi
if is_master; then
log info "Failover succeeded, I'm master now"
else
log warn 'Failover failed, executing REPLICAOF NO ONE'
redis_cmd REPLICAOF NO ONE
fi
}
master_to_replica() {
local master_host="$1"
if ! is_master; then
log info "I'm already replica, nothing to be done"
return
fi
if [ "$(repl_info connected_slaves)" -gt 0 ]; then
log info 'Starting failover'
if redis_cmd FAILOVER TIMEOUT "$FAILOVER_TIMEOUT"; then
log info "Waiting $FAILOVER_WAIT seconds"
sleep "$FAILOVER_WAIT"
fi
if is_master; then
log warn 'Failover failed'
else
log info "Failover succeeded, I'm replica now"
return
fi
fi
log info "Executing REPLICAOF $master_host $REDIS_PORT"
redis_cmd REPLICAOF "$master_host" $REDIS_PORT
}
if [ $# -lt 3 ]; then
log err "Expected 3 arguments, got $#"
exit 1
fi
redis_cmd PING || exit 1
case "$3" in
MASTER)
replica_to_master
;;
BACKUP | FAULT)
hostname="$(hostname -s)"
domain="$(hostname -d)"
case "$hostname" in
*-01) other_host="${hostname%-*}-02.$domain";;
*-02) other_host="${hostname%-*}-01.$domain";;
esac
master_to_replica "$other_host"
;;
esac
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment