#!/bin/sh
#
# Local Checkmk (checkmk.com) script to raise the alarm about problems with
# continual-sync sections.
#
# Place this in your Checkmk local library directory and make it executable. 
# An example filename would be: /usr/lib/check_mk_agent/local/continual-sync
#
# Note that this won't handle status files whose filenames contain non-ASCII
# or non-printable characters.

csConfigFile='/etc/checkmk-continual-sync.cf'

# Constants - to be adjusted manually on installation, or set to an empty
# value to turn off that particular check.
csMaxFullSyncFailures=2		# full sync failures in a row before alerting
csMaxPartialSyncFailures=4	# partial sync failures in a row before alerting
csFullOverdueMultiplier=2	# unsynced full sync intervals before alerting
csPartialOverdueMultiplier=10	# unsynced partial sync intervals before alerting

# The name of the service being reported to Checkmk.
service='continual-sync'

# The path of the section list file written by continual-sync.
sectionListFile='/var/run/continual-sync/sections.json'

# If the config file exists, load it, so it can override the constants above.
test -r "${csConfigFile}" && test -s "${csConfigFile}" && . "${csConfigFile}"

# Check there's a section list file to read.
if ! test -r "${sectionListFile}"; then
	printf '%d %s - %s\n' 3 "${service}" 'cannot read the section list file'
	exit 0
fi

# Find the starting line numbers of each section's details.
sectionStartLines="$(sed -n '/^[[:space:]]*{/=' "${sectionListFile}" 2>/dev/null)"

statusCode='0'		# 0=OK, 1=warning, 2=error, 3=problem with the check
statusMessage='OK'
extraInfo=''

now="$(date '+%s')"
for lineNo in ${sectionStartLines}; do
	sectionDetails="$(sed -n "${lineNo},/^[[:space:]]*}/p" "${sectionListFile}" 2>/dev/null)"
	test -n "${sectionDetails}" || continue

	sectionName="$(printf '%s\n' "${sectionDetails}" | awk '$1=="\"name\":"{print $2}' | cut -d '"' -f 2)" #'
	test -n "${sectionName}" || continue

	statusFile="$(printf '%s\n' "${sectionDetails}" | awk '$1=="\"statusFile\":"{print $2}' | cut -d '"' -f 2)" #'
	test -n "${statusFile}" || continue

	if test -n "${csMaxFullSyncFailures}" && test "${statusCode}" -lt 2; then
		fullSyncFailures="$(sed -n 's/^full sync failures * : //p' "${statusFile}" 2>/dev/null | tr -dc '0-9')"
		test -n "${fullSyncFailures}" || fullSyncFailures=0
		if test "${fullSyncFailures}" -gt "${csMaxFullSyncFailures}"; then
			statusCode='2'
			statusMessage='Error: '
			extraInfo="Sync set ${sectionName} - full sync failing"
		fi
	fi

	if test -n "${csMaxPartialSyncFailures}" && test "${statusCode}" -lt 1; then
		partialSyncFailures="$(sed -n 's/^partial sync failures * : //p' "${statusFile}" 2>/dev/null | tr -dc '0-9')"
		test -n "${partialSyncFailures}" || partialSyncFailures=0
		if test "${partialSyncFailures}" -gt "${csMaxPartialSyncFailures}"; then
			statusCode='1'
			statusMessage='Warning: '
			extraInfo="Sync set ${sectionName} - partial sync failing"
		fi
	fi

	if test -n "${csFullOverdueMultiplier}" && test "${statusCode}" -lt 2; then
		fullSyncInterval="$(printf '%s\n' "${sectionDetails}" | awk '$1=="\"fullSyncInterval\":"{print $2}' | tr -dc '0-9')"
		test -n "${fullSyncInterval}" || fullSyncInterval='0'
		lastFullSync="$(sed -n 's/^last full sync   * : .* .@//p' "${statusFile}" 2>/dev/null | tr -dc '0-9')"
		timeSince=''
		test -n "${lastFullSync}" && timeSince=$((now-lastFullSync))
		if test "${fullSyncInterval}" -gt 0 && test -n "${timeSince}" && test "${timeSince}" -gt $((fullSyncInterval*csFullOverdueMultiplier)); then
			statusCode='2'
			statusMessage='Error: '
			extraInfo="Sync set ${sectionName} - full sync overdue"
			break
		fi
	fi
	
	if test -n "${csPartialOverdueMultiplier}" && test "${statusCode}" -lt 1; then
		partialSyncInterval="$(printf '%s\n' "${sectionDetails}" | awk '$1=="\"partialSyncInterval\":"{print $2}' | tr -dc '0-9')"
		test -n "${partialSyncInterval}" || partialSyncInterval='0'
		lastPartialSync="$(sed -n 's/^last partial sync   * : .* .@//p' "${statusFile}" 2>/dev/null | tr -dc '0-9')"
		timeSince=''
		test -n "${lastPartialSync}" && timeSince=$((now-lastPartialSync))
		if test "${partialSyncInterval}" -gt 0 && test -n "${timeSince}" && test "${timeSince}" -gt $((partialSyncInterval*csPartialOverdueMultiplier)); then
			statusCode='1'
			statusMessage='Warning: '
			extraInfo="Sync set ${sectionName} - partial sync overdue"
			break
		fi
	fi
done

printf '%d %s - %s%s\n' ${statusCode} "${service}" "${statusMessage}" "${extraInfo}"

exit 0
