Controlar la temperatura de la Raspberry con Nagios + Datos de rendimiento para PNP4Nagios

Fuente: https://github.com/larsen0815/check_rasp_temp

Aclaración: El necesitar privilegios de administrador para ejecutar el script depende de la distribución utilizada, en Arch Linux no es necesario.

Código fuente de check_rasp_temp (/usr/local/nagios/libexec/).

check_rasp_temp
#!/bin/bash
# Adapted from check_nagios_latency
 
VCGENCMD="/opt/vc/bin/vcgencmd"
 
# Prints usage information
usage() {
 
	if [ -n "$1" ] ; then
		echo "Error: $1" 1>&2
	fi
 
	echo ""
	echo "Usage: check_rasp_temp [-h?] -w warning -c critical"
	echo ""
	echo "   -w         warning threshold"
	echo "   -c         critical threshold"
	echo "   -h, -?     this help message"
	echo ""
 
	exit 3
}
 
 
# Checks if a given program is available and executable
check_prog() {
 
	if [ -z "$PROG" ] ; then
		PROG=`which $1`
	fi
 
	if [ -z "$PROG" ] ; then
		echo "TEMPERATURE UNKNOWN - cannot find $1"
		exit 3
	fi
 
	PROG=""
}
 
 
# Main
# check progs
check_prog awk
check_prog bc
 
 
# process command line options
while getopts "h?c:w:" opt; do
	case $opt in
		c )      CRITICAL=$OPTARG;  ;;
		h | \? ) usage ; exit 3;    ;;
		w )      WARNING=$OPTARG;   ;;
	esac
done
shift $(($OPTIND - 1))
 
 
# Check options
if [ -z "${WARNING}" ] ; then
	usage "No warning threshold specified"
fi
if [ -z "${CRITICAL}" ] ; then
	usage "No critical threshold specified"
fi
 
# Check number formats
if ! echo $WARNING | grep -qE '^[0-9]+(\.[0-9]+)?$' ; then
	echo "TEMPERATURE UNKOWN - Wrong number: $WARNING"
	exit 3
fi
if ! echo $CRITICAL | grep -qE '^[0-9]+(\.[0-9]+)?$' ; then
	echo "TEMPERATURE UNKOWN - Wrong number: $CRITICAL"
	exit 3
fi
 
 
# Perform the checks
TEMP=`$VCGENCMD measure_temp | awk -F"=" '{print $2}' | awk -F"'" '{print $1}'`
PERF="temp=${TEMP};${WARNING};${CRITICAL};;"
 
COMPARISON=`echo "if($TEMP>=$CRITICAL) 1 else 0;" | bc`
if [ $COMPARISON -eq 1 ] ; then
	echo "TEMPERATURE CRITICAL: $TEMP | $PERF"
	exit 2
fi
 
COMPARISON=`echo "if($TEMP>=$WARNING) 1 else 0;" | bc`
if [ $COMPARISON -eq 1 ] ; then
	echo "TEMPERATURE WARNING: $TEMP | $PERF"
	exit 1
fi
 
echo "TEMPERATURE OK: $TEMP | $PERF"
exit 0;

Ejemplo de salida de check_rasp_temp.

/usr/local/nagios/libexec/check_rasp_temp -w 45 -c 50
TEMPERATURE OK: 36.9 | temp=36.9;45;50;;

NRPE Host (/usr/local/nagios/etc/nrpe.cfg)

command[check_temp_rasp]=/usr/local/nagios/libexec/check_rasp_temp -w 45 -c 50

Nagios Server

  define service{
   use                           local-service
   host_name                     XXXXX
   service_description           CPU Temp
   check_command                 check_nrpe!check_temp_rasp
   }