nagios检测cpu的插件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/bin/bash
#如果某些命令找不到,可以在此定义$PATH。
 
##################################################################################################################
# 作 者         :Witee                                                                                          #
# 最后修改时间  :2013-6-28                                                                                      #
# 版 本         :1.0                                                                                            #
# 联系方式      :http://www.weibo.com/witee                                                                     #
# 作用及使用方法:直接执行脚本,无需其它参数,使用iostat命令获得服务器CPU状态的值,并返回给Nagios及Nagios插件pnp #
##################################################################################################################
 
# 状态定义
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
STATE_DEPENDENT=4
 
# 将执行6次的结果写入文件
touch /tmp/iostat.log
iostat -c 1 6 >/tmp/iostat.log
if [ $? -ne 0 ]
then
  echo "Error : Can't find the command 'iostat' ."
    exit "$STATE_UNKNOWN"
fi
# 判断文件是否存在
if [ ! -f /tmp/iostat.log ]
then
    echo "Error : iostat.log not found ."
    exit "$STATE_UNKNOWN"
fi
 
# 赋值,取6次结果中的后5次的平均值
float_user=`cat /tmp/iostat.log |grep -v % |grep -v ^$ |sed '1,2d' |awk '{a+=$1}END{printf "%.0f",a/5}'`
float_system=`cat /tmp/iostat.log |grep -v % |grep -v ^$ |sed '1,2d' |awk '{a+=$3}END{printf "%.0f\n",a/5}'`
float_idle=`cat /tmp/iostat.log |grep -v % |grep -v ^$ |sed '1,2d' |awk '{a+=$6}END{printf "%.0f\n",a/5}'`
float_iowait=`cat /tmp/iostat.log |grep -v % |grep -v ^$ |sed '1,2d' |awk '{a+=$4}END{printf "%.0f\n",a/5}'`
 
# 删除iostat日志文件
if [ -f /tmp/iostat.log ]
then
    rm -f /tmp/iostat.log
fi
 
# 计算CPU使用率
used_idle=`awk "BEGIN{print (100-$float_idle)}"`
 
# 判断取到的值是否在正确范围
if [ $used_idle -lt 0 -o $used_idle -gt 100 ]
then
    echo "Error : used_idle out of the correct range ."
    exit "$STATE_UNKNOWN"
fi
 
 
# 定义报警阈值,这里为百分数,请不要添加‘%’
critical=90
warning=80
 
# 定义报警输出
stat_ok="OK"
stat_warning="WARNING"
stat_critical="CRITICAL"
stat_unknown="UNKNOWN"
stat_info="User=${float_user}% System=${float_system}% Iowait=${float_iowait}% Idle=${float_idle}% | CPU_Used=${used_idle}%"
 
# 判断并返回相应的值
if [ ${used_idle} -lt $warning ]
    then
    echo "${stat_ok} : ${stat_info}"
    exit "$STATE_OK"
 
elif [ ${used_idle} -ge $warning -a ${used_idle} -lt $critical ]
    then
    echo "${stat_warning} : ${stat_info}"
    exit "$STATE_WARNING"
 
elif [ ${used_idle} -ge $critical ]
    then
    echo "${stat_critical} : ${stat_info}"
    exit "$STATE_CRITICAL"
 
else
    echo "${stat_unknown} : ${stat_info}"
    exit "$STATE_UNKNOWN"
fi

编程技巧