A-A+
Nagios利用nrpe插件监控远程linux主机
一、安装编译环境和ssl(被监控机)
1.1、yum install make gcc gcc-c++ xinetd openssl openssl-devel –y
1.2、添加用户名和用户组
/usr/sbin/useradd nagios
/usr/sbin/groupadd nagcmd
/usr/sbin/usermod -G nagcmd nagios
二、安装nagios插件(被监控机)
tar -zxvf nagios-plugins-1.4.11.tar.gz -C /usr/local
cd /usr/local/nagios-plugins-1.4.11/
./configure
make
make install
三、安装nrpe插件(被监控机)
tar -zxvf nrpe-2.12.tar.gz -C /usr/local/
cd /usr/local/nrpe-2.12/
./configure
make all
make install
make install-plugin
make install-daemon //安装守护进程
make install-daemon-config //安装配置文件
make install-xinetd //安装xinetd脚本
四、在监控服务器上添加需要监控的对象(监控主机)
假设需要对ip为$otherhost 定义监控组为$nagiosgroup的主机进行监控
vim /usr/local/nagios/etc/objects/"$nagiosgroup".cfg
######### HOST DEFINITION#####################
# Define a host for the local machine
define host{
use linux-server
host_name $otherhost-$nagiosgroup #被监控主机的名称
alias $otherhost-$nagiosgroup #别名
address $otherhost #被监控主机的IP地址
}
########### HOST GROUP DEFINITION#######################
# Define an optional hostgroup for Linux machines
define hostgroup{
hostgroup_name $nagiosgroup #主机组名称
alias $nagiosgroup #别名
members $otherhost-$nagiosgroup #组的成员主机,多个主机以逗号相隔,必须是上面定义的
}
# Define a service to "ping" the local machine
define service{
use local-service #use表示引用,也就是将主机local-service的所有属性引用到linux-server中来
host_name $otherhost-$nagiosgroup #被监控的主机在上面中定义的
service_description PING #这个监控项目的描述,显示在web页面中
check_command check_ping!100.0,20%!500.0,60% #所用的命令,是commands.cfg中定义的
check_period 24x7 #监控的时间段,是timeperiods.cfg中定义的
max_check_attempts 3 #重试的次数
normal_check_interval 20 #循环检查的间隔时间,单位是分钟
retry_check_interval 1 #重试检查时间间隔,单位是分钟
notification_options w,u,c,r #在监控的结果是wucr时通知联系人
notification_interval 20 #在主机出现异常后,故障一直没有解决,nagios再次对使用者发出通知的时间。单位是分钟
notification_period 24x7 #通知的时间段
contacts test_ztc #联系人组,是contact.cfg中定义的
}
################ssh################
define service{
use local-service
host_name $otherhost-$nagiosgroup
service_description SSH
check_command check_ssh
check_period 24x7
max_check_attempts 3
normal_check_interval 20
retry_check_interval 1
notification_options w,u,c,r
notification_interval 30
notification_period 24x7
contacts test_ztc
}
#*****http*******
define service{
use local-service ; Name of service template to use
host_name $otherhost-$nagiosgroup
service_description HTTP
check_command check_http
check_period 24x7
max_check_attempts 3
normal_check_interval 20
retry_check_interval 1
notification_options w,u,c,r
notification_interval 30
notification_period 24x7
contacts test_ztc
}
#********cpu load************
define service{
use local-service ; Name of service template to use
host_name $otherhost-$nagiosgroup
service_description Current Load
check_command check_nrpe!check_load
check_period 24x7
max_check_attempts 3
normal_check_interval 20
retry_check_interval 1
notification_options w,u,c,r
notification_interval 30
notification_period 24x7
contacts test_ztc
}
#********swap**********
define service{
use local-service
host_name $otherhost-$nagiosgroup
service_description swap
check_command check_nrpe!check_swap
check_period 24x7
max_check_attempts 3
normal_check_interval 20
retry_check_interval 2
notification_options w,u,c,r
notification_interval 30
notification_period 24x7
contacts test_ztc
}
#**********mem
define service{
use local-service ; Name of service template to use
host_name $otherhost-$nagiosgroup
service_description check mem
check_command check_nrpe!check_mem
check_period 24x7
max_check_attempts 3
normal_check_interval 20
retry_check_interval 2
notification_options w,u,c,r
notification_interval 30
notification_period 24x7
contacts test_ztc
}
#*************/***************
define service{
use local-service
host_name $otherhost-$nagiosgroup
service_description / Partition
check_command check_nrpe!check_root
check_period 24x7
max_check_attempts 3
normal_check_interval 20
retry_check_interval 2
notification_options w,u,c,r
notification_interval 30
notification_period 24x7
contacts test_ztc
}
###########users#####
define service{
use local-service
host_name $otherhost-$nagiosgroup
service_description check users
check_command check_nrpe!check_users
check_period 24x7
max_check_attempts 3
normal_check_interval 20
retry_check_interval 2
notification_options w,u,c,r
notification_interval 30
notification_period 24x7
contacts test_ztc
}
###########proc############
define service{
use local-service
host_name $otherhost-$nagiosgroup
service_description check process
check_command check_nrpe!check_total_procs
check_period 24x7
max_check_attempts 3
normal_check_interval 20
retry_check_interval 2
notification_options w,u,c,r
notification_interval 30
notification_period 24x7
contacts test_ztc
}
五、将”$nagiosgroup”.cfg 加入nagios主配置文件中(监控主机)
# pwd
/usr/local/nagios/etc
[root@localhost etc]# vi nagios.cfg
# Definitions for monitoring the local (Linux) host
cfg_file=/usr/local/nagios/etc/objects/localhost.cfg
cfg_file=/usr/local/nagios/etc/objects/"$nagiosgroup".cfg
六、被监控机上添加监控私有服务的command
vi nrpe.cfg
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_mem]=/usr/local/nagios/libexec/check_mem.sh -w 6 -c 2
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200
command[check_date]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /data
command[check_swap]=/usr/local/nagios/libexec/check_swap -w 70% -c 60%
command[check_root]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /
七、配置nrpe (被监控机)
7.1添加nrpe对应端口
vi /etc/services
nrpe 5666/tcp # NRPE
7.2*配置nrpe /etc/xinetd.d/nrpe
service nrpe
{
flags = REUSE
socket_type = stream
port = 5666
wait = no
user = nagios
group = nagios
server = /usr/local/nagios/bin/nrpe
server_args = -c /usr/local/nagios/etc/nrpe.cfg --inetd
log_on_failure += USERID
disable = no
only_from = 127.0.0.1 10.1.1.45
}
7.3启动nrpe
service xinetd restart
netstat -anptl | grep 5666
tcp 0 0 0.0.0.0:5666 0.0.0.0:* LISTEN 21403/xinetd
7.4测试nrpe
./check_nrpe -H localhost
NRPE v2.12
7.5监控主机端测试nrpe
./check_nrpe -H ip(被监控机ip)
NRPE v2.12
7.6监控主机检查配置
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
7.7监控主机重启服务
service nagios reload
对linux主机的监控部署完毕,如需添加新的监控对象可照此添加。