2024年2月7日发(作者:)
完整安装nagios+cacti+短信和邮件报警
一:安装nagios中用到的其他软件和库文件
yum -y install httpd gcc glibc glibc-common gd gd-devel php php-mysql mysql mysql-server mysql-devel
openssl-devel
二:安装和配置nagios3.4.1监控主程序
2.1建立用户和组
groupadd nagcmd
useradd –m nagios
usermod -a -G nagcmd nagios
usermod -a -G nagcmd apache
2.2下载最新版nagios3.4.1
mkdir downloads
wget/sourceforge/nagios/
2.3 编译安装
cd downloads
tar –zxvf
cd nagios
默认安装至/usr/local/nagios/
./configure --with-command-group=nagcmd --enable-event-broker
make all && make install && make install-init && make install-config && make install-commandmode && make
install-webconf
WEB界面的用户名和密码
htpasswd -c /usr/local/nagios/etc/ nagiosadmin
输入密码并确认
service httpd restart
chkconfig httpd on
三:编译安装nagios-plugins-1.4.16
3.1下载最新版本nagios-plugins-1.4.16
cd downloads
wget /sourceforge/nagiosplug/
cd nagios-plugins-1.4.16
./configure --with-nagios-user=nagios --with-nagios-group=nagcmd --with-mysql
make && make install
chkconfig --add nagios
chkconfig nagios on
vim /etc/profile
加入PATH=$PATH:/usr/local/nagios/bin
检查nagios语法是否正确
nagios -v /usr/local/nagios/etc/
service nagios start
3.2 关闭selinux 及修改iptables或关闭iptables
关闭selinux的方法
vi /etc/sysconfig/selinux添加
SELINUX=disabled
iptables建议开启,然后允许需要开启的端口,一般nagios需要的端口是80和5666(nrpe)
vi /etc/sysconfig/iptables
service httpd restart 此时已经能从浏览器访问****/nagios默认用户名nagiosadmin 密码是之前输入过的。
四:监控nagios本机
创建fund123目录,用于存放所有相关配置文件
cd /usr/local/nagios/etc/
mkdir fund123
cd /usr/local/nagios/etc/objects
cp -v -R * /usr/local/nagios/etc/fund123/
文件和目录修改下权限
chown -R nagios:nagios fund123
修改主配置文件,修改之前最好先备份下
vi /usr/local/nagios/etc/
修改成
cfg_file=/usr/local/nagios/etc/fund123/
cfg_file=/usr/local/nagios/etc/fund123/
cfg_file=/usr/local/nagios/etc/fund123/
cfg_file=/usr/local/nagios/etc/fund123/
cfg_file=/usr/local/nagios/etc/fund123/
cfg_file=/usr/local/nagios/etc/fund123/
cfg_file=/usr/local/nagios/etc/fund123/
cfg_dir=/usr/local/nagios/etc/fund123/servers/
cfg_dir=/usr/local/nagios/etc/fund123/services/
注释掉#cfg_file=/usr/local/nagios/etc/objects/
cp objects/ fund123/servers/
vi
# Define a host for the local machine
define host{
use linux-server ; Name of host template to use
; This host definition will inherit all variables
that are defined
; in (or inherited by) the linux-server host
template definition.
host_name local-nagios
alias nagios+cacti
address 127.0.0.1
}
注释掉组的定义,后续会在hostgroups里定义
#define hostgroup{
# hostgroup_name linux-servers ; The name of the hostgroup
# alias Linux Servers ; Long name of the group
# members localhost ; Comma separated list of hosts that belong to this group
# }
通过PING检测主机是否存活
define service{
use local-service ; Name of service template to use
host_name local-nagios
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
# Define a service to check the disk space of the root partition
# on the local machine. Warning if < 20% free, critical if
# < 10% free space on partition.
define service{
use local-service ; Name of service template to use
host_name local-nagios
service_description Root Partition
check_command check_local_disk!20%!10%!/
}
# Define a service to check the number of currently logged in
# users on the local machine. Warning if > 20 users, critical
# if > 50 users.
define service{
use local-service ; Name of service template to use
host_name local-nagios
service_description Current Users
check_command check_local_users!20!50
}
# Define a service to check the number of currently running procs
# on the local machine. Warning if > 250 processes, critical if
# > 400 users.
define service{
use local-service ; Name of service template to use
host_name local-nagios
service_description Total Processes
check_command check_local_procs!250!400!RSZDT
}
# Define a service to check the load on the local machine.
define service{
use local-service ; Name of service template to use
host_name local-nagios
service_description Current Load
check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
}
# Define a service to check the swap usage the local machine.
# Critical if less than 10% of swap is free, warning if less than 20% is free
define service{
use local-service ; Name of service template to use
host_name local-nagios
service_description Swap Usage
check_command check_local_swap!20!10
}
# Define a service to check SSH on the local machine.
# Disable notifications for this service by default, as not all users may have SSH enabled.
define service{
use local-service ; Name of service template to use
host_name local-nagios
service_description SSH
check_command check_ssh
notifications_enabled 0
}
# Define a service to check HTTP on the local machine.
# Disable notifications for this service by default, as not all users may have HTTP enabled.
define service{
use local-service ; Name of service template to use
host_name local-nagios
service_description HTTP
check_command check_http
notifications_enabled 0
}
wq保存退出
定义主机组文件/usr/local/nagios/etc/fund123/
vi /usr/local/nagios/etc/fund123/
# Nagios Servers Group
define hostgroup{
hostgroup_name NagiosMonitor ;定义的组名,我这里定义的是Nagios监控的所有主机成员,以后有新的主机监控进来都会在这个组里显示
alias Monitor all servers ;别名
members * ;主机组的成员主机,星号<*>代表所有主机,有多个组成员可以用逗号隔开
}
# Linux System Group
define hostgroup{
hostgroup_name Linux-System ;定义的主机组名称,我这里定义的是Linux系统组
alias linux system
members local-nagios ;这里的组成员是定义的“host_name”这个的值
}
# Windows System Group
#define hostgroup{
# hostgroup_name Windows-System ;这里我定义的是Windows系统组,现在还没有主机成员,所以先注释掉,等有成员机了再添加进来
# alias windows system
# members
# }
定义服务组文件/usr/local/nagios/etc/fund123/
vi /usr/local/nagios/etc/fund123/
# Define an servicegroup for "Ping"
define servicegroup{
servicegroup_name Network
alias network connetivity
members local-nagios,PING ; 组成员的设置,主机名,服务名的方式表示,中间用逗号分隔
}
定义联系人组文件/usr/local/nagios/etc/fund123/
vi /usr/local/nagios/etc/fund123/
#contact group
define contactgroup{
contactgroup_name sagroup ;定义联系人组的名字
alias Nagios Adminstrators ;别名
members nagiosadmin ;联系人组成员
}
保险起见再赋予下fund123 文件权限
chown -R nagios:nagios fund123
检查配置是否有误
cd /usr/local/nagios/bin/
nagios -v ../etc/
重启nagios 重启http
/etc/init.d/nagios reload
service httpd restart
service nagios restart
五.nagios监控windows客户端
1下载nscilent++最新版本,网址是
/nscp/downloads
2安装
选择第一个
这里第一项填nagios服务器的地址,第二项密码不用填,其余都勾上。
3,安装完毕后修改系统服务—登陆—允许服务与桌面交互。
4,配置nagios主控端,增加对192.168.20.12这台机器的监控。
cp /usr/local/nagios/etc/fund123/ /usr/local/nagios/etc/fund123/servers/
vi
define host{
use windows-server ; Inherit default values from a template
host_name XS-20.12 ; The name we're giving to this host
alias XS-20.12 ; A longer name associated with the host
address 192.168.20.12 ; IP address of the host
}
#define hostgroup{
# hostgroup_name windows-servers ; The name of the hostgroup
# alias Windows Servers ; Long name of the group
# }
这里注释掉hostgroup的定义,因为我们都统一在hostgroup文件去定义。其余都采用默认,注意把host name这项的内容都改成XS-20.12。
注意:这里的继承模板“windows-server”中有定义hostgroup,所以要到中注释掉,不然验证配置文件时会报错。
/usr/local/nagios/etc/fund123
[root@nagios230 fund123]# vi
# hostgroups windows-servers ;
配置修改完成后都需要验证下配置是否正确,然后重启nagios和apache
cd /usr/local/nagios/bin/
nagios -v ../etc/
/etc/init.d/nagios reload
service httpd restart
service nagios restart
至此windows监控已完毕。
六.配置sendemail 来发送nagios报警信息
下载sendemail程序
cd downloads/
wget /menu/Software/SendEmail/
tar -zxvf && cd sendEmail-v1.56
cp sendEmail /usr/local/bin
chmod 0755 /usr/local/bin/sendEmail
2测试发送邮件
用nagios1的邮箱给weihj发一封邮件主题是nagios测试,内容是nagios test 测试
sendEmail -f nagios1@ –t tanqingsong520@ -s -xu
nagios1@ -xp -u "nagios测试" -m "nagios test 测试"
默认可能无法显示中文,可以做如下修改
sendEmail -f nagios1@ -t tanqingsong520@ -s -xu
nagios1@ -xp -u "nagios测试" -m "nagios test 测试" -o message-content-type=html -o
message-charset=utf8
这样说明已经能发送邮件了,接下来就是配置到nagios里即可。
3配置中的邮件通知
cd /usr/local/nagios/etc/fund123/
vi
define command {
command_name notify-host-by-email
command_line /usr/bin/printf "%b" "***** 详细信息如下
*****nNotification Type: $NOTIFICATIONTYPE$nHost: $HOSTNAME$nState: $HOSTSTATE$nAddress:
$HOSTADDRESS$nInfo: $HOSTOUTPUT$nnDate/Time: $LONGDATETIME$n" | /usr/local/bin/sendEmail -f
nagios@ -t $CONTACTEMAIL$ -s -u "** $HOSTALIAS$ is $HOSTSTATE$ **" -xu
nagios@ -xp ####### -o message-charset=utf8
}
define command {
command_name notify-service-by-email
command_line /usr/bin/printf "%b" "***** 详细信息如下 *****nNotification Type:
$NOTIFICATIONTYPE$nService: $SERVICEDESC$nHost: $HOSTALIAS$nAddress: $HOSTADDRESS$nState:
$SERVICESTATE$nnDate/Time: $LONGDATETIME$nnAdditional Info:n$SERVICEOUTPUT$n" |
/usr/local/bin/sendEmail -f nagios1@ -t $CONTACTEMAIL$ -s -u "**
$HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" -xu nagios1@ -xp ####### -o
message-charset=utf8
}
define command {
command_name notify-host-by-email
command_line /usr/bin/printf "%b" "***** 详细信息如下 *****nNotification Type:
$NOTIFICATIONTYPE$nHost: $HOSTNAME$nState: $HOSTSTATE$nAddress: $HOSTADDRESS$nInfo:
$HOSTOUTPUT$nnDate/Time: $LONGDATETIME$n" | /usr/local/bin/sendEmail -f nagios@ -t
$CONTACTEMAIL$ -s -u "** $HOSTALIAS$ is $HOSTSTATE$ **" -xu nagios@ -xp
-o message-charset=utf8
}
监控linux客户端:
1、服务端安装nrpe 确认安装了apache,gcc,glibc,gd库
#tar zxvf
#cd nrpe-2.13.1
#./configure --enable-ssl --with-ssl-lib=/lib/ 前提是安装了openssl与openssl-devel
#rpm -qa | grep ssl 保证:能够抓到 前两个
5
5
5
#make all
#make install-plugin
2、vim 编辑文件定义nrpe对的使用
#vim /usr/local/nagios/etc/fund123/
//添加如下几行使支持check_nrep
#check_nrep
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
3、新建设置要监控的内容
cp/usr/local/nagios/etc/fund123/servers/
/usr/local/nagios/etc/fund123/
#vim /usr/local/nagios/etc/fund123/servers/
define host{
use linux-server
host_name redhat //需要修改工作站的主机名
alias mylinux
address 192.168.20.44 //工作站主机IP
}
define service{
use generic-service
host_name redhat
service_description check-swap //对下面操作的描述
check_command check_nrpe!check_swap //执行检测交换分区命令,监控swap
}
define service{
use generic-service
host_name redhat
service_description check-load
check_command check_nrpe!check_load //监控负载
}
define service{
use generic-service
host_name redhat
service_description check-disk
check_command check_nrpe!check_had1 //监控硬盘
}
define service{
use generic-service
host_name redhat
service_description check-users
check_command check_nrpe!check_users //监控用户(使用)情况
}
define service{
use generic-service
host_name redhat
service_description otal_procs
check_command check_nrpe!check_total_procs //监控进程
}
//对以上的文件编辑如果有错,将会导致nagios无法启动;
//修改工作站主机的主机名为redhat,保证能够#ping redhat 通信正常,可以选择
#echo "192.168.20.44 redhat " >>/etc/hosts
#echo "192.168.20.230 nagios " >>/etc/hosts
//在服务器端
#echo "192.168.20.230 nagios " >> /etc/hosts
#echo "192.168.20.44 redhat" >> /etc/hosts
检查nagios的配置文件的正确性
#/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/
//如果上面有错误,请检查系统服务时间是否同步,新建的是否有语法错误!
//在是否添加对nrep使用等
#service nagios start 重启nagios服务
3、配置工作站上的nagios-plugin/nrpe等
1、安装nagios-plugin
#useradd nagios
#passwd nagios
#tar -zxvf
#cd nagios-plugins-1.4.16
#./configure --with-nagios-user=nagios --with-nagios-group=nagios
#make
#make install
2、改变目录权限使nagios用户能够访问
#chown –R /usr/local/nagios
3、安装nrpe服务使之能够通信
#tar zxvf
#cd nrpe-2.13
#./configure --enable-ssl --with-ssl-lib=/usr/lib/前提是安装了openssl与openssl-devel
#make all
#make install-plugin
#make install-daemon
#make install-daemon-config
4、配置nrpe信息
#vim /usr/local/nagios/etc/
allowed_host=192.168.20.230,127.0.0.1 //允许192.168.20.230服务器端对其监控
#/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/ –d
//检测nrep配置文件的正确性
#netstat -an | grep 5666 //是否监听5666用于nrpe通信的端口
//在服务端执行检测工作站nrpe信息
#/usr/local/nagios/libexec/check_nrpe -H 192.168.20.44
NRPE v2.13
//在工作站执行检测自己的nrpe信息
#/usr/local/nagios/libexec/check_nrpe -H 127.0.0.1
NRPE v2.13
配置完成
5、配置工作站上的被检测上网信息
#vim/usr/local/nagios/etc/
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_hda1]=/usr/local/nagios/libexec/check_disk -w 20 -c 10 -p /dev/hda1
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200
command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%


发布评论