2024年2月7日发(作者:)

完整安装nagios+cacti+短信和邮件报警

一:安装nagios中用到的其他软件和库文件

yum -y install httpd gcc glibc glibc-common gd gd-devel php php-mysql mysql mysql-server mysql-devel

openssl-devel

二:安装和配置nagios3.4.1监控主程序

2.1建立用户和组

groupadd nagcmd

useradd –m nagios

usermod -a -G nagcmd nagios

usermod -a -G nagcmd apache

2.2下载最新版nagios3.4.1

mkdir downloads

wget/sourceforge/nagios/

2.3 编译安装

cd downloads

tar –zxvf

cd nagios

默认安装至/usr/local/nagios/

./configure --with-command-group=nagcmd --enable-event-broker

make all && make install && make install-init && make install-config && make install-commandmode && make

install-webconf

WEB界面的用户名和密码

htpasswd -c /usr/local/nagios/etc/ nagiosadmin

输入密码并确认

service httpd restart

chkconfig httpd on

三:编译安装nagios-plugins-1.4.16

3.1下载最新版本nagios-plugins-1.4.16

cd downloads

wget /sourceforge/nagiosplug/

cd nagios-plugins-1.4.16

./configure --with-nagios-user=nagios --with-nagios-group=nagcmd --with-mysql

make && make install

chkconfig --add nagios

chkconfig nagios on

vim /etc/profile

加入PATH=$PATH:/usr/local/nagios/bin

检查nagios语法是否正确

nagios -v /usr/local/nagios/etc/

service nagios start

3.2 关闭selinux 及修改iptables或关闭iptables

关闭selinux的方法

vi /etc/sysconfig/selinux添加

SELINUX=disabled

iptables建议开启,然后允许需要开启的端口,一般nagios需要的端口是80和5666(nrpe)

vi /etc/sysconfig/iptables

service httpd restart 此时已经能从浏览器访问****/nagios默认用户名nagiosadmin 密码是之前输入过的。

四:监控nagios本机

创建fund123目录,用于存放所有相关配置文件

cd /usr/local/nagios/etc/

mkdir fund123

cd /usr/local/nagios/etc/objects

cp -v -R * /usr/local/nagios/etc/fund123/

文件和目录修改下权限

chown -R nagios:nagios fund123

修改主配置文件,修改之前最好先备份下

vi /usr/local/nagios/etc/

修改成

cfg_file=/usr/local/nagios/etc/fund123/

cfg_file=/usr/local/nagios/etc/fund123/

cfg_file=/usr/local/nagios/etc/fund123/

cfg_file=/usr/local/nagios/etc/fund123/

cfg_file=/usr/local/nagios/etc/fund123/

cfg_file=/usr/local/nagios/etc/fund123/

cfg_file=/usr/local/nagios/etc/fund123/

cfg_dir=/usr/local/nagios/etc/fund123/servers/

cfg_dir=/usr/local/nagios/etc/fund123/services/

注释掉#cfg_file=/usr/local/nagios/etc/objects/

cp objects/ fund123/servers/

vi

# Define a host for the local machine

define host{

use linux-server ; Name of host template to use

; This host definition will inherit all variables

that are defined

; in (or inherited by) the linux-server host

template definition.

host_name local-nagios

alias nagios+cacti

address 127.0.0.1

}

注释掉组的定义,后续会在hostgroups里定义

#define hostgroup{

# hostgroup_name linux-servers ; The name of the hostgroup

# alias Linux Servers ; Long name of the group

# members localhost ; Comma separated list of hosts that belong to this group

# }

通过PING检测主机是否存活

define service{

use local-service ; Name of service template to use

host_name local-nagios

service_description PING

check_command check_ping!100.0,20%!500.0,60%

}

# Define a service to check the disk space of the root partition

# on the local machine. Warning if < 20% free, critical if

# < 10% free space on partition.

define service{

use local-service ; Name of service template to use

host_name local-nagios

service_description Root Partition

check_command check_local_disk!20%!10%!/

}

# Define a service to check the number of currently logged in

# users on the local machine. Warning if > 20 users, critical

# if > 50 users.

define service{

use local-service ; Name of service template to use

host_name local-nagios

service_description Current Users

check_command check_local_users!20!50

}

# Define a service to check the number of currently running procs

# on the local machine. Warning if > 250 processes, critical if

# > 400 users.

define service{

use local-service ; Name of service template to use

host_name local-nagios

service_description Total Processes

check_command check_local_procs!250!400!RSZDT

}

# Define a service to check the load on the local machine.

define service{

use local-service ; Name of service template to use

host_name local-nagios

service_description Current Load

check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0

}

# Define a service to check the swap usage the local machine.

# Critical if less than 10% of swap is free, warning if less than 20% is free

define service{

use local-service ; Name of service template to use

host_name local-nagios

service_description Swap Usage

check_command check_local_swap!20!10

}

# Define a service to check SSH on the local machine.

# Disable notifications for this service by default, as not all users may have SSH enabled.

define service{

use local-service ; Name of service template to use

host_name local-nagios

service_description SSH

check_command check_ssh

notifications_enabled 0

}

# Define a service to check HTTP on the local machine.

# Disable notifications for this service by default, as not all users may have HTTP enabled.

define service{

use local-service ; Name of service template to use

host_name local-nagios

service_description HTTP

check_command check_http

notifications_enabled 0

}

wq保存退出

定义主机组文件/usr/local/nagios/etc/fund123/

vi /usr/local/nagios/etc/fund123/

# Nagios Servers Group

define hostgroup{

hostgroup_name NagiosMonitor ;定义的组名,我这里定义的是Nagios监控的所有主机成员,以后有新的主机监控进来都会在这个组里显示

alias Monitor all servers ;别名

members * ;主机组的成员主机,星号<*>代表所有主机,有多个组成员可以用逗号隔开

}

# Linux System Group

define hostgroup{

hostgroup_name Linux-System ;定义的主机组名称,我这里定义的是Linux系统组

alias linux system

members local-nagios ;这里的组成员是定义的“host_name”这个的值

}

# Windows System Group

#define hostgroup{

# hostgroup_name Windows-System ;这里我定义的是Windows系统组,现在还没有主机成员,所以先注释掉,等有成员机了再添加进来

# alias windows system

# members

# }

定义服务组文件/usr/local/nagios/etc/fund123/

vi /usr/local/nagios/etc/fund123/

# Define an servicegroup for "Ping"

define servicegroup{

servicegroup_name Network

alias network connetivity

members local-nagios,PING ; 组成员的设置,主机名,服务名的方式表示,中间用逗号分隔

}

定义联系人组文件/usr/local/nagios/etc/fund123/

vi /usr/local/nagios/etc/fund123/

#contact group

define contactgroup{

contactgroup_name sagroup ;定义联系人组的名字

alias Nagios Adminstrators ;别名

members nagiosadmin ;联系人组成员

}

保险起见再赋予下fund123 文件权限

chown -R nagios:nagios fund123

检查配置是否有误

cd /usr/local/nagios/bin/

nagios -v ../etc/

重启nagios 重启http

/etc/init.d/nagios reload

service httpd restart

service nagios restart

五.nagios监控windows客户端

1下载nscilent++最新版本,网址是

/nscp/downloads

2安装

选择第一个

这里第一项填nagios服务器的地址,第二项密码不用填,其余都勾上。

3,安装完毕后修改系统服务—登陆—允许服务与桌面交互。

4,配置nagios主控端,增加对192.168.20.12这台机器的监控。

cp /usr/local/nagios/etc/fund123/ /usr/local/nagios/etc/fund123/servers/

vi

define host{

use windows-server ; Inherit default values from a template

host_name XS-20.12 ; The name we're giving to this host

alias XS-20.12 ; A longer name associated with the host

address 192.168.20.12 ; IP address of the host

}

#define hostgroup{

# hostgroup_name windows-servers ; The name of the hostgroup

# alias Windows Servers ; Long name of the group

# }

这里注释掉hostgroup的定义,因为我们都统一在hostgroup文件去定义。其余都采用默认,注意把host name这项的内容都改成XS-20.12。

注意:这里的继承模板“windows-server”中有定义hostgroup,所以要到中注释掉,不然验证配置文件时会报错。

/usr/local/nagios/etc/fund123

[root@nagios230 fund123]# vi

# hostgroups windows-servers ;

配置修改完成后都需要验证下配置是否正确,然后重启nagios和apache

cd /usr/local/nagios/bin/

nagios -v ../etc/

/etc/init.d/nagios reload

service httpd restart

service nagios restart

至此windows监控已完毕。

六.配置sendemail 来发送nagios报警信息

下载sendemail程序

cd downloads/

wget /menu/Software/SendEmail/

tar -zxvf && cd sendEmail-v1.56

cp sendEmail /usr/local/bin

chmod 0755 /usr/local/bin/sendEmail

2测试发送邮件

用nagios1的邮箱给weihj发一封邮件主题是nagios测试,内容是nagios test 测试

sendEmail -f nagios1@ –t tanqingsong520@ -s -xu

nagios1@ -xp -u "nagios测试" -m "nagios test 测试"

默认可能无法显示中文,可以做如下修改

sendEmail -f nagios1@ -t tanqingsong520@ -s -xu

nagios1@ -xp -u "nagios测试" -m "nagios test 测试" -o message-content-type=html -o

message-charset=utf8

这样说明已经能发送邮件了,接下来就是配置到nagios里即可。

3配置中的邮件通知

cd /usr/local/nagios/etc/fund123/

vi

define command {

command_name notify-host-by-email

command_line /usr/bin/printf "%b" "***** 详细信息如下

*****nNotification Type: $NOTIFICATIONTYPE$nHost: $HOSTNAME$nState: $HOSTSTATE$nAddress:

$HOSTADDRESS$nInfo: $HOSTOUTPUT$nnDate/Time: $LONGDATETIME$n" | /usr/local/bin/sendEmail -f

nagios@ -t $CONTACTEMAIL$ -s -u "** $HOSTALIAS$ is $HOSTSTATE$ **" -xu

nagios@ -xp ####### -o message-charset=utf8

}

define command {

command_name notify-service-by-email

command_line /usr/bin/printf "%b" "***** 详细信息如下 *****nNotification Type:

$NOTIFICATIONTYPE$nService: $SERVICEDESC$nHost: $HOSTALIAS$nAddress: $HOSTADDRESS$nState:

$SERVICESTATE$nnDate/Time: $LONGDATETIME$nnAdditional Info:n$SERVICEOUTPUT$n" |

/usr/local/bin/sendEmail -f nagios1@ -t $CONTACTEMAIL$ -s -u "**

$HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" -xu nagios1@ -xp ####### -o

message-charset=utf8

}

define command {

command_name notify-host-by-email

command_line /usr/bin/printf "%b" "***** 详细信息如下 *****nNotification Type:

$NOTIFICATIONTYPE$nHost: $HOSTNAME$nState: $HOSTSTATE$nAddress: $HOSTADDRESS$nInfo:

$HOSTOUTPUT$nnDate/Time: $LONGDATETIME$n" | /usr/local/bin/sendEmail -f nagios@ -t

$CONTACTEMAIL$ -s -u "** $HOSTALIAS$ is $HOSTSTATE$ **" -xu nagios@ -xp

-o message-charset=utf8

}

监控linux客户端:

1、服务端安装nrpe 确认安装了apache,gcc,glibc,gd库

#tar zxvf

#cd nrpe-2.13.1

#./configure --enable-ssl --with-ssl-lib=/lib/ 前提是安装了openssl与openssl-devel

#rpm -qa | grep ssl 保证:能够抓到 前两个

5

5

5

#make all

#make install-plugin

2、vim 编辑文件定义nrpe对的使用

#vim /usr/local/nagios/etc/fund123/

//添加如下几行使支持check_nrep

#check_nrep

define command{

command_name check_nrpe

command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$

}

3、新建设置要监控的内容

cp/usr/local/nagios/etc/fund123/servers/

/usr/local/nagios/etc/fund123/

#vim /usr/local/nagios/etc/fund123/servers/

define host{

use linux-server

host_name redhat //需要修改工作站的主机名

alias mylinux

address 192.168.20.44 //工作站主机IP

}

define service{

use generic-service

host_name redhat

service_description check-swap //对下面操作的描述

check_command check_nrpe!check_swap //执行检测交换分区命令,监控swap

}

define service{

use generic-service

host_name redhat

service_description check-load

check_command check_nrpe!check_load //监控负载

}

define service{

use generic-service

host_name redhat

service_description check-disk

check_command check_nrpe!check_had1 //监控硬盘

}

define service{

use generic-service

host_name redhat

service_description check-users

check_command check_nrpe!check_users //监控用户(使用)情况

}

define service{

use generic-service

host_name redhat

service_description otal_procs

check_command check_nrpe!check_total_procs //监控进程

}

//对以上的文件编辑如果有错,将会导致nagios无法启动;

//修改工作站主机的主机名为redhat,保证能够#ping redhat 通信正常,可以选择

#echo "192.168.20.44 redhat " >>/etc/hosts

#echo "192.168.20.230 nagios " >>/etc/hosts

//在服务器端

#echo "192.168.20.230 nagios " >> /etc/hosts

#echo "192.168.20.44 redhat" >> /etc/hosts

检查nagios的配置文件的正确性

#/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/

//如果上面有错误,请检查系统服务时间是否同步,新建的是否有语法错误!

//在是否添加对nrep使用等

#service nagios start 重启nagios服务

3、配置工作站上的nagios-plugin/nrpe等

1、安装nagios-plugin

#useradd nagios

#passwd nagios

#tar -zxvf

#cd nagios-plugins-1.4.16

#./configure --with-nagios-user=nagios --with-nagios-group=nagios

#make

#make install

2、改变目录权限使nagios用户能够访问

#chown –R /usr/local/nagios

3、安装nrpe服务使之能够通信

#tar zxvf

#cd nrpe-2.13

#./configure --enable-ssl --with-ssl-lib=/usr/lib/前提是安装了openssl与openssl-devel

#make all

#make install-plugin

#make install-daemon

#make install-daemon-config

4、配置nrpe信息

#vim /usr/local/nagios/etc/

allowed_host=192.168.20.230,127.0.0.1 //允许192.168.20.230服务器端对其监控

#/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/ –d

//检测nrep配置文件的正确性

#netstat -an | grep 5666 //是否监听5666用于nrpe通信的端口

//在服务端执行检测工作站nrpe信息

#/usr/local/nagios/libexec/check_nrpe -H 192.168.20.44

NRPE v2.13

//在工作站执行检测自己的nrpe信息

#/usr/local/nagios/libexec/check_nrpe -H 127.0.0.1

NRPE v2.13

配置完成

5、配置工作站上的被检测上网信息

#vim/usr/local/nagios/etc/

command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10

command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20

command[check_hda1]=/usr/local/nagios/libexec/check_disk -w 20 -c 10 -p /dev/hda1

command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z

command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200

command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%