PLANNING
Plan your Nagios setup
1. What do you need to monitor?
2. What do you want to monitor?
3. Make a quick sketch to layout your monitoring-scheme (trust me it helps with the initial nagios setup) a Logical Network layout helps define the Nagios groups)
INSTALLATION
Install Ubuntu
Install Nagios following the link (below)
http://nagios.sourceforge.net/docs/3_0/quickstart-ubuntu.html
CONFIGURE
Monday, 25 July 2011
Thursday, 14 July 2011
Wednesday, 13 July 2011
Nagios - website checks
#define host
define host{
use websites
host_name www.a24-record.com
alias www.a24-record.com:5555
address www.a24-record.com:5555
}
#define host
define host{
use websites
host_name www.a24locum.co.uk
alias www.a24locum.co.uk
address www.a24locum.co.uk
}
define host{
use websites
host_name www.ambition24hours.co.uk
alias www.ambition24hours.co.uk
address www.ambition24hours.co.uk
}
#define host
define host{
use websites
host_name www.a24ahp.co.uk
alias www.a24ahp.co.uk
address www.a24ahp.co.uk
}
define host{
use websites
host_name www.ambition24hoursgroup.co.uk
alias www.ambition24hoursgroup.co.uk
address www.ambition24hours.co.uk
}
#define host{
# use websites
# host_name http://blog.a24group.co.uk
# alias http://blog.a24group.co.uk
# address http://blog.a24group.co.uk
# }
define service{
use local-service
host_name www.a24-record.com,www.a24locum.co.uk,www.nurses.co.za,www.ambition24hours.co.uk,www.a24ahp.co.uk,www.ambition24hoursgroup.co.uk,www.a24direct.co.uk,www.a24.co.za,www.a24group.co.za,www.nsofuk.com,www.locumservicesuk.com
service_description check_website
check_command check_website
}
Nagios - VMware ESXI Checks 4.1
define host{
use VMware
host_name Vcenter
alias VM Center
address 10.0.0.130
}
define host{
use VMware
host_name veam_backup
alias VM Backup
address 10.0.0.131
}
#monitor ping
define service{
use local-service ; Name of service template to use
host_name Vcenter,veam_backup
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
#Root Partition monitoring
define service{
use local-service ; Name of service template to use
host_name Vcenter,veam_backup
service_description Root Partition
check_command check_local_disk!20%!10%!/
}
#Current Users
define service{
use local-service ; Name of service template to use
host_name Vcenter,veam_backup
service_description Current Users
check_command check_local_users!20!50
}
#Monitor processes
define service{
use local-service ; Name of service template to use
host_name Vcenter,veam_backup
service_description Total Processes
check_command check_local_procs!250!400!RSZDT
}
Nagios - Printer Checks
define host{
use generic-printer ; Inherit default values from a template
host_name hplj2605dn ; The name we're giving to this printer
alias HP LaserJet 2605dn ; A longer name associated with the printer
address 192.168.1.30 ; IP address of the printer
hostgroups network-printers ; Host groups this printer is associated with
}
# A hostgroup for network printers
define hostgroup{
hostgroup_name network-printers ; The name of the hostgroup
alias Network Printers ; Long name of the group
}
define service{
use generic-service ; Inherit values from a template
host_name hplj2605dn ; The name of the host the service is associated with
service_description Printer Status ; The service description
check_command check_hpjd!-C public ; The command used to monitor the service
normal_check_interval 10 ; Check the service every 10 minutes under normal conditions
retry_check_interval 1 ; Re-check the service every minute until its final/hard state is determined
}
# Create a service for "pinging" the printer occassionally. Useful for monitoring RTA, packet loss, etc.
define service{
use generic-service
host_name hplj2605dn
service_description PING
check_command check_ping!3000.0,80%!5000.0,100%
normal_check_interval 10
retry_check_interval 1
}
Nagios - Zimbra Mail Server Checks
#Define Host
define host{
use linux-server
host_name Zmail01
alias Zmail01.a24group.com
address 10.0.0.18
}
#Define Host
define host{
use linux-server
host_name a24mailer
alias a24mailer.com
address 10.0.0.251
}
define host{
use linux-server
host_name Zmail03
alias Zmail03.arabellahealth.co.uk
address 10.0.0.91
}
define host{
use linux-server
host_name Zimsa
alias Zimsa.a24.co.za
address 10.0.0.252
}
define host{
use linux-server
host_name Zimsutton
alias Zmailsutton
address 178.78.120.35
}
#monitor ping
define service{
use local-service ; Name of service template to use
host_name Zmail01,Zmail03,Zimsa,Zimsutton,a24mailer
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
#Root Partition monitoring
define service{
use local-service ; Name of service template to use
host_name Zmail01,Zmail03,Zimsa,Zimsutton,a24mailer
service_description Root Partition
check_command check_local_disk!20%!10%!/
}
#Current Users
define service{
use local-service ; Name of service template to use
host_name Zmail01,Zmail03,Zimsa,Zimsutton,a24mailer
service_description Current Users
check_command check_local_users!20!50
}
#Monitor processes
define service{
use local-service ; Name of service template to use
host_name Zmail01,Zmail03,Zimsa,Zimsutton,a24mailer
service_description Total Processes
check_command check_local_procs!250!400!RSZDT
}
#Monitor HTTP
define service{
use local-service ; Name of service template to use
host_name Zmail01,Zmail03,Zimsa,a24mailer
service_description HTTP
check_command check_http
notifications_enabled 0
}
define service{
use local-service ; Name of service template to use
host_name Zmail01,Zmail03,Zimsa,Zimsutton,a24mailer
service_description IMAP
check_command check_imap
notifications_enabled 0
}
Nagios - Linux Checks
#define host
define host{
use linux-server
host_name Ltsp01
alias LTSP01
address 10.0.0.17
}
define service{
use local-service ; Name of service template to use
host_name Ltsp01
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
define service{
use local-service ; Name of service template to use
host_name Ltsp01
service_description Root Partition
check_command check_local_disk!20%!10%!/
}
define service{
use local-service ; Name of service template to use
host_name Ltsp01
service_description Current Users
check_command check_local_users!20!50
}
define service{
use local-service ; Name of service template to use
host_name Ltsp01
service_description Total Processes
check_command check_local_procs!250!400!RSZDT
}
define service{
use local-service ; Name of service template to use
host_name Ltsp01
service_description Current Load
check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
}
define service{
use local-service ; Name of service template to use
host_name Ltsp01
service_description Swap Usage
check_command check_local_swap!20!10
}
#define service{
# use local-service ; Name of service template to use
# host_name Ltsp01
# service_description HTTP
# check_command check_http
# notifications_enabled 0
# }
define service{
use generic-service
host_name Ltsp01
service_description Uptime
check_command check_nt!UPTIME
}
# Create a service for monitoring CPU load
# Change the host_name to match the name of the host you defined above
define service{
use generic-service
host_name Ltsp01
service_description CPU Load
check_command check_nt!CPULOAD!-l 5,80,90
}
define service{
use generic-service
host_name Ltsp01
service_description Memory Usage
check_command check_nt!MEMUSE!-w 80 -c 90
}
Nagios - Windows Checks (my examples)
define host{
use windows-server ; Inherit default values from a template
host_name Bell-AD-Sec ; The name we're giving to this host
alias Bell-AD-Sec ; A longer name associated with the host
address 10.0.0.65 ; IP address of the host
}
define service{
use generic-service
host_name Recruit,Ambsrv4,Neo,Dbit1,TMS,Bcms,drserver1,mailsa,neo,voip,vpn-01,
service_description Uptime
check_command check_nt!UPTIME
}
define service{
use generic-service
host_name Recruit,Ambsrv4,Neo,Dbit1,TMS,Bcms,drserver1,mailsa,neo,voip,vpn-01c
service_description CPU Load
check_command check_nt!CPULOAD!-l 5,80,90
}
define service{
use generic-service
host_name Recruit,Ambsrv4,Neo,Dbit1,TMS,Bcms,drserver1,mailsa,neo,voip
service_description Memory Usage
check_command check_nt!MEMUSE!-w 80 -c 90
define service{
use generic-service
host_name Recruit,Ambsrv4,Neo,Dbit1,TMS,Bcms,drserver1,mailsa,neo,
service_description Disk Space
check_command check_all_disks!20%!10%
}
#define service{
# use generic-service
# host_name Server1
# service_description Backup Exec Agent Browser
# check_command check_nt!SERVICESTATE!-d SHOWALL -l BackupExecAgentBrowser
# }
#define service{
# use generic-service
# host_name Server1
# service_description Backup Exec Device Media Service
# check_command check_nt!SERVICESTATE!-d SHOWALL -l BackupExecDeviceMediaService
# }
#define service{
# use generic-service
# host_name Server1
# service_description Backup Exec Job Engine
# check_command check_nt!SERVICESTATE!-d SHOWALL -l BackupExecJobEngine
# }
#define service{
# use generic-service
# host_name Server1
# service_description Backup Exec Server
# check_command check_nt!SERVICESTATE!-d SHOWALL -l BackupExecRPCService
# }
#Monitor DNS Server Services
define service{
use generic-service
host_name Recruit,Bell-AD-Primary,Bell-AD-Sec
service_description DNS Server
check_command check_nt!SERVICESTATE!-d SHOWALL -l DNS
}
define service{
use generic-service
host_name Recruit,Ambsrv4,Dbit1,Bcms,drserver1,mailsa,neo,voip,vpn-01
service_description ESET NOD Anti Virus Service
check_command check_nt!SERVICESTATE!-d SHOWALL -l ekrn
}
define service{
use generic-service
host_name Bell-AD-Primary,Bell-AD-Sec
service_description ESET Anti Virus Service
check_command check_nt!PROCSTATE!-d SHOWALL -l ekrn
}
define service{
use generic-service
host_name Ambsrv4
service_description BKUPEXEC MSSQL
check_command check_nt!SERVICESTATE!-d SHOWALL -l MSSQL"$$"BKUPEXEC
}
define service{
use generic-service
host_name Ambsrv4
service_description PRTG Graph
check_command check_nt!SERVICESTATE!-d SHOWALL -l prtgwatchservice
}
define service{
use generic-service
host_name Ambsrv4
service_description Thunderbird
check_command check_nt!PROCSTATE!-d SHOWALL -l thunderbird.exe
}
define service{
use generic-service
host_name Neo
service_description Scan Router
check_command check_nt!SERVICESTATE!-d SHOWALL -l ScanRouterDriverV2
}
define service{
use generic-service
host_name Dbit1
service_description D-BIT Replication
check_command check_nt!PROCSTATE!-d SHOWALL -l DBITReplication.exe
}
define service{
use generic-service
host_name TMS,Bcms
service_description Music on Hold
check_command check_nt!PROCSTATE!-d SHOWALL -l wmplayer.exe
}
define service{
use generic-service
host_name TMS
service_description Stella - Nova Telephone Management
check_command check_nt!PROCSTATE!-d SHOWALL -l Snova.exe
}
define service{
use generic-service
host_name Bell-AD-Primary,Bell-AD-Sec
service_description AD DS Domain Controller Services
check_command check_nt!SERVICESTATE! -d SHOWALL -l NTDS
}
define service{
use generic-service
host_name Bell-AD-Primary
service_description DHCP Server
check_command check_nt!SERVICESTATE! -d SHOWALL -l DHCPServer
}
define service{
use generic-service
host_name Bell-AD-Primary,Bell-AD-Sec
service_description Server
check_command check_nt!SERVICESTATE! -d SHOWALL -l LanmanServer
}
define service{
use generic-service
host_name Bcms
service_description Avaya BCMS Vu Server
check_command check_nt!PROCSTATE!-d SHOWALL -l BCMSVuServer.exe
}
Nagios - check_all_disks
Make sure you have check_disk in /usr/local/nagios/libexec/
Command.cfg
define command{
command_name check_all_disks
command_line $USER1$/check_disk -w 20 -c 10
}
Command.cfg
define command{
command_name check_all_disks
command_line $USER1$/check_disk -w 20 -c 10
}
Example:
define service{
use generic-service
host_name PCname
service_description Disk Space
check_command check_all_disks!20%!10%
}
Tuesday, 12 July 2011
Nagios - monitoring ESXI 4.1 (VMWARE)
http://exchange.nagios.org/directory/Plugins/Operating-Systems/*-Virtual-Environments/VMWare/Vmware-ESX-%26-VM-host/details
Monday, 11 July 2011
Nagios start & verify
Start Nagios
/etc/rc.d/init.d/nagios startManually: You can start the Nagios daemon manually with the -d command line option like so:
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
Wednesday, 6 July 2011
Nagios installation
Install Ubuntu with LAMP & postfix & Build essentials
Now we will restart Apache to make sure all of the changes take effect:
Once you have saved your changes to the contacts.cfg we need to verify that there are no errors in the configuration of Nagios.
Now we are going to start nagios:
Now we are going to set the map that will change your outbound messages from nagios to your email server username/email address.
Once your system has come back up you will be able to login to the website and look at your first Nagios installation.
Nagios User Setup
useradd -m -s /bin/bash nagios
passwd nagios
usermod -G nagios nagios
groupadd nagcmd
usermod -a -G nagcmd nagios
Download And Unzip Nagios And Nagios Plugins
cd /downloads
wget http://prdownloads.sourceforge.net/nagios/nagios-3.2.3.tar.gz
wget http://prdownloads.sourceforge.net/nagiosplug/nagios-plugins-1.4.15.tar.gz
tar -zxf /downloads/nagios-3.2.3.tar.gz
tar -zxf /downloads/nagios-plugins-1.4.15.tar.gz
Install Nagios
cd /downloads/nagios-3.2.3
./configure --with-command-group=nagcmd
make all
make install
make install-init
make install-config
make install-commandmode
make install-webconf
Nagios Password
This is the password you will need to look at the nagios pages. If you install Nagios to a different directory please change this command to where the Nagios etc. folder will be.
htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin
Enter your password when prompted.
Now we will restart Apache to make sure all of the changes take effect:
/etc/init.d/apache2 restart
Nagios Plugins
cd /downloads/nagios-plugins-1.4.15/
make
make install
Now we need to make Nagios start at bootup:
ln -s /etc/init.d/nagios /etc/rcS.d/S99nagios
Change Default Email Address For Nagios Admin
Open your favorite editor and open /usr/local/nagios/etc/objects/contacts.cfgOnce you have saved your changes to the contacts.cfg we need to verify that there are no errors in the configuration of Nagios.
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
/etc/init.d/nagios start
Postfix Configuration For A Smarthost Relay
postconf -e 'relayhost=yourmailserver.com'
postconf -e 'smtp_sasl_auth_enabled = yes'
postconf -e 'smtp_sasl_password_maps = hash:/etc/postfix/sasl_passwd'
postconf -e 'smtp_sasl_security_options ='
echo "yourmailserver.com emailusername:emailpassword" >
/etc/postfix/sasl_passwd
Now we will need to change the password file attributes so only root has access to read it.
chown root:root /etc/postfix/sasl_passwd
chmod 600 /etc/postfix/sasl_passwd
postmap /etc/postfix/sasl_passwd
echo "nagios mailusername@yourmailserver.com" /etc/postfix/canonical
echo "canonical_maps = hash:/etc/postfix/canonical" >>
/etc/postfix/main.cf
postmap /etc/postfix/canonical
/etc/init.d/postfix restart
Now that
this all has been completed you can restart your system.Once your system has come back up you will be able to login to the website and look at your first Nagios installation.
http://yourserver/nagios/
source: http://www.howtoforge.com/nagios-installation-on-ubuntu-10.04-lucid-lynx-p3
Nagios check_website plugin setup
Define check_website command in commands.cfg
add commands to /usr/local/nagios/etc/objects/commands.cfg
# 'check_website' command definition
define command{
command_name check_website
command_line $USER1$/check_website -H $HOSTADDRESS$
}
Download Plugin and copy to libexec
Download check_website plugin
Copy into /usr/local/nagios/libexec/change file permissions to chmod 775 check_website
Test Plugin
/usr/local/nagios/libexec/ ./check_website -H (hostname) -F (file)
add commands to /usr/local/nagios/etc/objects/commands.cfg
# 'check_website' command definition
define command{
command_name check_website
command_line $USER1$/check_website -H $HOSTADDRESS$
}
Download Plugin and copy to libexec
Download check_website plugin
Copy into /usr/local/nagios/libexec/change file permissions to chmod 775 check_website
Test Plugin
/usr/local/nagios/libexec/ ./check_website -H (hostname) -F (file)
Friday, 1 July 2011
Nagios - Check Backup Exec
****************************************************************
* check_be - Nagios plugin for Symantec BackupExec for Windows *
* by Toussaint OTTAVI (t.ottavi@medi.fr) *
****************************************************************
This is a windows executable, to be run on Windows servers where BackupExec
is installed. It will process all the job history files, find the most recent
occurrence of a specified job name, then it will return the current status of
this job. It can also return a 'warning' or 'critical' status if the last job
found is older than the specified amount of days.
1/ HOW TO USE THIS SOFTWARE
---------------------------
Run check_be.exe in a Windows console, with the following syntax :
check_be "path of XML files" "Name of the backup job"
- "Path of XML files" is the location where BackupExec puts its log files in
XML format. Default locations for these files are :
v10: c:\program files\veritas\backup exec\nt\data
v12: c:\Program Files\Symantec\Backup Exec\Data
- "Name of the backup job log" is the name of the job you want to check. It's
case independant.
Possible switches are :
-h : Shows brief syntax help
-d : Writes detailed debug information. This can help determining what's wrong
in case of any problem.
-c<n> : Return 'critical' state if the last occurrence of the job is older
than <n> days. This can help to determine if a scheduled job is
disabled, locked, paused, or any other reason.
-w<n> : Same, but for 'warning' state
Example of use :
check_be "c:\Program Files\Symantec\Backup Exec\Data" "My tape backup" -w1 -c3
Return states are :
'ok' for BackupExec status 2 and 19
'critical' for BackupExec status 0,1,6,7 and 21
'warning' for any other BackupExec status
'unknown' if it can not determine the BackupExec job status, for any reason.
2/ HOW TO USE IT WITH NAGIOS
----------------------------
2.1/ ON THE MONITORED SERVER
You should have nsclient++ installed. In your 'nsc.ini' file, you must declare
an external script like this:
[NRPE Handlers]
;# COMMAND DEFINITIONS
check_be=check_be.exe "c:\program files\veritas\backup exec\nt\data\" "JobName" -w1 -c3
2.2. ON THE NAGIOS SERVER
Define a service template :
define service{
name template-backupexec
use generic-service
service_description BackupExec Job Check ; default display name in Nagios
check_command check_nrpe!-c check_be ; same name as in the nsclient++ nsc.ini command definition
normal_check_interval 60 ; your check intervals here
retry_check_interval 60
register 0 ; this is a template
}
Then, in your object definition, add the following :
define service {
use template-backupexec
service_description BackupExec - Daily DAT backup ; specific display name, if you need
host_name MYHOST
}
* check_be - Nagios plugin for Symantec BackupExec for Windows *
* by Toussaint OTTAVI (t.ottavi@medi.fr) *
****************************************************************
This is a windows executable, to be run on Windows servers where BackupExec
is installed. It will process all the job history files, find the most recent
occurrence of a specified job name, then it will return the current status of
this job. It can also return a 'warning' or 'critical' status if the last job
found is older than the specified amount of days.
1/ HOW TO USE THIS SOFTWARE
---------------------------
Run check_be.exe in a Windows console, with the following syntax :
check_be "path of XML files" "Name of the backup job"
- "Path of XML files" is the location where BackupExec puts its log files in
XML format. Default locations for these files are :
v10: c:\program files\veritas\backup exec\nt\data
v12: c:\Program Files\Symantec\Backup Exec\Data
- "Name of the backup job log" is the name of the job you want to check. It's
case independant.
Possible switches are :
-h : Shows brief syntax help
-d : Writes detailed debug information. This can help determining what's wrong
in case of any problem.
-c<n> : Return 'critical' state if the last occurrence of the job is older
than <n> days. This can help to determine if a scheduled job is
disabled, locked, paused, or any other reason.
-w<n> : Same, but for 'warning' state
Example of use :
check_be "c:\Program Files\Symantec\Backup Exec\Data" "My tape backup" -w1 -c3
Return states are :
'ok' for BackupExec status 2 and 19
'critical' for BackupExec status 0,1,6,7 and 21
'warning' for any other BackupExec status
'unknown' if it can not determine the BackupExec job status, for any reason.
2/ HOW TO USE IT WITH NAGIOS
----------------------------
2.1/ ON THE MONITORED SERVER
You should have nsclient++ installed. In your 'nsc.ini' file, you must declare
an external script like this:
[NRPE Handlers]
;# COMMAND DEFINITIONS
check_be=check_be.exe "c:\program files\veritas\backup exec\nt\data\" "JobName" -w1 -c3
2.2. ON THE NAGIOS SERVER
Define a service template :
define service{
name template-backupexec
use generic-service
service_description BackupExec Job Check ; default display name in Nagios
check_command check_nrpe!-c check_be ; same name as in the nsclient++ nsc.ini command definition
normal_check_interval 60 ; your check intervals here
retry_check_interval 60
register 0 ; this is a template
}
Then, in your object definition, add the following :
define service {
use template-backupexec
service_description BackupExec - Daily DAT backup ; specific display name, if you need
host_name MYHOST
}
Nagios - Zimbra mail queue checks
Monitoring Zimbra Mail queue's with Nagios
edit
vi /usr/local/nagios/libexec/utils.pm
remove
$PATH_TO_MAILQ = "/usr/bin/mailq";
Add
$PATH_TO_MAILQ ="/opt/zimbra/postfix/sbin/mailq";
Test
/usr/local/nagios/libexec# /usr/local/nagios/libexec/check_mailq 10.0.0.251 -w 100 -c 150
Error
root@Nagi:/usr/local/nagios/libexec# /usr/local/nagios/libexec/check_mailq 10.0.0.251 -w 100 -c 150
ERROR: /opt/zimbra/postfix/sbin/mailq is not executable by (uid 0:gid(0 0))
Fix Error
edit
vi /etc/sudoers
nagios ALL=(zimbra) NOPASSWD: /usr/local/nagios/libexec/check_clamav.pl
nagios ALL=(zimbra) NOPASSWD: /usr/local/nagios/libexec/check_mailq
edit
vi /usr/local/nagios/libexec/utils.pm
remove
$PATH_TO_MAILQ = "/usr/bin/mailq";
Add
$PATH_TO_MAILQ ="/opt/zimbra/postfix/sbin/mailq";
Test
/usr/local/nagios/libexec# /usr/local/nagios/libexec/check_mailq 10.0.0.251 -w 100 -c 150
Error
root@Nagi:/usr/local/nagios/libexec# /usr/local/nagios/libexec/check_mailq 10.0.0.251 -w 100 -c 150
ERROR: /opt/zimbra/postfix/sbin/mailq is not executable by (uid 0:gid(0 0))
Fix Error
edit
vi /etc/sudoers
nagios ALL=(zimbra) NOPASSWD: /usr/local/nagios/libexec/check_clamav.pl
nagios ALL=(zimbra) NOPASSWD: /usr/local/nagios/libexec/check_mailq
Nagios, change web login password
SSH into Nagios
sudo -s
htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin (enter)
type new password
verify password
sudo -s
htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin (enter)
type new password
verify password
Subscribe to:
Posts (Atom)