Monday, 25 July 2011

New Nagios Installation - Steps

PLANNING

Plan your Nagios setup
1. What do you need to monitor?
2. What do you want to monitor?
3. Make a quick sketch to layout your monitoring-scheme (trust me it helps with the initial nagios setup) a Logical Network layout helps define the Nagios groups)


INSTALLATION

Install Ubuntu
Install Nagios following the link  (below)
 http://nagios.sourceforge.net/docs/3_0/quickstart-ubuntu.html

CONFIGURE




Wednesday, 13 July 2011

My second big Nagios site setup (July 2011)





Nagios - website checks


#define host
define host{
        use                     websites
        host_name               www.a24-record.com
        alias                   www.a24-record.com:5555
        address                 www.a24-record.com:5555
        }

#define host
define host{
        use                     websites
        host_name               www.a24locum.co.uk
        alias                   www.a24locum.co.uk
        address                 www.a24locum.co.uk
        }

define host{
        use                     websites
        host_name               www.ambition24hours.co.uk
        alias                   www.ambition24hours.co.uk
        address                 www.ambition24hours.co.uk
        }

#define host
define host{
        use                     websites
        host_name               www.a24ahp.co.uk
        alias                   www.a24ahp.co.uk
        address                 www.a24ahp.co.uk
        }

define  host{
        use                     websites
        host_name               www.ambition24hoursgroup.co.uk
        alias                   www.ambition24hoursgroup.co.uk
        address                 www.ambition24hours.co.uk
        }


#define host{
 #       use                     websites
  #      host_name               http://blog.a24group.co.uk
   #     alias                   http://blog.a24group.co.uk
    #    address                 http://blog.a24group.co.uk
     #   }


define service{
        use                     local-service
        host_name               www.a24-record.com,www.a24locum.co.uk,www.nurses.co.za,www.ambition24hours.co.uk,www.a24ahp.co.uk,www.ambition24hoursgroup.co.uk,www.a24direct.co.uk,www.a24.co.za,www.a24group.co.za,www.nsofuk.com,www.locumservicesuk.com
        service_description     check_website
        check_command           check_website
        }



Nagios - VMware ESXI Checks 4.1


define host{
        use                     VMware
        host_name               Vcenter
        alias                   VM Center
        address                 10.0.0.130
        }

define host{
        use                     VMware
        host_name               veam_backup
        alias                   VM Backup
        address                 10.0.0.131
        }


#monitor ping
define service{
        use                             local-service         ; Name of service template to use
        host_name                       Vcenter,veam_backup
        service_description             PING
        check_command                   check_ping!100.0,20%!500.0,60%
        }

#Root Partition monitoring
define service{
        use                             local-service         ; Name of service template to use
        host_name                       Vcenter,veam_backup
        service_description             Root Partition
        check_command                   check_local_disk!20%!10%!/
        }

#Current Users
define service{
        use                             local-service         ; Name of service template to use
        host_name                       Vcenter,veam_backup
        service_description             Current Users
        check_command                   check_local_users!20!50
        }



#Monitor processes
define service{
        use                             local-service         ; Name of service template to use
        host_name                       Vcenter,veam_backup
        service_description             Total Processes
        check_command                   check_local_procs!250!400!RSZDT
        }


Nagios - Printer Checks


define host{
        use             generic-printer         ; Inherit default values from a template
        host_name       hplj2605dn              ; The name we're giving to this printer
        alias           HP LaserJet 2605dn      ; A longer name associated with the printer
        address         192.168.1.30            ; IP address of the printer
        hostgroups      network-printers        ; Host groups this printer is associated with
        }

# A hostgroup for network printers

define hostgroup{
        hostgroup_name  network-printers        ; The name of the hostgroup
        alias           Network Printers        ; Long name of the group
        }


define service{
        use                     generic-service         ; Inherit values from a template
        host_name               hplj2605dn              ; The name of the host the service is associated with
        service_description     Printer Status          ; The service description
        check_command           check_hpjd!-C public    ; The command used to monitor the service
        normal_check_interval   10      ; Check the service every 10 minutes under normal conditions
        retry_check_interval    1       ; Re-check the service every minute until its final/hard state is determined
        }


# Create a service for "pinging" the printer occassionally.  Useful for monitoring RTA, packet loss, etc.

define service{
        use                     generic-service
        host_name               hplj2605dn
        service_description     PING
        check_command           check_ping!3000.0,80%!5000.0,100%
        normal_check_interval   10
        retry_check_interval    1
        }


Nagios - Zimbra Mail Server Checks


#Define Host
define host{
        use                     linux-server
        host_name               Zmail01
        alias                   Zmail01.a24group.com
        address                 10.0.0.18
        }

#Define Host
define host{
        use                     linux-server
        host_name               a24mailer
        alias                   a24mailer.com
        address                 10.0.0.251
        }

define host{
        use                     linux-server
        host_name               Zmail03
        alias                   Zmail03.arabellahealth.co.uk
        address                 10.0.0.91
        }


define host{
        use                     linux-server
        host_name               Zimsa
        alias                   Zimsa.a24.co.za
        address                 10.0.0.252
        }


define host{
        use                     linux-server
        host_name               Zimsutton
        alias                   Zmailsutton
        address                 178.78.120.35
        }

#monitor ping
define service{
        use                             local-service         ; Name of service template to use
        host_name                       Zmail01,Zmail03,Zimsa,Zimsutton,a24mailer
        service_description             PING
        check_command                   check_ping!100.0,20%!500.0,60%
        }



#Root Partition monitoring
define service{
        use                             local-service         ; Name of service template to use
        host_name                       Zmail01,Zmail03,Zimsa,Zimsutton,a24mailer
        service_description             Root Partition
        check_command                   check_local_disk!20%!10%!/
        }

#Current Users
define service{
        use                             local-service         ; Name of service template to use
        host_name                       Zmail01,Zmail03,Zimsa,Zimsutton,a24mailer
        service_description             Current Users
        check_command                   check_local_users!20!50
        }

#Monitor processes
define service{
        use                             local-service         ; Name of service template to use
        host_name                       Zmail01,Zmail03,Zimsa,Zimsutton,a24mailer
        service_description             Total Processes
        check_command                   check_local_procs!250!400!RSZDT
        }

#Monitor HTTP
define service{
        use                             local-service         ; Name of service template to use
        host_name                       Zmail01,Zmail03,Zimsa,a24mailer
        service_description             HTTP
        check_command                   check_http
        notifications_enabled           0
        }


define service{
        use                             local-service         ; Name of service template to use
        host_name                       Zmail01,Zmail03,Zimsa,Zimsutton,a24mailer
        service_description             IMAP
        check_command                   check_imap
        notifications_enabled           0
        }


Nagios - Linux Checks


#define host
define host{
        use                     linux-server
        host_name               Ltsp01
       alias                   LTSP01
        address                 10.0.0.17
      }

define service{
        use                             local-service         ; Name of service template to use
        host_name                       Ltsp01
        service_description             PING
        check_command                   check_ping!100.0,20%!500.0,60%
        }


define service{
        use                             local-service         ; Name of service template to use
        host_name                       Ltsp01
        service_description             Root Partition
        check_command                   check_local_disk!20%!10%!/
        }

define service{
        use                             local-service         ; Name of service template to use
        host_name                       Ltsp01
        service_description             Current Users
        check_command                   check_local_users!20!50
        }


define service{
        use                             local-service         ; Name of service template to use
        host_name                       Ltsp01
        service_description             Total Processes
        check_command                   check_local_procs!250!400!RSZDT
        }

define service{
        use                             local-service         ; Name of service template to use
        host_name                       Ltsp01
        service_description             Current Load
        check_command                   check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
        }

define service{
        use                             local-service         ; Name of service template to use
        host_name                       Ltsp01
        service_description             Swap Usage
        check_command                   check_local_swap!20!10
        }
#define service{
 #       use                             local-service         ; Name of service template to use
  #      host_name                       Ltsp01
   #     service_description             HTTP
    #    check_command                   check_http
     #   notifications_enabled           0
      #  }

define service{
        use                     generic-service
        host_name               Ltsp01
        service_description     Uptime
        check_command           check_nt!UPTIME
        }


# Create a service for monitoring CPU load
# Change the host_name to match the name of the host you defined above


define service{
        use                     generic-service
        host_name               Ltsp01
        service_description     CPU Load
        check_command           check_nt!CPULOAD!-l 5,80,90
        }


define service{
        use                     generic-service
        host_name               Ltsp01
        service_description     Memory Usage
        check_command           check_nt!MEMUSE!-w 80 -c 90
        }

Nagios - Windows Checks (my examples)


define host{
        use             windows-server  ; Inherit default values from a template
        host_name       Bell-AD-Sec       ; The name we're giving to this host
        alias           Bell-AD-Sec     ; A longer name associated with the host
        address         10.0.0.65     ; IP address of the host
        }

define service{
        use                     generic-service
        host_name               Recruit,Ambsrv4,Neo,Dbit1,TMS,Bcms,drserver1,mailsa,neo,voip,vpn-01,
        service_description     Uptime
        check_command           check_nt!UPTIME
        }


define service{
        use                     generic-service
        host_name               Recruit,Ambsrv4,Neo,Dbit1,TMS,Bcms,drserver1,mailsa,neo,voip,vpn-01c
        service_description     CPU Load
        check_command           check_nt!CPULOAD!-l 5,80,90
        }


define service{
        use                     generic-service
        host_name               Recruit,Ambsrv4,Neo,Dbit1,TMS,Bcms,drserver1,mailsa,neo,voip
        service_description     Memory Usage
        check_command           check_nt!MEMUSE!-w 80 -c 90

 define service{
        use                     generic-service
        host_name               Recruit,Ambsrv4,Neo,Dbit1,TMS,Bcms,drserver1,mailsa,neo,
        service_description     Disk Space
        check_command   check_all_disks!20%!10%
        }
#define service{
#        use                     generic-service
#        host_name               Server1
#        service_description     Backup Exec Agent Browser
#        check_command           check_nt!SERVICESTATE!-d SHOWALL -l BackupExecAgentBrowser
#        }

#define service{
#        use                     generic-service
#        host_name               Server1
#        service_description     Backup Exec Device Media Service
#        check_command           check_nt!SERVICESTATE!-d SHOWALL -l BackupExecDeviceMediaService
#       }


#define service{
#        use                     generic-service
 #       host_name               Server1
  #      service_description     Backup Exec Job Engine
   #     check_command           check_nt!SERVICESTATE!-d SHOWALL -l BackupExecJobEngine
    #    }

#define service{
#        use                     generic-service
 #       host_name               Server1
  #      service_description     Backup Exec Server
   #     check_command           check_nt!SERVICESTATE!-d SHOWALL -l BackupExecRPCService
    #    }

#Monitor DNS Server Services

define service{
        use                     generic-service
        host_name               Recruit,Bell-AD-Primary,Bell-AD-Sec
        service_description     DNS Server
        check_command           check_nt!SERVICESTATE!-d SHOWALL -l DNS
        }

define service{
        use                     generic-service
        host_name               Recruit,Ambsrv4,Dbit1,Bcms,drserver1,mailsa,neo,voip,vpn-01
        service_description     ESET NOD Anti Virus Service
        check_command           check_nt!SERVICESTATE!-d SHOWALL -l ekrn
        }

define service{
        use                     generic-service
        host_name               Bell-AD-Primary,Bell-AD-Sec
        service_description     ESET Anti Virus Service
        check_command           check_nt!PROCSTATE!-d SHOWALL -l ekrn
        }

define service{
        use                     generic-service
        host_name               Ambsrv4
        service_description     BKUPEXEC MSSQL
        check_command           check_nt!SERVICESTATE!-d SHOWALL -l MSSQL"$$"BKUPEXEC
        }


define service{
        use                     generic-service
        host_name               Ambsrv4
        service_description     PRTG Graph
        check_command           check_nt!SERVICESTATE!-d SHOWALL -l prtgwatchservice
        }

define service{
        use                     generic-service
        host_name               Ambsrv4
        service_description     Thunderbird
        check_command           check_nt!PROCSTATE!-d SHOWALL -l thunderbird.exe
        }


define service{
        use                     generic-service
        host_name               Neo
        service_description     Scan Router
        check_command           check_nt!SERVICESTATE!-d SHOWALL -l ScanRouterDriverV2
        }

define service{
         use                   generic-service
         host_name             Dbit1
         service_description   D-BIT Replication
         check_command         check_nt!PROCSTATE!-d SHOWALL -l DBITReplication.exe
        }


define service{
         use                   generic-service
         host_name             TMS,Bcms
         service_description   Music on Hold
         check_command         check_nt!PROCSTATE!-d SHOWALL -l wmplayer.exe
        }

define service{
         use                   generic-service
         host_name             TMS
         service_description   Stella - Nova Telephone Management
         check_command         check_nt!PROCSTATE!-d SHOWALL -l Snova.exe
        }

define service{
         use                   generic-service
         host_name             Bell-AD-Primary,Bell-AD-Sec
         service_description   AD DS Domain Controller Services
         check_command         check_nt!SERVICESTATE! -d SHOWALL -l NTDS
        }

define service{
         use                   generic-service
         host_name             Bell-AD-Primary
         service_description   DHCP Server
         check_command         check_nt!SERVICESTATE! -d SHOWALL -l DHCPServer
        }

define service{
         use                   generic-service
         host_name             Bell-AD-Primary,Bell-AD-Sec
         service_description   Server
         check_command         check_nt!SERVICESTATE! -d SHOWALL -l LanmanServer
        }

define service{
         use                   generic-service
         host_name             Bcms
         service_description   Avaya BCMS Vu Server
         check_command         check_nt!PROCSTATE!-d SHOWALL -l BCMSVuServer.exe
        }





Nagios - check_all_disks

Make sure you have check_disk in /usr/local/nagios/libexec/


Command.cfg


define command{
        command_name    check_all_disks
        command_line    $USER1$/check_disk -w 20 -c 10
        }

Example:
  define service{
        use                     generic-service
        host_name               PCname
        service_description     Disk Space
        check_command   check_all_disks!20%!10%
        }



Tuesday, 12 July 2011

Nagios - monitoring ESXI 4.1 (VMWARE)



http://exchange.nagios.org/directory/Plugins/Operating-Systems/*-Virtual-Environments/VMWare/Vmware-ESX-%26-VM-host/details

Monday, 11 July 2011

Nagios start & verify

Start Nagios 
 
/etc/rc.d/init.d/nagios start

Manually: You can start the Nagios daemon manually with the -d command line option like so:



 /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg

Wednesday, 6 July 2011

Nagios installation

Install Ubuntu with LAMP & postfix & Build essentials

Nagios User Setup

useradd -m -s /bin/bash nagios
passwd nagios
usermod -G nagios nagios
groupadd nagcmd
usermod -a -G nagcmd nagios

Download And Unzip Nagios And Nagios Plugins

cd /downloads
wget http://prdownloads.sourceforge.net/nagios/nagios-3.2.3.tar.gz
wget http://prdownloads.sourceforge.net/nagiosplug/nagios-plugins-1.4.15.tar.gz
tar -zxf /downloads/nagios-3.2.3.tar.gz
tar -zxf /downloads/nagios-plugins-1.4.15.tar.gz

Install Nagios

cd /downloads/nagios-3.2.3
./configure --with-command-group=nagcmd
make all
make install
make install-init
make install-config
make install-commandmode
make install-webconf

Nagios Password

This is the password you will need to look at the nagios pages. If you install Nagios to a different directory please change this command to where the Nagios etc. folder will be.
htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin
Enter your password when prompted.
Now we will restart Apache to make sure all of the changes take effect:
/etc/init.d/apache2 restart

Nagios Plugins
cd /downloads/nagios-plugins-1.4.15/
make
make install
Now we need to make Nagios start at bootup:
ln -s /etc/init.d/nagios /etc/rcS.d/S99nagios

Change Default Email Address For Nagios Admin

Open your favorite editor and open /usr/local/nagios/etc/objects/contacts.cfg

Once you have saved your changes to the contacts.cfg we need to verify that there are no errors in the configuration of Nagios.

/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg


Now we are going to start nagios:
/etc/init.d/nagios start

Postfix Configuration For A Smarthost Relay

postconf -e 'relayhost=yourmailserver.com'
postconf -e 'smtp_sasl_auth_enabled = yes'
postconf -e 'smtp_sasl_password_maps = hash:/etc/postfix/sasl_passwd'
postconf -e 'smtp_sasl_security_options ='
echo "yourmailserver.com emailusername:emailpassword" > /etc/postfix/sasl_passwd
Now we will need to change the password file attributes so only root has access to read it.
chown root:root /etc/postfix/sasl_passwd
chmod 600 /etc/postfix/sasl_passwd
postmap /etc/postfix/sasl_passwd

Now we are going to set the map that will change your outbound messages from nagios to your email server username/email address.
echo "nagios mailusername@yourmailserver.com" /etc/postfix/canonical
echo "canonical_maps = hash:/etc/postfix/canonical" >> /etc/postfix/main.cf
postmap /etc/postfix/canonical


/etc/init.d/postfix restart
Now that this all has been completed you can restart your system.
Once your system has come back up you will be able to login to the website and look at your first Nagios installation.
http://yourserver/nagios/

 source: http://www.howtoforge.com/nagios-installation-on-ubuntu-10.04-lucid-lynx-p3






Nagios check_website plugin setup

Define check_website command in commands.cfg


add commands to /usr/local/nagios/etc/objects/commands.cfg


# 'check_website' command definition

define command{

        command_name    check_website

        command_line    $USER1$/check_website -H $HOSTADDRESS$

        }


Download Plugin and copy to libexec


Download check_website plugin

Copy into /usr/local/nagios/libexec/change file permissions to chmod 775 check_website


Test Plugin


/usr/local/nagios/libexec/ ./check_website -H (hostname) -F (file)

Nagios preflight checks


Friday, 1 July 2011

htmlview


#chown -R nagios:nagios /usr/local/nagios/var/*
#htmlview http://localhost/nagios

Nagios - Check Backup Exec

****************************************************************
* check_be - Nagios plugin for Symantec BackupExec for Windows *
* by Toussaint OTTAVI (t.ottavi@medi.fr)                       * 
****************************************************************

This is a windows executable, to be run on Windows servers where BackupExec
is installed. It will process all the job history files, find the most recent
occurrence of a specified job name, then it will return the current status of
this job. It can also return a 'warning' or 'critical' status if the last job
found is older than the specified amount of days.


1/ HOW TO USE THIS SOFTWARE
---------------------------

Run check_be.exe in a Windows console, with the following syntax :

  check_be "path of XML files" "Name of the backup job"
  
- "Path of XML files" is the location where BackupExec puts its log files in
  XML format. Default locations for these files are :
    v10:  c:\program files\veritas\backup exec\nt\data
    v12: c:\Program Files\Symantec\Backup Exec\Data
  
- "Name of the backup job log" is the name of the job you want to check. It's
  case independant.

Possible switches are :

-h : Shows brief syntax help

-d : Writes detailed debug information. This can help determining what's wrong
     in case of any problem.
     
-c<n> : Return 'critical' state if the last occurrence of the job is older 
        than <n> days. This can help to determine if a scheduled job is 
        disabled, locked, paused, or any other reason.

-w<n> : Same, but for 'warning' state

Example of use :

check_be  "c:\Program Files\Symantec\Backup Exec\Data" "My tape backup" -w1 -c3

Return states are :
  'ok' for BackupExec status 2 and 19
  'critical' for BackupExec status 0,1,6,7 and 21
  'warning' for any other BackupExec status 
  'unknown' if it can not determine the BackupExec job status, for any reason.
  

2/ HOW TO USE IT WITH NAGIOS
----------------------------

2.1/ ON THE MONITORED SERVER

You should have nsclient++ installed. In your 'nsc.ini' file, you must declare
an external script like this: 

[NRPE Handlers]
;# COMMAND DEFINITIONS
check_be=check_be.exe "c:\program files\veritas\backup exec\nt\data\" "JobName" -w1 -c3
 
 
2.2. ON THE NAGIOS SERVER 

Define a service template :

define service{
  name                  template-backupexec
  use                   generic-service
  service_description    BackupExec Job Check    ; default display name in Nagios
  check_command         check_nrpe!-c check_be  ; same name as in the nsclient++ nsc.ini command definition 
  normal_check_interval 60                      ; your check intervals here
  retry_check_interval  60
  register 0                                    ; this is a template
  }
  
Then, in your object definition, add the following :

define service {
  use                 template-backupexec
  service_description BackupExec - Daily DAT backup   ; specific display name, if you need
  host_name           MYHOST
  } 

Nagios - Zimbra mail queue checks

Monitoring Zimbra Mail queue's with Nagios 

edit 
vi /usr/local/nagios/libexec/utils.pm

remove
$PATH_TO_MAILQ   = "/usr/bin/mailq";

Add
$PATH_TO_MAILQ  ="/opt/zimbra/postfix/sbin/mailq";

Test
/usr/local/nagios/libexec# /usr/local/nagios/libexec/check_mailq 10.0.0.251 -w 100 -c 150

Error
root@Nagi:/usr/local/nagios/libexec# /usr/local/nagios/libexec/check_mailq 10.0.0.251 -w 100 -c 150
ERROR: /opt/zimbra/postfix/sbin/mailq is not executable by (uid 0:gid(0 0))


Fix Error

edit 
vi /etc/sudoers

nagios ALL=(zimbra) NOPASSWD: /usr/local/nagios/libexec/check_clamav.pl

nagios ALL=(zimbra) NOPASSWD: /usr/local/nagios/libexec/check_mailq

Nagios, change web login password

SSH into Nagios
sudo -s
htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin (enter)
type new password
verify password