check_linux_bonding 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580
  1. #!/usr/bin/perl
  2. #
  3. # DESCRIPTION: Nagios plugin for checking the status of bonded network
  4. # interfaces (masters and slaves) on Linux servers.
  5. #
  6. # AUTHOR: Trond H. Amundsen <t.h.amundsen@usit.uio.no>
  7. #
  8. # Copyright (C) 2009-2014 Trond H. Amundsen
  9. #
  10. # This program is free software: you can redistribute it and/or modify
  11. # it under the terms of the GNU General Public License as published by
  12. # the Free Software Foundation, either version 3 of the License, or
  13. # (at your option) any later version.
  14. #
  15. # This program is distributed in the hope that it will be useful, but
  16. # WITHOUT ANY WARRANTY; without even the implied warranty of
  17. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. # General Public License for more details.
  19. #
  20. # You should have received a copy of the GNU General Public License
  21. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  22. #
  23. use strict;
  24. use warnings;
  25. use POSIX qw(isatty);
  26. use Getopt::Long qw(:config no_ignore_case);
  27. # Global (package) variables used throughout the code
  28. use vars qw( $NAME $VERSION $AUTHOR $CONTACT $E_OK $E_WARNING $E_CRITICAL
  29. $E_UNKNOWN $USAGE $HELP $LICENSE $linebreak $counter $exit_code
  30. %opt %reverse_exitcode %text2exit %bonding %nagios_level_count
  31. @perl_warnings @reports @blacklist @ok_reports
  32. );
  33. #---------------------------------------------------------------------
  34. # Initialization and global variables
  35. #---------------------------------------------------------------------
  36. # Collect perl warnings in an array
  37. $SIG{__WARN__} = sub { push @perl_warnings, [@_]; };
  38. # Version and similar info
  39. $NAME = 'check_linux_bonding';
  40. $VERSION = '1.4';
  41. $AUTHOR = 'Trond H. Amundsen';
  42. $CONTACT = 't.h.amundsen@usit.uio.no';
  43. # Exit codes
  44. $E_OK = 0;
  45. $E_WARNING = 1;
  46. $E_CRITICAL = 2;
  47. $E_UNKNOWN = 3;
  48. # Nagios error levels reversed
  49. %reverse_exitcode
  50. = (
  51. 0 => 'OK',
  52. 1 => 'WARNING',
  53. 2 => 'CRITICAL',
  54. 3 => 'UNKNOWN',
  55. );
  56. # Usage text
  57. $USAGE = <<"END_USAGE";
  58. Usage: $NAME [OPTION]...
  59. END_USAGE
  60. # Help text
  61. $HELP = <<'END_HELP';
  62. OPTIONS:
  63. -t, --timeout Plugin timeout in seconds [5]
  64. -s, --state Prefix alerts with alert state
  65. -S, --short-state Prefix alerts with alert state abbreviated
  66. -n, --no-bonding Alert level if no bonding interfaces found [ok]
  67. --slave-down Alert level if a slave is down [warning]
  68. --disable-sysfs Don't use sysfs (default), use procfs
  69. --ignore-num-ad (IEEE 802.3ad) Don't warn if num_ad_ports != num_slaves
  70. -b, --blacklist Blacklist failed interfaces
  71. -v, --verbose Debug/Verbose output, reports everything
  72. -h, --help Display this help text
  73. -V, --version Display version info
  74. For more information and advanced options, see the manual page or URL:
  75. http://folk.uio.no/trondham/software/check_linux_bonding.html
  76. END_HELP
  77. # Version and license text
  78. $LICENSE = <<"END_LICENSE";
  79. $NAME $VERSION
  80. Copyright (C) 2009-2014 $AUTHOR
  81. License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
  82. This is free software: you are free to change and redistribute it.
  83. There is NO WARRANTY, to the extent permitted by law.
  84. Written by $AUTHOR <$CONTACT>
  85. END_LICENSE
  86. # Options with default values
  87. %opt
  88. = ( 'timeout' => 5, # default timeout is 5 seconds
  89. 'help' => 0,
  90. 'version' => 0,
  91. 'blacklist' => [],
  92. 'no_bonding' => 'ok',
  93. 'state' => 0,
  94. 'shortstate' => 0,
  95. 'linebreak' => undef,
  96. 'verbose' => 0,
  97. 'disable_sysfs' => 0,
  98. 'slave_down' => 'warning',
  99. 'ignore_num_ad' => 0,
  100. );
  101. # Get options
  102. GetOptions('t|timeout=i' => \$opt{timeout},
  103. 'h|help' => \$opt{help},
  104. 'V|version' => \$opt{version},
  105. 'b|blacklist=s' => \@{ $opt{blacklist} },
  106. 'n|no-bonding=s' => \$opt{no_bonding},
  107. 's|state' => \$opt{state},
  108. 'S|short-state' => \$opt{shortstate},
  109. 'linebreak=s' => \$opt{linebreak},
  110. 'v|verbose' => \$opt{verbose},
  111. 'disable-sysfs' => \$opt{disable_sysfs},
  112. 'slave-down=s' => \$opt{slave_down},
  113. 'ignore-num-ad' => \$opt{ignore_num_ad},
  114. ) or do { print $USAGE; exit $E_UNKNOWN };
  115. # If user requested help
  116. if ($opt{'help'}) {
  117. print $USAGE, $HELP;
  118. exit $E_OK;
  119. }
  120. # If user requested version info
  121. if ($opt{'version'}) {
  122. print $LICENSE;
  123. exit $E_OK;
  124. }
  125. # Reports (messages) are gathered in this array
  126. @reports = ();
  127. # Setting timeout
  128. $SIG{ALRM} = sub {
  129. print "PLUGIN TIMEOUT: $NAME timed out after $opt{timeout} seconds\n";
  130. exit $E_UNKNOWN;
  131. };
  132. alarm $opt{timeout};
  133. # Default line break
  134. $linebreak = isatty(*STDOUT) ? "\n" : '<br/>';
  135. # Line break from option
  136. if (defined $opt{linebreak}) {
  137. if ($opt{linebreak} eq 'REG') {
  138. $linebreak = "\n";
  139. }
  140. elsif ($opt{linebreak} eq 'HTML') {
  141. $linebreak = '<br/>';
  142. }
  143. else {
  144. $linebreak = $opt{linebreak};
  145. }
  146. }
  147. # Blacklisted interfaces
  148. @blacklist = defined $opt{blacklist} ? @{ get_blacklist() } : ();
  149. # Translate text exit codes to values
  150. %text2exit
  151. = ( 'ok' => $E_OK,
  152. 'warning' => $E_WARNING,
  153. 'critical' => $E_CRITICAL,
  154. 'unknown' => $E_UNKNOWN,
  155. );
  156. # Check syntax of '--no-bonding' option
  157. if (!exists $text2exit{$opt{no_bonding}}) {
  158. unknown_error("Wrong usage of '--no-bonding' option: '"
  159. . $opt{no_bonding}
  160. . "' is not a recognized keyword");
  161. }
  162. # Check syntax of '--slave-down' option
  163. if (!exists $text2exit{$opt{slave_down}}) {
  164. unknown_error("Wrong usage of '--slave-down' option: '"
  165. . $opt{slave_down}
  166. . "' is not a recognized keyword");
  167. }
  168. #---------------------------------------------------------------------
  169. # Functions
  170. #---------------------------------------------------------------------
  171. #
  172. # Store a message in the message array
  173. #
  174. sub report {
  175. my ($msg, $exval) = @_;
  176. return push @reports, [ $msg, $exval ];
  177. }
  178. #
  179. # Give an error and exit with unknown state
  180. #
  181. sub unknown_error {
  182. my $msg = shift;
  183. print "ERROR: $msg\n";
  184. exit $E_UNKNOWN;
  185. }
  186. #
  187. # Read the blacklist option and return a hash containing the
  188. # blacklisted components
  189. #
  190. sub get_blacklist {
  191. my @bl = ();
  192. my @blacklist = ();
  193. if (scalar @{ $opt{blacklist} } >= 0) {
  194. foreach my $black (@{ $opt{blacklist} }) {
  195. my $tmp = q{};
  196. if (-f $black) {
  197. open my $BL, '<', $black
  198. or do { report('other', "Couldn't open blacklist file $black: $!", $E_UNKNOWN)
  199. and return {} };
  200. chomp($tmp = <$BL>);
  201. close $BL;
  202. }
  203. else {
  204. $tmp = $black;
  205. }
  206. push @bl, $tmp;
  207. }
  208. }
  209. return [] if $#bl < 0;
  210. # Parse blacklist string, put in hash
  211. foreach my $black (@bl) {
  212. push @blacklist, split m{,}xms, $black;
  213. }
  214. return \@blacklist;
  215. }
  216. #
  217. # Find bonding interfaces using sysfs
  218. #
  219. sub find_bonding_sysfs {
  220. my $sysdir = '/sys/class/net';
  221. my $masters_file = "$sysdir/bonding_masters";
  222. my @bonds = ();
  223. my %bonding = ();
  224. if (! -f $masters_file) {
  225. return {};
  226. }
  227. # get bonding masters
  228. open my $MASTER, '<', $masters_file
  229. or unknown_error("Couldn't open $masters_file: $!");
  230. @bonds = split m{\s+}xms, <$MASTER>;
  231. close $MASTER;
  232. foreach my $bond (@bonds) {
  233. # get bonding mode
  234. open my $MODE, '<', "$sysdir/$bond/bonding/mode"
  235. or unknown_error("ERROR: Couldn't open $sysdir/$bond/bonding/mode: $!");
  236. my ($mode, $nr) = split m/\s+/xms, <$MODE>;
  237. close $MODE;
  238. $bonding{$bond}{mode} = "mode=$nr ($mode)";
  239. # get 802.3ad number of ports
  240. if ($bonding{$bond}{mode} eq 'mode=4 (802.3ad)') {
  241. open my $AD_NUM, '<', "$sysdir/$bond/bonding/ad_num_ports"
  242. or unknown_error("ERROR: Couldn't open $sysdir/$bond/bonding/ad_num_ports: $!");
  243. my $ad_num = <$AD_NUM>;
  244. close $AD_NUM;
  245. $bonding{$bond}{ad_num} = $ad_num;
  246. }
  247. # get slaves
  248. my @slaves = ();
  249. open my $SLAVES, '<', "$sysdir/$bond/bonding/slaves"
  250. or unknown_error("Couldn't open $sysdir/$bond/bonding/slaves: $!");
  251. @slaves = split m/\s+/xms, <$SLAVES>;
  252. close $SLAVES;
  253. # get active slave
  254. open my $ACTIVE, '<', "$sysdir/$bond/bonding/active_slave"
  255. or unknown_error("Couldn't open $sysdir/$bond/bonding/active_slave: $!");
  256. $bonding{$bond}{active} = <$ACTIVE>;
  257. close $ACTIVE;
  258. if (defined $bonding{$bond}{active}) {
  259. chop $bonding{$bond}{active};
  260. }
  261. # get primary slave
  262. open my $PRIMARY, '<', "$sysdir/$bond/bonding/primary"
  263. or unknown_error("Couldn't open $sysdir/$bond/bonding/primary: $!");
  264. $bonding{$bond}{primary} = <$PRIMARY>;
  265. close $PRIMARY;
  266. if (defined $bonding{$bond}{primary}) {
  267. chop $bonding{$bond}{primary};
  268. }
  269. # get slave status
  270. foreach my $slave (@slaves) {
  271. my $statefile = -e "$sysdir/$bond/slave_$slave/operstate"
  272. ? "$sysdir/$bond/slave_$slave/operstate"
  273. : "$sysdir/$bond/lower_$slave/operstate";
  274. open my $STATE, '<', "$statefile"
  275. or unknown_error("Couldn't open $statefile: $!");
  276. chop($bonding{$bond}{slave}{$slave} = <$STATE>);
  277. close $STATE;
  278. }
  279. # get bond state
  280. open my $BSTATE, '<', "$sysdir/$bond/operstate"
  281. or unknown_error("Couldn't open $sysdir/$bond/operstate: $!");
  282. chop($bonding{$bond}{status} = <$BSTATE>);
  283. close $BSTATE;
  284. }
  285. return \%bonding;
  286. }
  287. #
  288. # Find bonding interfaces using procfs (fallback, deprecated)
  289. #
  290. sub find_bonding_procfs {
  291. my $procdir = '/proc/net/bonding';
  292. my @bonds = ();
  293. my %bonding = ();
  294. opendir(my $DIR, $procdir);
  295. @bonds = grep { m{\A bond\d+ \z}xms && -f "$procdir/$_" } readdir $DIR;
  296. closedir $DIR;
  297. if ($#bonds == -1) {
  298. return {};
  299. }
  300. foreach my $b (@bonds) {
  301. my $slave = undef;
  302. open my $BOND, '<', "$procdir/$b"
  303. or unknown_error("Couldn't open $procdir/$b: $!");
  304. while (<$BOND>) {
  305. # get bonding mode
  306. if (m{\A Bonding \s Mode: \s (.+) \z}xms) {
  307. chop($bonding{$b}{mode} = $1);
  308. }
  309. # get 802.3ad number of ports
  310. elsif (defined $bonding{$b}{mode} and $bonding{$b}{mode} =~ m{802\.3ad}xms
  311. and m{\A\s+ Number \s of \s ports: \s (\d+) .*\z}xms) {
  312. chomp($bonding{$b}{ad_num} = $1);
  313. }
  314. # get slave
  315. elsif (m{\A Slave \s Interface: \s (.+) \z}xms) {
  316. chop($slave = $1);
  317. }
  318. # get slave and bonding status
  319. elsif (m{\A MII \s Status: \s (.+) \z}xms) {
  320. if (defined $slave) {
  321. chop($bonding{$b}{slave}{$slave} = $1);
  322. }
  323. else {
  324. chop($bonding{$b}{status} = $1);
  325. }
  326. }
  327. # get primary slave
  328. elsif (m{\A Primary \s Slave: \s (\S+) .* \z}xms) {
  329. chomp($bonding{$b}{primary} = $1);
  330. }
  331. # get active slave
  332. elsif (m{\A Currently \s Active \s Slave: \s (.+) \z}xms) {
  333. chop($bonding{$b}{active} = $1);
  334. }
  335. }
  336. }
  337. return \%bonding;
  338. }
  339. #
  340. # Find bonding interfaces
  341. #
  342. sub find_bonding {
  343. my $bonding = undef;
  344. if ($opt{disable_sysfs}) {
  345. $bonding = find_bonding_procfs();
  346. }
  347. else {
  348. # first try sysfs
  349. $bonding = find_bonding_sysfs();
  350. # second try procfs
  351. if (scalar keys %{ $bonding } == 0) {
  352. $bonding = find_bonding_procfs();
  353. }
  354. }
  355. # if no bonding interfaces found, exit
  356. if (scalar keys %{ $bonding } == 0) {
  357. print $reverse_exitcode{$text2exit{$opt{no_bonding}}}
  358. . ": No bonding interfaces found\n";
  359. exit $text2exit{$opt{no_bonding}};
  360. }
  361. return $bonding;
  362. }
  363. #
  364. # Returns true if an interface is blacklisted
  365. #
  366. sub blacklisted {
  367. return 0 if !defined $opt{blacklist};
  368. my $if = shift;
  369. foreach $b (@blacklist) {
  370. if ($if eq $b) {
  371. return 1;
  372. }
  373. }
  374. return 0;
  375. }
  376. #=====================================================================
  377. # Main program
  378. #=====================================================================
  379. %bonding = %{ find_bonding() };
  380. MASTER:
  381. foreach my $b (sort keys %bonding) {
  382. # If the master interface is blacklisted
  383. if (blacklisted($b)) {
  384. my $msg = sprintf 'Bonding interface %s [%s] is %s, but IGNORED',
  385. $b, $bonding{$b}{mode}, $bonding{$b}{status};
  386. report($msg, $E_OK);
  387. next MASTER;
  388. }
  389. if ($bonding{$b}{status} ne 'up') {
  390. my $msg = sprintf 'Bonding interface %s [%s] is %s',
  391. $b, $bonding{$b}{mode}, $bonding{$b}{status};
  392. report($msg, $E_CRITICAL);
  393. }
  394. else {
  395. my $slaves_are_up = 1; # flag
  396. SLAVE:
  397. foreach my $i (sort keys %{ $bonding{$b}{slave} }) {
  398. # If the slave interface is blacklisted
  399. if (blacklisted($i)) {
  400. my $msg = sprintf 'Slave interface %s [member of %s] is %s, but IGNORED',
  401. $i, $b, $bonding{$b}{slave}{$i};
  402. report($msg, $E_OK);
  403. next SLAVE;
  404. }
  405. if ($bonding{$b}{slave}{$i} ne 'up') {
  406. $slaves_are_up = 0; # not all slaves are up
  407. my $msg = sprintf 'Bonding interface %s [%s]: Slave %s is %s',
  408. $b, $bonding{$b}{mode}, $i, $bonding{$b}{slave}{$i};
  409. report($msg, $text2exit{$opt{slave_down}});
  410. }
  411. }
  412. if ($slaves_are_up) {
  413. my %slave = map { $_ => q{} } keys %{ $bonding{$b}{slave} };
  414. foreach my $s (keys %slave) {
  415. if (defined $bonding{$b}{primary} and $bonding{$b}{primary} eq $s) {
  416. $slave{$s} .= '*';
  417. }
  418. if (defined $bonding{$b}{active} and $bonding{$b}{active} eq $s) {
  419. $slave{$s} .= '!';
  420. }
  421. }
  422. if (scalar keys %slave == 1) {
  423. my @slaves = keys %slave;
  424. my $msg = sprintf 'Bonding interface %s [%s] has only one slave (%s)',
  425. $b, $bonding{$b}{mode}, $slaves[0];
  426. report($msg, $E_WARNING);
  427. }
  428. elsif (scalar keys %slave == 0) { # FIXME: does this ever happen?
  429. my $msg = sprintf 'Bonding interface %s [%s] has zero slaves!',
  430. $b, $bonding{$b}{mode};
  431. report($msg, $E_CRITICAL);
  432. }
  433. elsif (defined $bonding{$b}{ad_num} and $bonding{$b}{ad_num} != scalar keys %slave
  434. and $opt{ignore_num_ad} == 0) {
  435. my $msg = sprintf 'Bonding interface %s [%s]: Number of AD ports (%d) does not equal the number of slaves (%d)',
  436. $b, $bonding{$b}{mode}, $bonding{$b}{ad_num}, scalar keys %slave;
  437. report($msg, $E_WARNING);
  438. }
  439. else {
  440. my @slaves = map { $_ . $slave{$_} } sort keys %slave;
  441. my $msg = sprintf 'Interface %s is %s: %s, %d slaves: %s',
  442. $b, $bonding{$b}{status}, $bonding{$b}{mode},
  443. scalar @slaves, join q{, }, @slaves;
  444. report($msg, $E_OK);
  445. }
  446. }
  447. }
  448. }
  449. # Counter variable
  450. %nagios_level_count
  451. = (
  452. 'OK' => 0,
  453. 'WARNING' => 0,
  454. 'CRITICAL' => 0,
  455. 'UNKNOWN' => 0,
  456. );
  457. # holds only ok messages
  458. @ok_reports = ();
  459. # Reset the WARN signal
  460. $SIG{__WARN__} = 'DEFAULT';
  461. # Print any perl warnings that have occured
  462. if (@perl_warnings) {
  463. foreach (@perl_warnings) {
  464. chop @$_;
  465. report("INTERNAL ERROR: @$_", $E_UNKNOWN);
  466. }
  467. }
  468. $counter = 0;
  469. ALERT:
  470. foreach (sort {$a->[1] < $b->[1]} @reports) {
  471. my ($msg, $level) = @{ $_ };
  472. $nagios_level_count{$reverse_exitcode{$level}}++;
  473. if ($level == $E_OK && !$opt{verbose}) {
  474. push @ok_reports, $msg;
  475. next ALERT;
  476. }
  477. # Prefix with nagios level if specified with option '--state'
  478. $msg = $reverse_exitcode{$level} . ": $msg" if $opt{state};
  479. # Prefix with one-letter nagios level if specified with option '--short-state'
  480. $msg = (substr $reverse_exitcode{$level}, 0, 1) . ": $msg" if $opt{shortstate};
  481. ($counter++ == 0) ? print $msg : print $linebreak, $msg;
  482. }
  483. # Determine our exit code
  484. $exit_code = $E_OK;
  485. if ($nagios_level_count{UNKNOWN} > 0) { $exit_code = $E_UNKNOWN; }
  486. if ($nagios_level_count{WARNING} > 0) { $exit_code = $E_WARNING; }
  487. if ($nagios_level_count{CRITICAL} > 0) { $exit_code = $E_CRITICAL; }
  488. # Print OK messages
  489. $counter = 0;
  490. if ($exit_code == $E_OK && !$opt{verbose}) {
  491. foreach my $msg (@ok_reports) {
  492. # Prefix with nagios level if specified with option '--state'
  493. $msg = "OK: $msg" if $opt{state};
  494. # Prefix with one-letter nagios level if specified with option '--short-state'
  495. $msg = "O: $msg" if $opt{shortstate};
  496. ($counter++ == 0) ? print $msg : print $linebreak, $msg;
  497. }
  498. }
  499. print "\n";
  500. # Exit with proper exit code
  501. exit $exit_code;