Monitor type for wear level

This commit is contained in:
Jamie Cameron
2014-10-11 11:16:06 -07:00
parent 7e22ae2ad9
commit ee87e6e555
4 changed files with 101 additions and 49 deletions

View File

@@ -19,3 +19,5 @@ Fixed the collapsible section showing raw SMART status output.
Use disk IDs for SMART monitoring instead of device names, which may change. Use disk IDs for SMART monitoring instead of device names, which may change.
---- Changes since 1.520 ---- ---- Changes since 1.520 ----
Fix support for 3ware hardware RAID arrays with non-contiguous disks. Fix support for 3ware hardware RAID arrays with non-contiguous disks.
---- Changes since 1.710 ----
Added a new status monitor type for alerting on the SSD wear level.

View File

@@ -26,6 +26,7 @@ index_serial=Serial number
index_capacity=Capacity index_capacity=Capacity
monitor_type=SMART Drive Check monitor_type=SMART Drive Check
monitor_type2=SSD Wearout
monitor_drive=Drive to check monitor_drive=Drive to check
monitor_errors=Check error log too? monitor_errors=Check error log too?
monitor_errorsinc=Yes, but only alert if error count increases monitor_errorsinc=Yes, but only alert if error count increases
@@ -35,6 +36,10 @@ monitor_nosuch=No such drive
monitor_nosmart=No SMART support monitor_nosmart=No SMART support
monitor_errorsfound=Reported $1 errors monitor_errorsfound=Reported $1 errors
monitor_errorsinced=Errors increased from $2 to $1 monitor_errorsinced=Errors increased from $2 to $1
monitor_wearlevel=Fail if wear level falls below
monitor_ewearlevel=Invalid wear level percentage
monitor_nowearout=Drive does not report an SSD wearout indicator
monitor_wornout=Wear level has fallen to $1
short_title=Short Self Test short_title=Short Self Test
short_doing=Starting short self test of $1 .. short_doing=Starting short self test of $1 ..

View File

@@ -350,7 +350,7 @@ if ($config{'attribs'}) {
elsif (/^\s*(\d+)\s+(\S+)\s+(0x\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/) { elsif (/^\s*(\d+)\s+(\S+)\s+(0x\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/) {
# A new-style vendor attribute # A new-style vendor attribute
$doneknown = 1; $doneknown = 1;
push(@attribs, [ $2, $10 ]); push(@attribs, [ $2, $10, $4 ]);
$attribs[$#attribs]->[0] =~ s/_/ /g; $attribs[$#attribs]->[0] =~ s/_/ /g;
} }
elsif (/^(\S.*\S):\s+\(\s*(\S+)\)\s*(.*)/ && !$doneknown) { elsif (/^(\S.*\S):\s+\(\s*(\S+)\)\s*(.*)/ && !$doneknown) {

View File

@@ -6,7 +6,8 @@ do 'smart-status-lib.pl';
sub status_monitor_list sub status_monitor_list
{ {
if (&has_command($config{'smartctl'})) { if (&has_command($config{'smartctl'})) {
return ( [ "smart", $text{'monitor_type'} ] ); return ( [ "smart", $text{'monitor_type'} ],
[ "wearout", $text{'monitor_type2'} ] );
} }
else { else {
return ( ); return ( );
@@ -17,10 +18,12 @@ else {
# Check the drive status # Check the drive status
sub status_monitor_status sub status_monitor_status
{ {
local ($type, $mon, $ui) = @_;
local @drives = &list_smart_disks_partitions(); local @drives = &list_smart_disks_partitions();
local ($d) = grep { ($_->{'device'} eq $_[1]->{'drive'} || local ($d) = grep { ($_->{'device'} eq $mon->{'drive'} ||
$_->{'id'} eq $_[1]->{'drive'}) && $_->{'id'} eq $mon->{'drive'}) &&
$_->{'subdisk'} eq $_[1]->{'subdisk'} } @drives; $_->{'subdisk'} eq $mon->{'subdisk'} } @drives;
if (!$d) { if (!$d) {
# Not in list?! # Not in list?!
return { 'up' => -1, return { 'up' => -1,
@@ -28,41 +31,68 @@ if (!$d) {
} }
local $st = &get_drive_status($d->{'device'}, $d); local $st = &get_drive_status($d->{'device'}, $d);
# Record number of errors since last time
local %errors;
local $errors_file = "$module_config_directory/last-errors";
&read_file($errors_file, \%errors);
local %lasterrors = %errors;
$errors{$_[1]->{'drive'}} = $st->{'errors'};
&write_file($errors_file, \%errors);
if (!$st->{'support'} || !$st->{'enabled'}) { if (!$st->{'support'} || !$st->{'enabled'}) {
# SMART not enabled on device # SMART not enabled on device
return { 'up' => -1, return { 'up' => -1,
'desc' => $text{'monitor_nosmart'} }; 'desc' => $text{'monitor_nosmart'} };
} }
elsif (!$st->{'check'}) {
# Check failed if ($type eq "wearout") {
return { 'up' => 0 }; # Check SSD wear level
} local $wo;
elsif ($st->{'errors'} && $_[1]->{'errors'} == 1) { foreach my $a (@{$st->{'attribs'}}) {
# Errors found, and failing on any errors if ($a->[0] eq "Media Wearout Indicator") {
return { 'up' => 0, $wo = $a;
'value' => $st->{'errors'}, last;
'desc' => &text('monitor_errorsfound', $st->{'errors'}) }; }
} }
elsif ($st->{'errors'} && $_[1]->{'errors'} == 2 && if (!$wo) {
$st->{'errors'} > $lasterrors{$_[1]->{'drive'}}) { return { 'up' => -1,
# Errors found and have increased 'desc' => $text{'monitor_nowearout'} };
return { 'up' => 0, }
'value' => $st->{'errors'}, if ($wo->[2] < $mon->{'wearlevel'}) {
'desc' => &text('monitor_errorsinced', $st->{'errors'}, return { 'up' => 0,
$lasterrors{$_[1]->{'drive'}}) }; 'desc' => &text('monitor_wornout', $wo->[2]),
'value' => $wo->[2] };
}
else {
return { 'up' => 1,
'value' => $wo->[2] };
}
} }
else { else {
# All OK! # Record number of errors since last time
return { 'up' => 1, local %errors;
'value' => $st->{'errors'} }; local $errors_file = "$module_config_directory/last-errors";
&read_file($errors_file, \%errors);
local %lasterrors = %errors;
$errors{$mon->{'drive'}} = $st->{'errors'};
&write_file($errors_file, \%errors);
# Check for errors
if (!$st->{'check'}) {
# Check failed
return { 'up' => 0 };
}
elsif ($st->{'errors'} && $mon->{'errors'} == 1) {
# Errors found, and failing on any errors
return { 'up' => 0,
'value' => $st->{'errors'},
'desc' => &text('monitor_errorsfound', $st->{'errors'}) };
}
elsif ($st->{'errors'} && $mon->{'errors'} == 2 &&
$st->{'errors'} > $lasterrors{$mon->{'drive'}}) {
# Errors found and have increased
return { 'up' => 0,
'value' => $st->{'errors'},
'desc' => &text('monitor_errorsinced', $st->{'errors'},
$lasterrors{$mon->{'drive'}}) };
}
else {
# All OK!
return { 'up' => 1,
'value' => $st->{'errors'} };
}
} }
} }
@@ -70,15 +100,16 @@ else {
# Return form for selecting a drive # Return form for selecting a drive
sub status_monitor_dialog sub status_monitor_dialog
{ {
local ($type, $mon) = @_;
local $rv; local $rv;
local @drives = &list_smart_disks_partitions(); local @drives = &list_smart_disks_partitions();
local ($inlist) = grep { ($_->{'device'} eq $_[1]->{'drive'} || local ($inlist) = grep { ($_->{'device'} eq $mon->{'drive'} ||
$_->{'id'} eq $_[1]->{'drive'}) && $_->{'id'} eq $mon->{'drive'}) &&
$_->{'subdisk'} eq $_[1]->{'subdisk'} } @drives; $_->{'subdisk'} eq $mon->{'subdisk'} } @drives;
$inlist = 1 if (!$_[1]->{'drive'}); $inlist = 1 if (!$mon->{'drive'});
$rv .= &ui_table_row($text{'monitor_drive'}, $rv .= &ui_table_row($text{'monitor_drive'},
&ui_select("drive", &ui_select("drive",
!$_[1]->{'drive'} ? $drives[0]->{'device'} : !$mon->{'drive'} ? $drives[0]->{'device'} :
$inlist ? ($inlist->{'id'} || $inlist->{'device'}).':'. $inlist ? ($inlist->{'id'} || $inlist->{'device'}).':'.
$inlist->{'subdisk'} : $inlist->{'subdisk'} :
undef, undef,
@@ -86,12 +117,18 @@ $rv .= &ui_table_row($text{'monitor_drive'},
$_->{'desc'}.($_->{'model'} ? $_->{'desc'}.($_->{'model'} ?
" ($_->{'model'})" : "") ] } @drives), " ($_->{'model'})" : "") ] } @drives),
[ "", $text{'monitor_other'} ] ]). [ "", $text{'monitor_other'} ] ]).
&ui_textbox("other", $inlist ? "" : $_[1]->{'drive'}, 15), 3); &ui_textbox("other", $inlist ? "" : $mon->{'drive'}, 15), 3);
$rv .= &ui_table_row($text{'monitor_errors'}, if ($type eq "wearout") {
&ui_radio("errors", $_[1]->{'errors'} || 0, $rv .= &ui_table_row($text{'monitor_wearlevel'},
[ [ 1, $text{'yes'} ], [ 0, $text{'no'} ], &ui_textbox("wearlevel", $mon->{'wearlevel'} || 10, 5)."%");
[ 2, $text{'monitor_errorsinc'} ] ])); }
else {
$rv .= &ui_table_row($text{'monitor_errors'},
&ui_radio("errors", $mon->{'errors'} || 0,
[ [ 1, $text{'yes'} ], [ 0, $text{'no'} ],
[ 2, $text{'monitor_errorsinc'} ] ]));
}
return $rv; return $rv;
} }
@@ -99,15 +136,23 @@ return $rv;
# Parse form for selecting a rule # Parse form for selecting a rule
sub status_monitor_parse sub status_monitor_parse
{ {
if ($_[2]->{'drive'}) { local ($type, $mon, $in) = @_;
($_[1]->{'drive'}, $_[1]->{'subdisk'}) = split(/:/, $_[2]->{'drive'}); if ($in->{'drive'}) {
($mon->{'drive'}, $mon->{'subdisk'}) = split(/:/, $in->{'drive'});
} }
else { else {
$_[1]->{'drive'} = $_[2]->{'other'}; $mon->{'drive'} = $in->{'other'};
$_[1]->{'subdisk'} = undef; $mon->{'subdisk'} = undef;
$_[1]->{'drive'} =~ /^\S+$/ || &error($text{'monitor_edrive'}); $mon->{'drive'} =~ /^\S+$/ || &error($text{'monitor_edrive'});
}
if ($type eq "wearout") {
$in->{'wearlevel'} =~ /^\d+(\.\d+)?$/ ||
&error($text{'monitor_ewearlevel'});
$mon->{'wearlevel'} = $in->{'wearlevel'};
}
else {
$mon->{'errors'} = $in->{'errors'};
} }
$_[1]->{'errors'} = $_[2]->{'errors'};
} }
1; 1;