source: https://github.com/lf-edge/eve/tree/master/pkg/pillar/vendor/github.com/andrewd-zededa/go-libzfs
257 lines
6.4 KiB
Go
257 lines
6.4 KiB
Go
package main
|
|
|
|
import (
|
|
"flag"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"log"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"code.cloudfoundry.org/bytefmt"
|
|
zfs "code.thetadev.de/ThetaDev/zfsmon/go-libzfs"
|
|
"github.com/davidscholberg/go-durationfmt"
|
|
"gopkg.in/yaml.v2"
|
|
)
|
|
|
|
const LAST_SCAN_FILE = "last_scan.yaml"
|
|
const LOG_FILE = "zfsmon.log"
|
|
const POOL_SPACE_WARN = 90
|
|
const DATE_FORMAT = "02.01.2006"
|
|
const TIME_FORMAT = "02.01.2006 15:04:05"
|
|
|
|
type ScanStats struct {
|
|
Func string
|
|
State uint64
|
|
Errors uint64
|
|
Duration uint64
|
|
LastFinished uint64
|
|
}
|
|
|
|
type DeviceState struct {
|
|
State string
|
|
ReadErrors uint64 `yaml:"read_errors"`
|
|
WriteErrors uint64 `yaml:"write_errors"`
|
|
ChecksumErrors uint64 `yaml:"checksum_errors"`
|
|
}
|
|
|
|
type PoolState struct {
|
|
Devices map[string]DeviceState
|
|
Scan *ScanStats
|
|
TotalSpace uint64 `yaml:"total_space"`
|
|
FreeSpace uint64 `yaml:"free_space"`
|
|
FillLevel float64 `yaml:"fill_level"`
|
|
}
|
|
|
|
func getStateOfDevices(tree zfs.VDevTree, data map[string]DeviceState) {
|
|
data[tree.Name] = DeviceState{
|
|
State: tree.Stat.State.String(),
|
|
ReadErrors: tree.Stat.ReadErrors,
|
|
WriteErrors: tree.Stat.WriteErrors,
|
|
ChecksumErrors: tree.Stat.ChecksumErrors,
|
|
}
|
|
|
|
if tree.Devices != nil {
|
|
for _, subtree := range tree.Devices {
|
|
getStateOfDevices(subtree, data)
|
|
}
|
|
}
|
|
}
|
|
|
|
func getScanFunc(scanFunc uint64) string {
|
|
switch scanFunc {
|
|
case zfs.PoolScanScrub:
|
|
return "Scrub"
|
|
case zfs.PoolScanResilver:
|
|
return "Resilver"
|
|
default:
|
|
return "None"
|
|
}
|
|
}
|
|
|
|
func getScanStats(scanStat zfs.PoolScanStat) *ScanStats {
|
|
return &ScanStats{
|
|
Func: getScanFunc(scanStat.Func),
|
|
State: scanStat.State,
|
|
Errors: scanStat.Errors,
|
|
Duration: scanStat.EndTime - scanStat.StartTime,
|
|
LastFinished: scanStat.EndTime,
|
|
}
|
|
}
|
|
|
|
func getPoolState(poolName string) (*PoolState, error) {
|
|
pool, err := zfs.PoolOpen(poolName)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
tree, err := pool.VDevTree()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
deviceData := map[string]DeviceState{}
|
|
getStateOfDevices(tree, deviceData)
|
|
|
|
scanStats := getScanStats(tree.ScanStat)
|
|
level := float64(tree.Stat.Alloc) / float64(tree.Stat.Space) * 100.0
|
|
|
|
return &PoolState{
|
|
Devices: deviceData,
|
|
Scan: scanStats,
|
|
TotalSpace: tree.Stat.Space,
|
|
FreeSpace: tree.Stat.Space - tree.Stat.Alloc,
|
|
FillLevel: level,
|
|
}, nil
|
|
}
|
|
|
|
func (s *PoolState) numDeviceErrors() uint64 {
|
|
var n uint64
|
|
|
|
for _, dev := range s.Devices {
|
|
n += (dev.ReadErrors + dev.WriteErrors + dev.ChecksumErrors)
|
|
|
|
if dev.State != "ONLINE" {
|
|
n++
|
|
}
|
|
}
|
|
return n
|
|
}
|
|
|
|
func readLastScanFile(lastScanFile string) *PoolState {
|
|
historyFile, err := os.Open(lastScanFile)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
defer historyFile.Close()
|
|
|
|
byteValue, _ := ioutil.ReadAll(historyFile)
|
|
var poolState *PoolState
|
|
_ = yaml.Unmarshal(byteValue, &poolState)
|
|
|
|
return poolState
|
|
}
|
|
|
|
func (s *PoolState) writeLastScanFile(lastScanFile string) {
|
|
data, _ := yaml.Marshal(s)
|
|
data = append([]byte("# Scan history from "+time.Now().Format(DATE_FORMAT)+"\n\n"), data...)
|
|
|
|
err := ioutil.WriteFile(lastScanFile, data, 0644)
|
|
if err != nil {
|
|
log.Println(err)
|
|
}
|
|
}
|
|
|
|
func checkPool(poolName string, lastScanFile string, spaceWarn float64) []string {
|
|
poolState, err := getPoolState(poolName)
|
|
if err != nil {
|
|
msg := fmt.Sprintf("Could not open %s. Error: %s", poolName, err.Error())
|
|
log.Println(msg)
|
|
return []string{msg}
|
|
}
|
|
|
|
lastState := readLastScanFile(lastScanFile)
|
|
msgs := []string{}
|
|
|
|
if lastState != nil {
|
|
// Compare scan data to last state
|
|
if poolState.numDeviceErrors() > lastState.numDeviceErrors() {
|
|
log.Println("Device errors occurred")
|
|
|
|
overviewBts, _ := yaml.Marshal(poolState.Devices)
|
|
msg := fmt.Sprintf(`Oh no! Looks like %s has encountered device errors.
|
|
Here is an overview of all devices and their statuses:
|
|
|
|
%s`, poolName, string(overviewBts))
|
|
|
|
msgs = append(msgs, msg)
|
|
}
|
|
|
|
// Check if new scan has completed
|
|
if poolState.Scan.LastFinished > lastState.Scan.LastFinished {
|
|
timeStr := time.Unix(int64(poolState.Scan.LastFinished), 0).Format(TIME_FORMAT)
|
|
durationStr, _ := durationfmt.Format(time.Second*time.Duration(poolState.Scan.Duration), "%0h:%0m:%0s")
|
|
|
|
log.Printf("%s on %s has finished at %s, took %s, %d errors",
|
|
poolState.Scan.Func, poolName, timeStr, durationStr, poolState.Scan.Errors)
|
|
|
|
msg := fmt.Sprintf("%s on %s has finished at %s\nDuration: %s\nErrors: %d",
|
|
poolState.Scan.Func, poolName, timeStr, durationStr, poolState.Scan.Errors)
|
|
msgs = append(msgs, msg)
|
|
}
|
|
|
|
// Check if fill level has reached the threshold
|
|
if lastState.FillLevel <= spaceWarn && poolState.FillLevel > spaceWarn {
|
|
byteStr := bytefmt.ByteSize(lastState.FreeSpace) + "B"
|
|
msg := fmt.Sprintf("Disk space warning for %s - only %s left", poolName, byteStr)
|
|
|
|
log.Println(msg)
|
|
msgs = append(msgs, msg)
|
|
}
|
|
} else {
|
|
log.Println("This is your first scan. Creating last_scan file.")
|
|
}
|
|
|
|
poolState.writeLastScanFile(lastScanFile)
|
|
return msgs
|
|
}
|
|
|
|
func sendMsgs(sendmail string, msgUri string, msgs []string) {
|
|
hostname, _ := os.Hostname()
|
|
if hostname == "" {
|
|
hostname = "unknown"
|
|
}
|
|
|
|
mailHeader := fmt.Sprintf(`Subject:[%s] zfsmon status update
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=utf-8
|
|
`, hostname)
|
|
for _, msg := range msgs {
|
|
cmd := exec.Command(sendmail, msgUri)
|
|
cmd.Stdin = strings.NewReader(mailHeader + msg)
|
|
|
|
err := cmd.Run()
|
|
if err != nil {
|
|
log.Println(err)
|
|
} else {
|
|
log.Printf("sent mail to %s", msgUri)
|
|
}
|
|
}
|
|
}
|
|
|
|
func main() {
|
|
workDir := ""
|
|
if os.Getuid() == 0 {
|
|
workDir = "/etc/zfsmon"
|
|
}
|
|
|
|
// Cmdline args
|
|
poolName := flag.String("pool", "tank", "ZFS pool name")
|
|
sendmail := flag.String("sendmail", "sendmail", "Sendmail path")
|
|
mailAddress := flag.String("m", "", "E-Mail address")
|
|
lastScanFile := flag.String("f", filepath.Join(workDir, LAST_SCAN_FILE), "File to store last scan results")
|
|
logFile := flag.String("log", filepath.Join(workDir, LOG_FILE), "Log file")
|
|
spaceWarn := flag.Int("space", POOL_SPACE_WARN, "Pool fill level in percent to warn at")
|
|
flag.Parse()
|
|
|
|
// Log file
|
|
if *logFile != "" {
|
|
f, err := os.OpenFile(*logFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
|
|
if err != nil {
|
|
log.Fatalf("error opening logfile: %v", err)
|
|
}
|
|
defer f.Close()
|
|
|
|
log.SetOutput(f)
|
|
}
|
|
|
|
msgs := checkPool(*poolName, *lastScanFile, float64(*spaceWarn))
|
|
if *mailAddress != "" {
|
|
sendMsgs(*sendmail, *mailAddress, msgs)
|
|
} else {
|
|
log.Println("mail address not set, messaging disabled")
|
|
}
|
|
}
|