module Ganeti.HTools.Program.Hcheck
( main
, options
, arguments
) where
import Control.Monad
import qualified Data.IntMap as IntMap
import Data.List (transpose)
import System.Exit
import Text.Printf (printf)
import Ganeti.HTools.AlgorithmParams (fromCLIOptions)
import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Cluster as Cluster
import qualified Ganeti.HTools.Cluster.Metrics as Metrics
import qualified Ganeti.HTools.Cluster.Utils as ClusterUtils
import qualified Ganeti.HTools.GlobalN1 as GlobalN1
import qualified Ganeti.HTools.Group as Group
import qualified Ganeti.HTools.Node as Node
import qualified Ganeti.HTools.Instance as Instance
import qualified Ganeti.HTools.Program.Hbal as Hbal
import Ganeti.HTools.RedundancyLevel (redundancy)
import Ganeti.Common
import Ganeti.HTools.CLI
import Ganeti.HTools.ExtLoader
import Ganeti.HTools.Loader
import Ganeti.HTools.Types
import Ganeti.Utils
options :: IO [OptType]
options = do
luxi <- oLuxiSocket
return
[ oDataFile
, oDiskMoves
, oAvoidDiskMoves
, oDynuFile
, oIgnoreDyn
, oEvacMode
, oExInst
, oExTags
, oIAllocSrc
, oInstMoves
, luxi
, oMachineReadable
, oMaxCpu
, oMaxSolLength
, oMinDisk
, oMinGain
, oMinGainLim
, oMinScore
, oIgnoreSoftErrors
, oNoSimulation
, oOfflineNode
, oQuiet
, oRapiMaster
, oSelInst
, oNoCapacityChecks
, oVerbose
]
arguments :: [ArgCompletion]
arguments = []
data Phase = Initial
| Rebalanced
data Level = GroupLvl String
| ClusterLvl
type GroupInfo = (Gdx, (Node.List, Instance.List))
type GroupStats = ((Group.Group, Double, Int), [Int])
htcPrefix :: String
htcPrefix = "HCHECK"
commonData :: Options -> [(String, String)]
commonData opts =
[ ("N1_FAIL", "Nodes not N+1 happy")
, ("CONFLICT_TAGS", "Nodes with conflicting instances")
, ("OFFLINE_PRI", "Instances having the primary node offline")
, ("OFFLINE_SEC", "Instances having a secondary node offline")
]
++ [ ("GN1_FAIL", "Nodes not directly evacuateable") | optCapacity opts ]
groupData :: Options -> [(String, String)]
groupData opts = commonData opts ++ [("SCORE", "Group score")]
++ [("REDUNDANCY", "Group redundancy level")]
clusterData :: Options -> [(String, String)]
clusterData opts = commonData opts ++
[ ("REDUNDANCY", "Cluster redundancy level") ] ++
[ ("NEED_REBALANCE", "Cluster is not healthy") ]
phasePrefix :: Phase -> String
phasePrefix Initial = "INIT"
phasePrefix Rebalanced = "FINAL"
levelPrefix :: Level -> String
levelPrefix GroupLvl {} = "GROUP"
levelPrefix ClusterLvl = "CLUSTER"
keysData :: Options -> Level -> [String]
keysData opts GroupLvl {} = map fst $ groupData opts
keysData opts ClusterLvl = map fst $ clusterData opts
phaseDescr :: Phase -> String
phaseDescr Initial = "initially"
phaseDescr Rebalanced = "after rebalancing"
descrData :: Options -> Level -> [String]
descrData opts GroupLvl {} = map snd $ groupData opts
descrData opts ClusterLvl = map snd $ clusterData opts
phaseLevelDescr :: Phase -> Level -> String
phaseLevelDescr phase (GroupLvl name) =
printf "Statistics for group %s %s\n" name $ phaseDescr phase
phaseLevelDescr phase ClusterLvl =
printf "Cluster statistics %s\n" $ phaseDescr phase
printKeysHTC :: [(String, String)] -> IO ()
printKeysHTC = printKeys htcPrefix
printBool :: Bool
-> Bool
-> String
printBool True True = "1"
printBool True False = "0"
printBool False b = show b
printGroupsMappings :: Group.List -> IO ()
printGroupsMappings gl = do
let extract_vals g = (printf "GROUP_UUID_%d" $ Group.idx g :: String,
Group.uuid g)
printpairs = map extract_vals (Container.elems gl)
printKeysHTC printpairs
prepareKey :: Level -> Phase -> String -> String
prepareKey level@ClusterLvl phase suffix =
printf "%s_%s_%s" (phasePrefix phase) (levelPrefix level) suffix
prepareKey level@(GroupLvl idx) phase suffix =
printf "%s_%s_%s_%s" (phasePrefix phase) (levelPrefix level) idx suffix
printStats :: Options
-> Bool
-> Level
-> Phase
-> [String]
-> IO ()
printStats opts True level phase values = do
let keys = map (prepareKey level phase) (keysData opts level)
printKeysHTC $ zip keys values
printStats opts False level phase values = do
let prefix = phaseLevelDescr phase level
descr = descrData opts level
unless (optVerbose opts < 1) $ do
putStrLn ""
putStr prefix
mapM_ (uncurry (printf " %s: %s\n")) (zip descr values)
extractGroupData :: Bool -> Group.Group -> String
extractGroupData True grp = show $ Group.idx grp
extractGroupData False grp = Group.name grp
prepareGroupValues :: [Int] -> Double -> Int -> [String]
prepareGroupValues stats score redundancyLevel =
map show stats ++ [printf "%.8f" score] ++ [show redundancyLevel]
prepareClusterValues :: Bool -> [Int] -> [Bool] -> [String]
prepareClusterValues machineread stats bstats =
map show stats ++ map (printBool machineread) bstats
printGroupStats :: Options -> Bool -> Phase -> GroupStats -> IO ()
printGroupStats opts machineread phase
((grp, score, redundancyLevel), stats) = do
let values = prepareGroupValues stats score redundancyLevel
extradata = extractGroupData machineread grp
printStats opts machineread (GroupLvl extradata) phase values
printClusterStats :: Options -> Bool -> Phase -> [Int] -> Bool -> Int -> IO ()
printClusterStats opts machineread phase stats needhbal gRed = do
let values = prepareClusterValues machineread (stats ++ [gRed]) [needhbal]
printStats opts machineread ClusterLvl phase values
clusterNeedsRebalance :: [Int] -> Bool
clusterNeedsRebalance stats = sum stats > 0
perGroupChecks :: Options -> Group.List -> GroupInfo -> GroupStats
perGroupChecks opts gl (gidx, (nl, il)) =
let grp = Container.find gidx gl
offnl = filter Node.offline (Container.elems nl)
n1violated = length . fst $ Cluster.computeBadItems nl il
gn1fail = length . filter (not . GlobalN1.canEvacuateNode (nl, il))
$ IntMap.elems nl
conflicttags = length $ filter (>0)
(map Node.conflictingPrimaries (Container.elems nl))
offline_pri = sum . map length $ map Node.pList offnl
offline_sec = length $ map Node.sList offnl
score = Metrics.compCV nl
redundancyLvl = redundancy (fromCLIOptions opts) nl il
groupstats = [ n1violated
, conflicttags
, offline_pri
, offline_sec
]
++ [ gn1fail | optCapacity opts ]
in ((grp, score, redundancyLvl), groupstats)
executeSimulation :: Options -> Cluster.Table -> Double
-> Gdx -> Node.List -> Instance.List
-> IO GroupInfo
executeSimulation opts ini_tbl min_cv gidx nl il = do
let imlen = maximum . map (length . Instance.alias) $ Container.elems il
nmlen = maximum . map (length . Node.alias) $ Container.elems nl
(fin_tbl, _) <- Hbal.iterateDepth False (fromCLIOptions opts) ini_tbl
(optMaxLength opts)
nmlen imlen [] min_cv
let (Cluster.Table fin_nl fin_il _ _) = fin_tbl
return (gidx, (fin_nl, fin_il))
maybeSimulateGroupRebalance :: Options -> GroupInfo -> IO GroupInfo
maybeSimulateGroupRebalance opts (gidx, (nl, il)) = do
let ini_cv = Metrics.compCV nl
ini_tbl = Cluster.Table nl il ini_cv []
min_cv = optMinScore opts + Metrics.optimalCVScore nl
if ini_cv < min_cv
then return (gidx, (nl, il))
else executeSimulation opts ini_tbl min_cv gidx nl il
maybeSimulateRebalance :: Bool
-> Options
-> [GroupInfo]
-> IO [GroupInfo]
maybeSimulateRebalance True opts cluster =
mapM (maybeSimulateGroupRebalance opts) cluster
maybeSimulateRebalance False _ cluster = return cluster
printFinalHTC :: Bool -> IO ()
printFinalHTC = printFinal htcPrefix
main :: Options -> [String] -> IO ()
main opts args = do
unless (null args) $ exitErr "This program doesn't take any arguments."
let verbose = optVerbose opts
machineread = optMachineReadable opts
nosimulation = optNoSimulation opts
(ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
nlf <- setNodeStatus opts fixed_nl
let splitcluster = ClusterUtils.splitCluster nlf ilf
when machineread $ printGroupsMappings gl
let groupsstats = map (perGroupChecks opts gl) splitcluster
clusterstats = map sum . transpose . map snd $ groupsstats
globalRedundancy = minimum $ map (\((_, _, r), _) -> r) groupsstats
needrebalance = clusterNeedsRebalance clusterstats
unless (verbose < 1 || machineread) .
putStrLn $ if nosimulation
then "Running in no-simulation mode."
else if needrebalance
then "Cluster needs rebalancing."
else "No need to rebalance cluster, no problems found."
mapM_ (printGroupStats opts machineread Initial) groupsstats
printClusterStats opts machineread Initial clusterstats needrebalance
globalRedundancy
let exitOK = nosimulation || not needrebalance
simulate = not nosimulation && needrebalance
rebalancedcluster <- maybeSimulateRebalance simulate opts splitcluster
when (simulate || machineread) $ do
let newgroupstats = map (perGroupChecks opts gl) rebalancedcluster
newclusterstats = map sum . transpose . map snd $ newgroupstats
newGlobalRedundancy = minimum $ map (\((_, _, r), _) -> r)
newgroupstats
newneedrebalance = clusterNeedsRebalance clusterstats
mapM_ (printGroupStats opts machineread Rebalanced) newgroupstats
printClusterStats opts machineread Rebalanced newclusterstats
newneedrebalance newGlobalRedundancy
printFinalHTC machineread
unless exitOK . exitWith $ ExitFailure 1