module Ganeti.HTools.Program.Hbal (main) where
import Control.Concurrent (threadDelay)
import Control.Exception (bracket)
import Control.Monad
import Data.List
import Data.Maybe (isJust, isNothing, fromJust)
import Data.IORef
import System (exitWith, ExitCode(..))
import System.IO
import System.Posix.Process
import System.Posix.Signals
import qualified System
import Text.Printf (printf, hPrintf)
import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Cluster as Cluster
import qualified Ganeti.HTools.Group as Group
import qualified Ganeti.HTools.Node as Node
import qualified Ganeti.HTools.Instance as Instance
import Ganeti.HTools.CLI
import Ganeti.HTools.ExtLoader
import Ganeti.HTools.Utils
import Ganeti.HTools.Types
import Ganeti.HTools.Loader
import qualified Ganeti.Luxi as L
import Ganeti.Jobs
options :: [OptType]
options =
[ oPrintNodes
, oPrintInsts
, oPrintCommands
, oOneline
, oDataFile
, oEvacMode
, oRapiMaster
, oLuxiSocket
, oExecJobs
, oGroup
, oMaxSolLength
, oVerbose
, oQuiet
, oOfflineNode
, oMinScore
, oMaxCpu
, oMinDisk
, oMinGain
, oMinGainLim
, oDiskMoves
, oSelInst
, oInstMoves
, oDynuFile
, oExTags
, oExInst
, oSaveCluster
, oShowVer
, oShowHelp
]
iterateDepth :: Cluster.Table
-> Int
-> Bool
-> Bool
-> Int
-> Int
-> [MoveJob]
-> Bool
-> Score
-> Score
-> Score
-> Bool
-> IO (Cluster.Table, [MoveJob])
iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen
cmd_strs oneline min_score mg_limit min_gain evac_mode =
let Cluster.Table ini_nl ini_il _ _ = ini_tbl
allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
m_fin_tbl = if allowed_next
then Cluster.tryBalance ini_tbl disk_moves inst_moves
evac_mode mg_limit min_gain
else Nothing
in
case m_fin_tbl of
Just fin_tbl ->
do
let
(Cluster.Table _ _ _ fin_plc) = fin_tbl
fin_plc_len = length fin_plc
cur_plc@(idx, _, _, move, _) = head fin_plc
(sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
nmlen imlen cur_plc fin_plc_len
afn = Cluster.involvedNodes ini_il cur_plc
upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
unless oneline $ do
putStrLn sol_line
hFlush stdout
iterateDepth fin_tbl max_rounds disk_moves inst_moves
nmlen imlen upd_cmd_strs oneline min_score
mg_limit min_gain evac_mode
Nothing -> return (ini_tbl, cmd_strs)
formatOneline :: Double -> Int -> Double -> String
formatOneline ini_cv plc_len fin_cv =
printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
(if fin_cv == 0 then 1 else ini_cv / fin_cv)
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
waitForJobs client jids = do
sts <- L.queryJobsStatus client jids
case sts of
Bad x -> return $ Bad x
Ok s -> if any (<= JOB_STATUS_RUNNING) s
then do
threadDelay (1000000 * 15)
waitForJobs client jids
else return $ Ok s
checkJobsStatus :: [JobStatus] -> Bool
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
execWrapper :: String -> Node.List
-> Instance.List -> IORef Int -> [JobSet] -> IO Bool
execWrapper _ _ _ _ [] = return True
execWrapper master nl il cref alljss = do
cancel <- readIORef cref
(if cancel > 0
then do
hPrintf stderr "Exiting early due to user request, %d\
\ jobset(s) remaining." (length alljss)::IO ()
return False
else execJobSet master nl il cref alljss)
execJobSet :: String -> Node.List
-> Instance.List -> IORef Int -> [JobSet] -> IO Bool
execJobSet _ _ _ _ [] = return True
execJobSet master nl il cref (js:jss) = do
let jobs = map (\(_, idx, move, _) ->
Cluster.iMoveToJob nl il idx move) js
let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
putStrLn $ "Executing jobset for instances " ++ commaJoin descr
jrs <- bracket (L.getClient master) L.closeClient
(\client -> do
jids <- L.submitManyJobs client jobs
case jids of
Bad x -> return $ Bad x
Ok x -> do
putStrLn $ "Got job IDs " ++ commaJoin x
waitForJobs client x
)
(case jrs of
Bad x -> do
hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
return False
Ok x -> if checkJobsStatus x
then execWrapper master nl il cref jss
else do
hPutStrLn stderr $ "Not all jobs completed successfully: " ++
show x
hPutStrLn stderr "Aborting."
return False)
hangleSigInt :: IORef Int -> IO ()
hangleSigInt cref = do
writeIORef cref 1
putStrLn ("Cancel request registered, will exit at" ++
" the end of the current job set...")
hangleSigTerm :: IORef Int -> IO ()
hangleSigTerm cref = do
writeIORef cref 2
putStrLn "Double cancel request, exiting now..."
exitImmediately $ ExitFailure 2
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
runJobSet master fin_nl il cmd_jobs = do
cref <- newIORef 0
mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
[(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
execWrapper master fin_nl il cref cmd_jobs
main :: IO ()
main = do
cmd_args <- System.getArgs
(opts, args) <- parseOpts cmd_args "hbal" options
unless (null args) $ do
hPutStrLn stderr "Error: this program doesn't take any arguments."
exitWith $ ExitFailure 1
let oneline = optOneline opts
verbose = optVerbose opts
shownodes = optShowNodes opts
showinsts = optShowInsts opts
ini_cdata@(ClusterData gl fixed_nl ilf ctags) <- loadExternalData opts
let offline_passed = optOffline opts
all_nodes = Container.elems fixed_nl
offline_lkp = map (lookupName (map Node.name all_nodes)) offline_passed
offline_wrong = filter (not . goodLookupResult) offline_lkp
offline_names = map lrContent offline_lkp
offline_indices = map Node.idx $
filter (\n -> Node.name n `elem` offline_names)
all_nodes
m_cpu = optMcpu opts
m_dsk = optMdsk opts
csf = commonSuffix fixed_nl ilf
when (not (null offline_wrong)) $ do
hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
(commaJoin (map lrContent offline_wrong)) :: IO ()
exitWith $ ExitFailure 1
let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
then Node.setOffline n True
else n) fixed_nl
nlf = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
nm
when (not oneline && verbose > 1) $
putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
when (Container.size ilf == 0) $ do
(if oneline then putStrLn $ formatOneline 0 0 0
else printf "Cluster is empty, exiting.\n")
exitWith ExitSuccess
let split_insts = Cluster.findSplitInstances nlf ilf
unless (null split_insts) $ do
hPutStrLn stderr "Found instances belonging to multiple node groups:"
mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts
hPutStrLn stderr "Aborting."
exitWith $ ExitFailure 1
let ngroups = Cluster.splitCluster nlf ilf
when (length ngroups > 1 && isNothing (optGroup opts)) $ do
hPutStrLn stderr "Found multiple node groups:"
mapM_ (hPutStrLn stderr . (" " ++) . Group.name .
flip Container.find gl . fst) ngroups
hPutStrLn stderr "Aborting."
exitWith $ ExitFailure 1
maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
unless oneline $ printf "Loaded %d nodes, %d instances\n"
(Container.size nlf)
(Container.size ilf)
(gname, (nl, il)) <- case optGroup opts of
Nothing -> do
let (gidx, cdata) = head ngroups
grp = Container.find gidx gl
return (Group.name grp, cdata)
Just g -> case Container.findByName gl g of
Nothing -> do
hPutStrLn stderr $ "Node group " ++ g ++
" not found. Node group list is:"
mapM_ (hPutStrLn stderr . (" " ++) . Group.name ) (Container.elems gl)
hPutStrLn stderr "Aborting."
exitWith $ ExitFailure 1
Just grp ->
case lookup (Group.idx grp) ngroups of
Nothing -> do
return (Group.name grp, (Container.empty, Container.empty))
Just cdata -> return (Group.name grp, cdata)
unless oneline $ printf "Group size %d nodes, %d instances\n"
(Container.size nl)
(Container.size il)
putStrLn $ "Selected node group: " ++ gname
when (length csf > 0 && not oneline && verbose > 1) $
printf "Note: Stripping common suffix of '%s' from names\n" csf
let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
unless (oneline || verbose == 0) $ printf
"Initial check done: %d bad nodes, %d bad instances.\n"
(length bad_nodes) (length bad_instances)
when (length bad_nodes > 0) $
putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
\that the cluster will end N+1 happy."
maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
let ini_cv = Cluster.compCV nl
ini_tbl = Cluster.Table nl il ini_cv []
min_cv = optMinScore opts
when (ini_cv < min_cv) $ do
(if oneline then
putStrLn $ formatOneline ini_cv 0 ini_cv
else printf "Cluster is already well balanced (initial score %.6g,\n\
\minimum score %.6g).\nNothing to do, exiting\n"
ini_cv min_cv)
exitWith ExitSuccess
unless oneline (if verbose > 2 then
printf "Initial coefficients: overall %.8f, %s\n"
ini_cv (Cluster.printStats nl)
else
printf "Initial score: %.8f\n" ini_cv)
unless oneline $ putStrLn "Trying to minimize the CV..."
let imlen = maximum . map (length . Instance.alias) $ Container.elems il
nmlen = maximum . map (length . Node.alias) $ Container.elems nl
(fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
(optDiskMoves opts)
(optInstMoves opts)
nmlen imlen [] oneline min_cv
(optMinGainLim opts) (optMinGain opts)
(optEvacMode opts)
let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
ord_plc = reverse fin_plc
sol_msg = case () of
_ | null fin_plc -> printf "No solution found\n"
| verbose > 2 ->
printf "Final coefficients: overall %.8f, %s\n"
fin_cv (Cluster.printStats fin_nl)
| otherwise ->
printf "Cluster score improved from %.8f to %.8f\n"
ini_cv fin_cv ::String
unless oneline $ putStr sol_msg
unless (oneline || verbose == 0) $
printf "Solution length=%d\n" (length ord_plc)
let cmd_jobs = Cluster.splitJobs cmd_strs
cmd_data = Cluster.formatCmds cmd_jobs
when (isJust $ optShowCmds opts) $
do
let out_path = fromJust $ optShowCmds opts
putStrLn ""
(if out_path == "-" then
printf "Commands to run to reach the above solution:\n%s"
(unlines . map (" " ++) .
filter (/= " check") .
lines $ cmd_data)
else do
writeFile out_path (shTemplate ++ cmd_data)
printf "The commands have been written to file '%s'\n" out_path)
maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
(ClusterData gl fin_nl fin_il ctags)
maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
when (verbose > 3) $ do
let ini_cs = Cluster.totalResources nl
fin_cs = Cluster.totalResources fin_nl
printf "Original: mem=%d disk=%d\n"
(Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
printf "Final: mem=%d disk=%d\n"
(Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
when oneline $
putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv
eval <-
if optExecJobs opts && not (null ord_plc)
then (case optLuxi opts of
Nothing -> do
hPutStrLn stderr "Execution of commands possible only on LUXI"
return False
Just master -> runJobSet master fin_nl il cmd_jobs)
else return True
unless eval (exitWith (ExitFailure 1))