{-| Node freeing scheduler

-}

{-

Copyright (C) 2013 Google Inc.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-}

module Ganeti.HTools.Program.Hsqueeze
  (main
  , options
  , arguments
  ) where

import Control.Applicative
import Control.Lens (over)
import Control.Monad
import Data.Function
import Data.List
import Data.Maybe
import qualified Data.IntMap as IntMap
import Text.Printf (printf)

import Ganeti.BasicTypes
import Ganeti.Common
import qualified Ganeti.HTools.AlgorithmParams as Alg
import Ganeti.HTools.CLI
import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Cluster as Cluster
import Ganeti.HTools.ExtLoader
import qualified Ganeti.HTools.Instance as Instance
import Ganeti.HTools.Loader
import qualified Ganeti.HTools.Node as Node
import Ganeti.HTools.Tags (hasStandbyTag, standbyAuto)
import Ganeti.HTools.Types
import Ganeti.JQueue (currentTimestamp, reasonTrailTimestamp)
import Ganeti.JQueue.Objects (Timestamp)
import qualified Ganeti.Jobs as Jobs
import Ganeti.OpCodes
import Ganeti.OpCodes.Lens (metaParamsL, opReasonL)
import Ganeti.Utils

import Ganeti.Version (version)

-- | Options list and functions.
options :: IO [OptType]
options = do
  luxi <- oLuxiSocket
  return
    [ luxi
    , oDataFile
    , oExecJobs
    , oMinResources
    , oTargetResources
    , oSaveCluster
    , oPrintCommands
    , oVerbose
    , oNoHeaders
    ]

-- | The list of arguments supported by the program.
arguments :: [ArgCompletion]
arguments = []

-- | Wraps an 'OpCode' in a 'MetaOpCode' while also adding a comment
-- about what generated the opcode.
annotateOpCode :: Timestamp -> String -> Jobs.Annotator
annotateOpCode ts comment =
  over (metaParamsL . opReasonL)
      (++ [("hsqueeze"
           , "hsqueeze " ++ version ++ " called"
           , reasonTrailTimestamp ts
           )])
  . setOpComment (comment ++ " " ++ version)
  . wrapOpCode

-- | Within a cluster configuration, decide if the node hosts only
-- externally-mirrored instances.
onlyExternal ::  (Node.List, Instance.List) -> Node.Node -> Bool
onlyExternal (_, il) nd =
  not
  . any (Instance.usesLocalStorage . flip Container.find il)
  $ Node.pList nd

-- | Predicate of not being secondary node for any instance
noSecondaries :: Node.Node -> Bool
noSecondaries = null . Node.sList

-- | Predicate whether, in a configuration, all running instances are on
-- online nodes.
allInstancesOnOnlineNodes :: (Node.List, Instance.List) -> Bool
allInstancesOnOnlineNodes (nl, il) =
 all (not . Node.offline . flip Container.find nl . Instance.pNode)
 . IntMap.elems
 $ il

-- | Predicate whether, in a configuration, each node has enough resources 
-- to additionally host the given instance.
allNodesCapacityFor :: Instance.Instance -> (Node.List, Instance.List) -> Bool
allNodesCapacityFor inst (nl, _) =
  all (isOk . flip Node.addPri inst) . IntMap.elems $ nl

-- | Balance a configuration, possible for 0 steps, till no further improvement
-- is possible.
balance :: (Node.List, Instance.List) 
           -> ((Node.List, Instance.List), [MoveJob])
balance (nl, il) =
  let ini_cv = Cluster.compCV nl
      ini_tbl = Cluster.Table nl il ini_cv []
      balanceStep = Cluster.tryBalance
                      (Alg.defaultOptions { Alg.algMinGain = 0.0
                                          , Alg.algMinGainLimit = 0.0})
      bTables = map fromJust . takeWhile isJust
                  $ iterate (>>= balanceStep) (Just ini_tbl)
      (Cluster.Table nl' il' _ _) = last bTables
      moves = zip bTables (drop 1 bTables) >>= Cluster.getMoves
  in ((nl', il'), reverse moves)

-- | In a configuration, mark a node as online or offline.
onlineOfflineNode :: Bool -> (Node.List, Instance.List) -> Ndx ->
                     (Node.List, Instance.List)
onlineOfflineNode offline (nl, il) ndx =
  let nd = Container.find ndx nl
      nd' = Node.setOffline nd offline
      nl' = Container.add ndx nd' nl
  in (nl', il)

-- | Offline or online a list nodes, and return the state after a balancing
-- attempt together with the sequence of moves that lead there.
onlineOfflineNodes :: Bool -> [Ndx] -> (Node.List, Instance.List)
                      -> ((Node.List, Instance.List), [MoveJob])
onlineOfflineNodes offline ndxs conf =
  let conf' = foldl (onlineOfflineNode offline) conf ndxs
  in balance conf'

-- | Offline a list of nodes, and return the state after balancing with
-- the sequence of moves that lead there.
offlineNodes :: [Ndx] -> (Node.List, Instance.List)
                -> ((Node.List, Instance.List), [MoveJob])
offlineNodes = onlineOfflineNodes True

-- | Online a list of nodes, and return the state after balancing with
-- the sequence of moves that lead there.
onlineNodes :: [Ndx] -> (Node.List, Instance.List)
               -> ((Node.List, Instance.List), [MoveJob])
onlineNodes = onlineOfflineNodes False

-- | Predicate on whether a list of nodes can be offlined or onlined
-- simultaneously in a given configuration, while still leaving enough
-- capacity on every node for the given instance.
canOnlineOffline :: Bool -> Instance.Instance -> (Node.List, Instance.List)
                    -> [Node.Node] ->Bool
canOnlineOffline offline inst conf nds = 
  let conf' = fst $ onlineOfflineNodes offline (map Node.idx nds) conf
  in allInstancesOnOnlineNodes conf' && allNodesCapacityFor inst conf'

-- | Predicate on whether a list of nodes can be offlined simultaneously.
canOffline :: Instance.Instance -> (Node.List, Instance.List) ->
              [Node.Node] -> Bool
canOffline = canOnlineOffline True

-- | Predicate on whether onlining a list of nodes suffices to get enough
-- free resources for given instance.
sufficesOnline :: Instance.Instance -> (Node.List, Instance.List)
                  -> [Node.Node] ->  Bool
sufficesOnline = canOnlineOffline False

-- | Greedily offline the nodes, starting from the last element, and return
-- the list of nodes that could simultaneously be offlined, while keeping
-- the resources specified by an instance.
greedyOfflineNodes :: Instance.Instance -> (Node.List, Instance.List) 
                      -> [Node.Node] -> [Node.Node]
greedyOfflineNodes _ _ [] = []
greedyOfflineNodes inst conf (nd:nds) =
  let nds' = greedyOfflineNodes inst conf nds
  in if canOffline inst conf (nd:nds') then nd:nds' else nds'

-- | Try to provide enough resources by onlining an initial segment of
-- a list of nodes. Return Nothing, if even onlining all of them is not
-- enough.
tryOnline :: Instance.Instance -> (Node.List, Instance.List) -> [Node.Node]
             -> Maybe [Node.Node]
tryOnline inst conf = listToMaybe . filter (sufficesOnline inst conf) . inits

-- | From a specification, name, and factor create an instance that uses that
-- factor times the specification, rounded down.
instanceFromSpecAndFactor :: String -> Double -> ISpec -> Instance.Instance
instanceFromSpecAndFactor name f spec =
  Instance.create name
    (floor (f * fromIntegral (iSpecMemorySize spec)))
    0 []
    (floor (f * fromIntegral (iSpecCpuCount spec)))
    Running [] False Node.noSecondary Node.noSecondary DTExt
    (floor (f * fromIntegral (iSpecSpindleUse spec)))
    []
    False

-- | Get opcodes for the given move job.
getMoveOpCodes :: Node.List
               -> Instance.List
               -> [JobSet]
               -> Result [([[OpCode]], String)]
getMoveOpCodes nl il js = return $ zip (map opcodes js) (map descr js)
  where opcodes = map (\(_, idx, move, _) ->
                      Cluster.iMoveToJob nl il idx move)
        descr job = "Moving instances " ++ commaJoin
                       (map (\(_, idx, _, _) -> Container.nameOf il idx) job)

-- | Get opcodes for tagging nodes with standby.
getTagOpCodes ::  [Node.Node] -> Result [([[OpCode]], String)]
getTagOpCodes nl = return $ zip (map opCode nl) (map descr nl)
  where
    opCode node = [[Node.genAddTagsOpCode node [standbyAuto]]]
    descr node = "Tagging node " ++ Node.name node ++ " with standby"

-- | Get opcodes for powering off nodes
getPowerOffOpCodes :: [Node.Node] -> Result [([[OpCode]], String)]
getPowerOffOpCodes nl = do
  opcodes <- Node.genPowerOffOpCodes nl
  return [([opcodes], "Powering off nodes")]

-- | Get opcodes for powering on nodes
getPowerOnOpCodes :: [Node.Node] -> Result [([[OpCode]], String)]
getPowerOnOpCodes nl = do
  opcodes <- Node.genPowerOnOpCodes nl
  return [([opcodes], "Powering on nodes")]

maybeExecJobs :: Options
              -> String
              -> Result [([[OpCode]], String)]
              -> IO (Result ())
maybeExecJobs opts comment opcodes =
  if optExecJobs opts
    then (case optLuxi opts of
            Nothing ->
              return $ Bad "Execution of commands possible only on LUXI"
            Just master -> do
              ts <- currentTimestamp
              let annotator = maybe id setOpPriority (optPriority opts) .
                              annotateOpCode ts comment
              case opcodes of
                Bad msg -> error msg
                Ok codes -> Jobs.execWithCancel annotator master codes)
    else return $ Ok ()

-- | Main function.
main :: Options -> [String] -> IO ()
main opts args = do
  unless (null args) $ exitErr "This program doesn't take any arguments."

  let verbose = optVerbose opts
      targetf = optTargetResources opts
      minf = optMinResources opts

  ini_cdata@(ClusterData _ nlf ilf _ ipol) <- loadExternalData opts

  maybeSaveData (optSaveCluster opts) "original" "before hsqueeze run" ini_cdata

  let nodelist = IntMap.elems nlf
      offlineCandidates = 
        sortBy (flip compare `on` length . Node.pList)
        . filter (foldl (liftA2 (&&)) (const True)
                  [ not . Node.offline
                  , not . Node.isMaster
                  , noSecondaries
                  , onlyExternal (nlf, ilf)
                  ])
        $ nodelist
      onlineCandidates =
        filter (liftA2 (&&) Node.offline hasStandbyTag) nodelist
      conf = (nlf, ilf)
      std = iPolicyStdSpec ipol
      targetInstance = instanceFromSpecAndFactor "targetInstance" targetf std
      minInstance = instanceFromSpecAndFactor "targetInstance" minf std
      toOffline = greedyOfflineNodes targetInstance conf offlineCandidates
      ((fin_off_nl, fin_off_il), off_mvs) =
        offlineNodes (map Node.idx toOffline) conf
      final_off_cdata =
        ini_cdata { cdNodes = fin_off_nl, cdInstances = fin_off_il }
      off_jobs = Cluster.splitJobs off_mvs
      off_opcodes = liftM concat $ sequence
                    [ getMoveOpCodes nlf ilf off_jobs
                    , getTagOpCodes toOffline
                    , getPowerOffOpCodes toOffline
                    ]
      off_cmd =
        Cluster.formatCmds off_jobs
        ++ "\necho Tagging Commands\n"
        ++ (toOffline >>= (printf "  gnt-node add-tags %s %s\n"
                             `flip` standbyAuto)
                          . Node.alias)
        ++ "\necho Power Commands\n"
        ++ (toOffline >>= printf "  gnt-node power -f off %s\n" . Node.alias)
      toOnline = tryOnline minInstance conf onlineCandidates
      nodesToOnline = fromMaybe onlineCandidates toOnline
      ((fin_on_nl, fin_on_il), on_mvs) =
        onlineNodes (map Node.idx nodesToOnline) conf
      final_on_cdata =
        ini_cdata { cdNodes = fin_on_nl, cdInstances = fin_on_il }
      on_jobs = Cluster.splitJobs on_mvs
      on_opcodes = liftM2 (++) (getPowerOnOpCodes nodesToOnline)
                               (getMoveOpCodes nlf ilf on_jobs)
      on_cmd =
        "echo Power Commands\n"
        ++ (nodesToOnline >>= printf "  gnt-node power -f on %s\n" . Node.alias)
        ++ Cluster.formatCmds on_jobs

  when (verbose > 1) . putStrLn 
    $ "Offline candidates: " ++ commaJoin (map Node.name offlineCandidates)

  when (verbose > 1) . putStrLn
    $ "Online candidates: " ++ commaJoin (map Node.name onlineCandidates)

  if not (allNodesCapacityFor minInstance conf)
    then do
      unless (optNoHeaders opts) $
        putStrLn "'Nodes to online'"
      mapM_ (putStrLn . Node.name) nodesToOnline
      when (verbose > 1 && isNothing toOnline) . putStrLn $
        "Onlining all nodes will not yield enough capacity"
      maybeSaveCommands "Commands to run:" opts on_cmd
      let comment = printf "expanding by %d nodes" (length nodesToOnline)
      exitIfBad "hsqueeze" =<< maybeExecJobs opts comment on_opcodes
      maybeSaveData (optSaveCluster opts)
         "squeezed" "after hsqueeze expansion" final_on_cdata
    else
      if null toOffline
        then do      
          unless (optNoHeaders opts) $
            putStrLn "'No action'"
          maybeSaveCommands "Commands to run:" opts "echo Nothing to do"
          maybeSaveData (optSaveCluster opts)
            "squeezed" "after hsqueeze doing nothing" ini_cdata
        else do
          unless (optNoHeaders opts) $
            putStrLn "'Nodes to offline'"
          mapM_ (putStrLn . Node.name) toOffline
          maybeSaveCommands "Commands to run:" opts off_cmd
          let comment = printf "condensing by %d nodes" (length toOffline)
          exitIfBad "hsqueeze" =<< maybeExecJobs opts comment off_opcodes
          maybeSaveData (optSaveCluster opts)
            "squeezed" "after hsqueeze run" final_off_cdata