src/Ganeti/HTools/Backend/MonD.hs

{-# LANGUAGE BangPatterns #-}

{-| Monitoring daemon backend

This module holds implements the querying of the monitoring daemons
for dynamic utilisation data.

-}

{-

Copyright (C) 2015 Google Inc.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-}


module Ganeti.HTools.Backend.MonD
  ( queryAllMonDDCs
  , pMonDData
  , Report(..)
  , DataCollector
  , dName
  , fromCurl
  , mkReport
  , totalCPUCollector
  , xenCPUCollector
  , kvmRSSCollector
  , scaleMemoryWeight
  , useInstanceRSSData
  ) where

import Control.Monad
import Control.Monad.Writer
import qualified Data.List as L
import qualified Data.IntMap as IntMap
import qualified Data.Map as Map
import Data.Maybe (catMaybes, mapMaybe)
import qualified Data.Set as Set
import Network.Curl
import qualified Text.JSON as J

import Ganeti.BasicTypes
import qualified Ganeti.Constants as C
import Ganeti.Cpu.Types
import qualified Ganeti.DataCollectors.CPUload as CPUload
import qualified Ganeti.DataCollectors.KvmRSS as KvmRSS
import qualified Ganeti.DataCollectors.XenCpuLoad as XenCpuLoad
import Ganeti.DataCollectors.Types ( DCReport, DCCategory
                                   , dcReportData, dcReportName
                                   , getCategoryName )
import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Node as Node
import qualified Ganeti.HTools.Instance as Instance
import Ganeti.HTools.Loader (ClusterData(..))
import Ganeti.HTools.Types
import Ganeti.HTools.CLI
import Ganeti.JSON
import Ganeti.Logging.Lifted (logWarning)
import Ganeti.Utils (exitIfBad)

-- * General definitions

-- | The actual data types for MonD's Data Collectors.
data Report = CPUavgloadReport CPUavgload
            | InstanceCpuReport (Map.Map String Double)
            | InstanceRSSReport (Map.Map String Double)

-- | Type describing a data collector basic information.
data DataCollector = DataCollector
  { dName     :: String           -- ^ Name of the data collector
  , dCategory :: Maybe DCCategory -- ^ The name of the category
  , dMkReport :: DCReport -> Maybe Report -- ^ How to parse a monitor report
  , dUse      :: [(Node.Node, Report)]
                 -> (Node.List, Instance.List)
                 -> Result (Node.List, Instance.List)
                 -- ^ How the collector reports are to be used to bring dynamic
                 -- data into a cluster
  }

-- * Node-total CPU load average data collector

-- | Parse a DCReport for the node-total CPU collector.
mkCpuReport :: DCReport -> Maybe Report
mkCpuReport dcr =
  case fromJVal (dcReportData dcr) :: Result CPUavgload of
    Ok cav -> Just $ CPUavgloadReport cav
    Bad _ -> Nothing

-- | Take reports of node CPU values and update a node accordingly.
updateNodeCpuFromReport :: (Node.Node, Report) -> Node.Node
updateNodeCpuFromReport (node, CPUavgloadReport cav) =
  let ct = cavCpuTotal cav
      du = Node.utilLoad node
      du' = du {cpuWeight = ct}
  in node { Node.utilLoad = du' }
updateNodeCpuFromReport (node, _) = node

-- | Update the instance CPU-utilization data, asuming that each virtual
-- CPU contributes equally to the node CPU load.
updateCpuUtilDataFromNode :: Instance.List -> Node.Node -> Instance.List
updateCpuUtilDataFromNode il node =
  let ct = cpuWeight (Node.utilLoad node)
      n_uCpu = Node.uCpu node
      upd inst =
        if Node.idx node == Instance.pNode inst
          then
            let i_vcpus = Instance.vcpus inst
                i_util = ct / fromIntegral n_uCpu * fromIntegral i_vcpus
                i_du = Instance.util inst
                i_du' = i_du {cpuWeight = i_util}
            in inst {Instance.util = i_du'}
          else inst
  in Container.map upd il

-- | Update cluster data from node CPU load reports.
useNodeTotalCPU :: [(Node.Node, Report)]
                -> (Node.List, Instance.List)
                -> Result (Node.List, Instance.List)
useNodeTotalCPU reports (nl, il) =
  let newnodes = map updateNodeCpuFromReport reports
      il' = foldl updateCpuUtilDataFromNode il newnodes
      nl' = zip (Container.keys nl) newnodes
  in return (Container.fromList nl', il')

-- | The node-total CPU collector.
totalCPUCollector :: DataCollector
totalCPUCollector = DataCollector { dName = CPUload.dcName
                                  , dCategory = CPUload.dcCategory
                                  , dMkReport = mkCpuReport
                                  , dUse = useNodeTotalCPU
                                  }

-- * Xen instance CPU-usage collector

-- | Parse results of the Xen-Cpu-load data collector.
mkXenCpuReport :: DCReport -> Maybe Report
mkXenCpuReport =
  liftM InstanceCpuReport . maybeParseMap . dcReportData

-- | Update cluster data based on the per-instance CPU usage
-- reports
useInstanceCpuData :: [(Node.Node, Report)]
                   -> (Node.List, Instance.List)
                   -> Result (Node.List, Instance.List)
useInstanceCpuData reports (nl, il) = do
  let toMap (InstanceCpuReport m) = Just m
      toMap _ = Nothing
  let usage = Map.unions $ mapMaybe (toMap . snd) reports
      missingData = (Set.fromList . map Instance.name $ IntMap.elems il)
                    Set.\\ Map.keysSet usage
  unless (Set.null missingData)
    . Bad . (++) "No CPU information available for "
    . show $ Set.elems missingData
  let updateInstance inst =
        let cpu = Map.lookup (Instance.name inst) usage
            dynU = Instance.util inst
            dynU' = maybe dynU (\c -> dynU { cpuWeight = c }) cpu
        in inst { Instance.util = dynU' }
  let il' = IntMap.map updateInstance il
  let updateNode node =
        let cpu = sum
                  . map (\ idx -> maybe 0 (cpuWeight . Instance.util)
                                  $ IntMap.lookup idx il')
                  $ Node.pList node
            dynU = Node.utilLoad node
            dynU' = dynU { cpuWeight = cpu }
        in node { Node.utilLoad = dynU' }
  let nl' = IntMap.map updateNode nl
  return (nl', il')

-- | Collector for per-instance CPU data as observed by Xen
xenCPUCollector :: DataCollector
xenCPUCollector = DataCollector { dName = XenCpuLoad.dcName
                                , dCategory = XenCpuLoad.dcCategory
                                , dMkReport = mkXenCpuReport
                                , dUse = useInstanceCpuData
                                }

-- * kvm instance RSS collector

-- | Parse results of the kvm instance RSS data Collector
mkKvmRSSReport :: DCReport -> Maybe Report
mkKvmRSSReport =
  liftM InstanceRSSReport . maybeParseMap . dcReportData

-- | Conversion constant from htools' internal memory unit,
-- which is MiB to RSS unit, which reported in pages (of 4kiB
-- each).
pagesPerMiB :: Double
pagesPerMiB = 256.0

-- | Update cluster data based on per-instance RSS data.
-- Also set the node's memoy util pool correctly. Our unit
-- of memory usage is pages; there are 256 pages per MiB
-- of node memory not used by the node itself.
useInstanceRSSData :: [(Node.Node, Report)]
                   -> (Node.List, Instance.List)
                   -> Result (Node.List, Instance.List)
useInstanceRSSData reports (nl, il) = do
  let toMap (InstanceRSSReport m) = Just m
      toMap _                     = Nothing
  let usage = Map.unions $ mapMaybe (toMap . snd) reports
      missingData = (Set.fromList . map Instance.name $ IntMap.elems il)
                    Set.\\ Map.keysSet usage
  unless (Set.null missingData)
    . Bad . (++) "No RSS information available for "
    . show $ Set.elems missingData
  let updateInstance inst =
        let mem = Map.lookup (Instance.name inst) usage
            dynU = Instance.util inst
            dynU' = maybe dynU (\m -> dynU { memWeight = m }) mem
        in inst { Instance.util = dynU' }
  let il' = IntMap.map updateInstance il
  let updateNode node =
        let mem = sum
                  . map (\ idx -> maybe 0 (memWeight . Instance.util)
                                  $ IntMap.lookup idx il')
                  $ Node.pList node
            dynU = Node.utilLoad node
            dynU' = dynU { memWeight = mem }
            pool = Node.utilPool node
            nodePages = (Node.tMem node - fromIntegral (Node.nMem node))
                        * pagesPerMiB
            pool' = pool { memWeight = nodePages }
        in node { Node.utilLoad = dynU', Node.utilPool = pool' }
  let nl' = IntMap.map updateNode nl
  return (nl', il')

-- | Update cluster data based on the per-instance CPU usage
kvmRSSCollector :: DataCollector
kvmRSSCollector = DataCollector { dName = KvmRSS.dcName
                                , dCategory = KvmRSS.dcCategory
                                , dMkReport = mkKvmRSSReport
                                , dUse = useInstanceRSSData
                                }

-- | Scale the importance of the memory weight in dynamic utilisation,
-- by multiplying the usage with the given factor. Note that the underlying
-- model for dynamic utilisation is that they are reported in arbitrary units.
scaleMemoryWeight :: Double
                  -> (Node.List, Instance.List)
                  -> (Node.List, Instance.List)
scaleMemoryWeight f (nl, il) =
  let updateInst inst =
        let dynU = Instance.util inst
            dynU' = dynU { memWeight = f * memWeight dynU}
        in inst { Instance.util = dynU' }
      updateNode node =
        let dynU = Node.utilLoad node
            dynU' = dynU { memWeight = f * memWeight dynU}
        in node { Node.utilLoad = dynU' }
  in (IntMap.map updateNode nl, IntMap.map updateInst il)

-- * Collector choice

-- | The list of Data Collectors used by hail and hbal.
collectors :: Options -> [DataCollector]
collectors opts
  | optIgnoreDynu opts = []
  | otherwise =
      (if optMonDXen opts then [ xenCPUCollector ] else [ totalCPUCollector ] )
      ++ [ kvmRSSCollector | optMonDKvmRSS opts ]

-- * Querying infrastructure

-- | Return the data from correct combination of a Data Collector
-- and a DCReport.
mkReport :: DataCollector -> Maybe DCReport -> Maybe Report
mkReport dc = (>>= dMkReport dc)

-- | MonDs Data parsed by a mock file. Representing (node name, list of reports
-- produced by MonDs Data Collectors).
type MonDData = (String, [DCReport])

-- | A map storing MonDs data.
type MapMonDData = Map.Map String [DCReport]

-- | Get data report for the specified Data Collector and Node from the map.
fromFile :: DataCollector -> Node.Node -> MapMonDData -> Maybe DCReport
fromFile dc node m =
  let matchDCName dcr = dName dc == dcReportName dcr
  in maybe Nothing (L.find matchDCName) $ Map.lookup (Node.name node) m

-- | Get Category Name.
getDCCName :: Maybe DCCategory -> String
getDCCName dcc =
  case dcc of
    Nothing -> "default"
    Just c -> getCategoryName c

-- | Prepare url to query a single collector.
prepareUrl :: DataCollector -> Node.Node -> URLString
prepareUrl dc node =
  Node.name node ++ ":" ++ show C.defaultMondPort ++ "/"
  ++ show C.mondLatestApiVersion ++ "/report/" ++
  getDCCName (dCategory dc) ++ "/" ++ dName dc

-- | Query a specified MonD for a Data Collector.
fromCurl :: DataCollector -> Node.Node -> IO (Maybe DCReport)
fromCurl dc node = do
  (code, !body) <-  curlGetString (prepareUrl dc node) []
  case code of
    CurlOK ->
      case J.decodeStrict body :: J.Result DCReport of
        J.Ok r -> return $ Just r
        J.Error _ -> return Nothing
    _ -> do
      logWarning $ "Failed to contact node's " ++ Node.name node
                   ++ " MonD for DC " ++ dName dc
      return Nothing

-- | Parse a node's JSON record.
pMonDN :: JSRecord -> Result MonDData
pMonDN a = do
  node <- tryFromObj "Parsing node's name" a "node"
  reports <- tryFromObj "Parsing node's reports" a "reports"
  return (node, reports)

-- | Parse MonD data file contents.
pMonDData :: String -> Result [MonDData]
pMonDData input =
  loadJSArray "Parsing MonD's answer" input >>=
  mapM (pMonDN . J.fromJSObject)

-- | Query a single MonD for a single Data Collector.
queryAMonD :: Maybe MapMonDData -> DataCollector -> Node.Node
              -> IO (Maybe Report)
queryAMonD m dc node =
  liftM (mkReport dc) $ case m of
      Nothing -> fromCurl dc node
      Just m' -> return $ fromFile dc node m'

-- | Query all MonDs for a single Data Collector. Return the updated
-- cluster, as well as a bit inidicating wether the collector succeeded.
queryAllMonDs :: Maybe MapMonDData -> (Node.List, Instance.List)
                 -> DataCollector -> WriterT All IO (Node.List, Instance.List)
queryAllMonDs m (nl, il) dc = do
  elems <- liftIO $ mapM (queryAMonD m dc) (Container.elems nl)
  let elems' = catMaybes elems
  if length elems == length elems'
    then
      let results = zip (Container.elems nl) elems'
      in case dUse dc results (nl, il) of
        Ok (nl', il') -> return (nl', il')
        Bad s -> do
          logWarning s
          tell $ All False
          return (nl, il)
    else do
      logWarning $ "Didn't receive an answer by all MonDs, " ++ dName dc
                   ++ "'s data will be ignored."
      tell $ All False
      return (nl,il)

-- | Query all MonDs for all Data Collector. Return the cluster enriched
-- by dynamic data, as well as a bit indicating wether all collectors
-- could be queried successfully.
queryAllMonDDCs :: ClusterData -> Options -> WriterT All IO ClusterData
queryAllMonDDCs cdata opts = do
  map_mDD <-
    case optMonDFile opts of
      Nothing -> return Nothing
      Just fp -> do
        monDData_contents <- liftIO $ readFile fp
        monDData <- liftIO . exitIfBad "can't parse MonD data"
                    . pMonDData $ monDData_contents
        return . Just $ Map.fromList monDData
  let (ClusterData _ nl il _ _) = cdata
  (nl', il') <- foldM (queryAllMonDs map_mDD) (nl, il) (collectors opts)
  return $ cdata {cdNodes = nl', cdInstances = il'}