Commit f1c71b3e authored by Hans-Peter Deifel's avatar Hans-Peter Deifel 🐢
Browse files

Import refinable partition data type

This is copy pasta from another project.
parent 60926945
......@@ -22,6 +22,8 @@ library
, Data.Functors.FixedProduct
, Data.Functors
, Data.Sort
, Data.RefinablePartition
, Data.Vector.Unboxed.Mutable.Utils
, Parser
, Algorithm
default-language: Haskell2010
......@@ -32,6 +34,11 @@ library
, yaml
, unordered-containers
, bytestring
, primitive
, vector-algorithms
, microlens
, microlens-th
, deepseq
executable ma
main-is: Main.hs
......
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MagicHash #-}
module Data.RefinablePartition
( -- * Types
RefinablePartition
-- * Construction
, make
-- * Accessors
, numBlocks
, blockSize
, blockOfState
, statesOfBlock
-- * Marking
, mark
, isMarked
, hasMarked
-- * Splitting
, splitMarked
, splitBy
, groupBy
) where
import Control.Monad (forM_, forM, when, foldM)
import Control.Monad.ST
import Data.Ord (comparing)
import Data.Primitive.MutVar
import qualified Data.Vector as V
import qualified Data.Vector.Algorithms.Heap as VM
import Data.Vector.Mutable (MVector)
import qualified Data.Vector.Mutable as VM
import qualified Data.Vector.Unboxed.Mutable as VU
import Lens.Micro
import Lens.Micro.TH
import Control.DeepSeq (NFData)
import qualified Data.Vector.Unboxed.Mutable.Utils as VU
-- | A state is represented by an integer.
--
-- States in an automaton should always start at zero.
type State = Int
-- | A block is represented by an integer.
--
-- Blocks in a partition always start at zero.
newtype Block = Block { fromBlock :: Int }
deriving (Eq,Ord,Num,Enum,NFData)
instance Show Block where
show (Block b) = show b
data StateRepr = StateRepr
{ _block :: {-# UNPACK #-} Block
, _location :: {-# UNPACK #-} Int
} deriving (Show)
makeLenses ''StateRepr
data BlockRepr = BlockRepr
{ _startOffset :: {-# UNPACK #-} Int
, _endOffset :: {-# UNPACK #-} Int -- exclusive
, _unmarkedOffset :: {-# UNPACK #-} Int
} deriving (Show)
makeLenses ''BlockRepr
-- | Refinable partition type.
--
-- Should be thought of as a partition {A₁,..} of a set of 'State' values from
-- zero to n. The `Ai`s are called Blocks.
--
-- This type is by nature mutable and can be mutated with the operations in this
-- module. The `s` type variable is there to support the ST monad.
data RefinablePartition s = Partition
{ _blockCount :: MutVar s Int
, _statesByBlock :: VU.MVector s State
, _states :: MVector s StateRepr
, _blocks :: MVector s BlockRepr
}
makeLenses ''RefinablePartition
-- | Create a mutable refinable partition.
make :: Int -- ^ Number of states n
-> Int -- ^ Number of initial blocks m
-> (State -> Block)
-- ^ Initial partition assigning a block to each state. Must be defined on
-- {0..n} and only have values in {0..m}.
-> ST s (RefinablePartition s)
make numStates numBlocks initPart
| numStates < numBlocks = error "RefinablePartition.make: More blocks than states"
| numBlocks == 0 = error "RefinablePartition.make: zero blocks"
| otherwise = do
statesByBlock <- VU.new numStates
states <- VM.new numStates
-- we need to reserve space for more blocks, to allow splitting to create them.
-- There can be at most as many blocks as there are states
blocks <- VM.new numStates
blockCount <- newMutVar numBlocks
-- contains a list of states for each block
blockStates <- VM.replicate numBlocks []
forM_ [0..numStates-1] $ \i -> do
let Block block = initPart i
VM.modify blockStates (i:) block
blockStates' <- V.unsafeFreeze blockStates
currentLocation <- newMutVar 0
iforM_ blockStates' $ \i stateList -> do
beginningOfBlock <- readMutVar currentLocation
forM_ stateList $ \s -> do
stateLocation <- readMutVar currentLocation
modifyMutVar currentLocation (+1)
VU.write statesByBlock stateLocation s
VM.write states s StateRepr { _block = Block i
, _location = stateLocation
}
endOfBlock <- readMutVar currentLocation
VM.write blocks i BlockRepr { _startOffset = beginningOfBlock
, _endOffset = endOfBlock
, _unmarkedOffset = beginningOfBlock
}
return Partition { _blockCount = blockCount
, _statesByBlock = statesByBlock
, _states = states
, _blocks = blocks
}
-- | Return number of blocks in this partition.
--
-- Runtime: O(1)
numBlocks :: RefinablePartition s -> ST s Int
numBlocks p = readMutVar (p ^. blockCount)
-- | Return number of states in a given block.
--
-- Runtime: O(1)
blockSize :: RefinablePartition s -> Block -> ST s Int
blockSize p b = getBlock p b >>= \block ->
return (block^.endOffset - block^.startOffset)
-- | Return the block that a given state belongs to.
--
-- Runtime: O(1)
blockOfState :: RefinablePartition s -> State -> ST s Block
blockOfState p s = getState p s >>= \state ->
return (state^.block)
-- | Record this state as "marked".
--
-- This is implemented in a way to efficiently split the block in marked and
-- unmarked states.
--
-- Runtime: O(1)
mark :: RefinablePartition s -> State -> ST s ()
mark partition s = do
!(StateRepr {..}) <- getState partition s
!(BlockRepr {..}) <- getBlock partition _block
when (_location > _unmarkedOffset) $
swap partition (_location) (_unmarkedOffset)
setBlock partition _block $
unmarkedOffset %~ (+1)
where
-- swap two indices in statesByBlock array
swap :: RefinablePartition s -> Int -> Int -> ST s ()
swap partition a b = do
setStateAt partition a $ location .~ b
setStateAt partition b $ location .~ a
VU.swap (partition^.statesByBlock) a b
-- | Decide if a block has marked states.
--
-- Runtime: O(1)
hasMarked :: RefinablePartition s -> Block -> ST s Bool
hasMarked p b = getBlock p b >>= \block ->
return (block^.startOffset /= block^.unmarkedOffset)
-- | Decide wether a state is marked.
--
-- Runtime: O(1)
isMarked :: RefinablePartition s -> State -> ST s Bool
isMarked partition s = do
state <- getState partition s
blk <- getBlock partition (state^.block)
return $ state^.location < blk^.unmarkedOffset
-- | Return a list of all states in a given block.
--
-- Runtime: O(n) for n == number of states in this block
statesOfBlock :: RefinablePartition s -> Block -> ST s [State]
statesOfBlock partition b = do
block <- getBlock partition b
forM (blockIndices block) $ \i ->
VU.read (partition^.statesByBlock) i
-- | Split a block into two new blocks for its marked and unmarked states.
--
-- Returns a tuple of (marked block, unmarked block). If there are no marked or
-- unmarked states, the respective block returned will be Nothing.
--
-- The largest new block will inherit the identity (i.e. block number) of the
-- old block (with bias towards the one with unmarked states).
--
-- Runtime: O(number of marked states in the old block)
splitMarked :: RefinablePartition s -> Block -> ST s (Maybe Block, Maybe Block)
splitMarked partition b = do
block <- getBlock partition b
if block^.startOffset == block^.unmarkedOffset then -- nothing marked
return (Nothing, Just b)
else if block^.unmarkedOffset == block^.endOffset then do -- nothing unmarked
setBlock partition b $ unmarkedOffset .~ (block^.startOffset)
return (Just b, Nothing)
else do
let numMarked = (block^.unmarkedOffset) - (block^.startOffset)
numUnmarked = (block^.endOffset) - (block^.unmarkedOffset)
if numMarked <= numUnmarked then do
-- new block for marked states
new <- newBlock partition (block^.startOffset) (block^.unmarkedOffset)
-- let old block begin at at the first unmarked state
setBlock partition b $ startOffset .~ (block^.unmarkedOffset)
return (Just new, Just b)
else do
-- new block for unmarked states
new <- newBlock partition (block^.unmarkedOffset) (block^.endOffset)
-- let old block end at at the last marked state
setBlock partition b $ endOffset .~ (block^.unmarkedOffset)
-- and reset marked status for all states
setBlock partition b $ unmarkedOffset .~ (block^.startOffset)
return (Just b, Just new)
-- | Split a block into two new blocks according to a given predicate.
--
-- Returns a tuple @(a, b)@ where @a@ is the block with all states for which the
-- predicate returns True and 'b' contains the rest of the states.
--
-- The largest block in @[a,b]@ shares the identity (i.e. the block number) with
-- the original block.
--
-- Runtime: O(number of states in the old block)
splitBy :: RefinablePartition s -> Block -> (State -> Bool) -> ST s (Maybe Block, Maybe Block)
splitBy !partition !b !predicate = do
!block@(BlockRepr {..}) <- getBlock partition b
splitPoint <- VU.partition (partition^.statesByBlock) predicate _startOffset _endOffset
if splitPoint == _startOffset then -- no matching states
return (Nothing, Just b)
else if splitPoint == _endOffset then -- all states match
return (Just b, Nothing)
else do
-- update location for all states, because split moves them
updateLocations partition block
let beforeSplitNum = splitPoint - _startOffset
afterSplitNum = _endOffset- splitPoint
if (beforeSplitNum <= afterSplitNum) then do
setBlock partition b $ startOffset .~ splitPoint
setBlock partition b $ unmarkedOffset .~ splitPoint
new <- newBlock partition _startOffset splitPoint
return (Just new, Just b)
else do
setBlock partition b $ endOffset .~ splitPoint
new <- newBlock partition splitPoint _endOffset
return (Just b, Just new)
-- | Split a block into new blocks according to some atttribute of its states.
--
-- The result is maximally coarse list of blocks, such all states in a new block
-- have the same value for the given attribute.
--
-- One of the blocks inherits the identity of the old block.
--
-- Runtime: O(n*log(n)) where n is the number of states in the old block.
groupBy :: Ord a => RefinablePartition s -> Block -> (State -> a) -> ST s [Block]
groupBy partition b predicate = do
block <- getBlock partition b
let start = block^.startOffset
end = block^.endOffset
VM.sortBy (comparing predicate) $
VU.slice start (end-start) (partition^.statesByBlock)
updateLocations partition block
indices <- VU.groupBy (partition^.statesByBlock) predicate start end
let splitAt (currentBlock,newBlocks) index = do
setBlock partition currentBlock $ unmarkedOffset .~ index
(Just previousBlock, Just nextBlock) <- splitMarked partition currentBlock
return (nextBlock, newBlocks++[previousBlock])
(last,blocks) <- foldM splitAt (b, []) indices
-- unless (null indices) $
-- setBlock partition b $ endOffset .~ head indices
return (blocks ++ [last])
-- helpers
getBlock :: RefinablePartition s -> Block -> ST s BlockRepr
getBlock !partition !(Block b) = VM.unsafeRead (_blocks partition) b
setBlock :: RefinablePartition s -> Block -> (BlockRepr -> BlockRepr) -> ST s ()
setBlock partition (Block b) setter = VM.unsafeModify (_blocks partition) setter b
getState :: RefinablePartition s -> State -> ST s StateRepr
getState partition s = VM.unsafeRead (partition^.states) s
setState :: RefinablePartition s -> State -> (StateRepr -> StateRepr) -> ST s ()
setState partition s setter = VM.modify (partition^.states) setter s
setStateAt :: RefinablePartition s -> Int -> (StateRepr -> StateRepr) -> ST s ()
setStateAt partition loc setter = VU.read (partition^.statesByBlock) loc >>= \state ->
setState partition state setter
newBlock :: RefinablePartition s -> Int -> Int -> ST s Block
newBlock partition beginning end = do
let repr = BlockRepr beginning end beginning -- no marked blocks by default
blk <- allocateBlock partition
setBlock partition blk (const repr)
-- update block of all contained states
forM_ (blockIndices repr) $ \pos ->
setStateAt partition pos (block .~ blk)
return blk
allocateBlock :: RefinablePartition s -> ST s Block
allocateBlock partition = do
current <- readMutVar (partition^.blockCount)
modifyMutVar (partition^.blockCount) (+1)
return $ Block current
-- Update location fields of all states in this block
updateLocations :: RefinablePartition s -> BlockRepr -> ST s ()
updateLocations partition block =
forM_ (blockIndices block) $ \i ->
setStateAt partition i $ location .~ i
blockIndices :: BlockRepr -> [Int]
blockIndices block = [block^.startOffset..block^.endOffset-1]
iforM_ :: Monad m => V.Vector a -> (Int -> a -> m b) -> m ()
iforM_ = flip V.imapM_
{-# LANGUAGE ScopedTypeVariables #-}
module Data.Vector.Unboxed.Mutable.Utils
( partition
, groupBy
) where
import Control.Monad (foldM)
import Control.Monad.ST
import qualified Data.Vector.Unboxed.Mutable as VU
partition :: VU.Unbox a => VU.MVector s a -> (a -> Bool) -> Int -> Int -> ST s Int
partition vec predicate = go
where
go lower upper
| lower >= upper = return lower
| otherwise = do
l <- VU.read vec lower
r <- VU.read vec (upper-1)
if predicate l then partition vec predicate (lower+1) upper
else if not (predicate r) then partition vec predicate lower (upper-1)
else VU.swap vec lower (upper-1) >> partition vec predicate (lower+1) upper
{-# INLINE partition #-}
-- end is exclusive
groupBy :: forall s a b. (Eq b, VU.Unbox a) => VU.MVector s a -> (a -> b) -> Int -> Int -> ST s [Int]
groupBy vec predicate lower upper
| lower >= upper = return [] -- special case empty range
| otherwise = do
first <- VU.read vec lower
reverse . snd <$> foldM groupByImpl (first, []) [lower+1..upper-1]
where
groupByImpl :: (a,[Int]) -> Int -> ST s (a,[Int])
groupByImpl (current, accu) i
| i >= upper = return (current,accu)
| otherwise = do
x <- VU.read vec i
if predicate x == predicate current then
return (x,accu)
else
return (x,i:accu)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment