Commit 90078604 authored by Thorsten Wißmann's avatar Thorsten Wißmann 🐧

berkeleyparser: make state name compression optional

Add a new option --compress-names that makes the compression of the
state names in the berkeleyparser grammar files optional.
parent 509fcebf
......@@ -18,12 +18,13 @@ import qualified Text.Megaparsec.Error as ME
import qualified Data.List as L
import Control.Monad
import Control.Monad.State.Lazy (modify, gets, evalState, runState)
import Control.Monad.State.Lazy (modify, gets, evalStateT, runState)
import qualified Control.Monad.State.Lazy as State
import Text.Printf (printf)
data Options = Options
{ optInputFile :: !String
, optCompressNames :: !Bool
}
optionsParser :: OptParse.Parser Options
......@@ -33,7 +34,12 @@ optionsParser =
(OptParse.metavar "INPUT_FILE" <>
OptParse.help
"Input .grammer file"))
<*>
(OptParse.switch
(OptParse.long "compress-names" <>
OptParse.short 'c' <>
OptParse.help
"Compress the state names"))
optparseInfo :: OptParse.ParserInfo Options
optparseInfo = OptParse.info (optionsParser <**> OptParse.helper)
......@@ -176,8 +182,12 @@ data Q = Q Int deriving (Eq, Ord)
instance Show Q where
show (Q n) = "q" ++ show n
compressCoalgebra :: (Show ident, Ord ident) => CoalgebraWTA ident -> CoalgebraWTA Q
compressCoalgebra coalg = flip evalState M.empty $ do
compressCoalgebraBy :: (Show ident, Ord ident, Show identNew, Ord identNew, Monad m)
=> (ident -> m identNew)
-- ^ transform the identifier
-> CoalgebraWTA ident
-> m (CoalgebraWTA identNew)
compressCoalgebraBy allocator coalg = flip evalStateT M.empty $ do
newStructure <- forM (M.toList $ coalgStructure coalg) $ \(left, rhs) -> do
newLeft <- ident2idx left
newRhs <- forM rhs $ \(weight, idx, params) -> do
......@@ -188,7 +198,9 @@ compressCoalgebra coalg = flip evalState M.empty $ do
ident2idx <- gets M.toList
let oldIdentComments ident =
M.findWithDefault [] ident $ coalgIdent2Comment coalg
let identComments = [ (idx, [show idx ++ " = \"" ++ show ident ++ "\""]
let identComments = [ (idx, (if show idx /= show ident
then [show idx ++ " = \"" ++ show ident ++ "\""]
else [])
++ oldIdentComments ident)
| (ident, idx) <- ident2idx ]
return $ coalg
......@@ -197,9 +209,11 @@ compressCoalgebra coalg = flip evalState M.empty $ do
}
where
-- | lookup the index of an identifier or insert it if unseen before
ident2idx :: Ord ident => ident -> State.State (M.Map ident Q) Q
ident2idx ident = do
new_index <- Q <$> gets M.size
ident2idx = ident2idx' allocator
ident2idx' :: (Ord ident, Monad m) => (ident -> m identNew) -> ident -> State.StateT (M.Map ident identNew) m identNew
ident2idx' alloc ident = do
new_index <- State.lift (alloc ident)
maybe_existing <- gets $ M.lookup ident
case maybe_existing of
Just n -> return n
......@@ -207,6 +221,48 @@ compressCoalgebra coalg = flip evalState M.empty $ do
modify $ M.insert ident new_index
return new_index
compressCoalgebra :: (Show ident, Ord ident, Show newIdent, Ord newIdent)
=> (Int -> newIdent)
-> CoalgebraWTA ident
-> CoalgebraWTA newIdent
compressCoalgebra int2ident coalg = State.evalState (compressCoalgebraBy (allocIdent int2ident) coalg) 0
where
allocIdent :: (Int -> q) -> ident -> State.State Int q
allocIdent int2ident _ = do
n <- State.get
State.put (n + 1)
return (int2ident n)
compressCoalgebra2Q :: (Show ident, Ord ident) => CoalgebraWTA ident -> CoalgebraWTA Q
compressCoalgebra2Q = compressCoalgebra Q
-- | same as compressCoalgebra2Q but directly transform 'Q' to 'Identifier'
compressCoalgebra2Ident :: (Show ident, Ord ident) => CoalgebraWTA ident -> CoalgebraWTA Identifier
compressCoalgebra2Ident = compressCoalgebra (\n -> Identifier (T.pack "q") (fromIntegral n))
sanitizeNames :: CoalgebraWTA Identifier -> CoalgebraWTA Identifier
sanitizeNames coalg = State.evalState (compressCoalgebraBy sanitizeIdent coalg) 0
where
sanitizeIdent :: Identifier -> State.State Int Identifier
sanitizeIdent (Identifier prefix idx) =
return $ flip Identifier idx $ T.pack $ sanitizeString $ T.unpack prefix
sanitizeString :: String -> String
sanitizeString = concatMap (fixchar)
fixchar :: Char -> [Char]
fixchar '$' = "_dollar_"
fixchar '`' = "_backtick_"
fixchar '@' = "_at_"
fixchar '.' = "_dot_"
fixchar ',' = "_comma_"
fixchar '-' = "_"
fixchar '/' = "_"
fixchar '"' = "_dq_"
fixchar '\'' = "_sq_"
fixchar ':' = "_col_"
fixchar defaultcase = [defaultcase]
-- ================ Print the copar data ================
-- ===========================================================================
......@@ -275,7 +331,7 @@ main = do
$ coalgebraPrependComment ["this file has been generated from \"" ++ filename ++ "\""]
$ coalgebraPrependComment [""]
$ coalgebraPrependComment formatComment
$ compressCoalgebra
$ (if (optCompressNames opts) then compressCoalgebra2Ident else sanitizeNames)
$ berkeleyToCopar grammar
-- printGrammarForCopar filename putStrLn grammar
return ()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment