Commit 7ffafe17 authored by Hans-Peter Deifel's avatar Hans-Peter Deifel 🐢

Clean and speed up whitespace parsing

This cleans up newline handling in the whitespace parser:

Newlines are not treated the same as other whitespace any more and are
only allowed after the functor expression and between individual
morphism points.

By doing this, we get a less surprising grammar for parsing ("x: { y:"
would never try to parse the "y:" as the start of a new definition)
and can speed up the hot path (a single space character) considerably.
parent 555e22ff
......@@ -17,7 +17,7 @@ benchmarks = bgroup "MA.Parser.Lexer"
, benchDecimal
, benchFloat
, benchName
, benchSpace
, benchWhitespace
]
benchColon :: Benchmark
......@@ -49,12 +49,26 @@ benchName = bgroup "name"
, benchp "love letters" L.name "love"
]
benchWhitespace :: Benchmark
benchWhitespace = bgroup "space"
[ benchSpace
, benchNewlines1
]
benchSpace :: Benchmark
benchSpace = bgroup "space"
[ benchp "one space" L.spaceConsumer " "
, benchp "some space" L.spaceConsumer " "
, benchp "comment" L.spaceConsumer "# A comment"
, benchp "space, comment, space" L.spaceConsumer " # A comment\n "
[ benchp "one space" L.space " "
, benchp "some space" L.space " "
, benchp "failing" L.space "a"
]
benchNewlines1 :: Benchmark
benchNewlines1 = bgroup "newlines1"
[ benchp "single newline" L.newlines1 "\n"
, benchp "some newlines" L.newlines1 "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
, benchp "comment" L.newlines1 "# A comment\n"
, benchp "newline, comment, space" L.newlines1 "\n# A comment\n "
, benchp "failing" L.newlines1 "a"
]
benchp :: NFData a => String -> Parser a -> Text -> Benchmark
......
......@@ -31,7 +31,7 @@ import MA.Coalgebra.Parser.Internal
import MA.Coalgebra.RefinementTypes
import MA.FunctorExpression.Sorts (Sort)
import MA.FunctorExpression.Type
import MA.Parser.Lexer
import qualified MA.Parser.Lexer as L
import MA.Parser.Types
type MorphParser l h1 = StateT (ParserState l h1) Parser
......@@ -107,9 +107,10 @@ morphismsParser (Functor sort f) = finalizeState <$> (execStateT p initState)
checkUndefinedRefs
parsePoint = do
from <- name >>= defineSymbol
void $ symbol ":"
from <- L.name >>= defineSymbol
void $ L.symbol ":"
succs <- parseMorphismPoint (fmap wrapper f)
L.newlinesOrEof
graph %= M.insert from (sort, succs)
......@@ -126,7 +127,7 @@ wrapper ::
(Functor f, ParseMorphism f)
=> FunctorExpression f Sort
-> MorphParser (Label f) (H1 f) State
wrapper Variable = name >>= lookupSymbol
wrapper Variable = L.name >>= lookupSymbol
wrapper (Functor nextSort f) = do
from <- newState
succs <- parseMorphismPoint (fmap wrapper f)
......
......@@ -16,7 +16,7 @@ import Text.Megaparsec
import Data.Text (Text)
import MA.FunctorExpression.Type
import MA.Parser.Lexer
import qualified MA.Parser.Lexer as L
import MA.Parser.Types
data FunctorParser f where
......@@ -50,16 +50,16 @@ newtype Precedence = Precedence Int
deriving (Num, Bounded, Enum, Integral, Real, Ord, Eq, Show)
functorsParser :: forall f m. [FunctorParser f] -> ParserT m (FunctorExpression f Precedence)
functorsParser functors = try spaceConsumer *> parseLevel (zip (reverse functors) [1..]) >>= checkForFunctor
functorsParser functors = parseLevel (zip (reverse functors) [1..]) >>= checkForFunctor
where
parseLevel :: [(FunctorParser f, Precedence)] -> ParserT m (FunctorExpression f Precedence)
parseLevel ((FunctorParser f, i):functors) = f (parseLevel functors) >>= \case
Left a -> return a
Right fa -> return (Functor i fa)
parseLevel [] = variable <|> parens (functorsParser functors)
parseLevel [] = variable <|> L.parens (functorsParser functors)
variable :: ParserT m (FunctorExpression f Precedence)
variable = symbol "X" >> return Variable
variable = L.symbol "X" >> return Variable
<?> "variable (X)"
checkForFunctor ::
......@@ -74,4 +74,4 @@ parseFunctorExpression ::
-> Text
-> Either ParseErr (FunctorExpression f Precedence)
parseFunctorExpression functors =
parse (functorsParser (concat functors) <* eof)
parse ((L.space *> L.newlines) *> functorsParser (concat functors) <* eof)
......@@ -19,6 +19,7 @@ import MA.FunctorExpression.Parser
import MA.FunctorExpression.Sorts
import MA.Coalgebra.Parser
import MA.Parser.Types
import qualified MA.Parser.Lexer as L
import MA.Functors
import MA.Functors.SomeFunctor (SomeFunctor)
import MA.FunctorExpression.Desorting
......@@ -29,7 +30,9 @@ coalgebraParser ::
=> [[FunctorParser f]]
-> Parser (SymbolTable, Encoding (Label (Desorted f)) (H1 (Desorted f)))
coalgebraParser functors = do
L.space *> L.newlines
f <- (annotateSorts <$> functorsParser (concat functors))
L.newlines1
morphismsParser f <?> "morphism definition"
-- TODO: Needs better name
......
{-# LANGUAGE FlexibleContexts #-}
module MA.Parser.Lexer
( spaceConsumer
( newlines1
, newlines
, newlinesOrEof
, space
, space1
, symbol
, lexeme
, braces
......@@ -18,25 +22,56 @@ module MA.Parser.Lexer
) where
import Data.Char
import Control.Monad
import Data.Text (Text)
import qualified Data.Text as T
import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Megaparsec.Char hiding (space, space1, newline)
import qualified Text.Megaparsec.Char.Lexer as L
import MA.Parser.Types
spaceConsumer :: MonadParser m => m ()
spaceConsumer = L.space space1 (L.skipLineComment "#") empty
{-# INLINE spaceConsumer #-}
newline :: MonadParser m => m ()
newline = (void (takeWhile1P (Just "newline") (=='\n'))
<|> (skipLineComment <* char '\n')) *> space
{-# INLINE newline #-}
-- | Parses newlines or the end of input
newlinesOrEof :: MonadParser m => m ()
newlinesOrEof = newlines1 <|> eof
{-# INLINE newlinesOrEof #-}
-- | Parses one or more newlines or comments
newlines1 :: MonadParser m => m ()
newlines1 = skipSome newline
{-# INLINE newlines1 #-}
-- | Parses zero or more newlines or comments
newlines :: MonadParser m => m ()
newlines = skipMany newline
{-# INLINE newlines #-}
skipLineComment :: MonadParser m => m ()
skipLineComment = char '#' *> void (takeWhileP Nothing (/= '\n'))
{-# INLINE skipLineComment #-}
-- | Parsers zero or more whitespace characters (except newlines)
space :: MonadParser m => m ()
space = void (takeWhileP (Just "space") (\c -> isSpace c && c /= '\n'))
{-# INLINE space #-}
-- | Parsers one or more whitespace characters (except newlines)
space1 :: MonadParser m => m ()
space1 = void (takeWhile1P (Just "space") (\c -> isSpace c && c /= '\n'))
{-# INLINE space1 #-}
lexeme :: MonadParser m => m a -> m a
lexeme = L.lexeme spaceConsumer
lexeme = L.lexeme space
{-# INLINE lexeme #-}
symbol :: MonadParser m => Text -> m Text
symbol = L.symbol spaceConsumer
symbol = L.symbol space
{-# INLINE symbol #-}
braces :: MonadParser m => m a -> m a
......
......@@ -13,18 +13,52 @@ spec = do
spaceSpec :: Spec
spaceSpec = describe "whitespace" $ do
let p = parse (L.spaceConsumer <* eof) ""
describe "space" $ do
let p = parse (L.space <* eof) ""
it "skips spaces" $
p `shouldSucceedOn` " "
it "skips spaces" $
p `shouldSucceedOn` " "
it "skips tabs" $
p `shouldSucceedOn` "\t\t"
it "parses tabs" $
p `shouldSucceedOn` "\t\t"
it "doesn't parse letters" $
p `shouldFailOn` " a"
it "doesn't parse letters" $
p `shouldFailOn` " a"
it "doesn't parse newlines" $
p `shouldFailOn` " \n"
it "parses comments" $ do
p `shouldSucceedOn` "# foobar"
p `shouldSucceedOn` " # foobar\n #barfoo\n "
describe "newlines1" $ do
let p = parse (L.newlines1 <* eof) ""
it "skips many newlines" $
p `shouldSucceedOn` "\n\n\n"
it "skips newlines interleaved with spaces" $
p `shouldSucceedOn` "\n \n \n"
it "skips comments" $
p `shouldSucceedOn` "#foo\n#bar\n\n # lala\n"
it "fails on the empty input" $
p `shouldFailOn` ""
it "fails with leading whitespace" $
p `shouldFailOn` " \n"
describe "newlines" $ do
let p = parse (L.newlines <* eof) ""
it "skips many newlines" $
p `shouldSucceedOn` "\n\n\n"
it "skips newlines interleaved with spaces" $
p `shouldSucceedOn` "\n \n \n"
it "skips comments" $
p `shouldSucceedOn` "#foo\n#bar\n\n # lala\n"
it "parses the empty input" $
p `shouldSucceedOn` ""
it "fails with leading whitespace" $
p `shouldFailOn` " \n"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment