1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_GHC -Wno-unused-top-binds #-}
-- | Parser for the untyped lambda calculus, whose AST is defined in "AST".
module Parse (parse, tryParse) where
import qualified AST as A
import Text.Megaparsec (
MonadParsec (..),
Parsec,
between,
errorBundlePretty,
noneOf,
runParser,
some,
(<?>),
(<|>),
)
import Text.Megaparsec.Char (space1)
import qualified Text.Megaparsec.Char.Lexer as L
import Control.Monad (void)
import Data.Bifunctor (Bifunctor (..))
import Data.Void (Void)
-- * Exposed functions
-- | Parse a Lambda expression; throw an exception over an error
parse :: String -> A.Expr
parse = either error id . tryParse
-- | Parse some code 'String' into an 'L.Expr' or an error message.
tryParse :: String -> Either String A.Expr
tryParse =
first errorBundlePretty
. runParser (pSpace >> pExpr <* eof) "<input>"
{- * Expression parser
Note that Megaparsec allows us to label tokens with 'label' or '(<?>)', which
helps it produce human-readable error messages.
-}
-- | Entry point for parser.
pExpr :: Parser A.Expr
pExpr = pBody <?> "expression"
-- | Parse expressions at the lowest level of precedence, i.e., lambdas.
pBody :: Parser A.Expr
pBody = pLam <|> pApp
where
pLam = do
pToken "\\"
bs <- some pIdent <?> "lambda binders"
pToken "."
body <- pBody <?> "lambda body"
return $ foldr A.Lam body bs
-- | Parse juxtaposition as application.
pApp :: Parser A.Expr
pApp = foldl1 A.App <$> some pAtom <?> "term application"
-- | Parse expressions at the highest precedence, including parenthesized terms
pAtom :: Parser A.Expr
pAtom = A.Var <$> pVar <|> pParens pExpr
where
pVar = pIdent <?> "variable"
-- * Megaparsec boilerplate and helpers
-- | Parsing monad.
type Parser = Parsec Void String
-- | Parse an identifier, possible surrounded by spaces
pIdent :: Parser String
pIdent = L.lexeme pSpace (some $ noneOf ['\\', '.', '(', ')', ' ', '\n', '\r', '\t', '-'])
-- | Consume a token defined by a string, possibly surrounded by spaces
pToken :: String -> Parser ()
pToken = void . L.symbol pSpace
-- | Parse some element surrounded by parentheses.
pParens :: Parser a -> Parser a
pParens = between (pToken "(") (pToken ")")
-- | Consumes whitespace and comments.
pSpace :: Parser ()
pSpace =
label "whitespace" $
L.space
space1
(L.skipLineComment "--")
(L.skipBlockCommentNested "{-" "-}")
|