about summary refs log tree commit diff
path: root/src/Parse.hs
blob: 748bd980c5f0d0c478bd8866e939e682de62f5aa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_GHC -Wno-unused-top-binds #-}

-- | Parser for the untyped lambda calculus, whose AST is defined in "AST".
module Parse (parse, tryParse) where

import qualified AST as A

import Text.Megaparsec (
  MonadParsec (..),
  Parsec,
  errorBundlePretty,
  noneOf,
  runParser,
  some,
  (<?>),
  (<|>),
  between
 )
import Text.Megaparsec.Char( space1 )
import qualified Text.Megaparsec.Char.Lexer as L

import Control.Monad (void)
import Data.Bifunctor (Bifunctor (..))
import Data.Void (Void)

-- * Exposed functions

-- | Parse a Lambda expression; throw an exception over an error
parse :: String -> A.Expr
parse = either error id . tryParse

-- | Parse some code 'String' into an 'L.Expr' or an error message.
tryParse :: String -> Either String A.Expr
tryParse = first errorBundlePretty .
           runParser (pSpace >> pExpr <* eof) "<input>"

{- * Expression parser

Note that Megaparsec allows us to label tokens with 'label' or '(<?>)', which
helps it produce human-readable error messages.
-}

-- | Entry point for parser.
pExpr :: Parser A.Expr
pExpr = pBody <?> "expression"

-- | Parse expressions at the lowest level of precedence, i.e., lambdas.
pBody :: Parser A.Expr
pBody = pLam <|> pApp
 where
  pLam = do
    pToken "\\"
    bs <- some pIdent <?> "lambda binders"
    pToken "."
    body <- pBody <?> "lambda body"
    return $ foldr A.Lam body bs

-- | Parse juxtaposition as application.
pApp :: Parser A.Expr
pApp = foldl1 A.App <$> some pAtom <?> "term application"

-- | Parse expressions at the highest precedence, including parenthesized terms
pAtom :: Parser A.Expr
pAtom = A.Var <$> pVar <|> pParens pExpr
 where
  pVar = pIdent <?> "variable"

-- * Megaparsec boilerplate and helpers

-- | Parsing monad.
type Parser = Parsec Void String

-- | Parse an identifier, possible surrounded by spaces
pIdent :: Parser String
pIdent = L.lexeme pSpace (some $ noneOf ['\\','.','(',')',' ','\n','\r','\t','-'])

-- | Consume a token defined by a string, possibly surrounded by spaces
pToken :: String -> Parser ()
pToken = void . L.symbol pSpace

-- | Parse some element surrounded by parentheses.
pParens :: Parser a -> Parser a
pParens = between (pToken "(") (pToken ")")

-- | Consumes whitespace and comments.
pSpace :: Parser ()
pSpace = label "whitespace" $ L.space
    space1
    (L.skipLineComment "--")
    (L.skipBlockCommentNested "{-" "-}")