代码之家  ›  专栏  ›  技术社区  ›  ChaosPandion

这是解析器组合器库的合理基础吗?

  •  2
  • ChaosPandion  · 技术社区  · 15 年前

    我最近一直在与fparsec合作,我发现缺少通用的解析器是我的主要停止点。我对这个小库的目标是简单性以及对通用输入的支持。你能想到任何能改善这一点的补充吗,或者有什么特别糟糕的地方吗?

    open LazyList 
    
    type State<'a, 'b> (input:LazyList<'a>, data:'b) =
        member this.Input = input
        member this.Data = data
    
    type Result<'a, 'b, 'c> =
    | Success of 'c * State<'a, 'b>
    | Failure of string * State<'a, 'b>
    
    type Parser<'a,'b, 'c> = State<'a, 'b> -> Result<'a, 'b, 'c>
    
    let (>>=) left right state =
        match left state with
        | Success (result, state) -> (right result) state
        | Failure (message, _) -> Result<'a, 'b, 'd>.Failure (message, state)
    
    let (<|>) left right state =
        match left state with
        | Success (_, _) as result -> result
        | Failure (_, _) -> right state
    
    let (|>>) parser transform state =
        match parser state with
        | Success (result, state) -> Success (transform result, state)
        | Failure (message, _) -> Failure (message, state)
    
    let (<?>) parser errorMessage state =
        match parser state with
        | Success (_, _) as result -> result
        | Failure (_, _) -> Failure (errorMessage, state)                     
    
    type ParseMonad() =
        member this.Bind (f, g) = f >>= g
        member this.Return x s = Success(x, s)
        member this.Zero () s = Failure("", s)                           
        member this.Delay (f:unit -> Parser<_,_,_>) = f()
    
    let parse = ParseMonad()
    

    回溯

    令人惊讶的是,实现您描述的内容并不需要太多的代码。这有点草率,但似乎效果相当好。

    let (>>=) left right state =
        seq {
            for res in left state do
                match res with
                | Success(v, s) ->
                    let v  = 
                        right v s 
                        |> List.tryFind (
                            fun res -> 
                                match res with 
                                | Success (_, _) -> true 
                                | _ -> false
                        ) 
                    match v with
                    | Some v -> yield v
                    | None -> ()
        } |> Seq.toList
    
    let (<|>) left right state = 
        left state @ right state
    

    切换代码以使用懒惰的列表和尾部调用优化的递归。

    let (>>=) left right state =
        let rec readRight lst =
            match lst with
            | Cons (x, xs) ->
                match x with
                | Success (r, s) as q -> LazyList.ofList [q]                     
                | Failure (m, s) -> readRight xs
            | Nil -> LazyList.empty<Result<'a, 'b, 'd>>
        let rec readLeft lst =
            match lst with
            | Cons (x, xs) ->
                match x with
                | Success (r, s) -> 
                    match readRight (right r s) with 
                    | Cons (x, xs) ->
                        match x with
                        | Success (r, s) as q -> LazyList.ofList [q]                     
                        | Failure (m, s) -> readRight xs
                    | Nil -> readLeft xs   
                | Failure (m, s) -> readLeft xs
            | Nil -> LazyList.empty<Result<'a, 'b, 'd>>
        readLeft (left state)
    
    let (<|>) (left:Parser<'a, 'b, 'c>) (right:Parser<'a, 'b, 'c>) state = 
        LazyList.delayed (fun () -> left state) 
        |> LazyList.append 
        <| LazyList.delayed (fun () -> right state)
    
    1 回复  |  直到 15 年前
        1
  •  2
  •   Tomas Petricek    15 年前

    我认为您需要做的一个重要的设计决策是,您是否希望在解析器中支持回溯(我不太记得解析理论,但这可能指定了解析器可以处理的语言类型)。

    回溯。 在您的实现中,解析器可能会失败 Failure 或者只产生一个结果 Success 案例)。另一种选择是生成零个或多个结果(例如,将结果表示为 seq<'c> )抱歉,如果这是你已经考虑过的事情:—),但无论如何…

    不同的是,您的解析器总是遵循第一个可能的选项。例如,如果编写如下内容:

    let! s1 = (str "ab" <|> str "a")
    let! s2 = str "bcd"
    

    使用您的实现,输入“abcd”将失败。它将选择 <|> 然后,它将处理前两个字符,序列中的下一个分析器将失败。基于序列的实现将能够回溯并遵循 <和gt; 并分析输入。

    我想到的另一个想法是你也可以 Combine 解析器计算生成器的成员。这有点微妙(因为您需要了解计算表达式是如何转换的),但有时它可能很有用。如果添加:

    member x.Combine(a, b) = a <|> b
    member x.ReturnFrom(p) = p
    

    然后可以很好地编写递归分析器:

    let rec many p acc = 
      parser { let! r = p                  // Parse 'p' at least once
               return! many p (r::acc)     // Try parsing 'p' multiple times
               return r::acc |> List.rev } // If fails, return the result