我目前正在尝试学习Scheme(特别是chicken Scheme),并希望更好地了解该语言的性能陷阱。我已经写了一个CSV解析器,并在下面分享了它。我尝试使用的测试文件是130MB,解析大约需要7分钟。简单地读取所有行只需要几毫秒,所以问题在于解析器。我试图让我的代码尽可能地“Lispy”,并希望避免引入太多我知道在Chicken Scheme中可用的低级构造。我希望从改进这段代码中获得的主要东西是Scheme性能背后更好的直觉。
编辑:我从评论中得到了一些建议,现在有两个独立的解析器。第一个代码块使用基于列表的解决方案,第二个使用字符串引用。我原以为由于线性记忆,字符串refs会更快,但实验只让我感到困惑。但这是我试图解决的核心问题。我真的不明白为什么一个解决方案比另一个更好。任何见解都会很棒。
我还回去对性能进行了一些测量:
列表解决方案已编译:
37.658s CPU time, 1.2s GC time (major), 258575354/244106 mutations (total/tracked), 1160/317431 GCs (major/minor), maximum live heap: 306.33 KiB
已编译字符串引用解决方案:
644.939s CPU time, 1.308s GC time (major), 167854392/243430 mutations (total/tracked), 1281/1023116 GCs (major/minor), maximum live heap: 305.51 KiB
; List based solution
(import (chicken string))
(import utf8)
(import list-utils)
(define (lookahead-list ahead lst)
(cond [(null? ahead) #t]
[(and (not (null? lst)) (not (null? ahead)))
(if (eq? (car ahead) (car lst)) (lookahead-list (cdr ahead) (cdr lst)) #f)]
[else #f]
)
)
(define (null-blist? blist) (null? (car blist)))
(define (lookahead-blist ahead blst) (lookahead-list ahead (car blst)))
(define (read-blist blist)
(if (null? blist) #!eof
(let ([head (caar blist)]) (set-car! blist (cdar blist)) head))
)
; Csv parsing
(define (csv/read-atom blist)
(let loop ([res '()] [cmode #t])
(cond [(lookahead-blist '(#\" #\") blist) (read-blist blist) (loop (cons (read-blist blist) res) cmode)]
[(lookahead-blist '(#\") blist) (read-blist blist) (loop res (not cmode))]
[(and cmode (lookahead-blist '(#\,) blist)) (reverse-list->string res)]
[(null-blist? blist) (reverse-list->string res)]
[else (loop (cons (read-blist blist) res) cmode)]
))
)
(define (csv/parse-non-blank-line blist)
(reverse
(let loop ([res '()])
(let ([nres (cons (csv/read-atom blist) res)])
(if (lookahead-blist '(#\,) blist)
(begin (read-blist blist) (loop nres)) nres)
)
))
)
(define (csv/parse-line str)
(if (equal? str "") '() (csv/parse-non-blank-line (list (string->list str))))
)
(define (csv/parse func init port)
(let loop ([acc init] [line (read-line port)])
(if (eq? line #!eof) acc
(loop (func acc (csv/parse-line line)) (read-line port))
)
)
)
(define (csv/parse-table func init port)
(let ([format (csv/parse-line (read-line port))])
(define (shim acc elem)
(func acc (zip-alist format elem))
)
(csv/parse shim init port)
)
)
(define (csv/parse-table-file func init fname) (call-with-input-file (lambda (p) (csv/parse-table func init p))))
; String-ref based solution
(import (chicken io))
(import (chicken string))
(import utf8)
(import list-utils)
; Cursor string
(define (string->cstring str)
(cons 0 str)
)
(define (cstring/forward cstr)
(cons (+ 1 (car cstr)) (cdr cstr))
)
(define (cstring/peek cstr)
(if (cstring/null? cstr)
#!eof
(string-ref (cdr cstr) (car cstr))
)
)
(define (cstring/forward! cstr)
(let ([ret (cstring/peek cstr)])
(set-car! cstr (+ 1 (car cstr)))
ret
)
)
(define (cstring/null? cstr)
(>= (car cstr) (string-length (cdr cstr)))
)
(define (lookahead-cstring ahead cstr)
(cond [(null? ahead) #t]
[(and (not (cstring/null? cstr)) (not (null? ahead)))
(if (eq? (car ahead) (cstring/peek cstr)) (lookahead-cstring (cdr ahead) (cstring/forward cstr)) #f)]
[else #f]
)
)
; Csv parsing
(define (csv/read-atom cstr)
(let loop ([res '()] [cmode #t])
(cond [(lookahead-cstring '(#\" #\") cstr) (cstring/forward! cstr) (loop (cons (cstring/forward! cstr) res) cmode)]
[(lookahead-cstring '(#\") cstr) (cstring/forward! cstr) (loop res (not cmode))]
[(and cmode (lookahead-cstring '(#\,) cstr)) (reverse-list->string res)]
[(cstring/null? cstr) (reverse-list->string res)]
[else (loop (cons (cstring/forward! cstr) res) cmode)]
))
)
(define (csv/parse-non-blank-line cstr)
(reverse
(let loop ([res '()])
(let ([nres (cons (csv/read-atom cstr) res)])
(if (lookahead-cstring '(#\,) cstr)
(begin (cstring/forward! cstr) (loop nres)) nres)
)
))
)
(define (csv/parse-line str)
(if (equal? str "") '() (csv/parse-non-blank-line (string->cstring str)))
)
(define (csv/parse func init port)
(let loop ([acc init] [line (read-line port)])
(if (eq? line #!eof) acc
(loop (func acc (csv/parse-line line)) (read-line port))
)
)
)
(define (csv/parse-table func init port)
(let ([format (csv/parse-line (read-line port))])
(define (shim acc elem)
(func acc (zip-alist format elem))
)
(csv/parse shim init port)
)
)
(define (csv/parse-table-file func init fname) (call-with-input-file fname (lambda (p) (csv/parse-table func init p))))