代码之家  ›  专栏  ›  技术社区  ›  MWB

有人能加快这个Chez计划吗?

  •  1
  • MWB  · 技术社区  · 6 年前

    --optimize-level 3 -O3

    (import
      (rnrs)
      (rnrs r5rs))
    
    
    (let* ((n (* 1024 16))
           (a (make-vector n))
           (acc 0))
    
      (do ((i 0 (+ i 1)))
        ((= i n) #f)
        (vector-set! a i (cons (cos i) (sin i))))
    
      (do ((i 0 (+ i 1)))
        ((= i n) #f)
        (do ((j 0 (+ j 1)))
          ((= j n) #f)
          (let ((ai (vector-ref a i))
                (aj (vector-ref a j)))
            (set! acc (+ acc (+ (* (car ai) (cdr aj))
                                (* (cdr ai) (car aj))))))))
    
      (write acc)
      (newline))
    
    (exit)
    

    #include <iostream>
    #include <cmath>
    #include <vector>
    #include <algorithm>
    
    typedef std::pair<double, double> pr;
    
    typedef std::vector<pr> vec;
    
    double loop(const vec& a)
    {
        double acc = 0;
        const int n = a.size();
    
        for(int i = 0; i < n; ++i)
            for(int j = 0; j < n; ++j)
            {
                const pr& ai = a[i];
                const pr& aj = a[j];
                acc += ai .first * aj.second + 
                       ai.second * aj .first;
            }
    
        return acc;
    }
    
    int main()
    {
        const int n = 1024 * 16;
    
        vec v(n);
    
        for(int i = 0; i < n; ++i)
            v[i] = pr(std::cos(i), std::sin(i));
    
        std::cout << loop(v) << std::endl;
    }
    

    我意识到,在方案中内存的间接比在C++中要多,但性能上的差异还是令人惊讶的。

    有没有一个简单的方法来加速方案版本(而不会将内存布局更改为完全不规则的布局)

    1 回复  |  直到 6 年前
        1
  •  2
  •   Sylwester    6 年前

    所以,虽然这些程序看起来是一样的,但它们却不一样。在C版本中使用fixnum算法,而Scheme版本使用标准的数字塔。要使C版本更像Scheme,请尝试使用bignum库进行计算。

    作为测试,我用 (rnrs arithmetic flonums) (rnrs arithmetic fixnums) 而且它把DrRacket的执行时间减少了一半。我希望在任何实现中都会发生同样的情况。

    现在,我的初步测试表明,C代码的执行速度大约是预期的25倍,而不是50倍,通过改为浮点运算,我将C代码的执行速度降到了预期的15倍。

    希望这对切兹也有帮助:)

    编辑

    #!r6rs
    (import
     (rnrs)
     ;; import the * and + that only work on floats (which are faster, but they still check their arguments)
     (only (rnrs arithmetic flonums) fl+ fl*))
    
    
    (let* ((n (* 1024 16))
           (a (make-vector n))
           (acc 0.0)) ; We want float, lets tell Scheme about that!
    
      ;; using inexact f instead of integer i
      ;; makes every result of cos and sin inexact
      (do ((i 0 (+ i 1))
           (f 0.0 (+ f 1)))
        ((= i n) #f)
        (vector-set! a i (cons (cos f) (sin f))))
    
      (do ((i 0 (+ i 1)))
        ((= i n) #f)
        (do ((j 0 (+ j 1)))
          ((= j n) #f)
          (let ((ai (vector-ref a i))
                (aj (vector-ref a j)))
            ;; use float versions of + and *
            ;; since this is where most of the time is used
            (set! acc (fl+ acc
                           (fl+ (fl* (car ai) (cdr aj))
                                (fl* (cdr ai) (car aj))))))))
    
      (write acc)
      (newline))
    

    而具体实现(锁定)只是为了说明在运行时完成的类型检查确实有影响此代码比以前的优化运行速度快30%:

    #lang racket
    
    ;; this imports import the * and + for floats as unsafe-fl* etc. 
    (require racket/unsafe/ops)
    
    (let* ((n (* 1024 16))
           (a (make-vector n))
           (acc 0.0)) ; We want float, lets tell Scheme about that!
    
      (do ((i 0 (+ i 1))
           (f 0.0 (+ f 1)))
        ((= i n) #f)
        ;; using inexact f instead of integer i
        ;; makes every result of cos and sin inexact
        (vector-set! a i (cons (cos f) (sin f))))
    
      (do ((i 0 (+ i 1)))
        ((= i n) #f)
        (do ((j 0 (+ j 1)))
          ((= j n) #f)
          ;; We guarantee argument is a vector
          ;; and nothing wrong will happen using unsafe accessors
          (let ((ai (unsafe-vector-ref a i))
                (aj (unsafe-vector-ref a j)))
            ;; use unsafe float versions of + and *
            ;; since this is where most of the time is used
            ;; also use unsafe car/cdr as we guarantee the argument is
            ;; a pair.
            (set! acc (unsafe-fl+ acc
                                  (unsafe-fl+ (unsafe-fl* (unsafe-car ai) (unsafe-cdr aj))
                                              (unsafe-fl* (unsafe-cdr ai) (unsafe-car aj))))))))
    
      (write acc)
      (newline))
    

    我努力保持了原始代码的风格。这不是一个很地道的计划。我不会用的 set! 当然,但这并不影响速度。

    推荐文章