Сравните блиц ++, броненосец, буст::MultiArray

Question

Сравните блиц ++, броненосец, буст::MultiArray

Я сделал сравнение между blitz++, armadillo, boost::MultiArray со следующим кодом (заимствовано из старого поста)

#include <iostream>
using namespace std;
#include <windows.h>
#define _SCL_SECURE_NO_WARNINGS
#define BOOST_DISABLE_ASSERTS 
#include <boost/multi_array.hpp>
#include <blitz/array.h>
#include <armadillo>

int main(int argc, char* argv[])
{
    const int X_SIZE = 1000;
    const int Y_SIZE = 1000;
    const int ITERATIONS = 100;
    unsigned int startTime = 0;
    unsigned int endTime = 0;

    // Create the boost array


    //------------------Measure boost Loop------------------------------------------
    {
        typedef boost::multi_array<double, 2> ImageArrayType;
        ImageArrayType boostMatrix(boost::extents[X_SIZE][Y_SIZE]);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    boostMatrix[x][y] = 1.0001;
                }
            }
        }
        endTime = ::GetTickCount();
        printf("[Boost Loop] Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
    }
    //------------------Measure blitz Loop-------------------------------------------
    {
        blitz::Array<double, 2> blitzArray( X_SIZE, Y_SIZE );
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    blitzArray(x,y) = 1.0001;
                }
            }
        }
        endTime = ::GetTickCount();
        printf("[Blitz Loop] Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
    }

    //------------------Measure armadillo loop----------------------------------------
    {
        arma::mat matArray( X_SIZE, Y_SIZE );
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int y = 0; y < Y_SIZE; ++y)
            {
                for (int x = 0; x < X_SIZE; ++x)
                {
                    matArray(x,y) = 1.0001;
                }
            }
        }
        endTime = ::GetTickCount();
        printf("[arma  Loop]  Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
    }

    //------------------Measure native loop----------------------------------------
    // Create the native array
    {
        double *nativeMatrix = new double [X_SIZE * Y_SIZE];
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int y = 0; y < Y_SIZE*X_SIZE; ++y)
            {
                nativeMatrix[y] = 1.0001;
            }
        }
        endTime = ::GetTickCount();
        printf("[Native Loop]Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
        delete[] nativeMatrix;
    }

    //------------------Measure boost computation-----------------------------------
    {
        typedef boost::multi_array<double, 2> ImageArrayType;
        ImageArrayType boostMatrix(boost::extents[X_SIZE][Y_SIZE]);
        for (int x = 0; x < X_SIZE; ++x)
        {
            for (int y = 0; y < Y_SIZE; ++y)
            {
                boostMatrix[x][y] = 1.0001;
            }
        }
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    boostMatrix[x][y] += boostMatrix[x][y] * 0.5;
                }
            }
        }
        endTime = ::GetTickCount();
        printf("[Boost computation] Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
    }

    //------------------Measure blitz computation-----------------------------------
    {
        blitz::Array<double, 2> blitzArray( X_SIZE, Y_SIZE );
        blitzArray = 1.0001;
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            blitzArray += blitzArray*0.5;
        }
        endTime = ::GetTickCount();
        printf("[Blitz computation] Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
    }

    //------------------Measure armadillo computation-------------------------------
    {
        arma::mat matArray( X_SIZE, Y_SIZE );
        matArray.fill(1.0001);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            //matArray.fill(1.0001);
            matArray += matArray*0.5;
        }
        endTime = ::GetTickCount();
        printf("[arma  computation] Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
    }

    //------------------Measure native computation------------------------------------------
    // Create the native array
    {
        double *nativeMatrix = new double [X_SIZE * Y_SIZE];
        for (int y = 0; y < Y_SIZE*X_SIZE; ++y)
        {
            nativeMatrix[y] = 1.0001;
        }
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int y = 0; y < Y_SIZE*X_SIZE; ++y)
            {
                nativeMatrix[y] += nativeMatrix[y] * 0.5;
            }
        }
        endTime = ::GetTickCount();
        printf("[Native computation]Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
        delete[] nativeMatrix;
    }

    return 0;
}

На окнах VS2010 результаты

[Boost Loop] Elapsed time:  1.217 seconds
[Blitz Loop] Elapsed time:  0.046 seconds
[arma  Loop]  Elapsed time:  0.078 seconds
[Native Loop]Elapsed time:  0.172 seconds
[Boost computation] Elapsed time:  2.152 seconds
[Blitz computation] Elapsed time:  0.156 seconds
[arma  computation] Elapsed time:  0.078 seconds
[Native computation]Elapsed time:  0.078 seconds

На окнах Intel C++ результаты

[Boost Loop] Elapsed time:  0.468 seconds
[Blitz Loop] Elapsed time:  0.125 seconds
[arma  Loop]  Elapsed time:  0.046 seconds
[Native Loop]Elapsed time:  0.047 seconds
[Boost computation] Elapsed time:  0.796 seconds
[Blitz computation] Elapsed time:  0.109 seconds
[arma  computation] Elapsed time:  0.078 seconds
[Native computation]Elapsed time:  0.062 seconds

Что-то странное:

(1) with VS2010, native computation (including loop) is faster than native loop
(2) blitz loop behave so different under VS2010 and intel C++.

Чтобы скомпилировать blitz ++ с помощью компилятора intel C++, в папке blitz / intel / требуется файл bzconfig.h. Но нет. Я просто копирую один в blitz / ms / bzconfig.h в. Это может дать неоптимальную конфигурацию. Кто-нибудь может сказать мне, как скомпилировать блиц ++ с компилятором Intel C++? В руководстве было сказано запустить скрипт bzconfig, чтобы получить правильный файл bzconfig.h. Но я не понимаю, что это значит.

Большое спасибо!

Добавьте некоторые из моих выводов:

1. Boost multi array is the slowest.
2. With intel c++ compiler, native pointers are very fast.
3. With intel c++ compiler,  armadillo can achieve the performance of native pointers.
4. Also test eigen, it is x0% slower than armadillo in my simple cases.
5. Curious about blitz++'s behavior in intel c++ compiler with proper configuration.
   Please see my question.

36

c++ multidimensional-array armadillo boost-multi-array blitz++

Источник

user1899020 19 янв '13 в 13:36

4 ответа

Другие вопросы по тегам c++ multidimensional-array armadillo boost-multi-array blitz++

user666154 22 окт '15 в 12:23 2015-10-22 12:23 · Answer 1 · 2015-10-22 12:23

Короткий ответ: ./configure CXX=icpc, найденный, прочитав Руководство пользователя Blitz++.

Длинный ответ:

Чтобы скомпилировать blitz ++ с помощью компилятора intel C++, в папке blitz/intel/ требуется файл bzconfig.h. Но нет.

Да и да. Blitz++ должен генерировать сам файл. Согласно Руководству пользователя Blitz++ blitz.pdf включен в blitz-0.10.tar.gz, раздел "Установка",

Blitz++ использует GNU Autoconf, который обрабатывает перезапись Make-файлов для различных платформ и компиляторов.

Точнее, Blitz++ использует цепочку инструментов GNU autotools (automake, autoconf, configure), которая может генерировать make-файлы, настраивать сценарии, заголовочные файлы и многое другое. bzconfig.h файлы должны быть сгенерированы configure Скрипт, который поставляется с Blitz++, готов к использованию.

Я просто копирую один в blitz / ms / bzconfig.h в. Это может дать неоптимальную конфигурацию.

Если "неоптимальный" для вас означает "неработающий", тогда да.:-) Тебе нужен intel/bzconfig.h это точно представляет ваш компилятор.

Кто-нибудь может сказать мне, как скомпилировать блиц ++ с компилятором Intel C++?

Прочитайте и следуйте инструкциям, в частности, приведенному выше разделу "Установка".

перейдите в каталог 'blitz-VERSION' и введите: ./configure CXX=[compiler] где [компилятор] является одним из xlC++, icpc, pathCC, xlC, cxx, aCC, CC, g++, KCC, pgCC или FCC. (Если вы не выберете компилятор C++, скрипт configure попытается найти подходящий компилятор для текущей платформы.)

Вы сделали это? Для компилятора Intel вам нужно будет использовать./configure CXX=icpc,

В руководстве было сказано запустить скрипт bzconfig, чтобы получить правильный файл bzconfig.h. Но я не понимаю, что это значит.

Я предполагаю, что под "этим" вы подразумеваете "это". Что вы подразумеваете под "руководством"? Моя копия Руководства пользователя Blitz++ не упоминает bzconfig, Вы уверены, что используете руководство, соответствующее вашей версии Blitz++?

PS: Если посмотреть на "bzconfig" в содержимом blitz-0.10, похоже, что "bzconfig" больше не является частью Blitz++, но раньше был:

find . -name bzconfig -> Нет результатов

find . -print0 | xargs -0 grep -a -i -n -e bzconfig:

./blitz/compiler.h:44:    #error  In <blitz/config.h>: A working template implementation is required by Blitz++ (you may need to rerun the compiler/bzconfig script)

Это должно быть обновлено.

./blitz/gnu/bzconfig.h:4:/* blitz/gnu/bzconfig.h. Generated automatically at end of configure. */
./configure.ac:159:# autoconf replacement of bzconfig

Вот оно, эти bzconfig.h файлы должны быть сгенерированы configure,

./ChangeLog.1:1787: will now replace the old file that was generate with the bzconfig

Это может быть изменение, которое переключилось на autoconf.

./INSTALL:107:  2. Go into the compiler subdirectory and run the bzconfig

Это должно быть обновлено. Это то, что заставило тебя искать bzconfig?

./README:27:compiler      Compiler tests (used with obsolete bzconfig script)

Нуждается в обновлении compiler Каталог больше не включен.

user512923 22 дек '15 в 21:45 2015-12-22 21:45 · Answer 2 · 2015-12-22 21:45

Насколько я могу судить, вы оцениваете производительность каждой библиотеки матриц, измеряя скорость умножения одной матрицы на скаляр. Благодаря своей политике на основе шаблонов, Armadillo сделает очень хорошую работу, разбив каждое умножение на распараллеливаемый код для большинства компиляторов.

Но я предлагаю вам переосмыслить объем и методику тестирования. Например, вы пропустили каждую реализацию BLAS. Функция BLAS, которая вам понадобится, будет dscal. Предоставленная поставщиком реализация для вашего конкретного ЦП, вероятно, сработает хорошо.

Более уместно, есть еще много вещей, которые любая разумная библиотека векторов должна уметь делать: умножение матриц, точечные произведения, длины векторов, транспонирование и т. Д., Которые не рассматриваются вашим тестом. В вашем тесте рассматриваются ровно две вещи: назначение элементов, которое практически не является узким местом для векторных библиотек, и скалярное / векторное умножение, которое является функцией уровня 1 BLAS, предоставляемой каждым производителем ЦП.

Здесь обсуждается уровень 1 BLAS и код, сгенерированный компилятором.

ТЛ: д-р; используйте Armadillo с собственными библиотеками BLAS и LAPACK, связанными для вашей платформы.

user4374258 08 июн '16 в 09:51 2016-06-08 09:51 · Answer 3 · 2016-06-08 09:51

Мой тест показал, что буст-массивы имеют ту же производительность, что и нативный / жестко закодированный код C++.

Вам нужно сравнить их, используя активированную оптимизацию компилятора. То есть:-O3-DNDEBUG-DBOOST_UBLAS_NDEBUG-DBOOST_DISABLE_ASSERTS-DARMA_NO_DEBUG... Когда я тестировал (em++), Boost работал по крайней мере в 10 раз быстрее, когда вы деактивировали его утверждения, включите оптимизацию уровня 3, используя -O3и т. д. Любое справедливое сравнение должно использовать эти флаги.

user225186 31 авг '23 в 08:35 2023-08-31 08:35 · Answer 4 · 2023-08-31 08:35

Из любопытства я снова посещаю этот тест скорости.

Я добавил тесты для библиотеки Eigen и своей собственной библиотеки MULTI (https://gitlab.com/correaa/boost-multi) .

Полный код, адаптированный под Linux, находится ниже в конце поста.

Реализовав библиотеку многомерных массивов, важно учитывать, что большинство этих библиотек имеют проверку границ, которую можно при необходимости отключить.

Также важно отметить, что измерения времени требуют, по крайней мере, того, чтобы мы не позволяли компилятору оптимизировать код без побочных эффектов. По этой причине я добавилdoNotOptimizeAwayфункция, которая предотвращает это. (См. ее код в конце.) Также важно знать, откуда вызывать эту функцию (я полагаю, внутри цикла повторения).

Вот результаты:

Без оптимизации (это не очень хороший тест; он просто показывает, насколько плохо может быть выполнение этих измерений в режиме отладки)

      $ g++ a.cpp && ./a.out
[Boost Loop] Elapsed time:  7.216 seconds
[Blitz Loop] Elapsed time:  1.151 seconds
[arma  Loop]  Elapsed time:  0.747 seconds
[Native Loop] Elapsed time:  0.319 seconds
[EIGEN Loop] Elapsed time:  9.022 seconds
[MULTI Loop] Elapsed time:  7.769 seconds

[Boost computation] Elapsed time: 15.456 seconds
[Blitz computation] Elapsed time:  4.441 seconds
[arma  computation] Elapsed time:  0.662 seconds
[Native computation] Elapsed time:  0.340 seconds
[EIGEN computation] Elapsed time: 18.714 seconds
[MULTI computation] Elapsed time: 15.434 seconds

С оптимизацией:

      $ sudo cpupower frequency-set --governor performance
$ g++ -O3 -DNDEBUG a.cpp && sudo nice -n -10 ./a.out
[Boost Loop] Elapsed time:  0.024 seconds
[Blitz Loop] Elapsed time:  0.027 seconds
[arma  Loop]  Elapsed time:  0.052 seconds
[Native Loop] Elapsed time:  0.023 seconds
[EIGEN Loop] Elapsed time:  0.138 seconds
[MULTI Loop] Elapsed time:  0.031 seconds

[Boost computation] Elapsed time:  0.059 seconds
[Blitz computation] Elapsed time:  0.054 seconds
[arma  computation] Elapsed time:  0.061 seconds
[Native computation] Elapsed time:  0.058 seconds
[EIGEN computation] Elapsed time:  0.196 seconds
[MULTI computation] Elapsed time:  0.060 seconds

С быстрой математикой:

      $ g++ -O3 -DNDEBUG a.cpp && sudo nice -n -10 ./a.out
[Boost Loop] Elapsed time:  0.024 seconds
[Blitz Loop] Elapsed time:  0.028 seconds
[arma  Loop]  Elapsed time:  0.051 seconds
[Native Loop] Elapsed time:  0.024 seconds
[EIGEN Loop] Elapsed time:  0.139 seconds
[MULTI Loop] Elapsed time:  0.027 seconds

[Boost computation] Elapsed time:  0.040 seconds
[Blitz computation] Elapsed time:  0.033 seconds
[arma  computation] Elapsed time:  0.043 seconds
[Native computation] Elapsed time:  0.039 seconds
[EIGEN computation] Elapsed time:  0.148 seconds
[MULTI computation] Elapsed time:  0.041 seconds

С макросами для отключения связанных проверок, упомянутых здесь:

      $ sudo cpupower frequency-set --governor performance
$ g++ -march=native -mtune=native -Ofast -DNDEBUG -DARMA_NO_DEBUG -DBOOST_DISABLE_ASSERTS-DARMA_USE_LAPACK -DARMA_USE_BLAS -DBOOST_UBLAS_NDEBUG -DARMA_NO_DEBUG -DEIGEN_NO_DEBUG a.cpp && sudo nice -n -10 ./a.out
[Boost Loop] Elapsed time:  0.024 seconds
[Blitz Loop] Elapsed time:  0.024 seconds
[arma  Loop]  Elapsed time:  0.022 seconds
[Native Loop] Elapsed time:  0.022 seconds
[EIGEN Loop] Elapsed time:  0.131 seconds
[MULTI Loop] Elapsed time:  0.024 seconds

[Boost computation] Elapsed time:  0.025 seconds
[Blitz computation] Elapsed time:  0.023 seconds
[arma  computation] Elapsed time:  0.045 seconds
[Native computation] Elapsed time:  0.024 seconds
[EIGEN computation] Elapsed time:  0.132 seconds
[MULTI computation] Elapsed time:  0.024 seconds

Результаты с clang немного озадачивают (моя библиотека и Blitz показывают худшие результаты!)

      $ clang++ -std=c++17 -march=native -mtune=native -Ofast -DNDEBUG -DARMA_NO_DEBUG -DBOOST_DISABLE_ASSERTS-DARMA_USE_LAPACK -DARMA_USE_BLAS -DBOOST_UBLAS_NDEBUG -DARMA_NO_DEBUG -DEIGEN_NO_DEBUG a.cpp && sudo nice -n -10 ./a.out
[Boost Loop] Elapsed time:  0.025 seconds
[Blitz Loop] Elapsed time:  0.024 seconds
[arma  Loop]  Elapsed time:  0.023 seconds
[Native Loop] Elapsed time:  0.023 seconds
[EIGEN Loop] Elapsed time:  0.132 seconds
[MULTI Loop] Elapsed time:  0.069 seconds

[Boost computation] Elapsed time:  0.024 seconds
[Blitz computation] Elapsed time:  0.055 seconds
[arma  computation] Elapsed time:  0.023 seconds
[Native computation] Elapsed time:  0.023 seconds
[EIGEN computation] Elapsed time:  0.124 seconds
[MULTI computation] Elapsed time:  0.087 seconds

Итак, вот и все. Кажется, что в наши дни наиболее важным фактором является умение компилировать с оптимизацией, отключать отладку, выбирать компилятор и уметь писать тесты времени, а не выбранную библиотеку (по крайней мере, для этих простых шаблонов использования).

Эйген здесь все еще выделяется; пожалуйста, дайте мне знать, если кто-нибудь знает правильные варианты компиляции с Eigen.

Более подробная информация о моей машине с Ubuntu 23.04:

      g++ (Ubuntu 12.3.0-1ubuntu1~23.04) 12.3.0
clang version 15.0.7
libboost1.81-all-dev/lunar 1.81.0-4build2 amd64
libblitz0-dev/lunar,now 1:1.0.2+ds-4 amd64 [installed]
libarmadillo-dev/lunar,now 1:11.4.2+dfsg-1 amd64 [installed]
libeigen3-dev/lunar,now 3.4.0-4 all [installed]
MULTI v0.80.1 from https://gitlab.com/correaa/boost-multi

ПРОЦЕССОР:Intel® Core™ i7-9750H × 12

Полный код:

      #include <iostream>
using namespace std;
#define _SCL_SECURE_NO_WARNINGS
#define BOOST_DISABLE_ASSERTS
#include <boost/multi_array.hpp>
#include <blitz/array.h>
#include <armadillo>
#include <eigen3/Eigen/Dense>
#include "/home/correaa/boost-multi/include/multi/array.hpp"

#include <chrono>
auto GetTickCount() {return chrono::steady_clock::now();}

template <class T>
void doNotOptimizeAway(T&& t) {
    __asm__ __volatile__ ("" :: "g" (t));
}

int main(int argc, char* argv[])
{
    const int X_SIZE = 1000;
    const int Y_SIZE = 1000;
    double factor = 10;  // had to do more iteration to get more steady results, the timings are normalized still
    const int ITERATIONS = 100*10;
    auto startTime = ::GetTickCount();
    auto endTime = ::GetTickCount();

    // Create the boost array


    //------------------Measure boost Loop------------------------------------------
    {
        typedef boost::multi_array<double, 2> ImageArrayType;
        ImageArrayType boostMatrix(boost::extents[X_SIZE][Y_SIZE]);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    boostMatrix[x][y] = 1.0001;
                }
            }
            doNotOptimizeAway(boostMatrix);
        }
        endTime = ::GetTickCount();
        printf("[Boost Loop] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }
    //------------------Measure blitz Loop-------------------------------------------
    {
        blitz::Array<double, 2> blitzArray( X_SIZE, Y_SIZE );
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    blitzArray(x,y) = 1.0001;
                }
            }
            doNotOptimizeAway(blitzArray);
        }
        endTime = ::GetTickCount();
        printf("[Blitz Loop] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }

    //------------------Measure armadillo loop----------------------------------------
    {
        arma::mat matArray( X_SIZE, Y_SIZE );
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int y = 0; y < Y_SIZE; ++y)
            {
                for (int x = 0; x < X_SIZE; ++x)
                {
                    matArray(x,y) = 1.0001;
                }
            }
            doNotOptimizeAway(matArray);
        }
        endTime = ::GetTickCount();
        printf("[arma  Loop]  Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }

    //------------------Measure native loop----------------------------------------
    // Create the native array
    {
        double *nativeMatrix = new double [X_SIZE * Y_SIZE];
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int y = 0; y < Y_SIZE*X_SIZE; ++y)
            {
                nativeMatrix[y] = 1.0001;
            }
            doNotOptimizeAway(nativeMatrix);
        }
        endTime = ::GetTickCount();
        printf("[Native Loop] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
        delete[] nativeMatrix;
    }
    //------------------Measure EIGEN Loop------------------------------------------
    {
        typedef Eigen::MatrixXd ImageArrayType;
        ImageArrayType eigenMatrix(X_SIZE, Y_SIZE);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    eigenMatrix(x, y) = 1.0001;
                }
            }
            doNotOptimizeAway(eigenMatrix);
        }
        endTime = ::GetTickCount();
        printf("[EIGEN Loop] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }
    //------------------Measure Multi Loop------------------------------------------
    {
        typedef boost::multi::array<double, 2> ImageArrayType;
        ImageArrayType multiMatrix({X_SIZE, Y_SIZE});
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    multiMatrix[x][y] = 1.0001;
                }
            }
            doNotOptimizeAway(multiMatrix);
        }
        endTime = ::GetTickCount();
        printf("[MULTI Loop] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }


    //------------------Measure boost computation-----------------------------------
    {
        typedef boost::multi_array<double, 2> ImageArrayType;
        ImageArrayType boostMatrix(boost::extents[X_SIZE][Y_SIZE]);
        for (int x = 0; x < X_SIZE; ++x)
        {
            for (int y = 0; y < Y_SIZE; ++y)
            {
                boostMatrix[x][y] = 1.0001;
            }
        }
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    boostMatrix[x][y] += boostMatrix[x][y] * 0.5;
                }
            }
            doNotOptimizeAway(boostMatrix);
        }
        endTime = ::GetTickCount();
        printf("[Boost computation] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }

    //------------------Measure blitz computation-----------------------------------
    {
        blitz::Array<double, 2> blitzArray( X_SIZE, Y_SIZE );
        blitzArray = 1.0001;
        doNotOptimizeAway(blitzArray);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            blitzArray += blitzArray*0.5;
            doNotOptimizeAway(blitzArray);
        }
        endTime = ::GetTickCount();
        printf("[Blitz computation] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }

    //------------------Measure armadillo computation-------------------------------
    {
        arma::mat matArray( X_SIZE, Y_SIZE );
        matArray.fill(1.0001);
        doNotOptimizeAway(matArray);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            //matArray.fill(1.0001);
            matArray += matArray*0.5;
            doNotOptimizeAway(matArray);
        }
        endTime = ::GetTickCount();
        printf("[arma  computation] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }

    //------------------Measure native computation------------------------------------------
    // Create the native array
    {
        double *nativeMatrix = new double [X_SIZE * Y_SIZE];
        for (int y = 0; y < Y_SIZE*X_SIZE; ++y)
        {
            nativeMatrix[y] = 1.0001;
        }
        doNotOptimizeAway(nativeMatrix);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int y = 0; y < Y_SIZE*X_SIZE; ++y)
            {
                nativeMatrix[y] += nativeMatrix[y] * 0.5;
            }
            doNotOptimizeAway(nativeMatrix);
        }
        endTime = ::GetTickCount();
        printf("[Native computation] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
        delete[] nativeMatrix;
    }
    //------------------Measure EIGEN computation-----------------------------------
    {
        typedef Eigen::MatrixXd ImageArrayType;
        ImageArrayType eigenMatrix(X_SIZE, Y_SIZE);
        for (int x = 0; x < X_SIZE; ++x)
        {
            for (int y = 0; y < Y_SIZE; ++y)
            {
                eigenMatrix(x, y) = 1.0001;
            }
        }
        doNotOptimizeAway(eigenMatrix);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    eigenMatrix(x, y) += eigenMatrix(x, y) * 0.5;
                }
            }
            doNotOptimizeAway(eigenMatrix);
        }
        endTime = ::GetTickCount();
        printf("[EIGEN computation] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }
    //------------------Measure MULTI computation-----------------------------------
    {
        typedef boost::multi::array<double, 2> ImageArrayType;
        ImageArrayType multiMatrix({X_SIZE, Y_SIZE});
        for (int x = 0; x < X_SIZE; ++x)
        {
            for (int y = 0; y < Y_SIZE; ++y)
            {
                multiMatrix[x][y] = 1.0001;
            }
        }
        doNotOptimizeAway(multiMatrix);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    multiMatrix[x][y] += multiMatrix[x][y] * 0.5;
                }
            }
            doNotOptimizeAway(multiMatrix);
        }
        endTime = ::GetTickCount();
        printf("[MULTI computation] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }

    return 0;
}