CPU Cache探秘
分析如下代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 #include <iomanip> #include <iostream> #include <chrono> #include <memory> #include <random> #include <algorithm> int64_t SimpleSum (const std::unique_ptr<int32_t []>& data, const int32_t & size, const int32_t & flag) { int64_t sum = 0 ; for (int32_t idx = 0 ; idx < size; ++idx) { int32_t num = data[idx]; if (num < flag) { sum += num; } } return sum; } std::unique_ptr<int32_t[]> MakeData(const int32_t& size) { std::unique_ptr<int32_t[]> data(new int32_t[size]); std::random_device rd; std::mt19937 mt (rd()) ; std::uniform_real_distribution<double > dist (INT32_MIN, INT32_MAX) ; for (int32_t idx = 0 ; idx < size; ++idx) { data[idx] = dist (mt); } return data; }int main () { int32_t size = 128 * 1024 * 1024 ; int32_t loop = 1 ; std::unique_ptr<int32_t []> data = MakeData (size); int64_t result; { auto t1 = std::chrono::high_resolution_clock::now (); for (auto i = 0 ; i < loop; ++i) { result = SimpleSum (data, size, 123456789 ); } auto t2 = std::chrono::high_resolution_clock::now (); std::cout << "time1:" << std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count () << std::endl << " result:" << result << std::endl; } std::sort (data.get (), data.get () + size); { auto t1 = std::chrono::high_resolution_clock::now (); for (auto i = 0 ; i < loop; ++i) { result = SimpleSum (data, size, 123456789 ); } auto t2 = std::chrono::high_resolution_clock::now (); std::cout << "time1:" << std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count () << std::endl << " result:" << result << std::endl; } return 0 ; }
CPU: i7-10700
OS: windows10 x64
Toolset: MSVC 14.2
Config: Release X86
Compile Flags: /permissive- /ifcOutput "Release\" /GS /GL /analyze- /W3 /Gy /Zc:wchar_t /Zi /Gm- /O2 /sdl /Fd"Release\vc142.pdb" /Zc:inline /fp:precise /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_UNICODE" /D "UNICODE" /errorReport:prompt /WX- /Zc:forScope /Gd /Oy- /Oi /MD /FC /Fa"Release\" /EHsc /nologo /Fo"Release\" /Fp"Release\CPU Cache探秘.pch" /diagnostics:column
分析time1和time2会有怎样的大小关系?
运行
time2显著比time1小,节省了84%的时间。它们的算法一致,参数相同,仅仅是计算用的数据进行了排序,为什么相差这么多时间呢?
分析