假设我有
std::vector<T1> vec1 {/* filled with T1's */}; std::vector<T2> vec2 {/* filled with T2's */};
和一些函数T1 f(T2)当然可以是λ.在向vec2中的每个T2应用f时,连接vec1和vec2的最佳方法是什么?
显而易见的解决方案是std :: transform,即
vec1.reserve(vec1.size() + vec2.size()); std::transform(vec2.begin(),vec2.end(),std::back_inserter(vec1),f);
但我说这不是最佳的,因为std :: back_inserter必须对每个插入的元素进行不必要的容量检查.什么是最优的是类似的东西
vec1.insert(vec1.end(),vec2.begin(),f);
这可以通过单一容量检查逃脱.可悲的是,这不是有效的C.基本上这与std :: vector :: insert最适合矢量连接的原因相同,请参阅this问题和this问题中的注释,以便进一步讨论这一点.
所以:
> std ::使用STL转换最佳方法吗?
>如果是这样,我们可以做得更好吗?
>上面描述的插入函数是否被排除在STL之外有充分的理由吗?
UPDATE
我已经开始验证多个容量检查是否确实有明显的成本.为此,我基本上只将id函数(f(x)= x)传递给答案中讨论的std :: transform和push_back方法.完整的代码是:
#include <iostream> #include <vector> #include <iterator> #include <algorithm> #include <cstdint> #include <chrono> #include <numeric> #include <random> using std::size_t; std::vector<int> generate_random_ints(size_t n) { std::default_random_engine generator; auto seed1 = std::chrono::system_clock::now().time_since_epoch().count(); generator.seed((unsigned) seed1); std::uniform_int_distribution<int> uniform {}; std::vector<int> v(n); std::generate_n(v.begin(),n,[&] () { return uniform(generator); }); return v; } template <typename D=std::chrono::nanoseconds,typename F> D benchmark(F f,unsigned num_tests) { D total {0}; for (unsigned i = 0; i < num_tests; ++i) { auto start = std::chrono::system_clock::now(); f(); auto end = std::chrono::system_clock::now(); total += std::chrono::duration_cast<D>(end - start); } return D {total / num_tests}; } template <typename T> void std_insert(std::vector<T> vec1,const std::vector<T> &vec2) { vec1.insert(vec1.end(),vec2.end()); } template <typename T1,typename T2,typename UnaryOperation> void push_back_concat(std::vector<T1> vec1,const std::vector<T2> &vec2,UnaryOperation op) { vec1.reserve(vec1.size() + vec2.size()); for (const auto& x : vec2) { vec1.push_back(op(x)); } } template <typename T1,typename UnaryOperation> void transform_concat(std::vector<T1> vec1,UnaryOperation op) { vec1.reserve(vec1.size() + vec2.size()); std::transform(vec2.begin(),op); } int main(int argc,char **argv) { unsigned num_tests {1000}; size_t vec1_size {10000000}; size_t vec2_size {10000000}; auto vec1 = generate_random_ints(vec1_size); auto vec2 = generate_random_ints(vec1_size); auto f_std_insert = [&vec1,&vec2] () { std_insert(vec1,vec2); }; auto f_push_back_id = [&vec1,&vec2] () { push_back_concat(vec1,vec2,[] (int i) { return i; }); }; auto f_transform_id = [&vec1,&vec2] () { transform_concat(vec1,[] (int i) { return i; }); }; auto std_insert_time = benchmark<std::chrono::milliseconds>(f_std_insert,num_tests).count(); auto push_back_id_time = benchmark<std::chrono::milliseconds>(f_push_back_id,num_tests).count(); auto transform_id_time = benchmark<std::chrono::milliseconds>(f_transform_id,num_tests).count(); std::cout << "std_insert: " << std_insert_time << "ms" << std::endl; std::cout << "push_back_id: " << push_back_id_time << "ms" << std::endl; std::cout << "transform_id: " << transform_id_time << "ms" << std::endl; return 0; }
编译:
g++ vector_insert_demo.cpp -std=c++11 -O3 -o vector_insert_demo
输出:
std_insert: 44ms push_back_id: 61ms transform_id: 61ms
编译器将内联lambda,因此可以安全地打折成本.除非其他人对这些结果有可行的解释(或者愿意检查程序集),否则我认为可以合理地得出多个容量检查的明显成本.
解决方法
更新:性能差异是由于reserve()调用,至少在libstdc中,使得容量完全符合您的要求,而不是使用指数增长因子.
我做了一些时间测试,结果很有趣.使用std :: vector :: insert和boost :: transform_iterator是我发现的最快的方法:
版本1:
void appendTransformed1( std::vector<int> &vec1,const std::vector<float> &vec2 ) { auto v2begin = boost::make_transform_iterator(vec2.begin(),f); auto v2end = boost::make_transform_iterator(vec2.end(),f); vec1.insert(vec1.end(),v2begin,v2end); }
版本2:
void appendTransformed2( std::vector<int> &vec1,const std::vector<float> &vec2 ) { vec1.reserve(vec1.size()+vec2.size()); for (auto x : vec2) { vec1.push_back(f(x)); } }
版本3:
void appendTransformed3( std::vector<int> &vec1,const std::vector<float> &vec2 ) { vec1.reserve(vec1.size()+vec2.size()); std::transform(vec2.begin(),std::inserter(vec1,vec1.end()),f); }
定时:
Version 1: 0.59s Version 2: 8.22s Version 3: 8.42s
main.cpp中:
#include <algorithm> #include <cassert> #include <chrono> #include <iterator> #include <iostream> #include <random> #include <vector> #include "appendtransformed.hpp" using std::cerr; template <typename Engine> static std::vector<int> randomInts(Engine &engine,size_t n) { auto distribution = std::uniform_int_distribution<int>(0,999); auto generator = [&]{return distribution(engine);}; auto vec = std::vector<int>(); std::generate_n(std::inserter(vec,vec.end()),generator); return vec; } template <typename Engine> static std::vector<float> randomFloats(Engine &engine,size_t n) { auto distribution = std::uniform_real_distribution<float>(0,1000); auto generator = [&]{return distribution(engine);}; auto vec = std::vector<float>(); std::generate_n(std::inserter(vec,generator); return vec; } static auto appendTransformedFunction(int version) -> void(*)(std::vector<int>&,const std::vector<float> &) { switch (version) { case 1: return appendTransformed1; case 2: return appendTransformed2; case 3: return appendTransformed3; default: cerr << "Unknown version: " << version << "\n"; exit(EXIT_FAILURE); } return 0; } int main(int argc,char **argv) { if (argc!=2) { cerr << "Usage: appendtest (1|2|3)\n"; exit(EXIT_FAILURE); } auto version = atoi(argv[1]); auto engine = std::default_random_engine(); auto vec1_size = 1000000u; auto vec2_size = 1000000u; auto count = 100; auto vec1 = randomInts(engine,vec1_size); auto vec2 = randomFloats(engine,vec2_size); namespace chrono = std::chrono; using chrono::system_clock; auto appendTransformed = appendTransformedFunction(version); auto start_time = system_clock::now(); for (auto i=0; i!=count; ++i) { appendTransformed(vec1,vec2); } auto end_time = system_clock::now(); assert(vec1.size() == vec1_size+count*vec2_size); auto sum = std::accumulate(vec1.begin(),vec1.end(),0u); auto elapsed_seconds = chrono::duration<float>(end_time-start_time).count(); cerr << "Using version " << version << ":\n"; cerr << " sum=" << sum << "\n"; cerr << " elapsed: " << elapsed_seconds << "s\n"; }
编译器:g 4.9.1
选项:-std = c 11 -O2