std::allocate_shared 〜カスタムアロケータ

先日の日記の続き。std::allocate_shared 用のカスタムアロケータとして、簡易的なメモリプールを用意してみました。

namespace {

template <int unit>
class memory_manager {
 
private:
 union trait {
  trait* next;
  unsigned char body[unit];
 };
 
public:
 memory_manager(const std::size_t len):
  free(nullptr), traits(new trait[len]), length(len)
 {
  trait* next = nullptr;
  std::for_each(traits, traits + length, [&next](trait& cur) {
   cur.next = next;
   next = &cur;
  });
  free = next;
 }
 
 ~memory_manager()
 {
  delete [] traits;
 }
 
 void* allocate(const std::size_t len)
 {
  if (free != nullptr && len <= sizeof(trait)) {
   auto cur = free;
   free = free->next;
   return cur;
  } else {
   throw std::bad_alloc("NG");
  }
 }
 
 void deallocate(void* ptr)
 {
  auto cur = reinterpret_cast<trait*>(ptr);
  if (cur != nullptr) {
   cur->next = free;
   free = cur;
  }
 }
 
private:
 trait* free;
 trait* traits;
 std::size_t length;
 
private:
 memory_manager(const memory_manager&);
 memory_manager& operator=(const memory_manager&);
};

memory_manager<32 * sizeof(int) + 16> mem_mgr(500 * 10000);

template <class T>
class allocator
{
public:
 typedef size_t size_type;
 typedef ptrdiff_t difference_type;
 typedef T* pointer;
 typedef const T* const_pointer;
 typedef T& reference;
 typedef const T& const_reference;
 typedef T value_type;
 
 template <class U>
 struct rebind
 {
  typedef allocator<U> other;
 };
 
 allocator() throw(){}
 //allocator(const allocater&) throw(){}
 template <class U> allocator(const allocator<U>&) throw(){}
 
 ~allocator() throw(){}
 
 pointer allocate(size_type num, const void* hint = 0)
 {
  return reinterpret_cast<pointer>(mem_mgr.allocate(num * sizeof(T)));
 }
 void construct(pointer p, const T& value)
 {
  new( (void*)p ) T(value);
 }
 
 void deallocate(pointer p, size_type num)
 {
  mem_mgr.deallocate(p);
 }
 void destroy(pointer p)
 {
  p->~T();
 }
 
 pointer address(reference value) const { return &value; }
 const_pointer address(const_reference value) const { return &value; }
 
 size_type max_size() const throw()
 {
  return numeric_limits<size_t>::max() / sizeof(T);
 }
};

struct test {
 int array[32];
 test(){}
};

const std::size_t length = 500 * 10000;

};

上記アロケータを、下記*1のように使用しました。

int main()
{
 //
 // 先日のコードの続き
 //

 // std::allocate_shared を使用
 {
  const auto prev = std::clock();
  allocator<test> alloc; // カスタム アロケータ
  for (std::size_t ct = 0; ct < length; ct++) {
   array.push_back(std::allocate_shared<test>(alloc)); // アロケータ使用
  }
  array.clear();
  std::cout << "std::allocate_shared(2): " <<
   static_cast<double>(std::clock() - prev) / CLOCKS_PER_SEC <<
   "(s)" << std::endl;
 }
 return 0;
}

計測結果は下記のとおりでした。

ctor: 1.202(s)
std::make_shared: 0.717(s)
std::allocate_shared: 0.717(s)
std::allocate_shared(2): 0.281(s) ←プール版（カスタム アロケータ）
ptr: 0.577(s)

プール版の std::allocate_shared は速いですね！まぁ。メモリプールから O(1) で領域確保完了できるのですから当たり前なんですけど。
それはさておき、今回ちょっとした発見があったのは、std::allocate_shared で確保される領域サイズはインスタンスサイズ + 16 だったということです。追加サイズの中にはポインタ値も含まれると思うので、多分 x64 *2ではもうちょっと増えるのでしょうが、領域確保の際に管理対象のインスタンスとまとめられてしまうので、いまどきの PC においては、この程度のサイズ増加は誤差と見て良い気がします。
ま。今回の確認で何かが解決したわけでもないのですが、ちょっともやもやがすっきりした気がします。とりあえず、std::shared_ptr を使用する際は std::make_shared を使っていこうと思います。

*1:プール領域が広大になりすぎたのでサイズ減らしました。

*2:今回試したのは x86 環境です。

Chiharu の日記

絵描き C/C++ プログラマーの日記です。

std::allocate_shared 〜カスタムアロケータ