Chiharu の日記

絵描き C/C++ プログラマーの日記です。

PNG 並列圧縮ドラフト 〜libpng 並列処理

こちらに触発されて、libpng による並列出力にトライしています。下記のような I/F で画素データの並列出力ができると取り回しがいいかなぁ、と思っています。

typedef struct ppng ppng;

typedef enum ppng_result {
 ppng_result_succeeded,
 ppng_result_out_of_memory,
 ppng_result_io_error,
} ppng_result;

ppng* ppng_create(png_structp png, png_infop info);
void ppng_destroy(ppng* obj);

ppng_result ppng_add(ppng* obj, const void* pixels, unsigned rowbytes, unsigned height);
ppng_result ppng_finalize(ppng* obj);

以下、書きかけです。とりあえずコンパイルが通るところまで。圧縮率は外部指定できた方がよさそうですね。

#include "ppng.h"

#include <windows.h>
#include <process.h>

#include <pngstruct.h>
#include <pngpriv.h>
#include <zlib.h>

#include <assert.h>

typedef struct ppng_data {
 unsigned char* data;
 size_t length;
} ppng_data;

typedef struct ppng_node {
 struct ppng_node* prev;
 struct ppng_node* next;
 ppng_data* data;
} ppng_node;

typedef struct ppng_list {
 CRITICAL_SECTION cs;
 HANDLE* events;
 ppng_node* first;
 ppng_node* last;
 ppng_node end_of_node;
 unsigned count;
} ppng_list;

typedef struct ppng_zdata {
 ppng_data data;
 unsigned adler32;
 size_t input;
} ppng_zdata;

typedef struct ppng_zdata_array {
 CRITICAL_SECTION cs;
 ppng_zdata* data;
 size_t length;
 size_t max;
} ppng_zdata_array;

typedef struct ppng_context {
 void* obj;
 unsigned index;
 HANDLE thread;
} ppng_context;

struct ppng {
 png_structp png;
 png_infop info;
 unsigned threads, joined;
 ppng_list input;
 ppng_zdata_array output;
 
 ppng_context context[1];
};

static unsigned CALLBACK ppng_work(void* arg);

static void ppng_set_input(ppng* obj, ppng_data* data);
static ppng_data* ppng_get_input(ppng* obj, unsigned thread, unsigned* index);
static void ppng_set_output(ppng* obj, unsigned index, ppng_data* data, unsigned adler32, size_t input);

ppng* ppng_create(png_structp png, png_infop info)
{
 ppng* obj;
 unsigned ct;
 
 SYSTEM_INFO system;
 
 assert(png != NULL);
 assert(info != NULL);
 
 GetSystemInfo(&system);
 
 obj = malloc(offsetof(ppng, context) + system.dwNumberOfProcessors * sizeof(ppng_context));
 obj->png = png;
 obj->info = info;
 obj->threads = system.dwNumberOfProcessors;
 obj->joined = 0;
 
 InitializeCriticalSection(&obj->input.cs);
 obj->input.events = malloc(obj->threads * sizeof(HANDLE));
 for (ct = 0; ct < obj->threads; ct++) {
  obj->input.events[ct] = CreateEvent(NULL, TRUE, FALSE, NULL);
 }
 obj->input.first = NULL;
 obj->input.last = NULL;
 obj->input.count = 0;
 
 InitializeCriticalSection(&obj->output.cs);
 obj->output.data = NULL;
 obj->output.length = 0;
 obj->output.max = 0;
 
 for (ct = 0; ct < obj->threads; ct++) {
  obj->context[ct].obj = obj;
  obj->context[ct].index = ct;
  obj->context[ct].thread = (HANDLE) _beginthreadex(NULL, 0, ppng_work, &obj->context[ct], 0, NULL);
 }
 
 return obj;
}

void ppng_destroy(ppng* obj)
{
 unsigned ct;
 
 assert(obj != NULL);
 
 if (obj->joined == 0) {
  ppng_set_input(obj, NULL);
  
  for (ct = 0; ct < obj->threads; ct++) {
   WaitForSingleObject(obj->context[ct].thread, INFINITE);
  }
 }
 
 for (ct = 0; ct < obj->output.length; ct++) {
  free(obj->output.data[ct].data.data);
 }
 
 for (ct = 0; ct < obj->threads; ct++) {
  CloseHandle(obj->input.events[ct]);
 }
 free(obj->input.events);
 
 for (ct = 0; ct < obj->threads; ct++) {
  CloseHandle(obj->context[ct].thread);
 }
 
 free(obj);
}

ppng_result ppng_add(ppng* obj, const void* pixels, unsigned rowbytes, unsigned height)
{
 ppng_data* input;
 unsigned prowbytes, ct;
 
 unsigned char* dst;
 const unsigned char* src;
 
 assert(pixels != NULL);
 assert(rowbytes > 0);
 assert(height > 0);
 
 input = malloc(sizeof(ppng_data));
 
 prowbytes = png_get_rowbytes(obj->png, obj->info);
 
 input->data = malloc(prowbytes * height);
 input->length = prowbytes * height;
 
 dst = input->data;
 src = pixels;
 
 for (ct = 0; ct < height; ct++) {
  memcpy(dst, src, min(prowbytes, rowbytes));
  dst += prowbytes;
  src += rowbytes;
 }
 
 ppng_set_input(obj, input);
 
 return ppng_result_succeeded;
}

ppng_result ppng_finalize(ppng* obj)
{
 unsigned ct;
 
 size_t length;
 unsigned char* data;
 unsigned adler32;
 
 png_unknown_chunk chunk;
 
 assert(obj != NULL);
 assert(obj->joined == 0);
 
 ppng_set_input(obj, NULL);
 
 for (ct = 0; ct < obj->threads; ct++) {
  WaitForSingleObject(obj->context[ct].thread, INFINITE);
 }
 
 obj->joined = 1;
 
 length = 4, adler32 = 0;
 for (ct = 0; ct < obj->output.length; ct++) {
  if (ct == 0) {
   length += obj->output.data[ct].data.length;
  } else {
   length += obj->output.data[ct].data.length - 2;
  }
  adler32 = adler32_combine(adler32, obj->output.data[ct].adler32, obj->output.data[ct].input);
 }
 
 data = malloc(length);
 
 data[0] = adler32;
 data[1] = adler32 >> 8;
 data[2] = adler32 >> 16;
 data[3] = adler32 >> 24;
 
 length = 4;
 for (ct = 0; ct < obj->output.length; ct++) {
  if (ct == 0) {
   memcpy(data + length, obj->output.data[ct].data.data, obj->output.data[ct].data.length);
   length += obj->output.data[ct].data.length;
  } else {
   memcpy(data + length, obj->output.data[ct].data.data + 2, obj->output.data[ct].data.length - 2);
   length += obj->output.data[ct].data.length - 2;
  }
  adler32 = adler32_combine(adler32, obj->output.data[ct].adler32, obj->output.data[ct].input);
 }
 
 obj->png->mode |= PNG_HAVE_IDAT;
 
 memcpy(chunk.name, "IDAT", 4);
 chunk.data = data;
 chunk.size = length;
 chunk.location = PNG_AFTER_IDAT;
 
 obj->png->flags |= 0x10000L; /* PNG_FLAG_KEEP_UNSAFE_CHUNKS */
 
 png_set_unknown_chunks(obj->png, obj->info, &chunk, 1);
 png_set_unknown_chunk_location(obj->png, obj->info, 0, PNG_AFTER_IDAT);
 
 png_write_end(obj->png, obj->info);
 
 return ppng_result_succeeded;
}

unsigned CALLBACK ppng_work(void* arg)
{
 ppng_context* context;
 ppng_data* input;
 
 z_stream stream;
 ppng_data data;
 size_t max;
 unsigned index;
 
 assert(arg != NULL);
 
 context = arg;
 
 while ((input = ppng_get_input(context->obj, context->index, &index)) != NULL) {
  stream.zalloc = Z_NULL;
  stream.zfree = Z_NULL;
  stream.opaque = Z_NULL;
  deflateInit(&stream, 3);
  
  stream.avail_in = input->length;
  stream.next_in = input->data;
  
  data.data = NULL;
  data.length = 0, max = 0;
  
  do {
   max += 4096;
   data.data = realloc(data.data, max);
   
   stream.avail_out = max - data.length;
   stream.next_out = data.data + data.length;
   
   data.length += 4096 - stream.avail_out;
   
  } while (stream.avail_out == 0);
  
  ppng_set_output(context->obj, index, &data, stream.adler, input->length);
  
  deflateEnd(&stream);
  
  free(input);
 }
 
 return 0;
}

void ppng_set_input(ppng* obj, ppng_data* data)
{
 ppng_node* node;
 unsigned ct;
 
 assert(obj != NULL);
 assert(data != NULL);
 
 if (data != NULL) {
  node = malloc(sizeof(ppng_node));
  node->prev = NULL;
  node->next = NULL;
  node->data = data;
 } else {
  node = &obj->input.end_of_node;
 }
 
 EnterCriticalSection(&obj->input.cs);
 
 if (obj->input.first == NULL) {
  obj->input.first = node;
 }
 node->prev = obj->input.last;
 obj->input.last = node;
 
 for (ct = 0; ct < obj->threads; ct++) {
  SetEvent(obj->input.events[ct]);
 }
 
 LeaveCriticalSection(&obj->input.cs);
}

ppng_data* ppng_get_input(ppng* obj, unsigned thread, unsigned* index)
{
 ppng_data* data;
 unsigned count;
 
 ppng_node* node;
 HANDLE* event;
 
 assert(obj != NULL);
 assert(index != NULL);
 
 EnterCriticalSection(&obj->input.cs);
 
 event = obj->input.events[thread];
 
 while ((node = obj->input.first) == NULL) {
  ResetEvent(event);
  LeaveCriticalSection(&obj->input.cs);
  
  WaitForSingleObject(event, INFINITE);
  
  EnterCriticalSection(&obj->input.cs);
 }
 
 if (node != &obj->input.end_of_node) {
  obj->input.first = node->next;
  if (node == obj->input.last) {
   obj->input.last = NULL;
  }
 } else {
  node = NULL;
 }
 count = obj->input.count++;
 
 LeaveCriticalSection(&obj->input.cs);
 
 data = NULL;
 if (node != NULL) {
  data = node->data;
  free(node);
 }
 
 *index = count;
 
 return data;
}

void ppng_set_output(ppng* obj, unsigned index, ppng_data* data, unsigned adler32, size_t input)
{
 assert(obj != NULL);
 assert(data != NULL);
 
 EnterCriticalSection(&obj->output.cs);
 
 if (index >= obj->output.max) {
  obj->output.max = (index + 1023) & ~1024;
  obj->output.data = realloc(obj->output.data, obj->output.max * sizeof(ppng_zdata));
 }
 
 obj->output.data[index].data = *data;
 obj->output.data[index].adler32 = adler32;
 obj->output.data[index].input = input;
 
 obj->output.length = max(obj->output.length, index);
 
 LeaveCriticalSection(&obj->output.cs);
}