こちらに触発されて、libpng による並列出力にトライしています。下記のような I/F で画素データの並列出力ができると取り回しがいいかなぁ、と思っています。
typedef struct ppng ppng; typedef enum ppng_result { ppng_result_succeeded, ppng_result_out_of_memory, ppng_result_io_error, } ppng_result; ppng* ppng_create(png_structp png, png_infop info); void ppng_destroy(ppng* obj); ppng_result ppng_add(ppng* obj, const void* pixels, unsigned rowbytes, unsigned height); ppng_result ppng_finalize(ppng* obj);
以下、書きかけです。とりあえずコンパイルが通るところまで。圧縮率は外部指定できた方がよさそうですね。
#include "ppng.h" #include <windows.h> #include <process.h> #include <pngstruct.h> #include <pngpriv.h> #include <zlib.h> #include <assert.h> typedef struct ppng_data { unsigned char* data; size_t length; } ppng_data; typedef struct ppng_node { struct ppng_node* prev; struct ppng_node* next; ppng_data* data; } ppng_node; typedef struct ppng_list { CRITICAL_SECTION cs; HANDLE* events; ppng_node* first; ppng_node* last; ppng_node end_of_node; unsigned count; } ppng_list; typedef struct ppng_zdata { ppng_data data; unsigned adler32; size_t input; } ppng_zdata; typedef struct ppng_zdata_array { CRITICAL_SECTION cs; ppng_zdata* data; size_t length; size_t max; } ppng_zdata_array; typedef struct ppng_context { void* obj; unsigned index; HANDLE thread; } ppng_context; struct ppng { png_structp png; png_infop info; unsigned threads, joined; ppng_list input; ppng_zdata_array output; ppng_context context[1]; }; static unsigned CALLBACK ppng_work(void* arg); static void ppng_set_input(ppng* obj, ppng_data* data); static ppng_data* ppng_get_input(ppng* obj, unsigned thread, unsigned* index); static void ppng_set_output(ppng* obj, unsigned index, ppng_data* data, unsigned adler32, size_t input); ppng* ppng_create(png_structp png, png_infop info) { ppng* obj; unsigned ct; SYSTEM_INFO system; assert(png != NULL); assert(info != NULL); GetSystemInfo(&system); obj = malloc(offsetof(ppng, context) + system.dwNumberOfProcessors * sizeof(ppng_context)); obj->png = png; obj->info = info; obj->threads = system.dwNumberOfProcessors; obj->joined = 0; InitializeCriticalSection(&obj->input.cs); obj->input.events = malloc(obj->threads * sizeof(HANDLE)); for (ct = 0; ct < obj->threads; ct++) { obj->input.events[ct] = CreateEvent(NULL, TRUE, FALSE, NULL); } obj->input.first = NULL; obj->input.last = NULL; obj->input.count = 0; InitializeCriticalSection(&obj->output.cs); obj->output.data = NULL; obj->output.length = 0; obj->output.max = 0; for (ct = 0; ct < obj->threads; ct++) { obj->context[ct].obj = obj; obj->context[ct].index = ct; obj->context[ct].thread = (HANDLE) _beginthreadex(NULL, 0, ppng_work, &obj->context[ct], 0, NULL); } return obj; } void ppng_destroy(ppng* obj) { unsigned ct; assert(obj != NULL); if (obj->joined == 0) { ppng_set_input(obj, NULL); for (ct = 0; ct < obj->threads; ct++) { WaitForSingleObject(obj->context[ct].thread, INFINITE); } } for (ct = 0; ct < obj->output.length; ct++) { free(obj->output.data[ct].data.data); } for (ct = 0; ct < obj->threads; ct++) { CloseHandle(obj->input.events[ct]); } free(obj->input.events); for (ct = 0; ct < obj->threads; ct++) { CloseHandle(obj->context[ct].thread); } free(obj); } ppng_result ppng_add(ppng* obj, const void* pixels, unsigned rowbytes, unsigned height) { ppng_data* input; unsigned prowbytes, ct; unsigned char* dst; const unsigned char* src; assert(pixels != NULL); assert(rowbytes > 0); assert(height > 0); input = malloc(sizeof(ppng_data)); prowbytes = png_get_rowbytes(obj->png, obj->info); input->data = malloc(prowbytes * height); input->length = prowbytes * height; dst = input->data; src = pixels; for (ct = 0; ct < height; ct++) { memcpy(dst, src, min(prowbytes, rowbytes)); dst += prowbytes; src += rowbytes; } ppng_set_input(obj, input); return ppng_result_succeeded; } ppng_result ppng_finalize(ppng* obj) { unsigned ct; size_t length; unsigned char* data; unsigned adler32; png_unknown_chunk chunk; assert(obj != NULL); assert(obj->joined == 0); ppng_set_input(obj, NULL); for (ct = 0; ct < obj->threads; ct++) { WaitForSingleObject(obj->context[ct].thread, INFINITE); } obj->joined = 1; length = 4, adler32 = 0; for (ct = 0; ct < obj->output.length; ct++) { if (ct == 0) { length += obj->output.data[ct].data.length; } else { length += obj->output.data[ct].data.length - 2; } adler32 = adler32_combine(adler32, obj->output.data[ct].adler32, obj->output.data[ct].input); } data = malloc(length); data[0] = adler32; data[1] = adler32 >> 8; data[2] = adler32 >> 16; data[3] = adler32 >> 24; length = 4; for (ct = 0; ct < obj->output.length; ct++) { if (ct == 0) { memcpy(data + length, obj->output.data[ct].data.data, obj->output.data[ct].data.length); length += obj->output.data[ct].data.length; } else { memcpy(data + length, obj->output.data[ct].data.data + 2, obj->output.data[ct].data.length - 2); length += obj->output.data[ct].data.length - 2; } adler32 = adler32_combine(adler32, obj->output.data[ct].adler32, obj->output.data[ct].input); } obj->png->mode |= PNG_HAVE_IDAT; memcpy(chunk.name, "IDAT", 4); chunk.data = data; chunk.size = length; chunk.location = PNG_AFTER_IDAT; obj->png->flags |= 0x10000L; /* PNG_FLAG_KEEP_UNSAFE_CHUNKS */ png_set_unknown_chunks(obj->png, obj->info, &chunk, 1); png_set_unknown_chunk_location(obj->png, obj->info, 0, PNG_AFTER_IDAT); png_write_end(obj->png, obj->info); return ppng_result_succeeded; } unsigned CALLBACK ppng_work(void* arg) { ppng_context* context; ppng_data* input; z_stream stream; ppng_data data; size_t max; unsigned index; assert(arg != NULL); context = arg; while ((input = ppng_get_input(context->obj, context->index, &index)) != NULL) { stream.zalloc = Z_NULL; stream.zfree = Z_NULL; stream.opaque = Z_NULL; deflateInit(&stream, 3); stream.avail_in = input->length; stream.next_in = input->data; data.data = NULL; data.length = 0, max = 0; do { max += 4096; data.data = realloc(data.data, max); stream.avail_out = max - data.length; stream.next_out = data.data + data.length; data.length += 4096 - stream.avail_out; } while (stream.avail_out == 0); ppng_set_output(context->obj, index, &data, stream.adler, input->length); deflateEnd(&stream); free(input); } return 0; } void ppng_set_input(ppng* obj, ppng_data* data) { ppng_node* node; unsigned ct; assert(obj != NULL); assert(data != NULL); if (data != NULL) { node = malloc(sizeof(ppng_node)); node->prev = NULL; node->next = NULL; node->data = data; } else { node = &obj->input.end_of_node; } EnterCriticalSection(&obj->input.cs); if (obj->input.first == NULL) { obj->input.first = node; } node->prev = obj->input.last; obj->input.last = node; for (ct = 0; ct < obj->threads; ct++) { SetEvent(obj->input.events[ct]); } LeaveCriticalSection(&obj->input.cs); } ppng_data* ppng_get_input(ppng* obj, unsigned thread, unsigned* index) { ppng_data* data; unsigned count; ppng_node* node; HANDLE* event; assert(obj != NULL); assert(index != NULL); EnterCriticalSection(&obj->input.cs); event = obj->input.events[thread]; while ((node = obj->input.first) == NULL) { ResetEvent(event); LeaveCriticalSection(&obj->input.cs); WaitForSingleObject(event, INFINITE); EnterCriticalSection(&obj->input.cs); } if (node != &obj->input.end_of_node) { obj->input.first = node->next; if (node == obj->input.last) { obj->input.last = NULL; } } else { node = NULL; } count = obj->input.count++; LeaveCriticalSection(&obj->input.cs); data = NULL; if (node != NULL) { data = node->data; free(node); } *index = count; return data; } void ppng_set_output(ppng* obj, unsigned index, ppng_data* data, unsigned adler32, size_t input) { assert(obj != NULL); assert(data != NULL); EnterCriticalSection(&obj->output.cs); if (index >= obj->output.max) { obj->output.max = (index + 1023) & ~1024; obj->output.data = realloc(obj->output.data, obj->output.max * sizeof(ppng_zdata)); } obj->output.data[index].data = *data; obj->output.data[index].adler32 = adler32; obj->output.data[index].input = input; obj->output.length = max(obj->output.length, index); LeaveCriticalSection(&obj->output.cs); }