/*
 *                           TERMS AND CONDITIONS
 *                                   FOR
 *                         OPEN SOURCE CODE LICENSE
 *                               Version 1.1
 * 
 * Japan Registry Services Co., Ltd. ("JPRS"), a Japanese corporation
 * having its head office at Chiyoda First Bldg. East 13F 3-8-1 Nishi-Kanda,
 * Chiyoda-ku, Tokyo 101-0065, Japan, grants you the license for open source
 * code specified in EXHIBIT A the "Code" subject to the following Terms and
 * Conditions ("OSCL").
 * 
 * 1. License Grant.
 *   JPRS hereby grants you a worldwide, royalty-free, non-exclusive
 *   license, subject to third party intellectual property claims:
 *   (a) under intellectual property rights (other than patent or
 *       trademark) licensable by JPRS to use, reproduce, modify, display,
 *       perform, sublicense and distribute the Code (or portions thereof)
 *       with or without modifications, and/or as part of a derivative work;
 *       or
 *   (b) under claims of the infringement through the making, using,
 *       offering to sell and/or otherwise disposing the JPRS Revised Code
 *       (or portions thereof);
 *   (c) the licenses granted in this Section 1(a) and (b) are effective on
 *       the date JPRS first distributes the Code to you under the terms of
 *       this OSCL;
 *   (d) Notwithstanding the above stated terms, no patent license is
 *       granted:
 *       1)  for a code that you delete from the Code;
 *       2)  separate from the Code; or
 *       3)  for infringements caused by:
 *            i) modification of the Code; or
 *           ii) combination of the Code with other software or devices.
 * 
 * 2. Consents.
 *   You agree that:
 *   (a) you must include a copy of this OSCL and the notice set forth in
 *       EXHIBIT A with every copy of the Code you distribute;
 *   (b) you must include a copy of this OSCL and the notice set forth in
 *       EXHIBIT A with every copy of binary form of the Code in the
 *       documentation and/or other materials provided with the distribution;
 *   (c) you may not offer or impose any terms on any source code version
 *       that alters or restricts the applicable version of this OSCL or
 *       the recipients' rights hereunder.
 *   (d) If the terms and conditions are set forth in EXHIBIT A, you must
 *       comply with those terms and conditions.
 * 
 * 3. Proprietary Information.
 *   All trademarks, service marks, patents, copyrights, trade secrets, and
 *   other proprietary rights in or related to the Code are and will remain
 *   the exclusive property of JPRS or its licensors, whether or not
 *   specifically recognized or perfected under local law except specified
 *   in this OSCL; provided however you agree and understand that the JPRS
 *   name may not be used to endorse or promote this Code without prior
 *   written approval of JPRS.
 * 
 * 4. WARRANTY DISCLAIMER.
 *   JPRS MAKES NO REPRESENTATIONS AND WARRANTIES REGARDING THE USE OF THE
 *   CODE, NOR DOES JPRS MAKE ANY REPRESENTATIONS THAT THE CODE WILL BECOME
 *   COMMERCIALLY AVAILABLE. JPRS, ITS AFFILIATES, AND ITS SUPPLIERS DO NOT
 *   WARRANT OR REPRESENT THAT THE CODE IS FREE OF ERRORS OR THAT THE CODE
 *   IS SUITABLE FOR TRANSLATION AND/OR LOCALIZATION. THE CODE IS PROVIDED
 *   ON AN "AS IS" BASIS AND JPRS AND ITS SUPPLIERS HAVE NO OBLIGATION TO
 *   CORRECT ERRORS OR TO SUPPORT THE CODE UNDER THIS OSCL FOR ANY REASON.
 *   TO THE FULL EXTENT PERMITTED BY LAW, ALL OBLIGATIONS ARE HEREBY
 *   EXCLUDED WHETHER EXPRESS, STATUTORY OR IMPLIED UNDER LAW, COURSE OF
 *   DEALING, CUSTOM, TRADE USAGE, ORAL OR WRITTEN STATEMENT OR OTHERWISE,
 *   INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES OF MERCHANTABILITY
 *   OR FITNESS FOR A PARTICULAR PURPOSE CONCERNING THE CODE.
 * 
 * 5. NO LIABILITY.
 *   UNDER NO CIRCUMSTANCES SHALL JPRS AND/OR ITS AFFILIATES, LICENSORS, OR
 *   REPRESENTATIVES BE LIABLE FOR ANY DAMAGES INCLUDING BUT NOT LIMITED TO
 *   CONSEQUENTIAL, INDIRECT, SPECIAL, PUNITIVE OR INCIDENTAL DAMAGES,
 *   WHETHER FORESEEABLE OR UNFORESEEABLE, BASED ON YOUR CLAIMS, INCLUDING,
 *   BUT NOT LIMITED TO, CLAIMS FOR LOSS OF DATA, GOODWILL, PROFITS, USE OF
 *   MONEY, INTERRUPTION IN USE OR AVAILABILITY OF DATA, STOPPAGE, IMPLIED
 *   WARRANTY, BREACH OF CONTRACT, MISREPRESENTATION, NEGLIGENCE, STRICT
 *   LIABILITY IN TORT, OR OTHERWISE.
 * 
 * 6. Indemnification.
 *   You hereby agree to indemnify, defend, and hold harmless JPRS for any
 *   liability incurred by JRPS due to your terms of warranty, support,
 *   indemnity, or liability offered by you to any third party.
 * 
 * 7. Termination.
 * 7.1 This OSCL shall be automatically terminated in the events that:
 *   (a) You fail to comply with the terms herein and fail to cure such
 *       breach within 30 days of becoming aware of the breach;
 *   (b) You initiate patent or copyright infringement litigation against
 *       any party (including a cross-claim or counterclaim in a lawsuit)
 *       alleging that the Code constitutes a direct or indirect patent or
 *       copyright infringement, in such case, this OSCL to you shall
 *       terminate as of the date such litigation is filed;
 * 7.2 In the event of termination under Sections 7.1(a) or 7.1(b) above,
 *     all end user license agreements (excluding distributors and
 *     resellers) which have been validly granted by You or any distributor
 *     hereunder prior to termination shall survive termination.
 *
 * 
 * 8. General.
 *   This OSCL shall be governed by, and construed and enforced in
 *   accordance with, the laws of Japan. Any litigation or arbitration
 *   between the parties shall be conducted exclusively in Tokyo, Japan
 *   except written consent of JPRS provides other venue.
 * 
 * 
 *                                EXHIBIT A
 * 
 * The original open source code of idnkit-2 is idnkit-1.0 developed and
 * conceived by Japan Network Information Center ("JPNIC"), a Japanese
 * association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
 * Chiyoda-ku, Tokyo 101-0047, Japan, and JPRS modifies above original code
 * under following Terms and Conditions set forth by JPNIC.
 * 
 *                                  JPNIC
 * 
 * Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved.
 * 
 * By using this file, you agree to the terms and conditions set forth bellow.
 * 
 *                       LICENSE TERMS AND CONDITIONS
 * 
 * The following License Terms and Conditions apply, unless a different
 * license is obtained from Japan Network Information Center ("JPNIC"),
 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
 * Chiyoda-ku, Tokyo 101-0047, Japan.
 * 
 * 1. Use, Modification and Redistribution (including distribution of any
 *    modified or derived work) in source and/or binary forms is permitted
 *    under this License Terms and Conditions.
 * 
 * 2. Redistribution of source code must retain the copyright notices as they
 *    appear in each source code file, this License Terms and Conditions.
 * 
 * 3. Redistribution in binary form must reproduce the Copyright Notice,
 *    this License Terms and Conditions, in the documentation and/or other
 *    materials provided with the distribution. For the purposes of binary
 *    distribution the "Copyright Notice" refers to the following language:
 *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
 * 
 * 4. The name of JPNIC may not be used to endorse or promote products
 *    derived from this Software without specific prior written approval of
 *    JPNIC.
 * 
 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
 *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 *    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
 *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
 * 
 * 
 *                        JPRS Public License Notice
 *                                   For
 *                                idnkit-2.
 * 
 * The contents of this file are subject to the Terms and Conditions for
 * the Open Source Code License (the "OSCL"). You may not use this file
 * except in compliance with above terms and conditions. A copy of the OSCL
 * is available at <http://jprs.co.jp/idn/>.
 * The JPRS Revised Code is idnkit-2.
 * The Initial Developer of the JPRS Revised Code is Japan Network
 * Information Center ("JPNIC"), a Japanese association,
 * Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, Chiyoda-ku, Tokyo
 * 101-0047, Japan.
 * "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
 * "Copyright (c) 2010-2012 Japan Registry Services Co., Ltd.  All rights reserved."
 * Contributor(s): ______________________________________.
 * 
 * If you wish to allow use of your version of this file only under the
 * above License(s) and not to allow others to use your version of this
 * file, please indicate your decision by deleting the relevant provisions
 * above and replacing them with the notice and other provisions required
 * by the above License(s). If you do not delete the relevant provisions,
 * a recipient may use your version of this file under either the above
 * License(s).
 */

#include <config.h>

#include <stddef.h>
#include <stdlib.h>
#include <string.h>

#include <idn/assert.h>
#include <idn/debug.h>
#include <idn/logmacro.h>
#include <idn/result.h>
#include <idn/normalizer.h>
#include <idn/auto/combiningclass.h>
#include <idn/auto/composition.h>
#include <idn/auto/decomposition.h>

#define WORKBUF_SIZE		128
#define WORKBUF_SIZE_MAX	10000

/*
 * Some constants for Hangul decomposition/composition.
 */
#define UTF32_SBase	0xac00
#define UTF32_LBase	0x1100
#define UTF32_VBase	0x1161
#define UTF32_TBase	0x11a7
#define UTF32_LCount	19
#define UTF32_VCount	21
#define UTF32_TCount	28
#define UTF32_SLast	(UTF32_SBase + UTF32_LCount * UTF32_VCount * UTF32_TCount)
	
typedef struct {
	int cur;		/* pointing now processing character */
	int last;		/* pointing just after the last character */
	int size;		/* size of UTF-32 and CLASS array */
	unsigned long *utf32;	/* UTF-32 characters */
	int *class;		/* and their canonical classes */
	unsigned long utf32_buf[WORKBUF_SIZE];	/* local buffer */
	int class_buf[WORKBUF_SIZE];		/* ditto */
} workbuf_t;

static idn_result_t	normalize(int do_composition, int compat,
				  const unsigned long *from,
				  unsigned long *to, size_t tolen);
static idn_result_t	workbuf_decompose(workbuf_t *wb, unsigned long c,
					  int compat);
static void		workbuf_getcombiningclass(workbuf_t *wb);
static void		workbuf_reorder(workbuf_t *wb);
static void		workbuf_compose(workbuf_t *wb);
static idn_result_t	workbuf_flushbeforecur(workbuf_t *wb,
					       unsigned long **top,
					       size_t *tolenp);
static void		workbuf_init(workbuf_t *wb);
static void		workbuf_free(workbuf_t *wb);
static idn_result_t	workbuf_extend(workbuf_t *wb);
static idn_result_t	workbuf_append(workbuf_t *wb, unsigned long c);
static void		workbuf_shift(workbuf_t *wb, int shift);
static void		workbuf_removevoid(workbuf_t *wb);
static idn_result_t	utf32_decompose(int compat, unsigned long *v,
				       size_t vlen, unsigned long c,
				       int *decomp_lenp);
static int		utf32_iscompositecandidate(unsigned long c);
static idn_result_t	utf32_compose(unsigned long c1, unsigned long c2,
				     unsigned long *compp);

/*
 * Perform 'nfd' mapping.
 * This function is an entry point to 'mapproc' of the maplist module.
 */
idn_result_t
idn__normalizer_formd(void *privdata, const unsigned long *from,
		      unsigned long *to, size_t tolen) {
	idn_result_t r = idn_success;

	assert(from != NULL && to != NULL && tolen >= 0);
	TRACE(("idn__normalizer_formd(from=\"%s\", tolen=%d)\n",
	       idn__debug_utf32xstring(from), tolen));

	r = normalize(0, 0, from, to, tolen);

	if (r == idn_success) {
		TRACE(("idn__normalizer_formd(): success (to=\"%s\")\n",
		      idn__debug_utf32xstring(to)));
	} else {
		TRACE(("idn__normalizer_formd(): %s\n",
		      idn_result_tostring(r)));
	}

	return (r);
}

/*
 * Perform 'nfkd' mapping.
 * This function is an entry point to 'mapproc' of the maplist module.
 */
idn_result_t
idn__normalizer_formkd(void *privdata, const unsigned long *from,
		       unsigned long *to, size_t tolen) {
	idn_result_t r = idn_success;

	assert(from != NULL && to != NULL && tolen >= 0);

	TRACE(("idn__normalizer_formkd(from=\"%s\", tolen=%d)\n",
	       idn__debug_utf32xstring(from), tolen));

	r = normalize(0, 1, from, to, tolen);
	if (r == idn_success) {
		TRACE(("idn__normalizer_formkd(): success (to=\"%s\")\n",
		      idn__debug_utf32xstring(to)));
	} else {
		TRACE(("idn__normalizer_formkd(): %s\n",
		      idn_result_tostring(r)));
	}

	return (r);
}

/*
 * Perform 'nfc' mapping.
 * This function is an entry point to 'mapproc' of the maplist module.
 */
idn_result_t
idn__normalizer_formc(void *privdata, const unsigned long *from,
		      unsigned long *to, size_t tolen) {
	idn_result_t r = idn_success;

	assert(from != NULL && to != NULL && tolen >= 0);

	TRACE(("idn__normalizer_formc(from=\"%s\", tolen=%d)\n",
	       idn__debug_utf32xstring(from), tolen));

	r = normalize(1, 0, from, to, tolen);
	if (r == idn_success) {
		TRACE(("idn__normalizer_formc(): success (to=\"%s\")\n",
		      idn__debug_utf32xstring(to)));
	} else {
		TRACE(("idn__normalizer_formc(): %s\n",
		      idn_result_tostring(r)));
	}

	return (r);
}

/*
 * Perform 'nfkc' mapping.
 * This function is an entry point to 'mapproc' of the maplist module.
 */
idn_result_t
idn__normalizer_formkc(void *privdata, const unsigned long *from,
		       unsigned long *to, size_t tolen) {
	idn_result_t r = idn_success;

	assert(from != NULL && to != NULL && tolen >= 0);

	TRACE(("idn__normalizer_formkc(from=\"%s\", tolen=%d)\n",
	       idn__debug_utf32xstring(from), tolen));

	r = normalize(1, 1, from, to, tolen);
	if (r == idn_success) {
		TRACE(("idn__normalizer_formkc(): success (to=\"%s\")\n",
		      idn__debug_utf32xstring(to)));
	} else {
		TRACE(("idn__normalizer_formkc(): %s\n",
		      idn_result_tostring(r)));
	}

	return (r);
}

/*
 * Perform NFD, NFKD, NFC or NFKD.
 *
 *   do_composition == 0 && compat == 0   --  NFD
 *   do_composition == 0 && compat == 1   --  NFKD
 *   do_composition == 1 && compat == 0   --  NFC
 *   do_composition == 1 && compat == 1   --  NFKC
 */
static idn_result_t
normalize(int do_composition, int compat, const unsigned long *from,
	  unsigned long *to, size_t tolen) {
	idn_result_t r = idn_success;
	workbuf_t wb;

	/*
	 * Initialize working buffer.
	 */
	workbuf_init(&wb);

	while (*from != '\0') {
		unsigned long c;

		assert(wb.cur == wb.last);

		/*
		 * Get one character from 'from'.
		 */
		c = *from++;

		/*
		 * Decompose it.
		 */
		if ((r = workbuf_decompose(&wb, c, compat)) != idn_success)
			goto ret;

		/*
		 * Get canonical class.
		 */
		workbuf_getcombiningclass(&wb);

		/*
		 * Reorder & compose.
		 */
		for (; wb.cur < wb.last; wb.cur++) {
			if (wb.cur == 0) {
				continue;
			} else if (wb.class[wb.cur] > 0) {
				/*
				 * This is not a starter. Try reordering.
				 * Note that characters up to it are
				 * already in canonical order.
				 */
				workbuf_reorder(&wb);
				continue;
			}

			/*
			 * This is a starter character, and there are
			 * some characters before it.  Those characters
			 * have been reordered properly, and
			 * ready for composition.
			 */
			if (do_composition && wb.class[0] == 0)
				workbuf_compose(&wb);

			/*
			 * If CUR points to a starter character,
			 * then process of characters before CUR are
			 * already finished, because any further
			 * reordering/composition for them are blocked
			 * by the starter CUR points.
			 */
			if (wb.cur > 0 && wb.class[wb.cur] == 0) {
				/* Flush everything before CUR. */
				r = workbuf_flushbeforecur(&wb, &to, &tolen);
				if (r != idn_success)
					goto ret;
			}
		}
	}

	if (r == idn_success) {
		if (do_composition && wb.cur > 0 && wb.class[0] == 0) {
			/*
			 * There is some characters left in WB. They are
			 * ordered, but not composed yet. Now CUR points
			 * just after the last character in WB, and since
			 * workbuf_compose() tries to compose characters
			 * between top and CUR inclusive, we must make
			 * CUR one character back during workbuf_compose().
			 */
			wb.cur--;
			workbuf_compose(&wb);
			wb.cur++;
		}
		/*
		 * Call this even when WB.CUR == 0, to make TO
		 * NUL-terminated.
		 */
		r = workbuf_flushbeforecur(&wb, &to, &tolen);
		if (r != idn_success)
			goto ret;
	}

	if (tolen <= 0) {
		r = idn_buffer_overflow;
		goto ret;
	}
	*to = '\0';

ret:
	workbuf_free(&wb);
	return (r);
}

static idn_result_t
workbuf_decompose(workbuf_t *wb, unsigned long c, int compat) {
	idn_result_t r = idn_success;
	int dec_len;

again:
	r = utf32_decompose(compat, wb->utf32 + wb->last, wb->size - wb->last,
			   c, &dec_len);
	switch (r) {
	case idn_success:
		wb->last += dec_len;
		return (idn_success);
	case idn_notfound:
		return (workbuf_append(wb, c));
	case idn_buffer_overflow:
		if ((r = workbuf_extend(wb)) != idn_success)
			return (r);
		if (wb->size > WORKBUF_SIZE_MAX) {
			WARNING(("idn__normalizer_form*: "
				"working buffer too large\n"));
			return (idn_nomemory);
		}
		goto again;
	default:
		return (r);
	}
	/* NOTREACHED */
}

static void		
workbuf_getcombiningclass(workbuf_t *wb) {
	int i;
	unsigned long v;

	for (i = wb->cur; i < wb->last; i++) {
		v = wb->utf32[i];
		wb->class[i] = idn__sparsemap_getcombiningclass(v);
	}
}

static void
workbuf_reorder(workbuf_t *wb) {
	unsigned long c;
	int i;
	int class;

	assert(wb != NULL);

	i = wb->cur;
	c = wb->utf32[i];
	class = wb->class[i];

	while (i > 0 && wb->class[i - 1] > class) {
		wb->utf32[i] = wb->utf32[i - 1];
		wb->class[i] =wb->class[i - 1];
		i--;
		wb->utf32[i] = c;
		wb->class[i] = class;
	}
}

static void
workbuf_compose(workbuf_t *wb) {
	int cur;
	unsigned long *utf32;
	int *class;
	int last_class;
	int nvoids;
	int i;

	assert(wb != NULL && wb->class[0] == 0);

	cur = wb->cur;
	utf32 = wb->utf32;
	class = wb->class;

	/*
	 * If there are no decomposition sequence that begins with
	 * the top character, composition is impossible.
	 */
	if (!utf32_iscompositecandidate(utf32[0]))
		return;

	last_class = 0;
	nvoids = 0;
	for (i = 1; i <= cur; i++) {
		unsigned long c;
		int cl = class[i];

		if ((last_class < cl || (i - nvoids == 1 && cl == 0)) &&
		    utf32_compose(utf32[0], utf32[i], &c) == idn_success) {
			/*
			 * Replace the top character with the composed one.
			 */
			utf32[0] = c;
			class[0] = idn__sparsemap_getcombiningclass(c);

			class[i] = -1;	/* void this character */
			nvoids++;
		} else {
			last_class = cl;
		}
	}

	/* Purge void characters, if any. */
	if (nvoids > 0)
		workbuf_removevoid(wb);
}

static idn_result_t
workbuf_flushbeforecur(workbuf_t *wb, unsigned long **top, size_t *tolenp) {
	if (*tolenp < wb->cur)
		return (idn_buffer_overflow);

	memcpy(*top, wb->utf32, sizeof(**top) * wb->cur);
	*top += wb->cur;
	*tolenp -= wb->cur;
	workbuf_shift(wb, wb->cur);

	return (idn_success);
}

static void
workbuf_init(workbuf_t *wb) {
	wb->cur = 0;
	wb->last = 0;
	wb->size = WORKBUF_SIZE;
	wb->utf32 = wb->utf32_buf;
	wb->class = wb->class_buf;
}

static void
workbuf_free(workbuf_t *wb) {
	if (wb->utf32 != wb->utf32_buf) {
		free(wb->utf32);
		free(wb->class);
	}
}

static idn_result_t
workbuf_extend(workbuf_t *wb) {
	int newsize = wb->size * 3;

	if (wb->utf32 == wb->utf32_buf) {
		wb->utf32 = malloc(sizeof(wb->utf32[0]) * newsize);
		wb->class = malloc(sizeof(wb->class[0]) * newsize);
	} else {
		wb->utf32 = realloc(wb->utf32, sizeof(wb->utf32[0]) * newsize);
		wb->class = realloc(wb->class, sizeof(wb->class[0]) * newsize);
	}
	if (wb->utf32 == NULL || wb->class == NULL)
		return (idn_nomemory);
	else
		return (idn_success);
}

static idn_result_t
workbuf_append(workbuf_t *wb, unsigned long c) {
	idn_result_t r = idn_success;

	if (wb->last >= wb->size && (r = workbuf_extend(wb)) != idn_success)
		return (r);
	wb->utf32[wb->last++] = c;
	return (idn_success);
}

static void
workbuf_shift(workbuf_t *wb, int shift) {
	int nmove;

	assert(wb != NULL && wb->cur >= shift);

	nmove = wb->last - shift;
	(void)memmove(&wb->utf32[0], &wb->utf32[shift],
		      nmove * sizeof(wb->utf32[0]));
	(void)memmove(&wb->class[0], &wb->class[shift],
		      nmove * sizeof(wb->class[0]));
	wb->cur -= shift;
	wb->last -= shift;
}

static void
workbuf_removevoid(workbuf_t *wb) {
	int i, j;
	int last = wb->last;

	for (i = j = 0; i < last; i++) {
		if (wb->class[i] >= 0) {
			if (j < i) {
				wb->utf32[j] = wb->utf32[i];
				wb->class[j] = wb->class[i];
			}
			j++;
		}
	}
	wb->cur -= last - j;
	wb->last = j;
}

idn_result_t
utf32_decompose(int compat, unsigned long *v, size_t vlen, unsigned long c,
	       int *decomp_lenp) {
	unsigned long *vorg = v;
	int seqidx;
	const unsigned long *seq;

	assert(v != NULL && vlen >= 0 && decomp_lenp != NULL);

	/*
	 * First, check for Hangul.
	 */
	if (UTF32_SBase <= c && c < UTF32_SLast) {
		int idx, t_offset, v_offset, l_offset;

		idx = c - UTF32_SBase;
		t_offset = idx % UTF32_TCount;
		idx /= UTF32_TCount;
		v_offset = idx % UTF32_VCount;
		l_offset = idx / UTF32_VCount;
		if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3))
			return (idn_buffer_overflow);
		*v++ = UTF32_LBase + l_offset;
		*v++ = UTF32_VBase + v_offset;
		if (t_offset > 0)
			*v++ = UTF32_TBase + t_offset;
		*decomp_lenp = v - vorg;
		return (idn_success);
	}

	/*
	 * Look up decomposition table.  If no decomposition is defined
	 * or if it is a compatibility decomosition when canonical
	 * decomposition requested, return 'idn_notfound'.
	 */
	seqidx = idn__sparsemap_getdecomposition(c);
	if (seqidx == 0 || (!compat && IS_COMPAT_DECOMPOSITION(seqidx)))
		return (idn_notfound);
	seq = idn__sparsemap_getdecompositionseq(seqidx);
	
	/*
	 * Copy the decomposed sequence.  The end of the sequence are
	 * marked with END_BIT.
	 */
	for (;;) {
		unsigned long c;
		int dlen;
		idn_result_t r = idn_success;

		c = DECOMPOSITIONSEQ_DATA(*seq);

		/* Decompose recursively. */
		r = utf32_decompose(compat, v, vlen, c, &dlen);
		if (r == idn_success) {
			v += dlen;
			vlen -= dlen;
		} else if (r == idn_notfound) {
			if (vlen < 1)
				return (idn_buffer_overflow);
			*v++ = c;
			vlen--;
		} else {
			return (r);
		}

		if (IS_DECOMPOSITIONSEQ_DATA_END(*seq))
			break;
		seq++;
	}
	
	*decomp_lenp = v - vorg;

	return (idn_success);
}

int
utf32_iscompositecandidate(unsigned long c) {
	/* Check for Hangul */
	if ((UTF32_LBase <= c && c < UTF32_LBase + UTF32_LCount) ||
	    (UTF32_SBase <= c && c < UTF32_SLast))
		return (1);

	/*
	 * Look up composition table.  If there are no composition
	 * that begins with the given character, it is not a
	 * composition candidate.
	 */
	return (idn__sparsemap_getcomposition(c) != 0);
}

idn_result_t
utf32_compose(unsigned long c1, unsigned long c2, unsigned long *compp) {
	int seqidx;
	int lo, hi;
	const struct composition *cseq;

	assert(compp != NULL);

	/*
	 * Check for Hangul.
	 */
	if (UTF32_LBase <= c1 && c1 < UTF32_LBase + UTF32_LCount &&
	    UTF32_VBase <= c2 && c2 < UTF32_VBase + UTF32_VCount) {
		/*
		 * Hangul L and V.
		 */
		*compp = UTF32_SBase + ((c1 - UTF32_LBase) * UTF32_VCount +
			 (c2 - UTF32_VBase)) * UTF32_TCount;
		return (idn_success);
	} else if (UTF32_SBase <= c1 && c1 < UTF32_SLast &&
		   UTF32_TBase <= c2 && c2 < UTF32_TBase + UTF32_TCount &&
		   (c1 - UTF32_SBase) % UTF32_TCount == 0) {
		/*
		 * Hangul LV and T.
		 */
		*compp = c1 + (c2 - UTF32_TBase);
		return (idn_success);
	}

	/*
	 * Look up composition table.  If the result is 0, no composition
	 * is defined.  Otherwise, upper 16bits of the result contains
	 * the number of composition that begins with 'c1', and the lower
	 * 16bits is the offset in 'compose_seq'.
	 */
	seqidx = idn__sparsemap_getcomposition(c1);
	if (seqidx == 0)
		return (idn_notfound);
	cseq = idn__sparsemap_getcompositionseq(seqidx);

	/*
	 * The composite sequences are sorted by the 2nd character 'c2'.
	 * So we can use binary search.
	 */
	lo = 0;
	hi = NUM_COMPOSITIONS(seqidx) - 1;
	while (lo <= hi) {
		int mid = (lo + hi) / 2;

		if (cseq[mid].c2 < c2) {
			lo = mid + 1;
		} else if (cseq[mid].c2 > c2) {
			hi = mid - 1;
		} else {
			*compp = cseq[mid].comp;
			return (idn_success);
		}
	}
	return (idn_notfound);
}
