User:VasilievVV/Old Unicode

From mediawiki.org

Below is the old Unicode module code.

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <signal.h>
#include <time.h>
#include <lua.h>

#include <unicode/utf.h>
#include <unicode/uchar.h>
#include <unicode/ustring.h>

#include "php.h"
#include "php_luasandbox.h"
#include "luasandbox_unicode.h"

/*************************** Begin Lua function list ***************************/

struct luasandbox_unicode_function_list_entry {
	char* name;
	lua_CFunction function;
};

#define LUASANDBOX_UNICODE_NUM_FUNCTIONS 13

int luasandbox_utf8_len_lua(lua_State * L);
int luasandbox_utf8_ucfirst_lua(lua_State * L);
int luasandbox_utf8_uc_lua(lua_State * L);
int luasandbox_utf8_lc_lua(lua_State * L);
int luasandbox_utf8_tc_lua(lua_State * L);
int luasandbox_utf8_trim_lua(lua_State * L);
int luasandbox_utf8_at_lua(lua_State * L);
int luasandbox_utf8_code_lua(lua_State * L);
int luasandbox_utf8_sub_lua(lua_State * L);
int luasandbox_utf8_pos_lua(lua_State * L);
int luasandbox_utf8_replace_lua(lua_State * L);
int luasandbox_utf8_split_lua(lua_State * L);
int luasandbox_utf8_join_lua(lua_State * L);

static struct luasandbox_unicode_function_list_entry luasandbox_unicode_function_list[] = {
	{ "len", luasandbox_utf8_len_lua },
	{ "ucfirst", luasandbox_utf8_ucfirst_lua },
	{ "uc", luasandbox_utf8_uc_lua },
	{ "lc", luasandbox_utf8_lc_lua },
	{ "tc", luasandbox_utf8_tc_lua },
	{ "trim", luasandbox_utf8_trim_lua },
	{ "at", luasandbox_utf8_at_lua },
	{ "code", luasandbox_utf8_code_lua },
	{ "sub", luasandbox_utf8_sub_lua },
	{ "pos", luasandbox_utf8_pos_lua },
	{ "replace", luasandbox_utf8_replace_lua },
	{ "split", luasandbox_utf8_split_lua },
	{ "join", luasandbox_utf8_join_lua },
};

/*************************** End Lua function list ***************************/

#define LUASANDBOX_UNICODE_INVALID_FAIL()	{ \
			lua_pushstring( L, "Invalid UTF-8 supplied" ); \
			lua_error(L); \
		}
#define LUASANDBOX_UNICODE_INVALID_CHECK(cur) { \
			if( cur < 0 ) { \
				LUASANDBOX_UNICODE_INVALID_FAIL(); \
			} \
		}
#define LUASANDBOX_CHECK_ICU_ERROR(errorCode, cleanupCode)	{ \
			if( U_FAILURE(errorCode) ) { \
				char _luasandbox_errmsg[1024]; \
				snprintf( _luasandbox_errmsg, 1024, "Unicode handling error: %s", u_errorName(errorCode) ); \
				lua_pushstring( L, _luasandbox_errmsg ); \
				cleanupCode; \
				lua_error(L); \
			} \
			errorCode = U_ZERO_ERROR; \
		}

/** {{{ luasandbox_install_unicode_functions
 * 
 * Installs the unicode module into the global namespace.
 */
void luasandbox_install_unicode_functions(lua_State * L)
{
	int i;

	lua_createtable( L, 0, LUASANDBOX_UNICODE_NUM_FUNCTIONS );
	for( i = 0; i < LUASANDBOX_UNICODE_NUM_FUNCTIONS; i++ ) {
		lua_pushcfunction( L, luasandbox_unicode_function_list[i].function );
		lua_setfield( L, -2, luasandbox_unicode_function_list[i].name );
	}
	lua_setglobal( L, "unicode" );
}

/** {{{ luasandbox_utf8_len_lua
 * 
 * Lua function which calculates string length.
 */
int luasandbox_utf8_len_lua(lua_State * L)
{
	uint8_t *utf_string;
	size_t raw_len, result = 0;
	int32_t i = 0;
	UChar32 cur;

	utf_string = luaL_checklstring( L, 1, &raw_len );

	while( i < raw_len ) {
		U8_NEXT( utf_string, i, raw_len, cur );
		LUASANDBOX_UNICODE_INVALID_CHECK( cur );
		result++;
	}

	lua_pushinteger( L, result );
	return 1;
}

/** {{{ luasandbox_utf8_ucfirst_lua
 * 
 * Lua function which converts the first code point of the string to uppercase.
 */
int luasandbox_utf8_ucfirst_lua(lua_State * L)
{
	uint8_t *utf_string, *result;
	int retval;
	size_t raw_len;
	int32_t new_len;
	UChar32 first, newfirst;
	int offset = 0;	// Needed by U8_APPEND_UNSAFE

	utf_string = luaL_checklstring( L, 1, &raw_len );

	if( !raw_len ) {
		lua_pushstring( L, "" );
		return 1;
	}

	U8_GET( utf_string, 0, 0, raw_len, first );
	LUASANDBOX_UNICODE_INVALID_CHECK( first );

	newfirst = u_toupper( first );

	// The actions depend upon whether the lengths of symbol match
	if( U8_LENGTH(first) == U8_LENGTH(newfirst) ) {
		result = emalloc( raw_len );	// raw_len does not include \0
		memcpy( result, utf_string, raw_len );
		U8_APPEND_UNSAFE( result, offset, newfirst );
		new_len = raw_len;
	} else {
		// I have tested this code in cases when len(old) < len(new),
		// but I am unaware of any cases when those lengths do not match.
		// It should have happened with eszett, but since capital eszett is
		// considered substandard, u_toupper does not convert it.
		uint32_t oldlen = U8_LENGTH(first),
			newlen = U8_LENGTH(newfirst);
		int32_t delta = newlen - oldlen;

		result = emalloc( raw_len + delta );
		memcpy( result + newlen, utf_string + oldlen, raw_len - oldlen );
		U8_APPEND_UNSAFE( result, offset, newfirst );
		new_len = raw_len + delta;
	}
	
	lua_pushlstring( L, result, new_len );
	efree( result );

	return 1;
}

#define LUASANDBOX_UTF8_CHANGE_CASE_TOUPPER 1
#define LUASANDBOX_UTF8_CHANGE_CASE_TOLOWER 2
#define LUASANDBOX_UTF8_CHANGE_CASE_TOTITLE 3

/** {{{ luasandbox_utf8_change_case
 * 
 * Backend function for uc(), lc() and tc(). Converts string into UTF-16,
 * passes it to ICU function and then converts back to UTF-8. This is required
 * since casing algorithms are rather non-trivial and may be even locale-dependant.
 */
static int luasandbox_utf8_change_case(lua_State * L, int action)
{
	uint8_t *utf_string, *result;
	UChar *utf16_string, *result16;
	size_t raw_len;
	int32_t result_len, orig16_len, result16_len, max_len;
	UErrorCode errorCode = U_ZERO_ERROR;
	
	utf_string = luaL_checklstring( L, 1, &raw_len );

	// Use worst-case estimation of memory. Ideally, we'd first determine
	// the required buffer size, but u_strFromUTF8 does that only for
	// zero-terminated strings
	max_len = 2 * raw_len + 1;
	utf16_string = malloc( max_len );	// Yes, this is malloc, not emalloc. For som reason ICU thinks its nice to mess with memory in certain cases
	u_strFromUTF8( utf16_string, max_len, &orig16_len, utf_string, raw_len, &errorCode );
	LUASANDBOX_CHECK_ICU_ERROR( errorCode, free(utf16_string) );

	// Even worse worst-case here
	result16 = emalloc( 2 * max_len + 1 );
	switch( action ) {
		case LUASANDBOX_UTF8_CHANGE_CASE_TOUPPER:
			result16_len = u_strToUpper( result16, max_len, utf16_string, orig16_len, "", &errorCode );
			break;
		case LUASANDBOX_UTF8_CHANGE_CASE_TOLOWER:
			result16_len = u_strToLower( result16, max_len, utf16_string, orig16_len, "", &errorCode );
			break;
		case LUASANDBOX_UTF8_CHANGE_CASE_TOTITLE:
			result16_len = u_strToTitle( result16, max_len, utf16_string, orig16_len, NULL, "", &errorCode );
			break;
	}
	free(utf16_string);
	LUASANDBOX_CHECK_ICU_ERROR( errorCode, efree(result16) );
	
	// Back to UTF-8!
	max_len = 2 * result16_len;
	result = emalloc( max_len );
	u_strToUTF8( result, max_len, &result_len, result16, result16_len, &errorCode );
	efree(result16);
	LUASANDBOX_CHECK_ICU_ERROR( errorCode, efree(result) );

	// Return and clean up
	lua_pushlstring( L, result, result_len );
	efree(result);

	return 1;
}

int luasandbox_utf8_uc_lua(lua_State * L)
{
	luasandbox_utf8_change_case( L, LUASANDBOX_UTF8_CHANGE_CASE_TOUPPER );
}

int luasandbox_utf8_lc_lua(lua_State * L)
{
	luasandbox_utf8_change_case( L, LUASANDBOX_UTF8_CHANGE_CASE_TOLOWER );
}

int luasandbox_utf8_tc_lua(lua_State * L)
{
	luasandbox_utf8_change_case( L, LUASANDBOX_UTF8_CHANGE_CASE_TOTITLE );
}

/** {{{ luasandbox_utf8_trim_lua
 * 
 * Removes all the whitespace (except NBSP) from the beginning and end
 * of the string.
 */
int luasandbox_utf8_trim_lua(lua_State * L)
{
	uint8_t *utf_string, *result;
	size_t raw_len, new_len;
	UChar32 cur;
	uint32_t i, ltrim_len, rtrim_len;

	utf_string = luaL_checklstring( L, 1, &raw_len );

	// Left side
	while( i < raw_len ) {
		U8_NEXT( utf_string, i, raw_len, cur );
		if( cur < 0 ) {
			LUASANDBOX_UNICODE_INVALID_FAIL();
		}
		
		if( u_isWhitespace( cur ) ) {
			ltrim_len = i;
		} else {
			break;
		}
	}
	// Right side
	while( i < raw_len ) {
		U8_NEXT( utf_string, i, raw_len, cur );
		if( cur < 0 ) {
			LUASANDBOX_UNICODE_INVALID_FAIL();
		}
		
		if( u_isWhitespace( cur ) ) {
			rtrim_len++;
		} else {
			rtrim_len = 0;
		}
	}

	new_len = raw_len - ltrim_len - rtrim_len;
	result = emalloc( new_len );
	memcpy( result, utf_string + ltrim_len, new_len );
	lua_pushlstring( L, result, new_len );
	efree( result );
	
	return 1;
}

int luasandbox_utf8_at_lua(lua_State * L)
{
	int n = lua_gettop(L);
	uint8_t *utf_string, *result;
	size_t len, raw_len, result_len;
	
	int32_t i = 0, idx = 0, target = 0;
	int found = 0;
	UChar32 cur;
	
	utf_string = luaL_checklstring( L, 1, &raw_len );
	target = luaL_checkinteger( L, 2 );

	while( i < raw_len ) {
		U8_NEXT( utf_string, i, raw_len, cur );
		if( cur < 0 ) {
			LUASANDBOX_UNICODE_INVALID_FAIL();
		}
		if( idx == target ) {
			found = TRUE;
			break;
		}
		idx++;
	}

	if( !found ) {
		lua_pushboolean( L, FALSE );
		return 1;
	}

	result_len = U8_LENGTH( cur );
	result = emalloc( result_len );
	i = 0;
	U8_APPEND_UNSAFE( result, i, cur );
	lua_pushlstring( L, result, result_len );
	efree( result );

	return 1;
}

int luasandbox_utf8_code_lua(lua_State * L)
{
	uint8_t *utf_string;
	size_t raw_len;
	UChar32 cur;

	utf_string = luaL_checklstring( L, 1, &raw_len );

	if( raw_len > 0 ) {
		U8_GET( utf_string, 0, 0, raw_len, cur );
		if( cur < 0 ) {
			LUASANDBOX_UNICODE_INVALID_FAIL();
		}

		lua_pushinteger( L, cur );
		return 1;
	} else {
		lua_pushboolean( L, FALSE );
		return 1;
	}
	
	return 1;
}

int luasandbox_utf8_sub_lua(lua_State * L)
{
	uint8_t *utf_string, *result;
	size_t len, raw_len, result_len;
	
	int32_t i = 0, idx = 0, target = 0, target_len;
	int32_t target_start, target_end = -1;
	int found = 0;
	UChar32 cur;
	
	utf_string = luaL_checklstring( L, 1, &raw_len );
	target = luaL_checkinteger( L, 2 );
	if( lua_type( L, 3 ) == LUA_TNUMBER ) {
		target_len = lua_tointeger( L, 3 );
	} else {
		target_len = -1;
	}

	// Find the start symbol
	while( i < raw_len ) {
		if( idx == target ) {
			found = TRUE;
			break;
		}
		
		U8_NEXT( utf_string, i, raw_len, cur );
		if( cur < 0 ) {
			LUASANDBOX_UNICODE_INVALID_FAIL();
		}
		idx++;
	}

	// If start symbol index is larger than string size, return null
	if( !found ) {
		lua_pushstring( L, "" );
		return 1;
	}

	target_start = i;
	idx = 0;

	// Find the end position
	while( i < raw_len ) {
		if( idx == target_len ) {
			target_end = i;
			break;
		}

		U8_NEXT( utf_string, i, raw_len, cur );
		if( cur < 0 ) {
			LUASANDBOX_UNICODE_INVALID_FAIL();
		}
		idx++;
	}

	if( target_end == -1 ) {
		target_end = raw_len;
	}

	result_len = target_end - target_start;
	result = emalloc( result_len );
	memcpy( result, utf_string + target_start, result_len );
	lua_pushlstring( L, result, result_len );
	efree( result );

	return 1;
}

typedef struct {
	UChar32* string;
	int32_t* table;
	int32_t length;
	int32_t raw_length;
	int singleCharMode;
} utf8_needle_string;

#define UTF8_SEARCH_STATUS_FOUND 1
#define UTF8_SEARCH_STATUS_NOTFOUND 0
#define UTF8_SEARCH_STATUS_ERROR -1

typedef struct {
	int32_t status;		// Status of the search
	int32_t raw_index;	// Index in bytes
	int32_t cp_index;	// Index in codepoints
} utf8_search_result;

static utf8_needle_string* luasandbox_utf8_search_prepare(uint8_t* utf_string, int32_t raw_len)
{
	utf8_needle_string* str;
	int32_t i, idx;
	UChar32 cur;
	UErrorCode errorCode = U_ZERO_ERROR;
	int32_t cnd = 0;

	// Here we use the worst-case allocation
	str = emalloc( sizeof( utf8_needle_string ) );
	memset( str, 0, sizeof( utf8_needle_string ) );
	str->string = emalloc( raw_len * 4 );
	str->raw_length = raw_len;

	// Convert UTF-8 to UTF-32 for search purposes
	for( i = idx = 0; i < raw_len; idx++ ) {
		U8_NEXT( utf_string, i, raw_len, cur );
		if( cur < 0 ) {
			efree( str->string );
			efree( str );
			return NULL;
		}

		str->string[idx] = cur;
	}
	str->length = idx;

	// KMP cannot handle single character search
	// Use special case handler
	str->singleCharMode = str->length == 1;
	if( str->singleCharMode )
		return str;

	// Fill the search prefix table
	str->table = emalloc( str->length * sizeof(int32_t) );
	str->table[0] = -1;	// Yes, UChar32 is a signed type. "U" is for "Unicode", not for "unsigned"
	str->table[1] = 0;
	for( i = 2; i < str->length; i++ ) {
		if( str->string[i - 1] == str->string[cnd] ) {
			cnd++;
			str->table[i] = cnd;
		} else if( cnd > 0 ) {
			cnd = str->table[cnd];
			i--;
		} else {
			str->table[i] = 0;
		}
	}
	
	return str;
}

void luasandbox_utf8_search_free(utf8_needle_string *needle)
{
	if( needle->table )
		efree( needle->table );
	efree( needle->string );
	efree( needle );
}

#define UTF8_SEARCH_OFFSET_NONE 0
#define UTF8_SEARCH_OFFSET_RAW  1
#define UTF8_SEARCH_OFFSET_CP   2

/** {{{ luasandbox_utf8_search
 * 
 * Blah-blah.
 * Returns index if the search was sucessful, -1 if it was not, and -2 if the argument was an invalid UTF-8 sequence.
 */
utf8_search_result luasandbox_utf8_search(uint8_t *haystack, int32_t haystack_len, int offset_type, int offset, utf8_needle_string* needle) {
	int i, j, idx;	// Raw offset in haystack, CP offset in needle, CP offset in haystack
	UChar32 cur;
	utf8_search_result result;

	// Defaults
	result.raw_index = -1;
	result.cp_index  = -1;

	// If we are given raw offset, start with it
	if( offset_type == UTF8_SEARCH_OFFSET_RAW ) {
		i = offset;
	} else {
		i = 0;
	}

	if( needle->singleCharMode ) {
		// Handle special case of single character
		for( idx = 0; i < haystack_len; idx++ ) {
			U8_NEXT( haystack, i, haystack_len, cur );
			if( cur < 0 ) {
				result.status = UTF8_SEARCH_STATUS_ERROR;
				return result;
			}

			if( offset_type == UTF8_SEARCH_OFFSET_CP && idx < offset )
				continue;

			if( needle->string[0] == cur ) {
				result.status = UTF8_SEARCH_STATUS_FOUND;
				result.cp_index = idx;
				result.raw_index = i - needle->raw_length;
				return result;
			}
		}
	} else {
		// Otherwise use full value KMP search
		for( j = idx = 0; i < haystack_len; idx++ ) {
			U8_NEXT( haystack, i, haystack_len, cur );
			if( cur < 0 ) {
				result.status = UTF8_SEARCH_STATUS_ERROR;
				return result;
			}

			if( offset_type == UTF8_SEARCH_OFFSET_CP && idx < offset )
				continue;

			while( j > 0 && needle->string[j] != cur ) {
				j = needle->table[j];
			}
			if( needle->string[j] == cur )
				j++;
			if( j == needle->length ) {
				result.status = UTF8_SEARCH_STATUS_FOUND;
				result.cp_index = (idx+1) - needle->length;
				result.raw_index = i - needle->raw_length;
				return result;
			}
		}
	}

	result.status = UTF8_SEARCH_STATUS_NOTFOUND;
	return result;
}
/* }}} */

int luasandbox_utf8_pos_lua(lua_State * L)
{
	uint8_t *haystack, *needle_raw;
	size_t haystack_len, needle_len;
	utf8_needle_string *needle;
	int32_t offset;
	utf8_search_result result;

	haystack = luaL_checklstring( L, 1, &haystack_len );
	needle_raw = luaL_checklstring( L, 2, &needle_len );
	if( lua_type( L, 3 ) == LUA_TNUMBER ) {
		offset = lua_tointeger( L, 3 );
	} else {
		offset = 0;
	}
	
	if( !needle_len ) {
		lua_pushstring( L, "The needle parameter may not be empty" );
		lua_error( L );
	}

	needle = luasandbox_utf8_search_prepare( needle_raw, needle_len );
	if( !needle ) {
		LUASANDBOX_UNICODE_INVALID_FAIL();
	}

	result = luasandbox_utf8_search( haystack, haystack_len, UTF8_SEARCH_OFFSET_CP, offset, needle );
	luasandbox_utf8_search_free( needle );

	switch( result.status ) {
		case UTF8_SEARCH_STATUS_ERROR:
			LUASANDBOX_UNICODE_INVALID_FAIL();
		case UTF8_SEARCH_STATUS_FOUND:
			lua_pushinteger( L, result.cp_index );
			return 1;
		case UTF8_SEARCH_STATUS_NOTFOUND:
			lua_pushinteger( L, -1 );
			return 1;
	}
}

int luasandbox_utf8_replace_lua(lua_State * L)
{
	uint8_t *haystack, *needle_raw, *replacement, *result;
	size_t haystack_len, needle_len, replacement_len, result_len;
	utf8_needle_string *needle;
	utf8_search_result cur;
	int32_t i, offset, offset_src, offset_dest, matches_num, limit;
	int32_t *matches;
	int offset_mode;

	haystack = luaL_checklstring( L, 1, &haystack_len );
	needle_raw = luaL_checklstring( L, 2, &needle_len );
	replacement = luaL_checklstring( L, 3, &replacement_len );
	if( lua_type( L, 4 ) == LUA_TNUMBER ) {
		offset = lua_tointeger( L, 4 );
		offset_mode = UTF8_SEARCH_OFFSET_CP;
	} else {
		offset = 0;
		offset_mode = UTF8_SEARCH_OFFSET_RAW;
	}
	limit = ( lua_tointeger( L, 5 ) == LUA_TNUMBER ) ?
		luaL_checkinteger( L, 5 ) :
		-1;

	if( !needle_len ) {
		lua_pushstring( L, "The needle parameter may not be empty" );
		lua_error( L );
	}

	needle = luasandbox_utf8_search_prepare( needle_raw, needle_len );
	if( !needle ) {
		LUASANDBOX_UNICODE_INVALID_FAIL();
	}

	// As usually, just use worst-case scenario for memory allocation
	matches = emalloc( ( haystack_len / needle_len + 1 ) * sizeof(int32_t) );

	// Find all substrings to repalce
	matches_num = 0;
	for(;;) {
		if( limit > 0 && matches_num >= limit ) {
			break;
		}

		cur = luasandbox_utf8_search( haystack, haystack_len, offset_mode, offset, needle );
		if( cur.status == UTF8_SEARCH_STATUS_ERROR ) {
			LUASANDBOX_UNICODE_INVALID_FAIL();
		}

		if( cur.status == UTF8_SEARCH_STATUS_FOUND ) {
			matches[matches_num] = cur.raw_index;
			matches_num++;
			offset = cur.raw_index + needle->raw_length;
			offset_mode = UTF8_SEARCH_OFFSET_RAW;
		} else {
			break;
		}
	}
	luasandbox_utf8_search_free( needle );

	if( !matches_num ) {
		lua_pushlstring( L, haystack, haystack_len );
		return 1;
	}

	result_len = haystack_len + ( replacement_len - needle_len ) * matches_num;
	result = emalloc( result_len );

	// Replace all substrings
	memcpy( result, haystack, matches[i] );
	offset_src = offset_dest = matches[i];
	for( i = 0; i < matches_num; i++ ) {
		int32_t postfix_len;

		memcpy( result + offset_dest, replacement, replacement_len );
		offset_src  += needle_len;
		offset_dest += replacement_len;

		if( i == matches_num - 1 ) {
			postfix_len = haystack_len - offset_src;
		} else {
			postfix_len = matches[i+1] - offset_src;
		}

		memcpy( result + offset_dest, haystack + offset_src, postfix_len );
		offset_src  += postfix_len;
		offset_dest += postfix_len;
	}

	lua_pushlstring( L, result, result_len );
	efree( result );

	return 1;
}

int luasandbox_utf8_split_lua(lua_State * L)
{
	uint8_t *haystack, *needle_raw;
	size_t haystack_len, needle_len;
	utf8_needle_string *needle;
	utf8_search_result cur;
	int32_t i, offset, matches_num, limit;
	int32_t *matches;

	haystack = luaL_checklstring( L, 1, &haystack_len );
	needle_raw = luaL_checklstring( L, 2, &needle_len );
	limit = ( lua_tointeger( L, 3 ) == LUA_TNUMBER ) ?
		luaL_checkinteger( L, 3 ) :
		-1;

	if( !needle_len ) {
		lua_pushstring( L, "The needle parameter may not be empty" );
		lua_error( L );
	}

	needle = luasandbox_utf8_search_prepare( needle_raw, needle_len );
	if( !needle ) {
		LUASANDBOX_UNICODE_INVALID_FAIL();
	}

	// As usually, just use worst-case scenario for memory allocation
	matches = emalloc( ( haystack_len / needle_len + 1 ) * sizeof(int32_t) );

	// Find all substrings to split
	matches_num = 0;
	offset = 0;
	for(;;) {
		if( limit > 0 && matches_num >= limit ) {
			break;
		}

		cur = luasandbox_utf8_search( haystack, haystack_len, UTF8_SEARCH_OFFSET_RAW, offset, needle );
		if( cur.status == UTF8_SEARCH_STATUS_ERROR ) {
			LUASANDBOX_UNICODE_INVALID_FAIL();
		}

		if( cur.status == UTF8_SEARCH_STATUS_FOUND ) {
			matches[matches_num] = cur.raw_index;
			matches_num++;
			offset = cur.raw_index + needle->raw_length;
		} else {
			break;
		}
	}
	luasandbox_utf8_search_free( needle );

	lua_createtable( L, matches_num + 1, 0 );

	if( !matches_num ) {
		lua_pushlstring( L, haystack, haystack_len );
		lua_rawseti( L, -2, 1 );
		return 1;
	}

	// Replace all substrings
	lua_pushlstring( L, haystack, matches[0] );
	lua_rawseti( L, -2, 1 );
	offset = matches[0];
	for( i = 0; i < matches_num; i++ ) {
		int32_t bit_len;

		offset += needle_len;

		if( i == matches_num - 1 ) {
			bit_len = haystack_len - offset;
		} else {
			bit_len = matches[i+1] - offset;
		}

		lua_pushlstring( L, haystack + offset, bit_len );
		lua_rawseti( L, -2, i + 2 );
		offset  += bit_len;
	}

	return 1;
}

int luasandbox_utf8_join_lua(lua_State * L)
{
	uint8_t *result, *glue = NULL;
	size_t glue_len = 0, total_len = 0, offset = 0;
	uint32_t pieces_count;
	int i;

	luaL_checktype( L, 1, LUA_TTABLE );
	if( lua_isstring( L, 2 ) ) {
		glue = lua_tolstring( L, 2, &glue_len );
	} else {
		glue_len = 0;
	}

	pieces_count = lua_objlen( L, 1 );
	if( !pieces_count ) {
		lua_pushstring( L, "" );
		return 1;
	}

	lua_checkstack( L, pieces_count + 2 );
	total_len = 0;
	for( i = 1; i <= pieces_count; i++ ) {
		lua_pushinteger( L, i );	// Push the index
		lua_gettable( L, 1 );	// Get the value for that index
		luaL_checktype( L, -1, LUA_TSTRING );
		total_len += lua_objlen( L, -1 );
	}
	total_len += glue_len * ( pieces_count - 1 );

	result = emalloc( total_len );
	for( i = 1; i <= pieces_count; i++ ) {
		uint8_t *cur;
		size_t cur_len;
		
		cur = lua_tolstring( L, -(pieces_count - i + 1), &cur_len );
		memcpy( result + offset, cur, cur_len );
		offset += cur_len;
		
		// If this is not the last piece, add some glue
		if( glue && i != pieces_count ) {
			memcpy( result + offset, glue, glue_len );
			offset += glue_len;
		}
	}

	lua_pushlstring( L, result, total_len );
	efree( result );
	
	return 1;
}