require( 'ustring_data' )
do
local ustring_metatable = {}
-- Check whether the value is a ustring
isustring = function( val )
return getmetatable( val ) == ustring_metatable
end
-- Construct a ustring from a usual string
u = function( str )
-- Quick return if the value is already a ustring
if isustring( str ) then
return str
end
-- Determine the amount of trail bytes by the first one
local countTrail = function( val )
if val < 0x80 then
return 0
elseif val < 0xC0 then
error( "Invalid UTF-8 sequence supplied" )
elseif val < 0xE0 then
return 1
elseif val < 0xF0 then
return 2
else
return 3
end
end
-- Determine whether the given byte is a trail of a character
local isTrail = function( val )
return val >= 0x80 and val < 0xC0
end
-- Throws error if the illegal byte is found
local checkByte = function( val )
if val == 0xC0 or val == 0xC1 or val > 0xF4 then
error( "Invalid UTF-8 sequence supplied" )
end
end
local result = {}
local pos, upos = 1, 1
local subpos
-- Autoconvert numbers to match Lua standard string behavior
if type(str) == "number" then
str = tostring(str)
end
-- Avoid possible traps caused by supplying tables to u()
if type(str) ~= "string" then
error( "Only strings may be converted to ustrings" )
end
-- Main converstion loop
while pos <= #str do
local byte, trailCount
byte = str:byte( pos )
checkByte( byte )
trailCount = countTrail( byte )
if pos + trailCount > #str then
error( "Invalid UTF-8 sequence supplied" )
end
for subpos = pos + 1, pos + trailCount do
if not isTrail( str:byte( subpos ) ) then
error( "Invalid UTF-8 sequence supplied" )
end
end
result[upos] = str:sub( pos, pos + trailCount )
pos = pos + trailCount + 1
upos = upos + 1
end
setmetatable( result, ustring_metatable )
return result
end
local ustring_changecase = function( s, casetable )
s = u(s)
local result = {}
for i = 1, #s do
local char = rawget( s, i )
if casetable[char] ~= nil then
result[i] = casetable[char]
else
result[i] = char
end
end
setmetatable( result, ustring_metatable )
return result
end
ustring = {
len = function( s )
return #s
end,
uc = function( s )
return ustring_changecase( s, ustring_uppercase_map )
end,
lc = function( s )
return ustring_changecase( s, ustring_lowercase_map )
end,
}
ustring_metatable["__concat"] = function( a, b )
a = u(a)
b = u(b)
local result = {}
local firstlen = #a
for pos = 1, firstlen do
result[pos] = a[pos]
end
for pos = 1, #b do
result[pos + firstlen] = b[pos]
end
setmetatable( result, ustring_metatable )
return result
end
ustring_metatable["__index"] = function( s, idx )
if type(idx) == "number" then
if idx == 0 then
error( "Invalid ustring index supplied (zero)" )
end
if (idx > 0 and idx > #s) or (idx < 0 and idx < -#s) then
error( "Invalid ustring index supplied (out of bounds)" )
end
if idx > 0 then
return rawget( s, idx )
else
return rawget( s, #s + idx + 1 )
end
end
return ustring[idx]
end
ustring_metatable["__eq"] = function( a, b )
if #a ~= #b then
return false
else
for pos = 1, #a do
if rawget( a, pos ) ~= rawget( b, pos ) then
return false
end
end
return true
end
end
ustring_metatable["__tostring"] = function( s )
local result = ""
for i = 1, #s do
result = result .. rawget( s, i )
end
return result
end
end