ascii() and unascii() to help with broken utf-8 or binary data

This commit is contained in:
whosit 2025-04-10 09:36:42 +03:00
parent bb8c4cd070
commit a36ef944a3
3 changed files with 89 additions and 1 deletions

View File

@ -262,6 +262,27 @@ You can still resume it by typing `/eval_resume <text>` - the argument
text will be passed instead of the context of text area of the formspec.
### More utilities
#### Binary data
##### `ascii()` and `unascii()`
Have you ever encountered "<invalid UTF-8 string>" somewhere with no
easy way to display it?
`ascii()` will allow you to inspect these strings (which may be just
some broken unicode or arbitrary binary data).
`ascii(str)` will escape all control sequences and unprintable
characters so you can output it normally. It works well with broken
strings, because printable ASCII characters will be kept unchanged.
The reverse of this is `unascii(str)`, which will take the escaped
sequence and turn it back into raw bytes. This supports only `\xFF`
hex escapes, but it works well enough in pair with `ascii()`.
### Resetting your personal environment
If your environment gets messed up, or you just want to get rid of the

View File

@ -222,7 +222,9 @@ fsinput(label, text) -- show a form that will return the text you entered
core.show_formspec(player_name, "cmd_eval:input", table.concat(fs, ""))
local result = coroutine.yield(WAIT_FOR_FORMSPEC)
return result
end
end,
ascii = util.ascii,
unascii = util.unascii,
},
{
__index = function(_self, key)

View File

@ -1,3 +1,5 @@
local fmt = string.format
-- get actual eye_pos of the player (including eye_offset)
local function player_get_eye_pos(player)
@ -69,11 +71,74 @@ local function get_pointed_position(player, range, point_to_objects, point_to_li
return pointed_pos, pointed_thing
end
local escapes = {
['\a'] = '\\a',
['\b'] = '\\b',
['\f'] = '\\f',
['\n'] = '\\n',
['\r'] = '\\r',
['\t'] = '\\t',
['\v'] = '\\v',
['\\'] = '\\\\',
['"'] = '\\"',
}
local unescapes = {
['\\a'] = '\a',
['\\b'] = '\b',
['\\f'] = '\f',
['\\n'] = '\n',
['\\r'] = '\r',
['\\t'] = '\t',
['\\v'] = '\v',
['\\\\'] = '\\',
['\\"'] = '"',
}
local function escape_nonascii(c)
return escapes[c] or string.format('\\x%02x', string.byte(c))
end
local function ascii(str)
str = str:gsub('[%c\r\n\\"\128-\255]', escape_nonascii)
return str
end
local function unescape_hex(backslashes, hex)
if #backslashes % 2 == 0 then
-- Even number of backslashes, they escape each other, not the digits.
-- Return them so that next step can treat them.
return backslashes .. 'x' .. hex
else
-- Remove the odd backslash, which escapes the hex code.
-- The rest will be processed by the next step.
backslashes = backslashes:sub(1, -2)
end
local c = string.char(tonumber(hex, 16))
if c == '\\' then
-- will be unsecaped by the next step ("\x5cb" --> "\\b")
c = '\\\\'
end
return backslashes .. c
end
local function unascii(str)
--str = str:gsub('(\\+)(%d%d%d)', unescape_digits)
str = str:gsub('(\\+)x(%x%x)', unescape_hex)
str = str:gsub('\\[abfnrtv\\"]', unescapes)
return str
end
local util = {
player_get_eye_pos = player_get_eye_pos,
raycast_crosshair = raycast_crosshair,
raycast_crosshair_to_object = raycast_crosshair_to_object,
get_pointed_position = get_pointed_position,
ascii = ascii,
unascii = unascii,
}
return util