From a36ef944a3e50a8fef0353fcdfeb63fe597c8ba2 Mon Sep 17 00:00:00 2001 From: whosit Date: Thu, 10 Apr 2025 09:36:42 +0300 Subject: [PATCH] ascii() and unascii() to help with broken utf-8 or binary data --- README.md | 21 ++++++++++++++++++ init.lua | 4 +++- util.lua | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6ce23f2..4cb7f8d 100644 --- a/README.md +++ b/README.md @@ -262,6 +262,27 @@ You can still resume it by typing `/eval_resume ` - the argument text will be passed instead of the context of text area of the formspec. +### More utilities + +#### Binary data + +##### `ascii()` and `unascii()` + +Have you ever encountered "" somewhere with no +easy way to display it? + +`ascii()` will allow you to inspect these strings (which may be just +some broken unicode or arbitrary binary data). + +`ascii(str)` will escape all control sequences and unprintable +characters so you can output it normally. It works well with broken +strings, because printable ASCII characters will be kept unchanged. + +The reverse of this is `unascii(str)`, which will take the escaped +sequence and turn it back into raw bytes. This supports only `\xFF` +hex escapes, but it works well enough in pair with `ascii()`. + + ### Resetting your personal environment If your environment gets messed up, or you just want to get rid of the diff --git a/init.lua b/init.lua index 17eb626..3fc3294 100644 --- a/init.lua +++ b/init.lua @@ -222,7 +222,9 @@ fsinput(label, text) -- show a form that will return the text you entered core.show_formspec(player_name, "cmd_eval:input", table.concat(fs, "")) local result = coroutine.yield(WAIT_FOR_FORMSPEC) return result - end + end, + ascii = util.ascii, + unascii = util.unascii, }, { __index = function(_self, key) diff --git a/util.lua b/util.lua index 14090cf..6944106 100644 --- a/util.lua +++ b/util.lua @@ -1,3 +1,5 @@ +local fmt = string.format + -- get actual eye_pos of the player (including eye_offset) local function player_get_eye_pos(player) @@ -69,11 +71,74 @@ local function get_pointed_position(player, range, point_to_objects, point_to_li return pointed_pos, pointed_thing end +local escapes = { + ['\a'] = '\\a', + ['\b'] = '\\b', + ['\f'] = '\\f', + ['\n'] = '\\n', + ['\r'] = '\\r', + ['\t'] = '\\t', + ['\v'] = '\\v', + ['\\'] = '\\\\', + ['"'] = '\\"', +} + +local unescapes = { + ['\\a'] = '\a', + ['\\b'] = '\b', + ['\\f'] = '\f', + ['\\n'] = '\n', + ['\\r'] = '\r', + ['\\t'] = '\t', + ['\\v'] = '\v', + ['\\\\'] = '\\', + ['\\"'] = '"', +} + +local function escape_nonascii(c) + return escapes[c] or string.format('\\x%02x', string.byte(c)) +end + + +local function ascii(str) + str = str:gsub('[%c\r\n\\"\128-\255]', escape_nonascii) + return str +end + + +local function unescape_hex(backslashes, hex) + if #backslashes % 2 == 0 then + -- Even number of backslashes, they escape each other, not the digits. + -- Return them so that next step can treat them. + return backslashes .. 'x' .. hex + else + -- Remove the odd backslash, which escapes the hex code. + -- The rest will be processed by the next step. + backslashes = backslashes:sub(1, -2) + end + local c = string.char(tonumber(hex, 16)) + if c == '\\' then + -- will be unsecaped by the next step ("\x5cb" --> "\\b") + c = '\\\\' + end + return backslashes .. c +end + + +local function unascii(str) + --str = str:gsub('(\\+)(%d%d%d)', unescape_digits) + str = str:gsub('(\\+)x(%x%x)', unescape_hex) + str = str:gsub('\\[abfnrtv\\"]', unescapes) + return str +end + local util = { player_get_eye_pos = player_get_eye_pos, raycast_crosshair = raycast_crosshair, raycast_crosshair_to_object = raycast_crosshair_to_object, get_pointed_position = get_pointed_position, + ascii = ascii, + unascii = unascii, } return util