pyaxml_rs/
json_serde.rs

1//! JSON serialization helpers for the string-pool export/import feature.
2//!
3//! These functions are used by the CLI (`main.rs`) for the `--stringblocks-file`
4//! option.  Keeping them here (instead of in `main.rs`) makes them unit-testable.
5
6use crate::string_pool::StringPool;
7
8// ─────────────────────────────── Export ──────────────────────────────────────
9
10/// Serialize a string pool as a JSON object `{ "0": "…", "1": "…", … }`.
11///
12/// The returned string can be written to a file and later re-imported with
13/// [`import_stringblocks`].
14pub fn export_stringblocks(pool: &StringPool) -> String {
15    let count = pool.strings.len();
16    let mut json = String::from("{\n");
17    for i in 0..count {
18        let s = pool.decode(i as u32).unwrap_or_default();
19        let comma = if i + 1 < count { "," } else { "" };
20        json.push_str(&format!("  \"{}\": \"{}\"{}\n", i, json_escape(&s), comma));
21    }
22    json.push('}');
23    json
24}
25
26/// Maximum number of strings accepted from a stringblocks JSON file.
27const MAX_STRINGBLOCKS_ENTRIES: usize = 200_000;
28
29/// Populate a string pool from a JSON object previously produced by
30/// [`export_stringblocks`].
31pub fn import_stringblocks(json: &str, pool: &mut StringPool) {
32    let mut count = 0usize;
33    for line in json.lines() {
34        let line = line.trim();
35        // Each data line looks like:  "key": "value",
36        if let Some(colon) = line.find(':') {
37            let value_part = line[colon + 1..].trim().trim_end_matches(',').trim();
38            if value_part.starts_with('"') && value_part.ends_with('"') && value_part.len() >= 2 {
39                // MED-6: cap entries to prevent OOM from a malicious stringblocks file.
40                if count >= MAX_STRINGBLOCKS_ENTRIES {
41                    break;
42                }
43                let inner = &value_part[1..value_part.len() - 1];
44                pool.add(&json_unescape(inner));
45                count += 1;
46            }
47        }
48    }
49}
50
51// ──────────────────────────── JSON string helpers ─────────────────────────────
52
53/// Escape a string for safe embedding inside a JSON double-quoted value.
54pub fn json_escape(s: &str) -> String {
55    let mut out = String::with_capacity(s.len());
56    for c in s.chars() {
57        match c {
58            '"' => out.push_str("\\\""),
59            '\\' => out.push_str("\\\\"),
60            '\n' => out.push_str("\\n"),
61            '\r' => out.push_str("\\r"),
62            '\t' => out.push_str("\\t"),
63            c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)),
64            c => out.push(c),
65        }
66    }
67    out
68}
69
70/// Unescape a JSON double-quoted string value (inverse of [`json_escape`]).
71pub fn json_unescape(s: &str) -> String {
72    let mut out = String::with_capacity(s.len());
73    let mut chars = s.chars().peekable();
74    while let Some(c) = chars.next() {
75        if c != '\\' {
76            out.push(c);
77            continue;
78        }
79        match chars.next() {
80            Some('"') => out.push('"'),
81            Some('\\') => out.push('\\'),
82            Some('n') => out.push('\n'),
83            Some('r') => out.push('\r'),
84            Some('t') => out.push('\t'),
85            Some('u') => {
86                let hex: String = (0..4).filter_map(|_| chars.next()).collect();
87                if let Ok(v) = u32::from_str_radix(&hex, 16) {
88                    if let Some(ch) = char::from_u32(v) {
89                        out.push(ch);
90                    }
91                }
92            }
93            Some(c) => {
94                out.push('\\');
95                out.push(c);
96            }
97            None => break,
98        }
99    }
100    out
101}
102
103// ─────────────────────────────── Unit tests ──────────────────────────────────
104
105#[cfg(test)]
106mod tests {
107    use super::*;
108
109    #[test]
110    fn test_json_escape_plain() {
111        assert_eq!(json_escape("hello"), "hello");
112    }
113
114    #[test]
115    fn test_json_escape_special() {
116        assert_eq!(json_escape("a\"b\\c\nd"), r#"a\"b\\c\nd"#);
117    }
118
119    #[test]
120    fn test_json_escape_control() {
121        assert_eq!(json_escape("\x01"), "\\u0001");
122    }
123
124    #[test]
125    fn test_json_unescape_plain() {
126        assert_eq!(json_unescape("hello"), "hello");
127    }
128
129    #[test]
130    fn test_json_unescape_special() {
131        assert_eq!(json_unescape(r#"a\"b\\c\nd"#), "a\"b\\c\nd");
132    }
133
134    #[test]
135    fn test_json_unescape_unicode() {
136        assert_eq!(json_unescape("\\u0041"), "A");
137    }
138
139    #[test]
140    fn test_roundtrip_escape_unescape() {
141        let original = "hello \"world\"\nnewline\ttab\\backslash";
142        assert_eq!(json_unescape(&json_escape(original)), original);
143    }
144
145    #[test]
146    fn test_export_import_roundtrip() {
147        let mut pool = StringPool::new(false);
148        pool.add("hello");
149        pool.add("world");
150        pool.add("with \"quotes\"");
151
152        let json = export_stringblocks(&pool);
153
154        let mut pool2 = StringPool::new(false);
155        import_stringblocks(&json, &mut pool2);
156
157        assert_eq!(pool2.strings.len(), 3);
158        assert_eq!(pool2.decode(0).unwrap(), "hello");
159        assert_eq!(pool2.decode(1).unwrap(), "world");
160        assert_eq!(pool2.decode(2).unwrap(), "with \"quotes\"");
161    }
162}