pyaxml_rs/
arsc.rs

1//! Android Resource Table (ARSC) parser.
2//!
3//! Implements parsing of `resources.arsc` files (binary format type 0x0002)
4//! and produces the same `list_packages()` XML output as the Python `ARSC.list_packages()`.
5
6use std::collections::BTreeMap;
7use std::fmt::Write as FmtWrite;
8
9use crate::error::AxmlError;
10use crate::proto;
11use crate::string_pool::{StringBlocks, StringPool};
12use crate::typed_value;
13
14// ─────────────────────────── Chunk type constants ────────────────────────────
15
16pub const RES_TABLE_TYPE: u16 = 0x0002;
17pub(crate) const RES_TABLE_PACKAGE_TYPE: u16 = 0x0200;
18pub(crate) const RES_TABLE_TYPE_SPEC_TYPE: u16 = 0x0202;
19pub(crate) const RES_TABLE_TYPE_TYPE: u16 = 0x0201;
20
21const FLAG_COMPLEX: u16 = 0x0001;
22
23// ─────────────────────────────── Public types ────────────────────────────────
24
25/// Parsed ARSC (resources.arsc) file.
26/// Mirrors the Python `ARSC` class: `proto` holds the prost-generated protobuf
27/// message, `stringblocks` wraps the global string pool (proto + binary).
28pub struct Arsc {
29    /// Prost-generated protobuf representation; populated by `update_proto()`.
30    pub(crate) proto: proto::Arsc,
31    /// Global string pool wraps both the binary representation and the proto field.
32    pub(crate) stringblocks: StringBlocks,
33    pub(crate) packages: Vec<ArscPackage>,
34    /// Stored ARSC total file size (bytes 4..8 of the global header).
35    /// `pack()` writes this value verbatim.  `compute()` recalculates it.
36    pub(crate) total_size: u32,
37    /// Stored package count (bytes 8..12 of the global header).
38    /// `pack()` writes this value verbatim.  `compute()` sets it to `packages.len()`.
39    pub(crate) package_count: u32,
40}
41
42pub(crate) struct ArscPackage {
43    /// Prost-generated protobuf representation for this package.
44    pub(crate) proto: proto::AxmlResTablePackage,
45    pub(crate) id: u32,
46    pub(crate) name: String,
47    /// Pool of type names ("anim", "layout", "string", …)
48    pub(crate) type_strings: StringPool,
49    /// Pool of resource key names (attribute names)
50    pub(crate) key_strings: StringPool,
51    pub(crate) chunks: Vec<ArscResChunk>,
52    /// Stored package chunk size (bytes 4..8 of the package chunk header).
53    /// `pack()` writes this value verbatim.  `compute()` recalculates it.
54    pub(crate) chunk_size: u32,
55    /// lastPublicType field from package header (preserved for round-trip)
56    pub(crate) last_public_type: u32,
57    /// lastPublicKey field from package header (preserved for round-trip)
58    pub(crate) last_public_key: u32,
59}
60
61pub(crate) enum ArscResChunk {
62    Spec(ArscTypeSpec),
63    Type(ArscTypeType),
64}
65
66pub(crate) struct ArscTypeSpec {
67    pub(crate) id: u8,
68    pub(crate) res0: u8,
69    pub(crate) res1: u16,
70    pub(crate) entries: Vec<u32>,
71}
72
73pub(crate) struct ArscTypeType {
74    /// 1-based type ID (used as index into type_strings: `id - 1`)
75    pub(crate) id: u8,
76    /// flags field from ResTable_type header (preserved verbatim)
77    pub(crate) flags: u8,
78    /// reserved field from ResTable_type header (preserved verbatim)
79    pub(crate) reserved: u16,
80    /// ISO 639-1 language code extracted from ResTable_config, empty string = default locale
81    pub(crate) language: String,
82    /// ISO 3166-1 alpha-2 region code extracted from ResTable_config, empty string = no region
83    pub(crate) region: String,
84    /// Raw ResTable_config bytes (preserved for exact round-trip; first 4 bytes = config size).
85    pub(crate) config_raw: Vec<u8>,
86    /// One entry per slot; `None` = absent (0xFFFFFFFF offset in the table)
87    pub(crate) tables: Vec<Option<ArscEntry>>,
88}
89
90pub(crate) struct ArscEntry {
91    /// Size of this entry (from ResTable_entry.size field)
92    pub(crate) size: u16,
93    /// Flags from the original entry (needed to preserve FLAG_COMPLEX and others)
94    pub(crate) flags: u16,
95    /// Index into the package's key_strings pool
96    pub(crate) key_index: u32,
97    /// For simple (non-complex) entries: the typed value
98    pub(crate) value: Option<ArscValue>,
99    /// Raw entry data (everything after the 8-byte header: size + flags + key_index)
100    /// This preserves complex entries and other data we don't fully parse
101    pub(crate) raw_tail: Vec<u8>,
102}
103
104/// Android `Res_value` the actual resource data cell.
105pub(crate) struct ArscValue {
106    pub(crate) size: u16,
107    pub(crate) res0: u8,
108    pub(crate) data_type: u8,
109    pub(crate) data: u32,
110}
111
112// ─────────────────────────── Arsc impl ───────────────────────────────────────
113
114impl Arsc {
115    /// Parse a binary `resources.arsc` file.
116    pub fn from_axml(data: &[u8]) -> Result<Self, AxmlError> {
117        if data.len() < 12 {
118            return Err(AxmlError::UnexpectedEof);
119        }
120
121        // Global ARSC header: type(2) + header_size(2) + total_size(4) + package_count(4)
122        let header_size = u16::from_le_bytes([data[2], data[3]]) as usize;
123        let total_size = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
124        let package_count = u32::from_le_bytes([data[8], data[9], data[10], data[11]]);
125
126        let mut pos = header_size.max(12);
127
128        // Global string pool
129        let (string_pool, consumed) = StringPool::parse(&data[pos..])?;
130        pos += consumed;
131        let stringblocks =
132            StringBlocks::from_pool_and_proto(string_pool, proto::StringBlocks::default());
133
134        // Packages, SEC-01: cap allocation to prevent OOM from crafted package_count.
135        // Each package header is at minimum 288 bytes; use that as a lower bound.
136        let safe_pkg_cap = (package_count as usize).min(data.len() / 288).max(1);
137        let mut packages = Vec::with_capacity(safe_pkg_cap);
138        for _ in 0..package_count {
139            if pos + 8 > data.len() {
140                break;
141            }
142            let (pkg, pkg_consumed) = ArscPackage::parse(&data[pos..])?;
143            pos += pkg_consumed;
144            packages.push(pkg);
145        }
146
147        let mut arsc = Arsc {
148            proto: proto::Arsc::default(),
149            stringblocks,
150            packages,
151            total_size,
152            package_count,
153        };
154        arsc.update_proto();
155        Ok(arsc)
156    }
157
158    /// Return a reference to the cached prost-generated protobuf message.
159    /// Call `update_proto()` first if the internal state has been modified.
160    pub fn get_proto(&self) -> &proto::Arsc {
161        &self.proto
162    }
163
164    /// Serialize `self.proto` to native protobuf text format using prost-reflect.
165    pub fn to_proto_text(&self) -> String {
166        use prost_reflect::ReflectMessage;
167        self.proto.transcode_to_dynamic().to_text_format()
168    }
169
170    /// Serialize to indented proto text format (one field per line).
171    pub fn to_proto_text_pretty(&self) -> String {
172        use prost_reflect::{text_format::FormatOptions, ReflectMessage};
173        self.proto
174            .transcode_to_dynamic()
175            .to_text_format_with_options(&FormatOptions::default().pretty(true))
176    }
177
178    /// Write the binary proto representation of this ARSC file to `path`.
179    pub fn export_proto_file(&self, path: &str) -> Result<(), crate::error::AxmlError> {
180        std::fs::write(path, self.to_proto_bytes())?;
181        Ok(())
182    }
183
184    /// Load an ARSC from a binary proto file written by [`export_proto_file`].
185    pub fn from_proto_file(path: &str) -> Result<Self, crate::error::AxmlError> {
186        let data = std::fs::read(path)?;
187        Self::from_proto_bytes(&data)
188    }
189
190    /// Write the binary proto representation of package `idx` to `path`.
191    pub fn export_pkg_proto_file(
192        &self,
193        idx: usize,
194        path: &str,
195    ) -> Result<(), crate::error::AxmlError> {
196        use prost::Message as _;
197        let pkg = self.packages.get(idx).ok_or_else(|| {
198            crate::error::AxmlError::SliceError(format!(
199                "package index {} out of range (count: {})",
200                idx,
201                self.packages.len()
202            ))
203        })?;
204        std::fs::write(path, pkg.proto.encode_to_vec())?;
205        Ok(())
206    }
207
208    /// Replace package `idx` from a binary proto file written by [`export_pkg_proto_file`].
209    pub fn import_pkg_proto_file(
210        &mut self,
211        idx: usize,
212        path: &str,
213    ) -> Result<(), crate::error::AxmlError> {
214        use prost::Message as _;
215        let data = std::fs::read(path)?;
216        let pkg_proto = crate::proto::AxmlResTablePackage::decode(data.as_slice())?;
217        let count = self.packages.len();
218        let pkg = self.packages.get_mut(idx).ok_or_else(|| {
219            crate::error::AxmlError::SliceError(format!(
220                "package index {} out of range (count: {})",
221                idx, count
222            ))
223        })?;
224        *pkg = crate::proto_conv::proto_to_arsc_package(pkg_proto);
225        self.update_proto();
226        Ok(())
227    }
228
229    /// Produce an XML listing of resource entries grouped by locale.
230    /// Each group is wrapped in `<resources lang="…">`.
231    ///
232    /// If `language_filter` is `Some`, only the matching locale section is emitted.
233    /// Matching is case-insensitive ASCII. Use `"default"` to select entries with no
234    /// locale qualifier, or a language tag such as `"en"` or `"fr-FR"`.
235    pub fn list_packages(&self, language_filter: Option<&str>) -> String {
236        // Group entry lines by locale tag into a BTreeMap for sorted output.
237        let mut groups: BTreeMap<String, String> = BTreeMap::new();
238
239        for package in &self.packages {
240            for chunk in &package.chunks {
241                let ArscResChunk::Type(typetype) = chunk else {
242                    continue;
243                };
244                let tag = locale_tag(&typetype.language, &typetype.region);
245
246                // Filter by locale if requested; ASCII locale tags → case-insensitive, zero-alloc.
247                if let Some(f) = language_filter {
248                    if !tag.eq_ignore_ascii_case(f) {
249                        continue;
250                    }
251                }
252
253                let type_name = package
254                    .type_strings
255                    .decode(typetype.id as u32 - 1)
256                    .unwrap_or_default();
257
258                let buf = groups.entry(tag).or_default();
259
260                for (entry_idx, entry_opt) in typetype.tables.iter().enumerate() {
261                    let Some(entry) = entry_opt else { continue };
262
263                    let key_name = package
264                        .key_strings
265                        .decode(entry.key_index)
266                        .unwrap_or_default();
267
268                    let type_id = typetype.id as u32;
269                    let id = 0x7F00_0000u32
270                        | ((type_id & 0xFF) << 16)
271                        | (type_id & 0xFF00)
272                        | (entry_idx as u32);
273
274                    let (data_str, data_size) = entry
275                        .value
276                        .as_ref()
277                        .map(|v| {
278                            let s = if v.data_type == typed_value::TYPE_STRING {
279                                self.stringblocks.inner.decode(v.data).unwrap_or_default()
280                            } else {
281                                format!("{:#x}", v.data)
282                            };
283                            (s, v.size)
284                        })
285                        .unwrap_or_else(|| ("0x0".to_string(), 0));
286
287                    // write! directly into the group buffer, no intermediate String allocation.
288                    // SEC-03: escape all string values before embedding in XML attributes.
289                    let _ = writeln!(
290                        buf,
291                        "  <public type=\"{}\" name=\"{}\" id=\"{:#010x}\" data=\"{}\" data_size={}/>",
292                        xml_attr_escape(&type_name),
293                        xml_attr_escape(&key_name),
294                        id,
295                        xml_attr_escape(&data_str),
296                        data_size
297                    );
298                }
299            }
300        }
301
302        // Wrap all locale groups in a single root so the output is valid XML.
303        let mut out = String::from("<packages>\n");
304        for (lang, entries) in &groups {
305            let _ = write!(
306                out,
307                "<resources lang=\"{}\">\n{}</resources>\n",
308                lang, entries
309            );
310        }
311        out.push_str("</packages>");
312        out
313    }
314
315    /// Force recalculation of string-pool chunk sizes on the next `pack()`.
316    ///
317    /// When `recursive` is `true` (the default), all string pools are marked
318    /// dirty, the global pool plus every package's type and key pools.
319    /// When `false`, only the global string pool is recomputed; package-level
320    /// pools keep their current raw bytes.
321    /// Recalculate all chunk-size header fields and store them in the struct.
322    ///
323    /// When `recursive` is `true` (the default), all string pools are marked
324    /// dirty and all package `chunk_size` fields are updated.
325    /// When `false`, only the global string pool and the top-level size fields
326    /// are recomputed; package-level string pools keep their current raw bytes.
327    ///
328    /// After `compute()`, `pack()` produces a fully correct binary.
329    pub fn compute(&mut self, recursive: bool) {
330        // Always recalculate package chunk sizes from their actual content.
331        // When recursive is false, only the top-level size fields are updated.
332        // String pools that were mutated are already dirty (StringPool::add sets
333        // dirty=true) and will be re-encoded on pack().
334        if recursive {
335            for pkg in &mut self.packages {
336                pkg.chunk_size = pkg.actual_chunk_size();
337            }
338        } else {
339            // Even in non-recursive mode, recalculate package sizes so
340            // total_size is consistent with what pack() will actually write.
341            for pkg in &mut self.packages {
342                pkg.chunk_size = pkg.actual_chunk_size();
343            }
344        }
345        let global_sp = self.stringblocks.inner.pack();
346        let pkg_total: usize = self.packages.iter().map(|p| p.chunk_size as usize).sum();
347        self.total_size = (12 + global_sp.len() + pkg_total) as u32;
348        self.package_count = self.packages.len() as u32;
349    }
350
351    /// Returns the global string pool pack size (for debugging).
352    pub fn global_stringpool_size(&self) -> usize {
353        self.stringblocks.inner.pack().len()
354    }
355
356    /// Returns whether the global string pool is dirty.
357    pub fn global_stringpool_is_dirty(&self) -> bool {
358        self.stringblocks.inner.dirty
359    }
360
361    /// Returns the number of packages.
362    pub fn package_count(&self) -> usize {
363        self.packages.len()
364    }
365
366    /// Returns info about package string pools (for debugging).
367    pub fn package_pool_info(&self, idx: usize) -> Option<(usize, bool, usize, bool)> {
368        self.packages.get(idx).map(|pkg| {
369            let type_sp_size = pkg.type_strings.pack().len();
370            let type_sp_dirty = pkg.type_strings.dirty;
371            let key_sp_size = pkg.key_strings.pack().len();
372            let key_sp_dirty = pkg.key_strings.dirty;
373            (type_sp_size, type_sp_dirty, key_sp_size, key_sp_dirty)
374        })
375    }
376
377    /// Returns the count of chunks in a package (spec and type counts).
378    pub fn package_chunks_have_raw(&self, pkg_idx: usize) -> Vec<bool> {
379        self.packages
380            .get(pkg_idx)
381            .map(|pkg| pkg.chunks.iter().map(|_| false).collect())
382            .unwrap_or_default()
383    }
384
385    /// Serialize this ARSC to binary `resources.arsc` format.
386    ///
387    /// This function auto-recalculates sizes when needed (when total_size is 0).
388    /// Always produces a correct binary output.
389    pub fn pack(&self) -> Vec<u8> {
390        // Always compute sizes from actual content, no caching.
391        let global_sp = self.stringblocks.inner.pack();
392        let pkg_sizes: Vec<u32> = self
393            .packages
394            .iter()
395            .map(|p| p.actual_chunk_size())
396            .collect();
397        let pkg_total: usize = pkg_sizes.iter().map(|&s| s as usize).sum();
398        let total_size = (12 + global_sp.len() + pkg_total) as u32;
399
400        let mut out = Vec::with_capacity(total_size as usize);
401        out.extend_from_slice(&RES_TABLE_TYPE.to_le_bytes());
402        out.extend_from_slice(&12u16.to_le_bytes());
403        out.extend_from_slice(&total_size.to_le_bytes());
404        out.extend_from_slice(&self.package_count.to_le_bytes());
405        out.extend_from_slice(&global_sp);
406
407        for (pkg, &size) in self.packages.iter().zip(pkg_sizes.iter()) {
408            let pkg_data = pkg.pack_with_size(size);
409            out.extend_from_slice(&pkg_data);
410        }
411        out
412    }
413
414    /// Add a new resource entry (type_name / name → file_path string value).
415    ///
416    /// Finds or creates the type in the first package's type string pool, adds
417    /// the entry name to the key string pool, adds `file_path` to the global
418    /// string pool, then appends a new TypeSpec+TypeType pair for the default
419    /// locale.  Returns the new resource ID (0x7fTTEEEE).
420    pub fn add_resource(&mut self, type_name: &str, name: &str, file_path: &str) -> u32 {
421        let Some(pkg) = self.packages.first_mut() else {
422            return 0;
423        };
424
425        // Ensure the type exists in the type string pool (0-based index).
426        let type_idx_0 = match pkg.type_strings.find(type_name) {
427            Some(i) => i,
428            None => pkg.type_strings.add(type_name),
429        };
430        let type_id = (type_idx_0 + 1) as u8; // 1-based
431
432        // Add the entry name to the key string pool.
433        let key_idx = match pkg.key_strings.find(name) {
434            Some(i) => i,
435            None => pkg.key_strings.add(name),
436        };
437
438        // Add the file path to the global string pool.
439        let value_idx = match self.stringblocks.inner.find(file_path) {
440            Some(i) => i,
441            None => self.stringblocks.inner.add(file_path),
442        };
443
444        // Count existing entries of this type across all existing TypeType chunks.
445        let existing_count = pkg
446            .chunks
447            .iter()
448            .filter(|c| matches!(c, ArscResChunk::Type(tt) if tt.id == type_id))
449            .map(|c| {
450                if let ArscResChunk::Type(tt) = c {
451                    tt.tables.len()
452                } else {
453                    0
454                }
455            })
456            .max()
457            .unwrap_or(0);
458
459        let entry_idx = existing_count;
460        let entry_count = existing_count + 1;
461
462        // Build or extend TypeSpec for this type.
463        let spec_exists = pkg
464            .chunks
465            .iter()
466            .any(|c| matches!(c, ArscResChunk::Spec(s) if s.id == type_id));
467        if !spec_exists {
468            pkg.chunks.push(ArscResChunk::Spec(ArscTypeSpec {
469                id: type_id,
470                res0: 0,
471                res1: 0,
472                entries: vec![0u32; entry_count],
473            }));
474        } else {
475            for c in pkg.chunks.iter_mut() {
476                if let ArscResChunk::Spec(s) = c {
477                    if s.id == type_id {
478                        s.entries.push(0u32);
479                    }
480                }
481            }
482        }
483
484        // Build a new default-locale TypeType with all previous slots absent + new entry.
485        let mut tables: Vec<Option<ArscEntry>> = (0..existing_count).map(|_| None).collect();
486        let mut raw_tail = Vec::new();
487        raw_tail.extend_from_slice(&8u16.to_le_bytes()); // Res_value.size
488        raw_tail.push(0); // Res_value.res0
489        raw_tail.push(crate::typed_value::TYPE_STRING); // Res_value.data_type
490        raw_tail.extend_from_slice(&value_idx.to_le_bytes()); // Res_value.data
491        tables.push(Some(ArscEntry {
492            size: 16, // ResTable_entry header (8) + Res_value (8)
493            flags: 0, // Simple entry, not complex
494            key_index: key_idx,
495            value: Some(ArscValue {
496                size: 8,
497                res0: 0,
498                data_type: crate::typed_value::TYPE_STRING,
499                data: value_idx,
500            }),
501            raw_tail,
502        }));
503
504        // Minimal 32-byte ResTable_config for default locale (all zeros = "any" config).
505        let config_raw = {
506            let mut cfg = vec![0u8; 32];
507            cfg[0] = 32; // config_size
508            cfg
509        };
510
511        pkg.chunks.push(ArscResChunk::Type(ArscTypeType {
512            id: type_id,
513            flags: 0,
514            reserved: 0,
515            language: String::new(),
516            region: String::new(),
517            config_raw,
518            tables,
519        }));
520
521        let pkg_id = pkg.id;
522
523        // Update stored chunk sizes after modification
524        pkg.chunk_size = pkg.actual_chunk_size();
525
526        // Update total_size to reflect the new content
527        let global_sp_len = self.stringblocks.inner.pack().len();
528        let pkg_total: usize = self.packages.iter().map(|p| p.chunk_size as usize).sum();
529        self.total_size = (12 + global_sp_len + pkg_total) as u32;
530
531        self.update_proto();
532
533        // Build resource ID: 0xPPTTEEEE
534        ((pkg_id & 0xFF) << 24) | ((type_id as u32 & 0xFF) << 16) | (entry_idx as u32 & 0xFFFF)
535    }
536} // impl Arsc
537
538// ─────────────────────────── ArscPackage parsing ─────────────────────────────
539
540impl ArscPackage {
541    /// Return a reference to the cached prost-generated protobuf message for this package.
542    #[allow(dead_code)]
543    pub fn get_proto(&self) -> &proto::AxmlResTablePackage {
544        &self.proto
545    }
546
547    /// Compute the actual (correct) package chunk size from current content.
548    /// Used by `Arsc::compute()` to update `self.chunk_size`.
549    pub(crate) fn actual_chunk_size(&self) -> u32 {
550        const PKG_HEADER_SIZE: usize = 288;
551        let type_sp_len = self.type_strings.pack().len();
552        let key_sp_len = self.key_strings.pack().len();
553        let mut chunks_len = 0usize;
554        for chunk in &self.chunks {
555            let mut tmp = Vec::new();
556            match chunk {
557                ArscResChunk::Spec(spec) => pack_type_spec(spec, &mut tmp),
558                ArscResChunk::Type(tt) => pack_type_type(tt, &mut tmp),
559            }
560            chunks_len += tmp.len();
561        }
562        (PKG_HEADER_SIZE + type_sp_len + key_sp_len + chunks_len) as u32
563    }
564
565    /// Serialize this package to binary bytes.
566    ///
567    /// Bytes 4..8 of the chunk header (chunk size) come from `self.chunk_size`
568    /// verbatim.  Call `Arsc::compute()` first to write the correct size, or
569    /// set `self.chunk_size` manually for intentionally malformed output.
570    #[expect(dead_code)]
571    pub(crate) fn pack(&self) -> Vec<u8> {
572        self.pack_with_size(self.chunk_size)
573    }
574
575    pub(crate) fn pack_with_size(&self, chunk_size: u32) -> Vec<u8> {
576        const PKG_HEADER_SIZE: usize = 288;
577
578        let type_sp = self.type_strings.pack();
579        let key_sp = self.key_strings.pack();
580        let type_strings_offset = PKG_HEADER_SIZE as u32;
581        let key_strings_offset = type_strings_offset + type_sp.len() as u32;
582
583        let mut chunks_data: Vec<u8> = Vec::new();
584        for chunk in &self.chunks {
585            match chunk {
586                ArscResChunk::Spec(spec) => pack_type_spec(spec, &mut chunks_data),
587                ArscResChunk::Type(tt) => pack_type_type(tt, &mut chunks_data),
588            }
589        }
590
591        let body_len = PKG_HEADER_SIZE + type_sp.len() + key_sp.len() + chunks_data.len();
592        let mut out = Vec::with_capacity(body_len);
593
594        // 8-byte chunk header
595        out.extend_from_slice(&RES_TABLE_PACKAGE_TYPE.to_le_bytes());
596        out.extend_from_slice(&(PKG_HEADER_SIZE as u16).to_le_bytes());
597        out.extend_from_slice(&chunk_size.to_le_bytes());
598
599        // Package-specific fixed fields
600        out.extend_from_slice(&self.id.to_le_bytes());
601
602        // name: 256 bytes, null-padded UTF-16LE
603        let mut name_utf16 = self
604            .name
605            .encode_utf16()
606            .flat_map(|c| c.to_le_bytes())
607            .collect::<Vec<_>>();
608        name_utf16.resize(256, 0);
609        out.extend_from_slice(&name_utf16);
610
611        out.extend_from_slice(&type_strings_offset.to_le_bytes());
612        out.extend_from_slice(&self.last_public_type.to_le_bytes());
613        out.extend_from_slice(&key_strings_offset.to_le_bytes());
614        out.extend_from_slice(&self.last_public_key.to_le_bytes());
615        out.extend_from_slice(&0u32.to_le_bytes()); // typeIdOffset
616
617        // Current size: 8 + 4 + 256 + 4 + 4 + 4 + 4 + 4 = 288 ✓
618        debug_assert_eq!(out.len(), PKG_HEADER_SIZE);
619
620        out.extend_from_slice(&type_sp);
621        out.extend_from_slice(&key_sp);
622        out.extend_from_slice(&chunks_data);
623
624        out
625    }
626
627    fn parse(data: &[u8]) -> Result<(Self, usize), AxmlError> {
628        if data.len() < 8 {
629            return Err(AxmlError::UnexpectedEof);
630        }
631
632        // Chunk header
633        let _chunk_type = u16::from_le_bytes([data[0], data[1]]);
634        let _header_size = u16::from_le_bytes([data[2], data[3]]) as usize;
635        let chunk_size_raw = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
636        let pkg_size = chunk_size_raw as usize;
637
638        if pkg_size < 288 || pkg_size > data.len() {
639            return Err(AxmlError::SliceError(format!(
640                "Package size {} out of range (data len {})",
641                pkg_size,
642                data.len()
643            )));
644        }
645
646        let pkg = &data[..pkg_size];
647
648        // Fixed header fields (all offsets from pkg start = offset 0)
649        //   8: id
650        //  12: name (256 bytes, UTF-16LE, null-padded)
651        // 268: typeStrings offset
652        // 272: lastPublicType
653        // 276: keyStrings offset
654        // 280: lastPublicKey
655        let id = u32::from_le_bytes([pkg[8], pkg[9], pkg[10], pkg[11]]);
656        let name = decode_utf16_null(&pkg[12..268]);
657
658        let type_strings_off =
659            u32::from_le_bytes([pkg[268], pkg[269], pkg[270], pkg[271]]) as usize;
660        let last_public_type = u32::from_le_bytes([pkg[272], pkg[273], pkg[274], pkg[275]]);
661        let key_strings_off = u32::from_le_bytes([pkg[276], pkg[277], pkg[278], pkg[279]]) as usize;
662        let last_public_key = u32::from_le_bytes([pkg[280], pkg[281], pkg[282], pkg[283]]);
663
664        // Parse type string pool
665        let (type_strings, type_size) = parse_string_pool_at(pkg, type_strings_off)?;
666
667        // Parse key string pool
668        let (key_strings, key_size) = parse_string_pool_at(pkg, key_strings_off)?;
669
670        // ResType chunks start after both string pools
671        let chunks_start = (type_strings_off + type_size).max(key_strings_off + key_size);
672
673        let chunks = parse_res_chunks(&pkg[chunks_start..]);
674
675        Ok((
676            ArscPackage {
677                proto: proto::AxmlResTablePackage::default(),
678                id,
679                name,
680                type_strings,
681                key_strings,
682                chunks,
683                chunk_size: chunk_size_raw,
684                last_public_type,
685                last_public_key,
686            },
687            pkg_size,
688        ))
689    }
690}
691
692fn parse_string_pool_at(pkg: &[u8], offset: usize) -> Result<(StringPool, usize), AxmlError> {
693    if offset == 0 || offset >= pkg.len() {
694        return Ok((StringPool::default(), 0));
695    }
696    StringPool::parse(&pkg[offset..])
697}
698
699fn parse_res_chunks(data: &[u8]) -> Vec<ArscResChunk> {
700    let mut chunks = Vec::new();
701    let mut pos = 0;
702
703    while pos + 8 <= data.len() {
704        let chunk_type = u16::from_le_bytes([data[pos], data[pos + 1]]);
705        let chunk_size =
706            u32::from_le_bytes([data[pos + 4], data[pos + 5], data[pos + 6], data[pos + 7]])
707                as usize;
708
709        if chunk_size < 8 || pos + chunk_size > data.len() {
710            break;
711        }
712
713        let body = &data[pos + 8..pos + chunk_size];
714
715        match chunk_type {
716            RES_TABLE_TYPE_SPEC_TYPE => {
717                if let Ok(spec) = parse_type_spec(body) {
718                    chunks.push(ArscResChunk::Spec(spec));
719                }
720            }
721            RES_TABLE_TYPE_TYPE => {
722                if let Ok(tt) = parse_type_type(body) {
723                    chunks.push(ArscResChunk::Type(tt));
724                }
725            }
726            _ => {}
727        }
728
729        pos += chunk_size;
730    }
731
732    chunks
733}
734
735// ─────────────────────────── Chunk body parsers ──────────────────────────────
736
737fn parse_type_spec(body: &[u8]) -> Result<ArscTypeSpec, AxmlError> {
738    if body.len() < 8 {
739        return Err(AxmlError::UnexpectedEof);
740    }
741    let id = body[0];
742    let res0 = body[1];
743    let res1 = u16::from_le_bytes([body[2], body[3]]);
744    let entry_count = u32::from_le_bytes([body[4], body[5], body[6], body[7]]) as usize;
745    // Cap capacity: each entry is 4 bytes; header occupies the first 8 bytes.
746    let safe_cap = entry_count.min((body.len().saturating_sub(8)) / 4);
747    let mut entries = Vec::with_capacity(safe_cap);
748    for i in 0..entry_count {
749        let pos = 8 + i * 4;
750        if pos + 4 > body.len() {
751            break;
752        }
753        entries.push(u32::from_le_bytes([
754            body[pos],
755            body[pos + 1],
756            body[pos + 2],
757            body[pos + 3],
758        ]));
759    }
760    Ok(ArscTypeSpec {
761        id,
762        res0,
763        res1,
764        entries,
765    })
766}
767
768fn parse_type_type(body: &[u8]) -> Result<ArscTypeType, AxmlError> {
769    // body: id(1) flags(1) reserved(2) entryCount(4) entryStart(4) config(variable) offsets entries
770    if body.len() < 12 {
771        return Err(AxmlError::UnexpectedEof);
772    }
773    let id = body[0];
774    let flags = body[1];
775    let reserved = u16::from_le_bytes([body[2], body[3]]);
776    let entry_count = u32::from_le_bytes([body[4], body[5], body[6], body[7]]) as usize;
777
778    // Config starts at body offset 12; first 4 bytes of config = config size
779    if body.len() < 16 {
780        return Err(AxmlError::UnexpectedEof);
781    }
782    let config_size = u32::from_le_bytes([body[12], body[13], body[14], body[15]]) as usize;
783    if config_size < 16 || 12 + config_size > body.len() {
784        return Err(AxmlError::SliceError(format!(
785            "Invalid config size {}",
786            config_size
787        )));
788    }
789
790    // Entry offsets array immediately follows config
791    let offsets_base = 12 + config_size;
792    // SEC-02: use checked arithmetic to prevent integer overflow on 32-bit targets.
793    let offsets_size = entry_count
794        .checked_mul(4)
795        .ok_or_else(|| AxmlError::SliceError(format!("entry_count overflow: {}", entry_count)))?;
796    // Entry data immediately follows offset array
797    let entries_base = offsets_base
798        .checked_add(offsets_size)
799        .ok_or_else(|| AxmlError::SliceError("entries_base overflow".to_string()))?;
800
801    // SEC-01: cap allocation to prevent OOM from a malformed entry_count field.
802    // Each entry is at least 8 bytes (size u16 + flags u16 + key u32).
803    let safe_cap = entry_count.min(body.len() / 8);
804    let mut tables = Vec::with_capacity(safe_cap);
805    for i in 0..entry_count {
806        let off_pos = offsets_base + i * 4;
807        if off_pos + 4 > body.len() {
808            tables.push(None);
809            continue;
810        }
811        let entry_off = u32::from_le_bytes([
812            body[off_pos],
813            body[off_pos + 1],
814            body[off_pos + 2],
815            body[off_pos + 3],
816        ]);
817
818        if entry_off == 0xFFFF_FFFF {
819            tables.push(None);
820            continue;
821        }
822
823        let entry_pos = entries_base + entry_off as usize;
824        if entry_pos + 8 > body.len() {
825            tables.push(None);
826            continue;
827        }
828
829        match parse_entry(&body[entry_pos..]) {
830            Ok(e) => tables.push(Some(e)),
831            Err(_) => tables.push(None),
832        }
833    }
834
835    // Extract language and region from config if config is large enough
836    let language = if config_size >= 12 && body.len() >= 24 {
837        parse_locale_bytes(&body[20..22])
838    } else {
839        String::new()
840    };
841    let region = if config_size >= 12 && body.len() >= 26 {
842        parse_locale_bytes(&body[22..24])
843    } else {
844        String::new()
845    };
846
847    let config_raw = body[12..12 + config_size].to_vec();
848    Ok(ArscTypeType {
849        id,
850        flags,
851        reserved,
852        language,
853        region,
854        config_raw,
855        tables,
856    })
857}
858
859fn parse_entry(data: &[u8]) -> Result<ArscEntry, AxmlError> {
860    if data.len() < 8 {
861        return Err(AxmlError::UnexpectedEof);
862    }
863    let size = u16::from_le_bytes([data[0], data[1]]);
864    let flags = u16::from_le_bytes([data[2], data[3]]);
865    let key_index = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
866
867    if (flags & FLAG_COMPLEX) != 0 {
868        // Complex entry: ResTable_map_entry header (8 bytes after the 8-byte base header)
869        // followed by `count` ResTable_map values of 12 bytes each.
870        // bytes 8..12 = parent resource id
871        // bytes 12..16 = count of ResTable_map entries following
872        let count = if data.len() >= 16 {
873            u32::from_le_bytes([data[12], data[13], data[14], data[15]]) as usize
874        } else {
875            0
876        };
877        // Each ResTable_map is 12 bytes: name(4) + Res_value(8)
878        let total_size = 16 + count * 12;
879        let end = total_size.min(data.len());
880        let raw_tail = data[8..end].to_vec();
881        return Ok(ArscEntry {
882            size,
883            flags,
884            key_index,
885            value: None,
886            raw_tail,
887        });
888    }
889
890    // Simple entry: a Res_value (8 bytes) always follows the 8-byte entry header.
891    // The `size` field is the size of the ResTable_entry struct itself (always 8),
892    // NOT the total bytes on disk.  Always capture 8 trailing bytes for the value.
893    let value = if data.len() >= 16 {
894        Some(ArscValue {
895            size: u16::from_le_bytes([data[8], data[9]]),
896            res0: data[10],
897            data_type: data[11],
898            data: u32::from_le_bytes([data[12], data[13], data[14], data[15]]),
899        })
900    } else {
901        None
902    };
903
904    let end = 16usize.min(data.len());
905    let raw_tail = data[8..end].to_vec();
906
907    Ok(ArscEntry {
908        size,
909        flags,
910        key_index,
911        value,
912        raw_tail,
913    })
914}
915
916// ─────────────────────────────── Helpers ─────────────────────────────────────
917
918// ─────────────────────────── Binary pack helpers ─────────────────────────────
919
920pub(crate) fn pack_type_spec(spec: &ArscTypeSpec, out: &mut Vec<u8>) {
921    let entry_count = spec.entries.len() as u32;
922    let chunk_size = 8u32 + 8 + entry_count * 4;
923    out.extend_from_slice(&RES_TABLE_TYPE_SPEC_TYPE.to_le_bytes());
924    out.extend_from_slice(&16u16.to_le_bytes()); // headerSize = 16
925    out.extend_from_slice(&chunk_size.to_le_bytes());
926    out.push(spec.id);
927    out.push(spec.res0);
928    out.extend_from_slice(&spec.res1.to_le_bytes());
929    out.extend_from_slice(&entry_count.to_le_bytes());
930    for &e in &spec.entries {
931        out.extend_from_slice(&e.to_le_bytes());
932    }
933}
934
935pub(crate) fn pack_type_type(tt: &ArscTypeType, out: &mut Vec<u8>) {
936    pack_type_type_from_parsed(tt, out);
937}
938
939fn pack_type_type_from_parsed(tt: &ArscTypeType, out: &mut Vec<u8>) {
940    let entry_count = tt.tables.len() as u32;
941    let config_size = tt.config_raw.len();
942    // entries_start: offset from chunk start to first entry data
943    // = chunk header (8) + body header (12) + config + entry offsets array
944    let entries_start = 8u32 + 12 + config_size as u32 + entry_count * 4;
945    // Collect present entries
946    let mut entry_offsets = Vec::with_capacity(tt.tables.len());
947    let mut entries_data: Vec<u8> = Vec::new();
948    for entry_opt in &tt.tables {
949        match entry_opt {
950            None => entry_offsets.push(0xFFFF_FFFFu32),
951            Some(entry) => {
952                entry_offsets.push(entries_data.len() as u32);
953                // ResTable_entry: size(2) flags(2) key(4)
954                entries_data.extend_from_slice(&entry.size.to_le_bytes());
955                entries_data.extend_from_slice(&entry.flags.to_le_bytes());
956                entries_data.extend_from_slice(&entry.key_index.to_le_bytes());
957                // Use raw_tail to preserve all data (including complex entries)
958                entries_data.extend_from_slice(&entry.raw_tail);
959            }
960        }
961    }
962
963    // body = id(1)+flags(1)+reserved(2)+entryCount(4)+entriesStart(4)+config+offsets+entries
964    let body_size = 12 + config_size + entry_count as usize * 4 + entries_data.len();
965    let chunk_size = 8 + body_size;
966    // headerSize = 8 (chunk header) + 12 (body header) + config_size
967    let header_size = (8 + 12 + config_size) as u16;
968
969    out.extend_from_slice(&RES_TABLE_TYPE_TYPE.to_le_bytes());
970    out.extend_from_slice(&header_size.to_le_bytes());
971    out.extend_from_slice(&(chunk_size as u32).to_le_bytes());
972    // body header
973    out.push(tt.id);
974    out.push(tt.flags);
975    out.extend_from_slice(&tt.reserved.to_le_bytes());
976    out.extend_from_slice(&entry_count.to_le_bytes());
977    out.extend_from_slice(&entries_start.to_le_bytes());
978    // config
979    out.extend_from_slice(&tt.config_raw);
980    // offsets
981    for &off in &entry_offsets {
982        out.extend_from_slice(&off.to_le_bytes());
983    }
984    // entries
985    out.extend_from_slice(&entries_data);
986}
987
988/// Decode a 2-byte ASCII locale field (language or region) from ResTable_config.
989/// Returns an empty string if both bytes are zero (= not set / default locale).
990///
991/// Only ASCII alphabetic bytes are accepted; any other byte value (control
992/// characters, XML-special characters such as `<`, `>`, `"`, `&`) is silently
993/// discarded.  This prevents a crafted ARSC from injecting arbitrary content
994/// into the XML output through locale fields.
995fn parse_locale_bytes(bytes: &[u8]) -> String {
996    if bytes.len() < 2 || (bytes[0] == 0 && bytes[1] == 0) {
997        return String::new();
998    }
999    let mut s = String::with_capacity(2);
1000    for &b in bytes {
1001        if b.is_ascii_alphabetic() {
1002            s.push(b as char);
1003        }
1004    }
1005    s
1006}
1007
1008/// Build a locale tag string from language and region components.
1009/// - ("", "") → "default"
1010/// - ("en", "") → "en"
1011/// - ("en", "US") → "en-US"
1012pub fn locale_tag(language: &str, region: &str) -> String {
1013    match (language.is_empty(), region.is_empty()) {
1014        (true, _) => "default".to_string(),
1015        (false, true) => language.to_string(),
1016        (false, false) => format!("{}-{}", language, region),
1017    }
1018}
1019
1020/// Escape special XML characters so a string is safe to embed in an attribute value.
1021fn xml_attr_escape(s: &str) -> String {
1022    let mut out = String::with_capacity(s.len());
1023    for c in s.chars() {
1024        match c {
1025            '&' => out.push_str("&amp;"),
1026            '<' => out.push_str("&lt;"),
1027            '>' => out.push_str("&gt;"),
1028            '"' => out.push_str("&quot;"),
1029            _ => out.push(c),
1030        }
1031    }
1032    out
1033}
1034
1035/// Decode a null-terminated UTF-16LE byte slice to a Rust String.
1036fn decode_utf16_null(bytes: &[u8]) -> String {
1037    let words: Vec<u16> = bytes
1038        .chunks_exact(2)
1039        .map(|b| u16::from_le_bytes([b[0], b[1]]))
1040        .collect();
1041    let s = String::from_utf16_lossy(&words);
1042    // Trim at first null
1043    s.split('\0').next().unwrap_or("").to_string()
1044}