diff --git a/doc/sphinx/changelog.rst b/doc/sphinx/changelog.rst index 8fd37385f..8d0b80503 100644 --- a/doc/sphinx/changelog.rst +++ b/doc/sphinx/changelog.rst @@ -3,6 +3,13 @@ :fa:`solid fa-code-compare` Changelog ===================================== +0.16.3 - TBD +------------ + +:ELF: + + * Fix issue when parsing the dynamic table with an invalid offset (bug found + by :github_user:`lebr0nli`) 0.16.2 - January 1st, 2025 ---------------------------- diff --git a/include/LIEF/ELF/Parser.hpp b/include/LIEF/ELF/Parser.hpp index fef4875f6..6d6e3a75d 100644 --- a/include/LIEF/ELF/Parser.hpp +++ b/include/LIEF/ELF/Parser.hpp @@ -126,9 +126,9 @@ class LIEF_API Parser : public LIEF::Parser { template LIEF_LOCAL ok_error_t parse_segments(); - LIEF_LOCAL uint64_t get_dynamic_string_table() const; + LIEF_LOCAL uint64_t get_dynamic_string_table(BinaryStream* stream = nullptr) const; - LIEF_LOCAL result get_dynamic_string_table_from_segments() const; + LIEF_LOCAL result get_dynamic_string_table_from_segments(BinaryStream* stream = nullptr) const; LIEF_LOCAL uint64_t get_dynamic_string_table_from_sections() const; @@ -158,7 +158,7 @@ class LIEF_API Parser : public LIEF::Parser { LIEF_LOCAL result nb_dynsym_relocations() const; template - LIEF_LOCAL ok_error_t parse_dynamic_entries(uint64_t offset, uint64_t size); + LIEF_LOCAL ok_error_t parse_dynamic_entries(BinaryStream& stream); template LIEF_LOCAL ok_error_t parse_dynamic_symbols(uint64_t offset); diff --git a/src/ELF/Parser.cpp b/src/ELF/Parser.cpp index 1d154ecc7..35e0b5025 100644 --- a/src/ELF/Parser.cpp +++ b/src/ELF/Parser.cpp @@ -369,7 +369,40 @@ ok_error_t Parser::parse_symbol_version(uint64_t symbol_version_offset) { } -result Parser::get_dynamic_string_table_from_segments() const { +result Parser::get_dynamic_string_table_from_segments(BinaryStream* stream) const { + const ARCH arch = binary_->header().machine_type(); + if (const DynamicEntry* dt_str = binary_->get(DynamicEntry::TAG::STRTAB)) { + return binary_->virtual_address_to_offset(dt_str->value()); + } + + if (stream != nullptr) { + size_t count = 0; + ScopedStream scope(*stream); + while (*scope) { + if (++count > Parser::NB_MAX_DYNAMIC_ENTRIES) { + break; + } + if (binary_->type_ == Header::CLASS::ELF32) { + auto dt = scope->read(); + if (!dt) { + break; + } + if (DynamicEntry::from_value(dt->d_tag, arch) == DynamicEntry::TAG::STRTAB) { + return binary_->virtual_address_to_offset(dt->d_un.d_val); + } + } else { + auto dt = scope->read(); + if (!dt) { + break; + } + + if (DynamicEntry::from_value(dt->d_tag, arch) == DynamicEntry::TAG::STRTAB) { + return binary_->virtual_address_to_offset(dt->d_un.d_val); + } + } + } + } + Segment* dyn_segment = binary_->get(Segment::TYPE::DYNAMIC); if (dyn_segment == nullptr) { return 0; @@ -380,8 +413,6 @@ result Parser::get_dynamic_string_table_from_segments() const { stream_->setpos(offset); - const ARCH arch = binary_->header().machine_type(); - if (binary_->type_ == Header::CLASS::ELF32) { size_t nb_entries = size / sizeof(details::Elf32_Dyn); @@ -432,8 +463,8 @@ uint64_t Parser::get_dynamic_string_table_from_sections() const { return (*it_dynamic_string_section)->file_offset(); } -uint64_t Parser::get_dynamic_string_table() const { - if (auto res = get_dynamic_string_table_from_segments()) { +uint64_t Parser::get_dynamic_string_table(BinaryStream* stream) const { + if (auto res = get_dynamic_string_table_from_segments(stream)) { return *res; } return get_dynamic_string_table_from_sections(); diff --git a/src/ELF/Parser.tcc b/src/ELF/Parser.tcc index 4f65b8ccf..9dee70c82 100644 --- a/src/ELF/Parser.tcc +++ b/src/ELF/Parser.tcc @@ -82,16 +82,59 @@ ok_error_t Parser::parse_binary() { } } - // Parse Dynamic elements - // ====================== - - // Find the dynamic Segment + // Parse the dynamic table. To process this table, we can either process + // the content of the PT_DYNAMIC segment or process the content of the PT_LOAD + // segment that wraps the dynamic table. The second approach should be + // preferred since it uses a more accurate representation. + // (c.f. samples `issue_dynamic_table.elf` provided by @lebr0nli) if (const Segment* seg_dyn = binary_->get(Segment::TYPE::DYNAMIC)) { - const Elf_Off offset = seg_dyn->file_offset(); - const Elf_Off size = seg_dyn->physical_size(); + std::vector segments; + + // Find the PT_LOAD segment that wraps the PT_DYNAMIC table. + // As demonstrated in the library: ELF32_x86_library_libshellx.so + // we need to consider overlapping segments and take the "latest" one since + // this is what the loader would do. + for (const std::unique_ptr& segment : binary_->segments_) { + if (!segment->is_load()) { + continue; + } + const uint64_t dyn_start = seg_dyn->virtual_address(); + const uint64_t dyn_end = dyn_start + seg_dyn->virtual_size(); + const uint64_t load_start = segment->virtual_address(); + const uint64_t load_end = load_start + segment->virtual_size(); + if (!(load_start <= dyn_start && dyn_start < load_end)) { + continue; + } + + if (!(load_start < dyn_end && dyn_end <= load_end)) { + continue; + } + segments.push_back(segment.get()); + } + + binary_->sizing_info_->dynamic = seg_dyn->physical_size(); - parse_dynamic_entries(offset, size); - binary_->sizing_info_->dynamic = size; + // Usually #segments is 1 but we might have > 1 for overlapping segments + LIEF_DEBUG("Nb segments: {}", segments.size()); + + if (!segments.empty()) { + const Segment& load_seg = *segments.back(); + LIEF_DEBUG("Dynamic content wrapped by segment LOAD: [0x{:016x}, 0x{:016x}] " + "[0x{:016x}, 0x{:016x}]", load_seg.virtual_address(), + load_seg.virtual_address() + load_seg.virtual_size(), + load_seg.file_offset(), load_seg.file_offset() + load_seg.physical_size()); + + int64_t rel_offset = seg_dyn->virtual_address() - load_seg.virtual_address(); + assert(rel_offset >= 0); + span dynamic_content = load_seg.content().subspan(rel_offset); + SpanStream stream(dynamic_content); + stream.set_endian_swap(stream_->should_swap()); + parse_dynamic_entries(stream); + } else /* No PT_LOAD segment wrapping up the PT_DYNAMIC table */ { + const Elf_Off offset = seg_dyn->file_offset(); + ScopedStream scoped(*stream_, offset); + parse_dynamic_entries(*scoped); + } } process_dynamic_table(); @@ -913,37 +956,38 @@ ok_error_t Parser::parse_segments() { DataHandler::Node::SEGMENT); segment->handler_size_ = read_size; - if (segment->file_offset() > stream_->size() || (segment->file_offset() + read_size) > stream_->size()) { - LIEF_WARN("Segment #{} has a corrupted file offset (0x{:x}) ", i, segment->file_offset()); - break; - } - const Elf_Off offset_to_content = segment->file_offset(); - auto alloc = binary_->datahandler_->reserve(segment->file_offset(), read_size); - if (!alloc) { - LIEF_ERR("Can't allocate memory"); - break; - } - /* The DataHandlerStream interface references ELF data that are - * located in the ELF::DataHandler. Therefore, we can skip reading - * the data since they are already present in the data handler. - * This optimization saves memory (which is also performed in parse_sections<>(...)) - */ - if (stream_->type() != BinaryStream::STREAM_TYPE::ELF_DATA_HANDLER) { - std::vector seg_content; - if (stream_->peek_data(seg_content, offset_to_content, read_size)) { - segment->content(std::move(seg_content)); - } else { - LIEF_ERR("Unable to get the content of segment #{:d}", i); + const bool corrupted_offset = segment->file_offset() > stream_->size() || + (segment->file_offset() + read_size) > stream_->size(); + + if (!corrupted_offset) { + const Elf_Off offset_to_content = segment->file_offset(); + auto alloc = binary_->datahandler_->reserve(segment->file_offset(), read_size); + if (!alloc) { + LIEF_ERR("Can't allocate memory"); + break; + } + /* The DataHandlerStream interface references ELF data that are + * located in the ELF::DataHandler. Therefore, we can skip reading + * the data since they are already present in the data handler. + * This optimization saves memory (which is also performed in parse_sections<>(...)) + */ + if (stream_->type() != BinaryStream::STREAM_TYPE::ELF_DATA_HANDLER) { + std::vector seg_content; + if (stream_->peek_data(seg_content, offset_to_content, read_size)) { + segment->content(std::move(seg_content)); + } else { + LIEF_ERR("Unable to get the content of segment #{:d}", i); + } } - } - if (segment->is_interpreter()) { - auto interpreter = stream_->peek_string_at(offset_to_content, read_size); - if (!interpreter) { - LIEF_ERR("Can't read the interpreter string"); - } else { - binary_->interpreter_ = *interpreter; - binary_->sizing_info_->interpreter = read_size; + if (segment->is_interpreter()) { + auto interpreter = stream_->peek_string_at(offset_to_content, read_size); + if (!interpreter) { + LIEF_ERR("Can't read the interpreter string"); + } else { + binary_->interpreter_ = *interpreter; + binary_->sizing_info_->interpreter = read_size; + } } } } else { @@ -1263,29 +1307,26 @@ ok_error_t Parser::parse_dynamic_symbols(uint64_t offset) { binary_->sizing_info_->dynstr = dt_strsz->value(); } return ok(); -} // build_dynamic_sybols +} template -ok_error_t Parser::parse_dynamic_entries(uint64_t offset, uint64_t size) { +ok_error_t Parser::parse_dynamic_entries(BinaryStream& stream) { using Elf_Dyn = typename ELF_T::Elf_Dyn; using uint__ = typename ELF_T::uint; using Elf_Addr = typename ELF_T::Elf_Addr; using Elf_Off = typename ELF_T::Elf_Off; - LIEF_DEBUG("== Parsing dynamic section =="); + LIEF_DEBUG("Parsing dynamic entries"); - uint32_t nb_entries = size / sizeof(Elf_Dyn); - nb_entries = std::min(nb_entries, Parser::NB_MAX_DYNAMIC_ENTRIES); + uint32_t max_nb_entries = stream.size() / sizeof(Elf_Dyn); + max_nb_entries = std::min(max_nb_entries, Parser::NB_MAX_DYNAMIC_ENTRIES); - LIEF_DEBUG(".dynamic@0x{:x}:0x{:x} #", offset, size, nb_entries); - - Elf_Off dynamic_string_offset = get_dynamic_string_table(); + Elf_Off dynamic_string_offset = get_dynamic_string_table(&stream); bool end_of_dynamic = false; - stream_->setpos(offset); - for (size_t dynIdx = 0; dynIdx < nb_entries; ++dynIdx) { - const auto res_entry = stream_->read(); + while (stream) { + const auto res_entry = stream.read(); if (!res_entry) { break; } diff --git a/tests/elf/test_parser.py b/tests/elf/test_parser.py index 374b2f098..fc4f508b3 100644 --- a/tests/elf/test_parser.py +++ b/tests/elf/test_parser.py @@ -206,3 +206,10 @@ def test_ebpf_relocations(): assert relocations[8].info == 1 assert relocations[8].purpose == lief.ELF.Relocation.PURPOSE.OBJECT assert relocations[8].type == lief.ELF.Relocation.TYPE.BPF_64_NODYLD32 + + +def test_issue_dynamic_table(): + elf = lief.ELF.parse(get_sample("ELF/issue_dynamic_table.elf")) + dyn_entries = list(elf.dynamic_entries) + assert len(dyn_entries) == 28 + assert dyn_entries[0].name == "libselinux.so.1"