diff --git a/NEWS.md b/NEWS.md index 3b62f6aa..72318b7f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,39 @@ # News +## 3.3.3 - 2024-08-01 {#version-3-3-3} + +### Improvements + + * Added support for detecting invalid XML that has unsupported + content before root element + * GH-184 + * Patch by NAITOH Jun. + + * Added support for `REXML::Security.entity_expansion_limit=` and + `REXML::Security.entity_expansion_text_limit=` in SAX2 and pull + parsers + * GH-187 + * Patch by NAITOH Jun. + + * Added more tests for invalid XMLs. + * GH-183 + * Patch by Watson. + + * Added more performance tests. + * Patch by Watson. + + * Improved parse performance. + * GH-186 + * Patch by tomoya ishida. + +### Thanks + + * NAITOH Jun + + * Watson + + * tomoya ishida + ## 3.3.2 - 2024-07-16 {#version-3-3-2} ### Improvements @@ -15,6 +49,9 @@ * GH-172 * GH-173 * GH-174 + * GH-175 + * GH-176 + * GH-177 * Patch by Watson. * Added support for raising a parse exception when an XML has extra diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 5688c773..44dc6580 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -124,19 +124,10 @@ class BaseParser } module Private - # Terminal requires two or more letters. - INSTRUCTION_TERM = "?>" - COMMENT_TERM = "-->" - CDATA_TERM = "]]>" - DOCTYPE_TERM = "]>" - # Read to the end of DOCTYPE because there is no proper ENTITY termination - ENTITY_TERM = DOCTYPE_TERM - - INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um - NAME_PATTERN = /\s*#{NAME}/um + NAME_PATTERN = /#{NAME}/um GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>" PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>" ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um @@ -154,6 +145,7 @@ def initialize( source ) self.stream = source @listeners = [] @prefixes = Set.new + @entity_expansion_count = 0 end def add_listener( listener ) @@ -161,6 +153,7 @@ def add_listener( listener ) end attr_reader :source + attr_reader :entity_expansion_count def stream=( source ) @source = SourceFactory.create_from( source ) @@ -248,10 +241,10 @@ def pull_event if @document_status == nil start_position = @source.position if @source.match("/um, true, term: Private::COMMENT_TERM) + md = @source.match(/(.*?)-->/um, true) if md.nil? raise REXML::ParseException.new("Unclosed comment", @source) end @@ -318,7 +311,11 @@ def pull_event raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil? return [ :elementdecl, "/um, true, term: Private::COMMENT_TERM) + elsif md = @source.match(/--(.*?)-->/um, true) case md[1] when /--/, /-\z/ raise REXML::ParseException.new("Malformed comment", @source) end return [ :comment, md[1] ] if md end - elsif match = @source.match(/(%.*?;)\s*/um, true, term: Private::DOCTYPE_TERM) + elsif match = @source.match(/(%.*?;)\s*/um, true) return [ :externalentity, match[1] ] elsif @source.match(/\]\s*>/um, true) @document_status = :after_doctype @@ -430,7 +427,7 @@ def pull_event #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" raise REXML::ParseException.new("Malformed node", @source) unless md if md[0][0] == ?- - md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM) + md = @source.match(/--(.*?)-->/um, true) if md.nil? || /--|-\z/.match?(md[1]) raise REXML::ParseException.new("Malformed comment", @source) @@ -438,13 +435,13 @@ def pull_event return [ :comment, md[1] ] else - md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true, term: Private::CDATA_TERM) + md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true) return [ :cdata, md[1] ] if md end raise REXML::ParseException.new( "Declarations can only occur "+ "in the doctype declaration.", @source) elsif @source.match("?", true) - return process_instruction(start_position) + return process_instruction else # Get the next tag md = @source.match(Private::TAG_PATTERN, true) @@ -482,11 +479,15 @@ def pull_event if text.chomp!("<") @source.position -= "<".bytesize end - if @tags.empty? and @have_root + if @tags.empty? unless /\A\s*\z/.match?(text) - raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source) + if @have_root + raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source) + else + raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source) + end end - return pull_event + return pull_event if @have_root end return [ :text, text ] end @@ -505,7 +506,9 @@ def pull_event def entity( reference, entities ) value = nil value = entities[ reference ] if entities - if not value + if value + record_entity_expansion + else value = DEFAULT_ENTITIES[ reference ] value = value[2] if value end @@ -544,12 +547,17 @@ def unnormalize( string, entities=nil, filter=nil ) } matches.collect!{|x|x[0]}.compact! if matches.size > 0 + sum = 0 matches.each do |entity_reference| unless filter and filter.include?(entity_reference) entity_value = entity( entity_reference, entities ) if entity_value re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/ rv.gsub!( re, entity_value ) + sum += rv.bytesize + if sum > Security.entity_expansion_text_limit + raise "entity expansion has grown too large" + end else er = DEFAULT_ENTITIES[entity_reference] rv.gsub!( er[0], er[2] ) if er @@ -562,6 +570,14 @@ def unnormalize( string, entities=nil, filter=nil ) end private + + def record_entity_expansion + @entity_expansion_count += 1 + if @entity_expansion_count > Security.entity_expansion_limit + raise "number of entity expansions exceeded, processing aborted." + end + end + def need_source_encoding_update?(xml_declaration_encoding) return false if xml_declaration_encoding.nil? return false if /\AUTF-16\z/i =~ xml_declaration_encoding @@ -571,14 +587,14 @@ def need_source_encoding_update?(xml_declaration_encoding) def parse_name(base_error_message) md = @source.match(Private::NAME_PATTERN, true) unless md - if @source.match(/\s*\S/um) + if @source.match(/\S/um) message = "#{base_error_message}: invalid name" else message = "#{base_error_message}: name is missing" end raise REXML::ParseException.new(message, @source) end - md[1] + md[0] end def parse_id(base_error_message, @@ -647,18 +663,24 @@ def parse_id_invalid_details(accept_external_id:, end end - def process_instruction(start_position) - match_data = @source.match(Private::INSTRUCTION_END, true, term: Private::INSTRUCTION_TERM) - unless match_data - message = "Invalid processing instruction node" - @source.position = start_position - raise REXML::ParseException.new(message, @source) + def process_instruction + name = parse_name("Malformed XML: Invalid processing instruction node") + if @source.match(/\s+/um, true) + match_data = @source.match(/(.*?)\?>/um, true) + unless match_data + raise ParseException.new("Malformed XML: Unclosed processing instruction", @source) + end + content = match_data[1] + else + content = nil + unless @source.match("?>", true) + raise ParseException.new("Malformed XML: Unclosed processing instruction", @source) + end end - if match_data[1] == "xml" + if name == "xml" if @document_status raise ParseException.new("Malformed XML: XML declaration is not at the start", @source) end - content = match_data[2] version = VERSION.match(content) version = version[1] unless version.nil? encoding = ENCODING.match(content) @@ -673,7 +695,7 @@ def process_instruction(start_position) standalone = standalone[1] unless standalone.nil? return [ :xmldecl, version, encoding, standalone ] end - [:processing_instruction, match_data[1], match_data[2]] + [:processing_instruction, name, content] end def parse_attributes(prefixes, curr_ns) diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb index f8b232a2..36b45953 100644 --- a/lib/rexml/parsers/pullparser.rb +++ b/lib/rexml/parsers/pullparser.rb @@ -47,6 +47,10 @@ def add_listener( listener ) @listeners << listener end + def entity_expansion_count + @parser.entity_expansion_count + end + def each while has_next? yield self.pull diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb index 36f98c2a..cec9d2fc 100644 --- a/lib/rexml/parsers/sax2parser.rb +++ b/lib/rexml/parsers/sax2parser.rb @@ -22,6 +22,10 @@ def source @parser.source end + def entity_expansion_count + @parser.entity_expansion_count + end + def add_listener( listener ) @parser.add_listener( listener ) end diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index 573d0a13..39e92a57 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -31,7 +31,7 @@ module REXML COPYRIGHT = "Copyright © 2001-2008 Sean Russell " DATE = "2008/019" - VERSION = "3.3.2" + VERSION = "3.3.3" REVISION = "" Copyright = COPYRIGHT diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index 4c30532a..ff887fc0 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -117,7 +117,7 @@ def read_until(term) def ensure_buffer end - def match(pattern, cons=false, term: nil) + def match(pattern, cons=false) if cons @scanner.scan(pattern).nil? ? nil : @scanner else @@ -204,10 +204,20 @@ def initialize(arg, block_size=500, encoding=nil) end end - def read(term = nil) + def read(term = nil, min_bytes = 1) term = encode(term) if term begin - @scanner << readline(term) + str = readline(term) + @scanner << str + read_bytes = str.bytesize + begin + while read_bytes < min_bytes + str = readline(term) + @scanner << str + read_bytes += str.bytesize + end + rescue IOError + end true rescue Exception, NameError @source = nil @@ -237,10 +247,9 @@ def ensure_buffer read if @scanner.eos? && @source end - # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats: - # - ">" - # - "XXX>" (X is any string excluding '>') - def match( pattern, cons=false, term: nil ) + def match( pattern, cons=false ) + # To avoid performance issue, we need to increase bytes to read per scan + min_bytes = 1 while true if cons md = @scanner.scan(pattern) @@ -250,7 +259,8 @@ def match( pattern, cons=false, term: nil ) break if md return nil if pattern.is_a?(String) return nil if @source.nil? - return nil unless read(term) + return nil unless read(nil, min_bytes) + min_bytes *= 2 end md.nil? ? nil : @scanner diff --git a/test/parse/test_attlist.rb b/test/parse/test_attlist.rb deleted file mode 100644 index c1b4376c..00000000 --- a/test/parse/test_attlist.rb +++ /dev/null @@ -1,17 +0,0 @@ -require "test/unit" -require "core_assertions" - -require "rexml/document" - -module REXMLTests - class TestParseAttlist < Test::Unit::TestCase - include Test::Unit::CoreAssertions - - def test_linear_performance_gt - seq = [10000, 50000, 100000, 150000, 200000] - assert_linear_performance(seq, rehearsal: 10) do |n| - REXML::Document.new(']>') - end - end - end -end diff --git a/test/parse/test_attribute_list_declaration.rb b/test/parse/test_attribute_list_declaration.rb new file mode 100644 index 00000000..43882528 --- /dev/null +++ b/test/parse/test_attribute_list_declaration.rb @@ -0,0 +1,30 @@ +require "test/unit" +require "core_assertions" + +require "rexml/document" + +module REXMLTests + class TestParseAttributeListDeclaration < Test::Unit::TestCase + include Test::Unit::CoreAssertions + + def test_linear_performance_space + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("]>") + end + end + + def test_linear_performance_tab_and_gt + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("" * n + + "\">]>") + end + end + end +end diff --git a/test/parse/test_comment.rb b/test/parse/test_comment.rb index b7892232..4475dca7 100644 --- a/test/parse/test_comment.rb +++ b/test/parse/test_comment.rb @@ -110,6 +110,18 @@ def test_after_doctype_malformed_comment_end end end + def test_before_root + parser = REXML::Parsers::BaseParser.new('') + + events = {} + while parser.has_next? + event = parser.pull + events[event[0]] = event[1] + end + + assert_equal(" ok comment ", events[:comment]) + end + def test_after_root parser = REXML::Parsers::BaseParser.new('') diff --git a/test/parse/test_document_type_declaration.rb b/test/parse/test_document_type_declaration.rb index 490a27d4..99c23745 100644 --- a/test/parse/test_document_type_declaration.rb +++ b/test/parse/test_document_type_declaration.rb @@ -280,23 +280,6 @@ def test_notation_attlist doctype.children.collect(&:class)) end - def test_linear_performance_percent_gt - seq = [10000, 50000, 100000, 150000, 200000] - assert_linear_performance(seq, rehearsal: 10) do |n| - begin - REXML::Document.new('" * n + ']>') - rescue - end - end - end - - def test_linear_performance_comment_gt - seq = [10000, 50000, 100000, 150000, 200000] - assert_linear_performance(seq, rehearsal: 10) do |n| - REXML::Document.new('" * n + ' -->]>') - end - end - private def parse(internal_subset) super(<<-DOCTYPE) @@ -306,5 +289,29 @@ def parse(internal_subset) DOCTYPE end end + + def test_linear_performance_percent_gt + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + begin + REXML::Document.new("" * n + "]>") + rescue + end + end + end + + def test_linear_performance_comment_gt + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("" * n + " -->]>") + end + end + + def test_linear_performance_external_entity_right_bracket_gt + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("" * n + ";]>") + end + end end end diff --git a/test/parse/test_entity_declaration.rb b/test/parse/test_entity_declaration.rb index 7d750b90..81d95b58 100644 --- a/test/parse/test_entity_declaration.rb +++ b/test/parse/test_entity_declaration.rb @@ -1,9 +1,13 @@ # frozen_string_literal: false -require 'test/unit' -require 'rexml/document' +require "test/unit" +require "core_assertions" + +require "rexml/document" module REXMLTests class TestParseEntityDeclaration < Test::Unit::TestCase + include Test::Unit::CoreAssertions + private def xml(internal_subset) <<-XML @@ -18,6 +22,487 @@ def parse(internal_subset) REXML::Document.new(xml(internal_subset)).doctype end + public + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-GEDecl + class TestGeneralEntityDeclaration < self + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Name + class TestName < self + def test_prohibited_character + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 61 +Last 80 unconsumed characters: + invalid&name "valid-entity-value">]> + DETAIL + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EntityDef + class TestEntityDefinition < self + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EntityValue + class TestEntityValue < self + def test_no_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 59 +Last 80 unconsumed characters: + valid-name invalid-entity-value>]> + DETAIL + end + + def test_prohibited_character + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 44 +Last 80 unconsumed characters: + valid-name "% &">]> + DETAIL + end + + def test_mixed_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 61 +Last 80 unconsumed characters: + valid-name "invalid-entity-value'>]> + DETAIL + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-ExternalID + class TestExternalID < self + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-SystemLiteral + class TestSystemLiteral < self + def test_no_quote_in_system + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 68 +Last 80 unconsumed characters: + valid-name SYSTEM invalid-system-literal>]> + DETAIL + end + + def test_no_quote_in_public + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 90 +Last 80 unconsumed characters: + valid-name PUBLIC "valid-pubid-literal" invalid-system-literal>]> + DETAIL + end + + def test_mixed_quote_in_system + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 70 +Last 80 unconsumed characters: + valid-name SYSTEM 'invalid-system-literal">]> + DETAIL + end + + def test_mixed_quote_in_public + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 92 +Last 80 unconsumed characters: + valid-name PUBLIC "valid-pubid-literal" "invalid-system-literal'>]> + DETAIL + end + + def test_no_literal_in_system + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 45 +Last 80 unconsumed characters: + valid-name SYSTEM>]> + DETAIL + end + + def test_no_literal_in_public + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 67 +Last 80 unconsumed characters: + valid-name PUBLIC "valid-pubid-literal">]> + DETAIL + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidLiteral + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidChar + class TestPublicIDLiteral < self + def test_no_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 90 +Last 80 unconsumed characters: + valid-name PUBLIC invalid-pubid-literal "valid-system-literal">]> + DETAIL + end + + def test_prohibited_pubid_character + exception = assert_raise(REXML::ParseException) do + # U+3042 HIRAGANA LETTER A + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.force_encoding('utf-8').chomp, exception.to_s.force_encoding('utf-8')) +Malformed entity declaration +Line: 1 +Position: 74 +Last 80 unconsumed characters: + valid-name PUBLIC "\u3042" "valid-system-literal">]> + DETAIL + end + + def test_mixed_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 92 +Last 80 unconsumed characters: + valid-name PUBLIC "invalid-pubid-literal' "valid-system-literal">]> + DETAIL + end + + def test_no_literal + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 45 +Last 80 unconsumed characters: + valid-name PUBLIC>]> + DETAIL + end + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NDataDecl + class TestNotationDataDeclaration < self + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameChar + def test_prohibited_character + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 109 +Last 80 unconsumed characters: + valid-name PUBLIC "valid-pubid-literal" "valid-system-literal" NDATA invalid&nam + DETAIL + end + end + + def test_entity_value_and_notation_data_declaration + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 83 +Last 80 unconsumed characters: + valid-name "valid-entity-value" NDATA valid-ndata-value>]> + DETAIL + end + end + + def test_no_space + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 102 +Last 80 unconsumed characters: + valid-namePUBLIC"valid-pubid-literal""valid-system-literal"NDATAvalid-name>]> + DETAIL + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PEDecl + class TestParsedEntityDeclaration < self + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Name + class TestName < self + def test_prohibited_character + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 63 +Last 80 unconsumed characters: + % invalid&name "valid-entity-value">]> + DETAIL + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PEDef + class TestParsedEntityDefinition < self + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EntityValue + class TestEntityValue < self + def test_no_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 61 +Last 80 unconsumed characters: + % valid-name invalid-entity-value>]> + DETAIL + end + + def test_prohibited_character + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 46 +Last 80 unconsumed characters: + % valid-name "% &">]> + DETAIL + end + + def test_mixed_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 63 +Last 80 unconsumed characters: + % valid-name 'invalid-entity-value">]> + DETAIL + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-ExternalID + class TestExternalID < self + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-SystemLiteral + class TestSystemLiteral < self + def test_no_quote_in_system + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 70 +Last 80 unconsumed characters: + % valid-name SYSTEM invalid-system-literal>]> + DETAIL + end + + def test_no_quote_in_public + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 92 +Last 80 unconsumed characters: + % valid-name PUBLIC "valid-pubid-literal" invalid-system-literal>]> + DETAIL + end + + def test_mixed_quote_in_system + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 72 +Last 80 unconsumed characters: + % valid-name SYSTEM "invalid-system-literal'>]> + DETAIL + end + + def test_mixed_quote_in_public + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 94 +Last 80 unconsumed characters: + % valid-name PUBLIC "valid-pubid-literal" 'invalid-system-literal">]> + DETAIL + end + + def test_no_literal_in_system + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 47 +Last 80 unconsumed characters: + % valid-name SYSTEM>]> + DETAIL + end + + def test_no_literal_in_public + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 69 +Last 80 unconsumed characters: + % valid-name PUBLIC "valid-pubid-literal">]> + DETAIL + end + end + + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidLiteral + # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidChar + class TestPublicIDLiteral < self + def test_no_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 92 +Last 80 unconsumed characters: + % valid-name PUBLIC invalid-pubid-literal "valid-system-literal">]> + DETAIL + end + + def test_prohibited_pubid_character + exception = assert_raise(REXML::ParseException) do + # U+3042 HIRAGANA LETTER A + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.force_encoding('utf-8').chomp, exception.to_s.force_encoding('utf-8')) +Malformed entity declaration +Line: 1 +Position: 76 +Last 80 unconsumed characters: + % valid-name PUBLIC "\u3042" "valid-system-literal">]> + DETAIL + end + + def test_mixed_quote + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 94 +Last 80 unconsumed characters: + % valid-name PUBLIC 'invalid-pubid-literal" "valid-system-literal">]> + DETAIL + end + + def test_no_literal + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 47 +Last 80 unconsumed characters: + % valid-name PUBLIC>]> + DETAIL + end + end + end + + def test_entity_value_and_notation_data_declaration + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 85 +Last 80 unconsumed characters: + % valid-name "valid-entity-value" NDATA valid-ndata-value>]> + DETAIL + end + end + + def test_no_space + exception = assert_raise(REXML::ParseException) do + REXML::Document.new("]>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed entity declaration +Line: 1 +Position: 67 +Last 80 unconsumed characters: + %valid-nameSYSTEM"valid-system-literal">]> + DETAIL + end + end + def test_empty exception = assert_raise(REXML::ParseException) do parse(<<-INTERNAL_SUBSET) @@ -25,18 +510,47 @@ def test_empty INTERNAL_SUBSET end assert_equal(<<-DETAIL.chomp, exception.to_s) -Malformed notation declaration: name is missing +Malformed entity declaration Line: 5 -Position: 72 +Position: 70 Last 80 unconsumed characters: - ]> +> ]> DETAIL end - def test_linear_performance_gt + def test_linear_performance_entity_value_gt + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("" * n + + "\">]>") + end + end + + def test_linear_performance_entity_value_gt_right_bracket + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("]" * n + + "\">]>") + end + end + + def test_linear_performance_system_literal_in_system_gt_right_bracket + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new("]" * n + + "\">]>") + end + end + + def test_linear_performance_system_literal_in_public_gt_right_bracket seq = [10000, 50000, 100000, 150000, 200000] assert_linear_performance(seq, rehearsal: 10) do |n| - REXML::Document.new('' * n + '">') + REXML::Document.new("]" * n + + "\">]>") end end end diff --git a/test/parse/test_processing_instruction.rb b/test/parse/test_processing_instruction.rb index 7943cd3c..ba381dc4 100644 --- a/test/parse/test_processing_instruction.rb +++ b/test/parse/test_processing_instruction.rb @@ -4,7 +4,7 @@ require "rexml/document" module REXMLTests - class TestParseProcessinInstruction < Test::Unit::TestCase + class TestParseProcessingInstruction < Test::Unit::TestCase include Test::Unit::CoreAssertions def parse(xml) @@ -17,31 +17,38 @@ def test_no_name parse("") end assert_equal(<<-DETAIL.chomp, exception.to_s) -Invalid processing instruction node +Malformed XML: Invalid processing instruction node: invalid name Line: 1 Position: 4 Last 80 unconsumed characters: - +?> DETAIL end - def test_garbage_text - # TODO: This should be parse error. - # Create test/parse/test_document.rb or something and move this to it. - doc = parse(<<-XML) -x?> - - XML - pi = doc.children[1] - assert_equal([ - "x", - "y\n?> + + XML + assert_equal([["x", "y\n"]], + [[doc.children[0].target, doc.children[0].content], + [doc.children[1].target, doc.children[1].content]]) + end + + def test_before_root + parser = REXML::Parsers::BaseParser.new('') + + events = {} + while parser.has_next? + event = parser.pull + events[event[0]] = event[1] + end + + assert_equal("abc", events[:processing_instruction]) + end + def test_after_root parser = REXML::Parsers::BaseParser.new('') @@ -74,10 +105,22 @@ def test_after_root assert_equal("abc", events[:processing_instruction]) end + def test_content_question + document = REXML::Document.new("") + assert_equal("con?tent", document.root.children.first.content) + end + def test_linear_performance_gt seq = [10000, 50000, 100000, 150000, 200000] assert_linear_performance(seq, rehearsal: 10) do |n| - REXML::Document.new('" * n + ' ?>') + REXML::Document.new("" * n + " ?>") + end + end + + def test_linear_performance_tab + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new(" ?>") end end end diff --git a/test/parse/test_text.rb b/test/parse/test_text.rb index 1acefc40..04f553ae 100644 --- a/test/parse/test_text.rb +++ b/test/parse/test_text.rb @@ -4,6 +4,23 @@ module REXMLTests class TestParseText < Test::Unit::TestCase class TestInvalid < self + def test_before_root + exception = assert_raise(REXML::ParseException) do + parser = REXML::Parsers::BaseParser.new('b') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<~DETAIL.chomp, exception.to_s) + Malformed XML: Content at the start of the document (got 'b') + Line: 1 + Position: 4 + Last 80 unconsumed characters: + + DETAIL + end + def test_after_root exception = assert_raise(REXML::ParseException) do parser = REXML::Parsers::BaseParser.new('c') diff --git a/test/test_document.rb b/test/test_document.rb index 33cf4002..0764631d 100644 --- a/test/test_document.rb +++ b/test/test_document.rb @@ -41,7 +41,7 @@ def teardown class GeneralEntityTest < self def test_have_value - xml = < @@ -55,23 +55,24 @@ def test_have_value &a; -EOF +XML doc = REXML::Document.new(xml) - assert_raise(RuntimeError) do + assert_raise(RuntimeError.new("entity expansion has grown too large")) do doc.root.children.first.value end + REXML::Security.entity_expansion_limit = 100 assert_equal(100, REXML::Security.entity_expansion_limit) doc = REXML::Document.new(xml) - assert_raise(RuntimeError) do + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do doc.root.children.first.value end assert_equal(101, doc.entity_expansion_count) end def test_empty_value - xml = < @@ -85,23 +86,24 @@ def test_empty_value &a; -EOF +XML doc = REXML::Document.new(xml) - assert_raise(RuntimeError) do + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do doc.root.children.first.value end + REXML::Security.entity_expansion_limit = 100 assert_equal(100, REXML::Security.entity_expansion_limit) doc = REXML::Document.new(xml) - assert_raise(RuntimeError) do + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do doc.root.children.first.value end assert_equal(101, doc.entity_expansion_count) end def test_with_default_entity - xml = < @@ -112,14 +114,15 @@ def test_with_default_entity &a2; < -EOF +XML REXML::Security.entity_expansion_limit = 4 doc = REXML::Document.new(xml) assert_equal("\na\na a\n<\n", doc.root.children.first.value) + REXML::Security.entity_expansion_limit = 3 doc = REXML::Document.new(xml) - assert_raise(RuntimeError) do + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do doc.root.children.first.value end end diff --git a/test/test_pullparser.rb b/test/test_pullparser.rb index 096e8b7f..55205af8 100644 --- a/test/test_pullparser.rb +++ b/test/test_pullparser.rb @@ -155,5 +155,101 @@ def test_peek end assert_equal( 0, names.length ) end + + class EntityExpansionLimitTest < Test::Unit::TestCase + def setup + @default_entity_expansion_limit = REXML::Security.entity_expansion_limit + end + + def teardown + REXML::Security.entity_expansion_limit = @default_entity_expansion_limit + end + + class GeneralEntityTest < self + def test_have_value + source = <<-XML + + + + + + +]> + +&a; + + XML + + parser = REXML::Parsers::PullParser.new(source) + assert_raise(RuntimeError.new("entity expansion has grown too large")) do + while parser.has_next? + parser.pull + end + end + end + + def test_empty_value + source = <<-XML + + + + + + +]> + +&a; + + XML + + parser = REXML::Parsers::PullParser.new(source) + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + while parser.has_next? + parser.pull + end + end + + REXML::Security.entity_expansion_limit = 100 + parser = REXML::Parsers::PullParser.new(source) + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + while parser.has_next? + parser.pull + end + end + assert_equal(101, parser.entity_expansion_count) + end + + def test_with_default_entity + source = <<-XML + + + +]> + +&a; +&a2; +< + + XML + + REXML::Security.entity_expansion_limit = 4 + parser = REXML::Parsers::PullParser.new(source) + while parser.has_next? + parser.pull + end + + REXML::Security.entity_expansion_limit = 3 + parser = REXML::Parsers::PullParser.new(source) + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + while parser.has_next? + parser.pull + end + end + end + end + end end end diff --git a/test/test_sax.rb b/test/test_sax.rb index 5a3f5e4e..5e3ad75b 100644 --- a/test/test_sax.rb +++ b/test/test_sax.rb @@ -99,6 +99,92 @@ def test_sax2 end end + class EntityExpansionLimitTest < Test::Unit::TestCase + def setup + @default_entity_expansion_limit = REXML::Security.entity_expansion_limit + end + + def teardown + REXML::Security.entity_expansion_limit = @default_entity_expansion_limit + end + + class GeneralEntityTest < self + def test_have_value + source = <<-XML + + + + + + +]> + +&a; + + XML + + sax = REXML::Parsers::SAX2Parser.new(source) + assert_raise(RuntimeError.new("entity expansion has grown too large")) do + sax.parse + end + end + + def test_empty_value + source = <<-XML + + + + + + +]> + +&a; + + XML + + sax = REXML::Parsers::SAX2Parser.new(source) + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + sax.parse + end + + REXML::Security.entity_expansion_limit = 100 + sax = REXML::Parsers::SAX2Parser.new(source) + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + sax.parse + end + assert_equal(101, sax.entity_expansion_count) + end + + def test_with_default_entity + source = <<-XML + + + +]> + +&a; +&a2; +< + + XML + + REXML::Security.entity_expansion_limit = 4 + sax = REXML::Parsers::SAX2Parser.new(source) + sax.parse + + REXML::Security.entity_expansion_limit = 3 + sax = REXML::Parsers::SAX2Parser.new(source) + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + sax.parse + end + end + end + end + # used by test_simple_doctype_listener # submitted by Jeff Barczewski class SimpleDoctypeListener