| 1 | # coding: binary |
|---|
| 2 | require "test/unit/testcase" |
|---|
| 3 | |
|---|
| 4 | require 'rexml/source' |
|---|
| 5 | |
|---|
| 6 | class EncodingTester < Test::Unit::TestCase |
|---|
| 7 | include REXML |
|---|
| 8 | |
|---|
| 9 | TEST_DIR="test/data" |
|---|
| 10 | |
|---|
| 11 | def setup |
|---|
| 12 | @encoded = "<?xml version='1.0' encoding='ISO-8859-3'?>"+ |
|---|
| 13 | "<a><b>\346</b></a>" |
|---|
| 14 | @not_encoded = "<a><b>Ä</b></a>" |
|---|
| 15 | end |
|---|
| 16 | |
|---|
| 17 | # Given an encoded document, try to write out to that encoding |
|---|
| 18 | def test_encoded_in_encoded_out |
|---|
| 19 | doc = Document.new( @encoded ) |
|---|
| 20 | doc.write( out="" ) |
|---|
| 21 | out.force_encoding('binary') if out.respond_to? :force_encoding |
|---|
| 22 | assert_equal( @encoded, out ) |
|---|
| 23 | end |
|---|
| 24 | |
|---|
| 25 | # Given an encoded document, try to change the encoding and write it out |
|---|
| 26 | def test_encoded_in_change_out |
|---|
| 27 | doc = Document.new( @encoded ) |
|---|
| 28 | doc.xml_decl.encoding = "UTF-8" |
|---|
| 29 | assert_equal( doc.encoding, "UTF-8" ) |
|---|
| 30 | REXML::Formatters::Default.new.write( doc.root, out="" ) |
|---|
| 31 | out.force_encoding('binary') if out.respond_to? :force_encoding |
|---|
| 32 | assert_equal( @not_encoded, out ) |
|---|
| 33 | char = XPath.first( doc, "/a/b/text()" ).to_s |
|---|
| 34 | char.force_encoding('binary') if char.respond_to? :force_encoding |
|---|
| 35 | assert_equal( "Ä", char ) |
|---|
| 36 | end |
|---|
| 37 | |
|---|
| 38 | # * Given an encoded document, try to write it to a different encoding |
|---|
| 39 | def test_encoded_in_different_out |
|---|
| 40 | doc = Document.new( @encoded ) |
|---|
| 41 | REXML::Formatters::Default.new.write( doc.root, Output.new( out="", "UTF-8" ) ) |
|---|
| 42 | out.force_encoding('binary') if out.respond_to? :force_encoding |
|---|
| 43 | assert_equal( @not_encoded, out ) |
|---|
| 44 | end |
|---|
| 45 | |
|---|
| 46 | # * Given a non-encoded document, change the encoding |
|---|
| 47 | def test_in_change_out |
|---|
| 48 | doc = Document.new( @not_encoded ) |
|---|
| 49 | doc.xml_decl.encoding = "ISO-8859-3" |
|---|
| 50 | assert_equal( doc.encoding, "ISO-8859-3" ) |
|---|
| 51 | doc.write( out="" ) |
|---|
| 52 | out.force_encoding('binary') if out.respond_to? :force_encoding |
|---|
| 53 | assert_equal( @encoded, out ) |
|---|
| 54 | end |
|---|
| 55 | |
|---|
| 56 | # * Given a non-encoded document, write to a different encoding |
|---|
| 57 | def test_in_different_out |
|---|
| 58 | doc = Document.new( @not_encoded ) |
|---|
| 59 | doc.write( Output.new( out="", "ISO-8859-3" ) ) |
|---|
| 60 | out.force_encoding('binary') if out.respond_to? :force_encoding |
|---|
| 61 | assert_equal( @encoded, out ) |
|---|
| 62 | end |
|---|
| 63 | |
|---|
| 64 | # * Given an encoded document, accessing text and attribute nodes |
|---|
| 65 | # should provide UTF-8 text. |
|---|
| 66 | def test_in_different_access |
|---|
| 67 | doc = Document.new <<-EOL |
|---|
| 68 | <?xml version='1.0' encoding='ISO-8859-1'?> |
|---|
| 69 | <a a="ÿ">ÿ</a> |
|---|
| 70 | EOL |
|---|
| 71 | expect = "\303\277" |
|---|
| 72 | expect.force_encoding('UTF-8') if expect.respond_to? :force_encoding |
|---|
| 73 | assert_equal( expect, doc.elements['a'].attributes['a'] ) |
|---|
| 74 | assert_equal( expect, doc.elements['a'].text ) |
|---|
| 75 | end |
|---|
| 76 | |
|---|
| 77 | |
|---|
| 78 | def test_ticket_89 |
|---|
| 79 | doc = Document.new <<-EOL |
|---|
| 80 | <?xml version="1.0" encoding="CP-1252" ?> |
|---|
| 81 | <xml><foo></foo></xml> |
|---|
| 82 | EOL |
|---|
| 83 | |
|---|
| 84 | REXML::Document.new doc |
|---|
| 85 | end |
|---|
| 86 | |
|---|
| 87 | def test_ticket_110 |
|---|
| 88 | utf16 = REXML::Document.new(File.new(File.join(TEST_DIR,"ticket_110_utf16.xml"))) |
|---|
| 89 | assert_equal( "UTF-16", utf16.encoding ) |
|---|
| 90 | assert( utf16[0].kind_of?(REXML::XMLDecl)) |
|---|
| 91 | end |
|---|
| 92 | end |
|---|