/usr/share/perl5/XML/SAX/PurePerl/EncodingDetect.pm is in libxml-sax-perl 0.99+dfsg-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | # $Id$
package XML::SAX::PurePerl; # NB, not ::EncodingDetect!
use strict;
sub encoding_detect {
my ($parser, $reader) = @_;
my $error = "Invalid byte sequence at start of file";
my $data = $reader->data;
if ($data =~ /^\x00\x00\xFE\xFF/) {
# BO-UCS4-be
$reader->move_along(4);
$reader->set_encoding('UCS-4BE');
return;
}
elsif ($data =~ /^\x00\x00\xFF\xFE/) {
# BO-UCS-4-2143
$reader->move_along(4);
$reader->set_encoding('UCS-4-2143');
return;
}
elsif ($data =~ /^\x00\x00\x00\x3C/) {
$reader->set_encoding('UCS-4BE');
return;
}
elsif ($data =~ /^\x00\x00\x3C\x00/) {
$reader->set_encoding('UCS-4-2143');
return;
}
elsif ($data =~ /^\x00\x3C\x00\x00/) {
$reader->set_encoding('UCS-4-3412');
return;
}
elsif ($data =~ /^\x00\x3C\x00\x3F/) {
$reader->set_encoding('UTF-16BE');
return;
}
elsif ($data =~ /^\xFF\xFE\x00\x00/) {
# BO-UCS-4LE
$reader->move_along(4);
$reader->set_encoding('UCS-4LE');
return;
}
elsif ($data =~ /^\xFF\xFE/) {
$reader->move_along(2);
$reader->set_encoding('UTF-16LE');
return;
}
elsif ($data =~ /^\xFE\xFF\x00\x00/) {
$reader->move_along(4);
$reader->set_encoding('UCS-4-3412');
return;
}
elsif ($data =~ /^\xFE\xFF/) {
$reader->move_along(2);
$reader->set_encoding('UTF-16BE');
return;
}
elsif ($data =~ /^\xEF\xBB\xBF/) { # UTF-8 BOM
$reader->move_along(3);
$reader->set_encoding('UTF-8');
return;
}
elsif ($data =~ /^\x3C\x00\x00\x00/) {
$reader->set_encoding('UCS-4LE');
return;
}
elsif ($data =~ /^\x3C\x00\x3F\x00/) {
$reader->set_encoding('UTF-16LE');
return;
}
elsif ($data =~ /^\x3C\x3F\x78\x6D/) {
# $reader->set_encoding('UTF-8');
return;
}
elsif ($data =~ /^\x3C\x3F\x78/) {
# $reader->set_encoding('UTF-8');
return;
}
elsif ($data =~ /^\x3C\x3F/) {
# $reader->set_encoding('UTF-8');
return;
}
elsif ($data =~ /^\x3C/) {
# $reader->set_encoding('UTF-8');
return;
}
elsif ($data =~ /^[\x20\x09\x0A\x0D]+\x3C[^\x3F]/) {
# $reader->set_encoding('UTF-8');
return;
}
elsif ($data =~ /^\x4C\x6F\xA7\x94/) {
$reader->set_encoding('EBCDIC');
return;
}
warn("Unable to recognise encoding of this document");
return;
}
1;
|