/usr/share/perl5/Gscan2pdf/Ocropus.pm is in gscan2pdf 1.2.3-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | package Gscan2pdf::Ocropus;
use 5.008005;
use strict;
use warnings;
use Carp;
use File::Temp; # To create temporary files
use File::Basename;
use HTML::Entities;
use Encode;
use English qw( -no_match_vars ); # for $PROCESS_ID
our $VERSION = '1.2.3';
my ( $exe, $installed, $setup, $logger );
sub setup {
( my $class, $logger ) = @_;
return $installed if $setup;
if ( system("which ocroscript > /dev/null 2> /dev/null") == 0 ) {
my $env = $ENV{OCROSCRIPTS};
if ( not defined($env) ) {
for (qw(/usr /usr/local)) {
if ( -d "$_/share/ocropus/scripts" ) { $env = "$_/share/ocropus/scripts" }
}
}
if ( defined $env ) {
my $script;
if ( -f "$env/recognize.lua" ) {
$script = 'recognize';
}
elsif ( -f "$env/rec-tess.lua" ) {
$script = 'rec-tess';
}
if ( defined $script ) {
$exe = "ocroscript $script";
$installed = 1;
$logger->info("Using ocroscript with $script.");
}
else {
$logger->warn("Found ocroscript, but no recognition scripts. Disabling.");
}
}
else {
$logger->warn("Found ocroscript, but not its scripts. Disabling.");
}
}
$setup = 1;
return $installed;
}
sub hocr {
my ( $class, $file, $language, $loggr, $pidfile ) = @_;
my ( $png, $cmd );
if ( not $setup ) { Gscan2pdf::Ocropus->setup($loggr) }
if ( $file !~ /\.(?:png|jpg|pnm)$/xsm ) {
# Temporary filename for new file
$png = File::Temp->new( SUFFIX => '.png' );
my $image = Image::Magick->new;
$image->Read($file);
$image->Write( filename => $png );
}
else {
$png = $file;
}
if ($language) {
$cmd = "tesslanguage=$language $exe $png";
}
else {
$cmd = "$exe $png";
}
$logger->info($cmd);
# decode html->utf8
my $output;
if ( defined $pidfile ) {
( $output, undef ) =
Gscan2pdf::Document::open_three("echo $PROCESS_ID > $pidfile;$cmd");
}
else {
( $output, undef ) = Gscan2pdf::Document::open_three($cmd);
}
my $decoded = decode_entities($output);
# Unfortunately, there seems to be a case (tested in t/31_ocropus_utf8.t)
# where decode_entities doesn't work cleanly, so encode/decode to finally
# get good UTF-8
return decode_utf8( encode_utf8($decoded) );
}
1;
__END__
|