This file is indexed.

/usr/share/perl5/Gscan2pdf/Cuneiform.pm is in gscan2pdf 1.2.3-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
package Gscan2pdf::Cuneiform;

use 5.008005;
use strict;
use warnings;
use Carp;
use File::Temp;             # To create temporary files
use Gscan2pdf::Document;    # for slurp
use version;

our $VERSION = '1.2.3';

my ( %languages, $version, $setup, $logger );

sub setup {
 ( my $class, $logger ) = @_;
 return $version if $setup;

 my ( $out, $err ) = Gscan2pdf::Document::open_three('which cuneiform');
 return if ( not defined($out) or $out eq '' );

 ( $out, $err ) = Gscan2pdf::Document::open_three("cuneiform");
 if ( $out =~ /^Cuneiform\ for\ Linux\ ([\d\.]+)/x ) {
  $version = $1;
 }

 $setup = 1;
 return $version;
}

sub languages {
 unless (%languages) {

  # cuneiform language codes
  my %lang = (
   eng    => 'English',
   ger    => 'German',
   fra    => 'French',
   rus    => 'Russian',
   swe    => 'Swedish',
   spa    => 'Spanish',
   ita    => 'Italian',
   ruseng => 'Russian+English',
   ukr    => 'Ukrainian',
   srp    => 'Serbian',
   hrv    => 'Croatian',
   pol    => 'Polish',
   dan    => 'Danish',
   por    => 'Portuguese',
   dut    => 'Dutch',
   cze    => 'Czech',
   rum    => 'Romanian',
   hun    => 'Hungarian',
   bul    => 'Bulgarian',
   slo    => 'Slovak',
   slv    => 'Slovenian',
   lav    => 'Latvian',
   lit    => 'Lithuanian',
   est    => 'Estonian',
   tur    => 'Turkish',
  );

  # Dig out supported languages
  my $cmd = "cuneiform -l";
  $logger->info($cmd);
  ( my $output, undef ) = Gscan2pdf::Document::open_three($cmd);

  my $langs;
  if ( $output =~ /Supported\ languages:\ (.*)\./x ) {
   $langs = $1;
   for ( split " ", $langs ) {
    if ( defined $lang{$_} ) {
     $languages{$_} = $lang{$_};
    }
    else {
     $languages{$_} = $_;
    }
   }
  }
  else {
   $logger->info("Unrecognised output from cuneiform: $output");
  }
 }
 return \%languages;
}

sub hocr {
 my ( $class, $file, $language, $loggr, $pidfile ) = @_;
 my ($bmp);
 Gscan2pdf::Cuneiform->setup($loggr) unless $setup;

 # Temporary filename for output
 my $txt = File::Temp->new( SUFFIX => '.txt' );

 if ( version->parse("v$version") < version->parse('v1.1.0')
  and $file !~ /\.bmp$/x )
 {

  # Temporary filename for new file
  $bmp = File::Temp->new( SUFFIX => '.bmp' );
  my $image = Image::Magick->new;
  $image->Read($file);

# Force TrueColor, as this produces DirectClass, which is what cuneiform expects.
# Without this, PseudoClass is often produced, for which cuneiform gives
# "PUMA_XFinalrecognition failed" warnings
  $image->Write( filename => $bmp, type => 'TrueColor' );
 }
 else {
  $bmp = $file;
 }
 my $cmd = "cuneiform -l $language -f hocr -o $txt $bmp";
 $logger->info($cmd);
 if ( defined $pidfile ) {
  system("echo $$ > $pidfile;$cmd");
 }
 else {
  system($cmd);
 }
 return Gscan2pdf::Document::slurp($txt);
}

1;

__END__