This file is indexed.

/usr/share/perl5/Gscan2pdf/Ocropus.pm is in gscan2pdf 1.2.3-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
package Gscan2pdf::Ocropus;

use 5.008005;
use strict;
use warnings;
use Carp;
use File::Temp;    # To create temporary files
use File::Basename;
use HTML::Entities;
use Encode;
use English qw( -no_match_vars );    # for $PROCESS_ID

our $VERSION = '1.2.3';

my ( $exe, $installed, $setup, $logger );

sub setup {
 ( my $class, $logger ) = @_;
 return $installed if $setup;
 if ( system("which ocroscript > /dev/null 2> /dev/null") == 0 ) {
  my $env = $ENV{OCROSCRIPTS};

  if ( not defined($env) ) {
   for (qw(/usr /usr/local)) {
    if ( -d "$_/share/ocropus/scripts" ) { $env = "$_/share/ocropus/scripts" }
   }
  }
  if ( defined $env ) {
   my $script;
   if ( -f "$env/recognize.lua" ) {
    $script = 'recognize';
   }
   elsif ( -f "$env/rec-tess.lua" ) {
    $script = 'rec-tess';
   }
   if ( defined $script ) {
    $exe       = "ocroscript $script";
    $installed = 1;
    $logger->info("Using ocroscript with $script.");
   }
   else {
    $logger->warn("Found ocroscript, but no recognition scripts. Disabling.");
   }
  }
  else {
   $logger->warn("Found ocroscript, but not its scripts. Disabling.");
  }
 }
 $setup = 1;
 return $installed;
}

sub hocr {
 my ( $class, $file, $language, $loggr, $pidfile ) = @_;
 my ( $png, $cmd );
 if ( not $setup ) { Gscan2pdf::Ocropus->setup($loggr) }

 if ( $file !~ /\.(?:png|jpg|pnm)$/xsm ) {

  # Temporary filename for new file
  $png = File::Temp->new( SUFFIX => '.png' );
  my $image = Image::Magick->new;
  $image->Read($file);
  $image->Write( filename => $png );
 }
 else {
  $png = $file;
 }
 if ($language) {
  $cmd = "tesslanguage=$language $exe $png";
 }
 else {
  $cmd = "$exe $png";
 }
 $logger->info($cmd);

 # decode html->utf8
 my $output;
 if ( defined $pidfile ) {
  ( $output, undef ) =
    Gscan2pdf::Document::open_three("echo $PROCESS_ID > $pidfile;$cmd");
 }
 else {
  ( $output, undef ) = Gscan2pdf::Document::open_three($cmd);
 }
 my $decoded = decode_entities($output);

 # Unfortunately, there seems to be a case (tested in t/31_ocropus_utf8.t)
 # where decode_entities doesn't work cleanly, so encode/decode to finally
 # get good UTF-8
 return decode_utf8( encode_utf8($decoded) );
}

1;

__END__