This file is indexed.

/usr/include/tesseract/unicodes.h is in libtesseract-dev 3.02.01-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
/**********************************************************************
 * File:        unicodes.h
 * Description: Unicode related machinery
 * Author:      David Eger
 * Created:     Wed Jun 15 16:37:50 PST 2011
 *
 * (C) Copyright 2011, Google, Inc.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

#ifndef TESSERACT_CCUTIL_UNICODES_H__
#define TESSERACT_CCUTIL_UNICODES_H__

namespace tesseract {

extern const char *kUTF8LineSeparator;
extern const char *kUTF8ParagraphSeparator;
extern const char *kLRM;  // Left-to-Right Mark
extern const char *kRLM;  // Right-to-Left Mark
extern const char *kRLE;  // Right-to-Left Embedding
extern const char *kPDF;  // Pop Directional Formatting

// The following are confusable internal word punctuation symbols
// which we normalize to the first variant when matching in dawgs.
extern const char *kHyphenLikeUTF8[];
extern const char *kApostropheLikeUTF8[];

}  // namespace

#endif  // TESSERACT_CCUTIL_UNICODES_H__