/usr/include/CLucene/analysis/de/GermanAnalyzer.h is in libclucene-dev 2.3.3.4-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | /*------------------------------------------------------------------------------
* Copyright (C) 2003-2010 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#ifndef _lucene_analysis_de_GermanAnalyzer
#define _lucene_analysis_de_GermanAnalyzer
CL_NS_DEF2(analysis,de)
/**
* Analyzer for German language. Supports an external list of stopwords (words that
* will not be indexed at all) and an external list of exclusions (word that will
* not be stemmed, but indexed).
* A default set of stopwords is used unless an alternative list is specified, the
* exclusion list is empty by default.
*
*
* @version $Id: GermanAnalyzer.java 564236 2007-08-09 15:21:19Z gsingers $
*/
class CLUCENE_CONTRIBS_EXPORT GermanAnalyzer : public CL_NS(analysis)::Analyzer {
public:
/**
* List of typical german stopwords.
*/
static CL_NS(util)::ConstValueArray<const TCHAR*> GERMAN_STOP_WORDS;
private:
class SavedStreams;
/**
* Contains the stopwords used with the StopFilter.
*/
CL_NS(analysis)::CLTCSetList* stopSet;
/**
* Contains words that should be indexed but not stemmed.
*/
CL_NS(analysis)::CLTCSetList* exclusionSet;
public:
/**
* Builds an analyzer with the default stop words
* (<code>GERMAN_STOP_WORDS</code>).
*/
GermanAnalyzer();
/**
* Builds an analyzer with the given stop words.
*/
GermanAnalyzer(const TCHAR** stopWords);
/**
* Builds an analyzer with the given stop words.
*/
GermanAnalyzer(CL_NS(analysis)::CLTCSetList* stopwords);
/**
* Builds an analyzer with the given stop words.
*/
GermanAnalyzer(const char* stopwordsFile, const char* enc = NULL);
/**
* Builds an analyzer with the given stop words.
*/
GermanAnalyzer(CL_NS(util)::Reader* stopwordsReader, const bool deleteReader = false);
/**
*/
virtual ~GermanAnalyzer();
/**
* Builds an exclusionlist from an array of Strings.
*/
void setStemExclusionTable(const TCHAR** exclusionlist);
/**
* Builds an exclusionlist from a Hashtable.
*/
void setStemExclusionTable(CL_NS(analysis)::CLTCSetList* exclusionlist);
/**
* Builds an exclusionlist from the words contained in the given file.
*/
void setStemExclusionTable(const char* exclusionlistFile, const char* enc = NULL);
/**
* Builds an exclusionlist from the words contained in the given file.
*/
void setStemExclusionTable(CL_NS(util)::Reader* exclusionlistReader, const bool deleteReader = false);
/**
* Creates a TokenStream which tokenizes all the text in the provided Reader.
*
* @return A TokenStream build from a StandardTokenizer filtered with
* StandardFilter, LowerCaseFilter, StopFilter, GermanStemFilter
*/
virtual TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
virtual TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
};
CL_NS_END2
#endif
|