java.lang.Object
org.apache.lucene.analysis.en.KStemmer
This class implements the Kstem algorithm
-
Nested Class Summary
Nested Classes -
Field Summary
FieldsModifier and TypeFieldDescriptionprivate static char[]private static final String[][]private static final CharArrayMap<KStemmer.DictEntry> private static final String[][]private static final String[]private static char[]private static char[]private static char[]private intprivate int(package private) KStemmer.DictEntryprivate static final intprivate static final String[](package private) Stringprivate static final String[]private final OpenStringBuildercaching off private int maxCacheSize; private CharArrayMapcache = null; private static final String SAME = "SAME"; // use if stemmed form is the same -
Constructor Summary
Constructors -
Method Summary
Modifier and TypeMethodDescriptionprivate void(package private) CharSequenceprivate voidaspect()(package private) StringasString()Returns the result of the stem (assuming the word was changed) as a String.private voidprivate booleandoubleC(int i) private booleanendsIn(char[] s) private booleanendsIn(char a, char b) private booleanendsIn(char a, char b, char c) private booleanendsIn(char a, char b, char c, char d) private void(package private) char[]getChars()(package private) int(package private) Stringprivate voidprivate static CharArrayMap<KStemmer.DictEntry> private voidprivate booleanisAlpha(char ch) private booleanisCons(int index) private voidprivate booleanisVowel(int index) private voidprivate voidprivate voidprivate booleanlookup()private voidprivate booleanmatched()private voidprivate voidprivate voidprivate voidprivate voidprivate charprivate voidplural()private voidprivate void(package private) booleanstem(char[] term, int len) Stems the text in the token.(package private) Stringprivate intprivate booleanprivate KStemmer.DictEntry
-
Field Details
-
MaxWordLen
private static final int MaxWordLen- See Also:
-
exceptionWords
-
directConflations
-
countryNationality
-
supplementDict
-
properNouns
-
dict_ht
-
word
caching off private int maxCacheSize; private CharArrayMapcache = null; private static final String SAME = "SAME"; // use if stemmed form is the same -
j
private int j -
k
private int k -
matchedEntry
KStemmer.DictEntry matchedEntry -
ization
private static char[] ization -
ition
private static char[] ition -
ation
private static char[] ation -
ication
private static char[] ication -
result
String result
-
-
Constructor Details
-
KStemmer
KStemmer()
-
-
Method Details
-
penultChar
private char penultChar() -
isVowel
private boolean isVowel(int index) -
isCons
private boolean isCons(int index) -
initializeDictHash
-
isAlpha
private boolean isAlpha(char ch) -
stemLength
private int stemLength() -
endsIn
private boolean endsIn(char[] s) -
endsIn
private boolean endsIn(char a, char b) -
endsIn
private boolean endsIn(char a, char b, char c) -
endsIn
private boolean endsIn(char a, char b, char c, char d) -
wordInDict
-
plural
private void plural() -
setSuffix
-
setSuff
-
lookup
private boolean lookup() -
pastTense
private void pastTense() -
doubleC
private boolean doubleC(int i) -
vowelInStem
private boolean vowelInStem() -
aspect
private void aspect() -
ityEndings
private void ityEndings() -
nceEndings
private void nceEndings() -
nessEndings
private void nessEndings() -
ismEndings
private void ismEndings() -
mentEndings
private void mentEndings() -
izeEndings
private void izeEndings() -
ncyEndings
private void ncyEndings() -
bleEndings
private void bleEndings() -
icEndings
private void icEndings() -
ionEndings
private void ionEndings() -
erAndOrEndings
private void erAndOrEndings() -
lyEndings
private void lyEndings() -
alEndings
private void alEndings() -
iveEndings
private void iveEndings() -
stem
-
asString
String asString()Returns the result of the stem (assuming the word was changed) as a String. -
asCharSequence
CharSequence asCharSequence() -
getString
String getString() -
getChars
char[] getChars() -
getLength
int getLength() -
matched
private boolean matched() -
stem
boolean stem(char[] term, int len) Stems the text in the token. Returns true if changed.
-