本文整理汇总了Java中org.apache.lucene.analysis.CharArraySet类的典型用法代码示例。如果您正苦于以下问题:Java CharArraySet类的具体用法?Java CharArraySet怎么用?Java CharArraySet使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
CharArraySet类属于org.apache.lucene.analysis包,在下文中一共展示了CharArraySet类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: PatternAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public PatternAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
boolean lowercase =
settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "lowercase", true, deprecationLogger);
CharArraySet stopWords = Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, defaultStopwords);
String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/);
if (sPattern == null) {
throw new IllegalArgumentException("Analyzer [" + name + "] of type pattern must have a `pattern` set");
}
Pattern pattern = Regex.compile(sPattern, settings.get("flags"));
analyzer = new PatternAnalyzer(pattern, lowercase, stopWords);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:17,代码来源:PatternAnalyzerProvider.java
示例2: parseStemExclusion
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public static CharArraySet parseStemExclusion(Settings settings, CharArraySet defaultStemExclusion) {
String value = settings.get("stem_exclusion");
if (value != null) {
if ("_none_".equals(value)) {
return CharArraySet.EMPTY_SET;
} else {
// LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
return new CharArraySet(Strings.commaDelimitedListToSet(value), false);
}
}
String[] stemExclusion = settings.getAsArray("stem_exclusion", null);
if (stemExclusion != null) {
// LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
return new CharArraySet(Arrays.asList(stemExclusion), false);
} else {
return defaultStemExclusion;
}
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:19,代码来源:Analysis.java
示例3: parseWords
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public static CharArraySet parseWords(Environment env, Settings settings, String name, CharArraySet defaultWords,
Map<String, Set<?>> namedWords, boolean ignoreCase) {
String value = settings.get(name);
if (value != null) {
if ("_none_".equals(value)) {
return CharArraySet.EMPTY_SET;
} else {
return resolveNamedWords(Strings.commaDelimitedListToSet(value), namedWords, ignoreCase);
}
}
List<String> pathLoadedWords = getWordList(env, settings, name);
if (pathLoadedWords != null) {
return resolveNamedWords(pathLoadedWords, namedWords, ignoreCase);
}
return defaultWords;
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:17,代码来源:Analysis.java
示例4: testOverlappingAtBeginning
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
@Test
public void testOverlappingAtBeginning() throws Exception {
final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
"new york", "new york city", "city of new york"), false);
final String input = "new york city is great";
StringReader reader = new StringReader(input);
final WhitespaceTokenizer in = new WhitespaceTokenizer();
in.setReader(reader);
AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
aptf.setReplaceWhitespaceWith('_');
CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
aptf.reset();
assertTrue(aptf.incrementToken());
assertEquals("new_york_city", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("is", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("great", term.toString());
}
开发者ID:jprante,项目名称:elasticsearch-plugin-bundle,代码行数:24,代码来源:AutoPhrasingTokenFilterTest.java
示例5: testOverlappingAtEnd
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
@Test
public void testOverlappingAtEnd() throws Exception {
final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
"new york", "new york city", "city of new york"), false);
final String input = "the great city of new york";
StringReader reader = new StringReader(input);
final WhitespaceTokenizer in = new WhitespaceTokenizer();
in.setReader(reader);
AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
aptf.setReplaceWhitespaceWith('_');
CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
aptf.reset();
assertTrue(aptf.incrementToken());
assertEquals("the", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("great", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("city_of_new_york", term.toString());
}
开发者ID:jprante,项目名称:elasticsearch-plugin-bundle,代码行数:24,代码来源:AutoPhrasingTokenFilterTest.java
示例6: testIncompletePhrase
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
@Test
public void testIncompletePhrase() throws Exception {
final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
"big apple", "new york city", "property tax", "three word phrase"), false);
final String input = "some new york";
StringReader reader = new StringReader(input);
final WhitespaceTokenizer in = new WhitespaceTokenizer();
in.setReader(reader);
AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
aptf.setReplaceWhitespaceWith('_');
CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
aptf.reset();
assertTrue(aptf.incrementToken());
assertEquals("some", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("new", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("york", term.toString());
}
开发者ID:jprante,项目名称:elasticsearch-plugin-bundle,代码行数:24,代码来源:AutoPhrasingTokenFilterTest.java
示例7: RomanianAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public RomanianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new RomanianAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, RomanianAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:RomanianAnalyzerProvider.java
示例8: BasqueAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public BasqueAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new BasqueAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, BasqueAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:BasqueAnalyzerProvider.java
示例9: StandardHtmlStripAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
CharArraySet stopWords = Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, defaultStopwords);
analyzer = new StandardHtmlStripAnalyzer(stopWords);
analyzer.setVersion(version);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:8,代码来源:StandardHtmlStripAnalyzerProvider.java
示例10: IndonesianAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public IndonesianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new IndonesianAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, IndonesianAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:IndonesianAnalyzerProvider.java
示例11: ArabicAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public ArabicAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
arabicAnalyzer = new ArabicAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
arabicAnalyzer.setVersion(version);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:ArabicAnalyzerProvider.java
示例12: SnowballAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public SnowballAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
String language = settings.get("language", settings.get("name", "English"));
CharArraySet defaultStopwords = DEFAULT_LANGUAGE_STOPWORDS.getOrDefault(language, CharArraySet.EMPTY_SET);
CharArraySet stopWords = Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, defaultStopwords);
analyzer = new SnowballAnalyzer(language, stopWords);
analyzer.setVersion(version);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:11,代码来源:SnowballAnalyzerProvider.java
示例13: StopAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public StopAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
CharArraySet stopWords = Analysis.parseStopWords(
env, indexSettings.getIndexVersionCreated(), settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
this.stopAnalyzer = new StopAnalyzer(stopWords);
this.stopAnalyzer.setVersion(version);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:8,代码来源:StopAnalyzerProvider.java
示例14: SwedishAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public SwedishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new SwedishAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, SwedishAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:SwedishAnalyzerProvider.java
示例15: StandardAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public StandardAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
CharArraySet stopWords = Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, defaultStopwords);
int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
standardAnalyzer = new StandardAnalyzer(stopWords);
standardAnalyzer.setVersion(version);
standardAnalyzer.setMaxTokenLength(maxTokenLength);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:10,代码来源:StandardAnalyzerProvider.java
示例16: SpanishAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public SpanishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new SpanishAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, SpanishAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:SpanishAnalyzerProvider.java
示例17: WordDelimiterTokenFilterFactory
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public WordDelimiterTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
// Sample Format for the type table:
// $ => DIGIT
// % => DIGIT
// . => DIGIT
// \u002C => DIGIT
// \u200D => ALPHANUM
List<String> charTypeTableValues = Analysis.getWordList(env, settings, "type_table");
if (charTypeTableValues == null) {
this.charTypeTable = WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
} else {
this.charTypeTable = parseTypes(charTypeTableValues);
}
int flags = 0;
// If set, causes parts of words to be generated: "PowerShot" => "Power" "Shot"
flags |= getFlag(GENERATE_WORD_PARTS, settings, "generate_word_parts", true);
// If set, causes number subwords to be generated: "500-42" => "500" "42"
flags |= getFlag(GENERATE_NUMBER_PARTS, settings, "generate_number_parts", true);
// 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
flags |= getFlag(CATENATE_WORDS, settings, "catenate_words", false);
// If set, causes maximum runs of number parts to be catenated: "500-42" => "50042"
flags |= getFlag(CATENATE_NUMBERS, settings, "catenate_numbers", false);
// If set, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
flags |= getFlag(CATENATE_ALL, settings, "catenate_all", false);
// 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
flags |= getFlag(SPLIT_ON_CASE_CHANGE, settings, "split_on_case_change", true);
// If set, includes original words in subwords: "500-42" => "500" "42" "500-42"
flags |= getFlag(PRESERVE_ORIGINAL, settings, "preserve_original", false);
// 1, causes "j2se" to be three tokens; "j" "2" "se"
flags |= getFlag(SPLIT_ON_NUMERICS, settings, "split_on_numerics", true);
// If set, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
flags |= getFlag(STEM_ENGLISH_POSSESSIVE, settings, "stem_english_possessive", true);
// If not null is the set of tokens to protect from being delimited
Set<?> protectedWords = Analysis.getWordSet(env, indexSettings.getIndexVersionCreated(), settings, "protected_words");
this.protoWords = protectedWords == null ? null : CharArraySet.copy(protectedWords);
this.flags = flags;
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:40,代码来源:WordDelimiterTokenFilterFactory.java
示例18: PortugueseAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public PortugueseAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new PortugueseAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, PortugueseAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:PortugueseAnalyzerProvider.java
示例19: DanishAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public DanishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new DanishAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, DanishAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:DanishAnalyzerProvider.java
示例20: ArmenianAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public ArmenianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new ArmenianAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, ArmenianAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:ArmenianAnalyzerProvider.java
注:本文中的org.apache.lucene.analysis.CharArraySet类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论