lucene同义词的索引-创新互联
public interface SynonymEngine {
String[] getSynonyms(String key);
}
public class SynonymEngineImpl implements SynonymEngine {
private static HashMap map = new HashMap();
static {
map.put("quick",new String[]{"fast","speedy"});
map.put("jumps",new String[]{"leaps","hops"});
map.put("over",new String[]{"above"});
map.put("lazy",new String[]{"apathetic","sluggish"});
map.put("dog",new String[]{"canine","pooch"});
}
@Override
public String[] getSynonyms(String key) {
// TODO Auto-generated method stub
return map.get(key);
}
}
public class SynonymFilter extends TokenFilter {
private SynonymEngine engine;
private CharTermAttribute ct;
private PositionIncrementAttribute pt;
private Stack stack;
private AttributeSource.State current;
protected SynonymFilter(TokenStream input,SynonymEngine engine) {
super(input);
this.engine = engine;
ct = this.addAttribute(CharTermAttribute.class);
pt = this.addAttribute(PositionIncrementAttribute.class);
stack = new Stack();
}
@Override
public boolean incrementToken() throws IOException {
if(stack.size()>0) {
this.restoreState(current);
String p = stack.pop();
ct.setEmpty();
ct.append(p);
pt.setPositionIncrement(0);
return true;
}
System.out.println("++++++"+ct);
if(!input.incrementToken()) return false;
System.out.println("------"+ct);
if(addSynonym(ct.toString())) {
current = this.captureState();
}
return true;
}
private boolean addSynonym(String name) {
String[] sa = engine.getSynonyms(name);
if(sa != null && sa.length>0) {
for(String s:sa) {
stack.push(s);
}
return true;
} else {
return false;
}
}
}
public class SynonymAnalyzer extends Analyzer {
private SynonymEngine engine;
public SynonymAnalyzer(SynonymEngine engine) {
this.engine = engine;
}
@Override
public TokenStream tokenStream(String s, Reader reader) {
// TODO Auto-generated method stub
return new SynonymFilter(new StopFilter(Version.LUCENE_35,
new LowerCaseFilter(Version.LUCENE_35,
new StandardFilter(Version.LUCENE_35,
new StandardTokenizer(Version.LUCENE_35,reader)))
,StopAnalyzer.ENGLISH_STOP_WORDS_SET),engine);
}
}
public class TestSynonym {
private RAMDirectory directory;
@Test
public void init() {
directory = new RAMDirectory();
SynonymEngine engine = new SynonymEngineImpl();
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,new SynonymAnalyzer(engine));
String content = "The quick brown fox jumps over the lazy dog";
try {
IndexWriter writer = new IndexWriter(directory,config);
Document doc = new Document();
doc.add(new Field("content",content,Field.Store.YES,Field.Index.ANALYZED));
writer.addDocument(doc);
writer.close();
IndexReader reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs docs = searcher.search(new TermQuery(new Term("content","pooch")),10);
for(ScoreDoc sd:docs.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(d.get("content"));
}
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}成都网络公司-成都网站建设公司创新互联公司十年经验成就非凡,专业从事成都网站建设、成都做网站,成都网页设计,成都网页制作,软文发布平台,一元广告等。十年来已成功提供全面的成都网站建设方案,打造行业特色的成都网站建设案例,建站热线:18982081108,我们期待您的来电!
标题名称:lucene同义词的索引-创新互联
URL分享:http://azwzsj.com/article/idgoe.html
另外有需要云服务器可以了解下创新互联cdcxhl.cn,海内外云服务器15元起步,三天无理由+7*72小时售后在线,公司持有idc许可证,提供“云服务器、裸金属服务器、高防服务器、香港服务器、美国服务器、虚拟主机、免备案服务器”等云主机租用服务以及企业上云的综合解决方案,具有“安全稳定、简单易用、服务可用性高、性价比高”等特点与优势,专为企业上云打造定制,能够满足用户丰富、多元化的应用场景需求。
标题名称:lucene同义词的索引-创新互联
URL分享:http://azwzsj.com/article/idgoe.html