lucene 同义词的索引

发布时间:2020-03-04 10:41:38 作者:mingyongyao_cto
来源:网络 阅读:562
public interface SynonymEngine {

    String[] getSynonyms(String key);
}
public class SynonymEngineImpl implements SynonymEngine {
    
    private static HashMap<String,String[]> map = new HashMap<String ,String[]>();

    static {
        map.put("quick",new String[]{"fast","speedy"});
        map.put("jumps",new String[]{"leaps","hops"});
        map.put("over",new String[]{"above"});
        map.put("lazy",new String[]{"apathetic","sluggish"});
        map.put("dog",new String[]{"canine","pooch"});
    }
    @Override
    public String[] getSynonyms(String key) {
        // TODO Auto-generated method stub
        return map.get(key);
    }

}
public class SynonymFilter extends TokenFilter {

    private SynonymEngine engine;
    private CharTermAttribute ct;
    private PositionIncrementAttribute pt;
    private Stack<String> stack;
    private AttributeSource.State current;
    protected SynonymFilter(TokenStream input,SynonymEngine engine) {
        super(input);
        this.engine = engine;
        ct = this.addAttribute(CharTermAttribute.class);
        pt = this.addAttribute(PositionIncrementAttribute.class);
        stack  = new Stack<String>();
    }

    @Override
    public boolean incrementToken() throws IOException {
        if(stack.size()>0) {
            this.restoreState(current);
            String p = stack.pop();
            ct.setEmpty();
            ct.append(p);
            pt.setPositionIncrement(0);
            return true;
        }
        System.out.println("++++++"+ct);
        if(!input.incrementToken()) return false;
        System.out.println("------"+ct);
        
        if(addSynonym(ct.toString())) {
            current = this.captureState();
            
        }
        
        
        
        
        return true;
    }
    
    private boolean addSynonym(String name) {
        String[] sa = engine.getSynonyms(name);
        if(sa != null && sa.length>0) {
            for(String s:sa) {
                stack.push(s);
            }
            return true;
        } else {
            return false;
        }
    }

}
public class SynonymAnalyzer extends Analyzer {

    private SynonymEngine engine;
    
    public SynonymAnalyzer(SynonymEngine engine) {
        this.engine = engine;
    }
    @Override
    public TokenStream tokenStream(String s, Reader reader) {
        // TODO Auto-generated method stub
        return new SynonymFilter(new StopFilter(Version.LUCENE_35,
                new LowerCaseFilter(Version.LUCENE_35,
                        new StandardFilter(Version.LUCENE_35,
                                new StandardTokenizer(Version.LUCENE_35,reader)))
                ,StopAnalyzer.ENGLISH_STOP_WORDS_SET),engine);
    }

}
public class TestSynonym {

    private RAMDirectory directory;
    @Test
    public void init() {
        directory = new RAMDirectory();
        SynonymEngine engine = new SynonymEngineImpl();
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,new SynonymAnalyzer(engine));
        String content = "The quick brown fox jumps over the lazy dog";
        
        try {
            IndexWriter writer = new IndexWriter(directory,config);
            Document doc = new Document();
            doc.add(new Field("content",content,Field.Store.YES,Field.Index.ANALYZED));
            writer.addDocument(doc);
            writer.close();
            
            IndexReader reader = IndexReader.open(directory);
            IndexSearcher searcher = new IndexSearcher(reader);
            TopDocs docs = searcher.search(new TermQuery(new Term("content","pooch")),10);
            for(ScoreDoc sd:docs.scoreDocs) {
                Document d = searcher.doc(sd.doc);
                System.out.println(d.get("content"));
            }
            
        } catch (CorruptIndexException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (LockObtainFailedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
}


推荐阅读:
  1. 全文检索-Lucene
  2. Lucene如何实现索引和查询

免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。

同义词 lucene ce

上一篇:Exchange2016部署合集

下一篇:全量备份与增量备份实践

相关阅读

您好,登录后才能下订单哦!

密码登录
登录注册
其他方式登录
点击 登录注册 即表示同意《亿速云用户服务条款》