绑定完请刷新页面
取消
刷新

分享好友

×
取消 复制
solr查询工作原理深入内幕-3
2019-12-18 18:05:35

8.2 查询过程

  完整过程如下:IndexSearcher调用search方法

protectedvoidsearch(List leaves, Weight weight, Collector collector)

throws IOException {

// TODO: should we make this

// threaded...? the Collector could be sync'd?

// always use single thread:for(LeafReaderContext ctx : leaves) {// search each subreaderfinal LeafCollector leafCollector;

try{ leafCollector = collector.getLeafCollector(ctx);//1}catch (CollectionTerminatedException e) {

// there is no doc of interest in this reader context

// continue with the following leafcontinue; } BulkScorer scorer = weight.bulkScorer(ctx);//2

if(scorer !=null) {

try{scorer.score(leafCollector, ctx.reader().getLiveDocs());//3}catch (CollectionTerminatedException e) {

// collection was terminated prematurely

// continue with the following leaf }

}

}

}

 8.2.1 获取Collector

TopScoreDocCollector.java#SimpleTopScoreDocCollector

@Override

public LeafCollector getLeafCollector(LeafReaderContext context)

throws IOException {

finalintdocBase = context.docBase;

returnnew ScorerLeafCollector() {

@Override

publicvoidcollect(intdoc)throws IOException {

float score = scorer.score();/* Document document=context.reader().document(doc);*/// This collector cannot handle these scores:assertscore != Float.NEGATIVE_INFINITY;

assert!Float.isNaN(score);

totalHits++;

if(score <= pqTop.score) {

// Since docs are returned in-order (i.e., increasing doc Id), a document

// with equal score to pqTop.score cannot compete since HitQueue favors

// documents with lower doc Ids. Therefore reject those docs too.return;

}

pqTop.doc = doc + docBase;

pqTop.score = score;

pqTop = pq.updateTop();

}

};

}

8.2.2 调用打分socore

/** * Optional method, to return a {@link BulkScorer} to

* score the query and send hits to a {@link Collector}.

* Only queries that have a different top-level approach

* need to override this; the default implementation

* pulls a normal {@link Scorer} and iterates and

* collects the resulting hits which are not marked as deleted.

*

* @param context

* the {@link org.apache.lucene.index.LeafReaderContext} for which to return the {@link Scorer}.

*

* @return a {@link BulkScorer} which scores documents and

* passes them to a collector.

* @throws IOException if there is a low-level I/O error

*/publicBulkScorer bulkScorer(LeafReaderContext context)throwsIOException {Scorer scorer = scorer(context);

if(scorer ==null) {

// No docs matchreturnnull;

}

// This impl always scores docs in order, so we can

// ignore scoreDocsInOrder:returnnew DefaultBulkScorer(scorer);

}

/** Just wraps a Scorer and performs top scoring using it.

* @lucene.internal */protectedstaticclassDefaultBulkScorerextends BulkScorer {

privatefinal Scorer scorer;

privatefinal DocIdSetIterator iterator;

privatefinal TwoPhaseIterator twoPhase;

/** Sole constructor. */public DefaultBulkScorer(Scorer scorer) {

if(scorer ==null) {

thrownew NullPointerException();

}

this.scorer = scorer;

this.iterator = scorer.iterator();

this.twoPhase = scorer.twoPhaseIterator();

}

@Override

publiclong cost() {

return iterator.cost();

}

@Override

publicintscore(LeafCollector collector, Bits acceptDocs,intmin,intmax)throws IOException {

collector.setScorer(scorer);

if(scorer.docID() == -1 && min == 0 && max ==DocIdSetIterator.NO_MORE_DOCS) {scoreAll(collector, iterator, twoPhase, acceptDocs);

return DocIdSetIterator.NO_MORE_DOCS;

} else {

intdoc = scorer.docID();

if(doc < min) {

if(twoPhase ==null) {

doc = iterator.advance(min);

} else {

doc = twoPhase.approximation().advance(min);

}

}

return scoreRange(collector, iterator, twoPhase, acceptDocs, doc, max);

}

}

调用scoreAll方法,遍历Document,执行SimpleTopScoreDocCollector的collect方法,包含打分逻辑<见SimpleTopScoreDocCollector代码>。

/** Specialized method to bulk-score all hits; we

* separate this from {@link #scoreRange} to help out

* hotspot.

* See <a href="https://issues.apache.org/jira/browse/LUCENE-5487">LUCENE-5487</a> */staticvoidscoreAll(LeafCollector collector, DocIdSetIterator iterator, TwoPhaseIterator twoPhase, Bits acceptDocs)throws IOException {

if(twoPhase ==null) {

for(intdoc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {

if(acceptDocs ==null||acceptDocs.get(doc)) {collector.collect(doc);} } }else {

// The scorer has an approximation, so run the approximation first, then check acceptDocs, then confirmfinalDocIdSetIterator approximation = twoPhase.approximation();

for(intdoc = approximation.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = approximation.nextDoc()) {

if((acceptDocs ==null|| acceptDocs.get(doc)) &&twoPhase.matches()) {collector.collect(doc);} } } }

总结:

  梳理整理整个流程太累了。

分享好友

分享这个小栈给你的朋友们,一起进步吧。

凉城时光
创建时间:2019-12-04 10:57:57
朋友 我们一起聊运维
展开
订阅须知

• 所有用户可根据关注领域订阅专区或所有专区

• 付费订阅:虚拟交易,一经交易不退款;若特殊情况,可3日内客服咨询

• 专区发布评论属默认订阅所评论专区(除付费小栈外)

栈主、嘉宾

查看更多
  • 我没
    栈主

小栈成员

查看更多
  • unnamed personq
  • unnamed personq
  • bluetooth
  • amadan
戳我,来吐槽~