solr查询工作原理深入内幕-3

2019-12-18 18:05:35

8.2 查询过程

完整过程如下：IndexSearcher调用search方法

protectedvoidsearch(List leaves, Weight weight, Collector collector)

throws IOException {

// TODO: should we make this

// threaded...? the Collector could be sync'd?

// always use single thread:for(LeafReaderContext ctx : leaves) {// search each subreaderfinal LeafCollector leafCollector;

try{ leafCollector = collector.getLeafCollector(ctx);//1}catch (CollectionTerminatedException e) {

// there is no doc of interest in this reader context

// continue with the following leafcontinue; } BulkScorer scorer = weight.bulkScorer(ctx);//2

if(scorer !=null) {

try{scorer.score(leafCollector, ctx.reader().getLiveDocs());//3}catch (CollectionTerminatedException e) {

// collection was terminated prematurely

// continue with the following leaf }

}

8.2.1 获取Collector

TopScoreDocCollector.java#SimpleTopScoreDocCollector

@Override

public LeafCollector getLeafCollector(LeafReaderContext context)

throws IOException {

finalintdocBase = context.docBase;

returnnew ScorerLeafCollector() {

@Override

publicvoidcollect(intdoc)throws IOException {

float score = scorer.score();/* Document document=context.reader().document(doc);*/// This collector cannot handle these scores:assertscore != Float.NEGATIVE_INFINITY;

assert!Float.isNaN(score);

totalHits++;

if(score <= pqTop.score) {

// Since docs are returned in-order (i.e., increasing doc Id), a document

// with equal score to pqTop.score cannot compete since HitQueue favors

// documents with lower doc Ids. Therefore reject those docs too.return;

}

pqTop.doc = doc + docBase;

pqTop.score = score;

pqTop = pq.updateTop();

}

};

}

8.2.2 调用打分socore

/** * Optional method, to return a {@link BulkScorer} to

* score the query and send hits to a {@link Collector}.

* Only queries that have a different top-level approach

* need to override this; the default implementation

* pulls a normal {@link Scorer} and iterates and

* collects the resulting hits which are not marked as deleted.

* @param context

* the {@link org.apache.lucene.index.LeafReaderContext} for which to return the {@link Scorer}.

* @return a {@link BulkScorer} which scores documents and

* passes them to a collector.

* @throws IOException if there is a low-level I/O error

*/publicBulkScorer bulkScorer(LeafReaderContext context)throwsIOException {Scorer scorer = scorer(context);

if(scorer ==null) {

// No docs matchreturnnull;

}

// This impl always scores docs in order, so we can

// ignore scoreDocsInOrder:returnnew DefaultBulkScorer(scorer);

}

/** Just wraps a Scorer and performs top scoring using it.

* @lucene.internal */protectedstaticclassDefaultBulkScorerextends BulkScorer {

privatefinal Scorer scorer;

privatefinal DocIdSetIterator iterator;

privatefinal TwoPhaseIterator twoPhase;

/** Sole constructor. */public DefaultBulkScorer(Scorer scorer) {

if(scorer ==null) {

thrownew NullPointerException();

}

this.scorer = scorer;

this.iterator = scorer.iterator();

this.twoPhase = scorer.twoPhaseIterator();

}

@Override

publiclong cost() {

return iterator.cost();

}

@Override

publicintscore(LeafCollector collector, Bits acceptDocs,intmin,intmax)throws IOException {

collector.setScorer(scorer);

if(scorer.docID() == -1 && min == 0 && max ==DocIdSetIterator.NO_MORE_DOCS) {scoreAll(collector, iterator, twoPhase, acceptDocs);

return DocIdSetIterator.NO_MORE_DOCS;

} else {

intdoc = scorer.docID();

if(doc < min) {

if(twoPhase ==null) {

doc = iterator.advance(min);

} else {

doc = twoPhase.approximation().advance(min);

}

return scoreRange(collector, iterator, twoPhase, acceptDocs, doc, max);

}

调用scoreAll方法，遍历Document，执行SimpleTopScoreDocCollector的collect方法，包含打分逻辑<见SimpleTopScoreDocCollector代码>。

/** Specialized method to bulk-score all hits; we

* separate this from {@link #scoreRange} to help out

* hotspot.

* See <a href="https://issues.apache.org/jira/browse/LUCENE-5487">LUCENE-5487</a> */staticvoidscoreAll(LeafCollector collector, DocIdSetIterator iterator, TwoPhaseIterator twoPhase, Bits acceptDocs)throws IOException {

if(twoPhase ==null) {

for(intdoc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {

if(acceptDocs ==null||acceptDocs.get(doc)) {collector.collect(doc);} } }else {

// The scorer has an approximation, so run the approximation first, then check acceptDocs, then confirmfinalDocIdSetIterator approximation = twoPhase.approximation();

for(intdoc = approximation.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = approximation.nextDoc()) {

if((acceptDocs ==null|| acceptDocs.get(doc)) &&twoPhase.matches()) {collector.collect(doc);} } } }

总结：

　　梳理整理整个流程太累了。

分享好友

分享这个小栈给你的朋友们，一起进步吧。