Java 类org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo 实例源码

项目:search    文件:FieldPhraseList.java   
public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int seqnum ){
  this.boost = boost;
  this.seqnum = seqnum;

  // We keep TermInfos for further operations
  termsInfos = new ArrayList<>( terms );

  termsOffsets = new ArrayList<>( terms.size() );
  TermInfo ti = terms.get( 0 );
  termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
  if( terms.size() == 1 ){
    return;
  }
  int pos = ti.getPosition();
  for( int i = 1; i < terms.size(); i++ ){
    ti = terms.get( i );
    if( ti.getPosition() - pos == 1 ){
      Toffs to = termsOffsets.get( termsOffsets.size() - 1 );
      to.setEndOffset( ti.getEndOffset() );
    }
    else{
      termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
    }
    pos = ti.getPosition();
  }
}
项目:search    文件:FieldQuery.java   
public boolean isValidTermOrPhrase( final List<TermInfo> phraseCandidate ){
  // check terminal
  if( !terminal ) return false;

  // if the candidate is a term, it is valid
  if( phraseCandidate.size() == 1 ) return true;

  // else check whether the candidate is valid phrase
  // compare position-gaps between terms to slop
  int pos = phraseCandidate.get( 0 ).getPosition();
  for( int i = 1; i < phraseCandidate.size(); i++ ){
    int nextPos = phraseCandidate.get( i ).getPosition();
    if( Math.abs( nextPos - pos - 1 ) > slop ) return false;
    pos = nextPos;
  }
  return true;
}
项目:search    文件:FieldTermStackTest.java   
public void testTermInfoComparisonConsistency() {
  TermInfo a = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 0, 1 );
  TermInfo b = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 1, 1 );
  TermInfo c = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 2, 1 );
  TermInfo d = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 0, 1 );

  assertConsistentEquals( a, a );
  assertConsistentEquals( b, b );
  assertConsistentEquals( c, c );
  assertConsistentEquals( d, d );
  assertConsistentEquals( a, d );
  assertConsistentLessThan( a, b );
  assertConsistentLessThan( b, c );
  assertConsistentLessThan( a, c );
  assertConsistentLessThan( d, b );
  assertConsistentLessThan( d, c );
}
项目:NYBC    文件:FieldQuery.java   
public boolean isValidTermOrPhrase( final List<TermInfo> phraseCandidate ){
  // check terminal
  if( !terminal ) return false;

  // if the candidate is a term, it is valid
  if( phraseCandidate.size() == 1 ) return true;

  // else check whether the candidate is valid phrase
  // compare position-gaps between terms to slop
  int pos = phraseCandidate.get( 0 ).getPosition();
  for( int i = 1; i < phraseCandidate.size(); i++ ){
    int nextPos = phraseCandidate.get( i ).getPosition();
    if( Math.abs( nextPos - pos - 1 ) > slop ) return false;
    pos = nextPos;
  }
  return true;
}
项目:read-open-source-code    文件:FieldPhraseList.java   
public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int seqnum ){
  this.boost = boost;
  this.seqnum = seqnum;

  // We keep TermInfos for further operations
  termsInfos = new ArrayList<TermInfo>( terms );

  termsOffsets = new ArrayList<Toffs>( terms.size() );
  TermInfo ti = terms.get( 0 );
  termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
  if( terms.size() == 1 ){
    return;
  }
  int pos = ti.getPosition();
  for( int i = 1; i < terms.size(); i++ ){
    ti = terms.get( i );
    if( ti.getPosition() - pos == 1 ){
      Toffs to = termsOffsets.get( termsOffsets.size() - 1 );
      to.setEndOffset( ti.getEndOffset() );
    }
    else{
      termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
    }
    pos = ti.getPosition();
  }
}
项目:read-open-source-code    文件:FieldQuery.java   
public boolean isValidTermOrPhrase( final List<TermInfo> phraseCandidate ){
  // check terminal
  if( !terminal ) return false;

  // if the candidate is a term, it is valid
  if( phraseCandidate.size() == 1 ) return true;

  // else check whether the candidate is valid phrase
  // compare position-gaps between terms to slop
  int pos = phraseCandidate.get( 0 ).getPosition();
  for( int i = 1; i < phraseCandidate.size(); i++ ){
    int nextPos = phraseCandidate.get( i ).getPosition();
    if( Math.abs( nextPos - pos - 1 ) > slop ) return false;
    pos = nextPos;
  }
  return true;
}
项目:read-open-source-code    文件:FieldPhraseList.java   
public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int seqnum ){
  this.boost = boost;
  this.seqnum = seqnum;

  // We keep TermInfos for further operations
  termsInfos = new ArrayList<TermInfo>( terms );

  termsOffsets = new ArrayList<Toffs>( terms.size() );
  TermInfo ti = terms.get( 0 );
  termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
  if( terms.size() == 1 ){
    return;
  }
  int pos = ti.getPosition();
  for( int i = 1; i < terms.size(); i++ ){
    ti = terms.get( i );
    if( ti.getPosition() - pos == 1 ){
      Toffs to = termsOffsets.get( termsOffsets.size() - 1 );
      to.setEndOffset( ti.getEndOffset() );
    }
    else{
      termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
    }
    pos = ti.getPosition();
  }
}
项目:read-open-source-code    文件:FieldQuery.java   
public boolean isValidTermOrPhrase( final List<TermInfo> phraseCandidate ){
  // check terminal
  if( !terminal ) return false;

  // if the candidate is a term, it is valid
  if( phraseCandidate.size() == 1 ) return true;

  // else check whether the candidate is valid phrase
  // compare position-gaps between terms to slop
  int pos = phraseCandidate.get( 0 ).getPosition();
  for( int i = 1; i < phraseCandidate.size(); i++ ){
    int nextPos = phraseCandidate.get( i ).getPosition();
    if( Math.abs( nextPos - pos - 1 ) > slop ) return false;
    pos = nextPos;
  }
  return true;
}
项目:read-open-source-code    文件:FieldPhraseList.java   
public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int seqnum ){
  this.boost = boost;
  this.seqnum = seqnum;

  // We keep TermInfos for further operations
  termsInfos = new ArrayList<>( terms );

  termsOffsets = new ArrayList<>( terms.size() );
  TermInfo ti = terms.get( 0 );
  termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
  if( terms.size() == 1 ){
    return;
  }
  int pos = ti.getPosition();
  for( int i = 1; i < terms.size(); i++ ){
    ti = terms.get( i );
    if( ti.getPosition() - pos == 1 ){
      Toffs to = termsOffsets.get( termsOffsets.size() - 1 );
      to.setEndOffset( ti.getEndOffset() );
    }
    else{
      termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
    }
    pos = ti.getPosition();
  }
}
项目:read-open-source-code    文件:FieldQuery.java   
public boolean isValidTermOrPhrase( final List<TermInfo> phraseCandidate ){
  // check terminal
  if( !terminal ) return false;

  // if the candidate is a term, it is valid
  if( phraseCandidate.size() == 1 ) return true;

  // else check whether the candidate is valid phrase
  // compare position-gaps between terms to slop
  int pos = phraseCandidate.get( 0 ).getPosition();
  for( int i = 1; i < phraseCandidate.size(); i++ ){
    int nextPos = phraseCandidate.get( i ).getPosition();
    if( Math.abs( nextPos - pos - 1 ) > slop ) return false;
    pos = nextPos;
  }
  return true;
}
项目:Maskana-Gestor-de-Conocimiento    文件:FieldPhraseList.java   
public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int seqnum ){
  this.boost = boost;
  this.seqnum = seqnum;

  // We keep TermInfos for further operations
  termsInfos = new ArrayList<TermInfo>( terms );

  termsOffsets = new ArrayList<Toffs>( terms.size() );
  TermInfo ti = terms.get( 0 );
  termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
  if( terms.size() == 1 ){
    return;
  }
  int pos = ti.getPosition();
  for( int i = 1; i < terms.size(); i++ ){
    ti = terms.get( i );
    if( ti.getPosition() - pos == 1 ){
      Toffs to = termsOffsets.get( termsOffsets.size() - 1 );
      to.setEndOffset( ti.getEndOffset() );
    }
    else{
      termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
    }
    pos = ti.getPosition();
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:FieldQuery.java   
public boolean isValidTermOrPhrase( final List<TermInfo> phraseCandidate ){
  // check terminal
  if( !terminal ) return false;

  // if the candidate is a term, it is valid
  if( phraseCandidate.size() == 1 ) return true;

  // else check whether the candidate is valid phrase
  // compare position-gaps between terms to slop
  int pos = phraseCandidate.get( 0 ).getPosition();
  for( int i = 1; i < phraseCandidate.size(); i++ ){
    int nextPos = phraseCandidate.get( i ).getPosition();
    if( Math.abs( nextPos - pos - 1 ) > slop ) return false;
    pos = nextPos;
  }
  return true;
}
项目:Maskana-Gestor-de-Conocimiento    文件:FieldTermStackTest.java   
public void testTermInfoComparisonConsistency() {
  TermInfo a = new TermInfo( _TestUtil.randomUnicodeString( random() ), 0, 0, 0, 1 );
  TermInfo b = new TermInfo( _TestUtil.randomUnicodeString( random() ), 0, 0, 1, 1 );
  TermInfo c = new TermInfo( _TestUtil.randomUnicodeString( random() ), 0, 0, 2, 1 );
  TermInfo d = new TermInfo( _TestUtil.randomUnicodeString( random() ), 0, 0, 0, 1 );

  assertConsistentEquals( a, a );
  assertConsistentEquals( b, b );
  assertConsistentEquals( c, c );
  assertConsistentEquals( d, d );
  assertConsistentEquals( a, d );
  assertConsistentLessThan( a, b );
  assertConsistentLessThan( b, c );
  assertConsistentLessThan( a, c );
  assertConsistentLessThan( d, b );
  assertConsistentLessThan( d, c );
}
项目:search    文件:FieldPhraseList.java   
/**
 * Text of the match, calculated on the fly.  Use for debugging only.
 * @return the text
 */
public String getText() {
  StringBuilder text = new StringBuilder();
  for ( TermInfo ti: termsInfos ) {
    text.append( ti.getText() );
  }
  return text.toString();
}
项目:search    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  List<SubInfo> tempSubInfos = new ArrayList<>();
  List<SubInfo> realSubInfos = new ArrayList<>();
  HashSet<String> distinctTerms = new HashSet<>();
  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    float phraseTotalBoost = 0;
    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        phraseTotalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
    tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(),
      phraseInfo.getSeqnum(), phraseTotalBoost ) );
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.
  float norm = length * ( 1 / (float)Math.sqrt( length ) );

  float totalBoost = 0;
  for ( SubInfo tempSubInfo : tempSubInfos ) {
    float subInfoBoost = tempSubInfo.getBoost() * norm;
    realSubInfos.add( new SubInfo( tempSubInfo.getText(), tempSubInfo.getTermsOffsets(),
      tempSubInfo.getSeqnum(), subInfoBoost ));
    totalBoost += subInfoBoost;
  }

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, realSubInfos, totalBoost ) );
}
项目:search    文件:FieldQuery.java   
public QueryPhraseMap searchPhrase( final List<TermInfo> phraseCandidate ){
  QueryPhraseMap currMap = this;
  for( TermInfo ti : phraseCandidate ){
    currMap = currMap.subMap.get( ti.getText() );
    if( currMap == null ) return null;
  }
  return currMap.isValidTermOrPhrase( phraseCandidate ) ? currMap : null;
}
项目:search    文件:FieldQueryTest.java   
public void testSearchPhrase() throws Exception {
  Query query = pqF( "a", "b", "c" );

  // phraseHighlight = true, fieldMatch = true
  FieldQuery fq = new FieldQuery( query, true, true );

  // "a"
  List<TermInfo> phraseCandidate = new ArrayList<>();
  phraseCandidate.add( new TermInfo( "a", 0, 1, 0, 1 ) );
  assertNull( fq.searchPhrase( F, phraseCandidate ) );
  // "a b"
  phraseCandidate.add( new TermInfo( "b", 2, 3, 1, 1 ) );
  assertNull( fq.searchPhrase( F, phraseCandidate ) );
  // "a b c"
  phraseCandidate.add( new TermInfo( "c", 4, 5, 2, 1 ) );
  assertNotNull( fq.searchPhrase( F, phraseCandidate ) );
  assertNull( fq.searchPhrase( "x", phraseCandidate ) );

  // phraseHighlight = true, fieldMatch = false
  fq = new FieldQuery( query, true, false );

  // "a b c"
  assertNotNull( fq.searchPhrase( F, phraseCandidate ) );
  assertNotNull( fq.searchPhrase( "x", phraseCandidate ) );

  // phraseHighlight = false, fieldMatch = true
  fq = new FieldQuery( query, false, true );

  // "a"
  phraseCandidate.clear();
  phraseCandidate.add( new TermInfo( "a", 0, 1, 0, 1 ) );
  assertNotNull( fq.searchPhrase( F, phraseCandidate ) );
  // "a b"
  phraseCandidate.add( new TermInfo( "b", 2, 3, 1, 1 ) );
  assertNull( fq.searchPhrase( F, phraseCandidate ) );
  // "a b c"
  phraseCandidate.add( new TermInfo( "c", 4, 5, 2, 1 ) );
  assertNotNull( fq.searchPhrase( F, phraseCandidate ) );
  assertNull( fq.searchPhrase( "x", phraseCandidate ) );
}
项目:search    文件:FieldQueryTest.java   
public void testSearchPhraseSlop() throws Exception {
  // "a b c"~0
  Query query = pqF( "a", "b", "c" );

  // phraseHighlight = true, fieldMatch = true
  FieldQuery fq = new FieldQuery( query, true, true );

  // "a b c" w/ position-gap = 2
  List<TermInfo> phraseCandidate = new ArrayList<>();
  phraseCandidate.add( new TermInfo( "a", 0, 1, 0, 1 ) );
  phraseCandidate.add( new TermInfo( "b", 2, 3, 2, 1 ) );
  phraseCandidate.add( new TermInfo( "c", 4, 5, 4, 1 ) );
  assertNull( fq.searchPhrase( F, phraseCandidate ) );

  // "a b c"~1
  query = pqF( 1F, 1, "a", "b", "c" );

  // phraseHighlight = true, fieldMatch = true
  fq = new FieldQuery( query, true, true );

  // "a b c" w/ position-gap = 2
  assertNotNull( fq.searchPhrase( F, phraseCandidate ) );

  // "a b c" w/ position-gap = 3
  phraseCandidate.clear();
  phraseCandidate.add( new TermInfo( "a", 0, 1, 0, 1 ) );
  phraseCandidate.add( new TermInfo( "b", 2, 3, 3, 1 ) );
  phraseCandidate.add( new TermInfo( "c", 4, 5, 6, 1 ) );
  assertNull( fq.searchPhrase( F, phraseCandidate ) );
}
项目:search    文件:FieldQueryTest.java   
private void defgMultiTermQueryTest(Query query) throws IOException {
  FieldQuery fq = new FieldQuery( query, reader, true, true );
  QueryPhraseMap qpm = fq.getFieldTermMap(F, "defg");
  assertNotNull (qpm);
  assertNull (fq.getFieldTermMap(F, "dog"));
  List<TermInfo> phraseCandidate = new ArrayList<>();
  phraseCandidate.add( new TermInfo( "defg", 0, 12, 0, 1 ) );
  assertNotNull (fq.searchPhrase(F, phraseCandidate));
}
项目:search    文件:IndexTimeSynonymTest.java   
public void testFieldTermStackIndex1wSearch2terms() throws Exception {
  makeIndex1w();

  BooleanQuery bq = new BooleanQuery();
  bq.add( tq( "Mac" ), Occur.SHOULD );
  bq.add( tq( "MacBook" ), Occur.SHOULD );
  FieldQuery fq = new FieldQuery( bq, true, true );
  FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
  assertEquals( 1, stack.termList.size() );
  TermInfo ti = stack.pop();
  assertEquals("Mac(11,20,3)", ti.toString());
  assertEquals("MacBook(11,20,3)", ti.getNext().toString());
  assertSame(ti, ti.getNext().getNext());
}
项目:search    文件:IndexTimeSynonymTest.java   
public void testFieldTermStackIndex1w2wSearch1term1phrase() throws Exception {
  makeIndex1w2w();

  BooleanQuery bq = new BooleanQuery();
  bq.add( tq( "pc" ), Occur.SHOULD );
  bq.add( pqF( "personal", "computer" ), Occur.SHOULD );
  FieldQuery fq = new FieldQuery( bq, true, true );
  FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
  assertEquals( 2, stack.termList.size() );
  TermInfo ti = stack.pop();
  assertEquals( "pc(3,5,1)", ti.toString());
  assertEquals( "personal(3,5,1)", ti.getNext().toString());
  assertSame(ti, ti.getNext().getNext());
  assertEquals( "computer(3,5,2)", stack.pop().toString() );
}
项目:search    文件:IndexTimeSynonymTest.java   
public void testFieldTermStackIndex2w1wSearch1term1phrase() throws Exception {
  makeIndex2w1w();

  BooleanQuery bq = new BooleanQuery();
  bq.add( tq( "pc" ), Occur.SHOULD );
  bq.add( pqF( "personal", "computer" ), Occur.SHOULD );
  FieldQuery fq = new FieldQuery( bq, true, true );
  FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
  assertEquals( 2, stack.termList.size() );
  TermInfo ti = stack.pop();
  assertEquals("pc(3,20,1)", ti.toString());
  assertEquals("personal(3,20,1)", ti.getNext().toString());
  assertSame(ti, ti.getNext().getNext());
  assertEquals( "computer(3,20,2)", stack.pop().toString() );
}
项目:NYBC    文件:FieldPhraseList.java   
public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int seqnum ){
  this.boost = boost;
  this.seqnum = seqnum;

  // We keep TermInfos for further operations
  termsInfos = new ArrayList<TermInfo>( terms );

  termsOffsets = new ArrayList<Toffs>( terms.size() );
  TermInfo ti = terms.get( 0 );
  termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
  if( terms.size() == 1 ){
    text = ti.getText();
    return;
  }
  StringBuilder sb = new StringBuilder();
  sb.append( ti.getText() );
  int pos = ti.getPosition();
  for( int i = 1; i < terms.size(); i++ ){
    ti = terms.get( i );
    sb.append( ti.getText() );
    if( ti.getPosition() - pos == 1 ){
      Toffs to = termsOffsets.get( termsOffsets.size() - 1 );
      to.setEndOffset( ti.getEndOffset() );
    }
    else{
      termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
    }
    pos = ti.getPosition();
  }
  text = sb.toString();
}
项目:NYBC    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {

  float totalBoost = 0;

  List<SubInfo> subInfos = new ArrayList<SubInfo>();

  HashSet<String> distinctTerms = new HashSet<String>();

  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){

    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum() ) );

    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        totalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.  
  totalBoost *= length * ( 1 / Math.sqrt( length ) );

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:NYBC    文件:FieldQuery.java   
public QueryPhraseMap searchPhrase( final List<TermInfo> phraseCandidate ){
  QueryPhraseMap currMap = this;
  for( TermInfo ti : phraseCandidate ){
    currMap = currMap.subMap.get( ti.getText() );
    if( currMap == null ) return null;
  }
  return currMap.isValidTermOrPhrase( phraseCandidate ) ? currMap : null;
}
项目:NYBC    文件:FieldQueryTest.java   
public void testSearchPhrase() throws Exception {
  Query query = pqF( "a", "b", "c" );

  // phraseHighlight = true, fieldMatch = true
  FieldQuery fq = new FieldQuery( query, true, true );

  // "a"
  List<TermInfo> phraseCandidate = new ArrayList<TermInfo>();
  phraseCandidate.add( new TermInfo( "a", 0, 1, 0, 1 ) );
  assertNull( fq.searchPhrase( F, phraseCandidate ) );
  // "a b"
  phraseCandidate.add( new TermInfo( "b", 2, 3, 1, 1 ) );
  assertNull( fq.searchPhrase( F, phraseCandidate ) );
  // "a b c"
  phraseCandidate.add( new TermInfo( "c", 4, 5, 2, 1 ) );
  assertNotNull( fq.searchPhrase( F, phraseCandidate ) );
  assertNull( fq.searchPhrase( "x", phraseCandidate ) );

  // phraseHighlight = true, fieldMatch = false
  fq = new FieldQuery( query, true, false );

  // "a b c"
  assertNotNull( fq.searchPhrase( F, phraseCandidate ) );
  assertNotNull( fq.searchPhrase( "x", phraseCandidate ) );

  // phraseHighlight = false, fieldMatch = true
  fq = new FieldQuery( query, false, true );

  // "a"
  phraseCandidate.clear();
  phraseCandidate.add( new TermInfo( "a", 0, 1, 0, 1 ) );
  assertNotNull( fq.searchPhrase( F, phraseCandidate ) );
  // "a b"
  phraseCandidate.add( new TermInfo( "b", 2, 3, 1, 1 ) );
  assertNull( fq.searchPhrase( F, phraseCandidate ) );
  // "a b c"
  phraseCandidate.add( new TermInfo( "c", 4, 5, 2, 1 ) );
  assertNotNull( fq.searchPhrase( F, phraseCandidate ) );
  assertNull( fq.searchPhrase( "x", phraseCandidate ) );
}
项目:NYBC    文件:FieldQueryTest.java   
public void testSearchPhraseSlop() throws Exception {
  // "a b c"~0
  Query query = pqF( "a", "b", "c" );

  // phraseHighlight = true, fieldMatch = true
  FieldQuery fq = new FieldQuery( query, true, true );

  // "a b c" w/ position-gap = 2
  List<TermInfo> phraseCandidate = new ArrayList<TermInfo>();
  phraseCandidate.add( new TermInfo( "a", 0, 1, 0, 1 ) );
  phraseCandidate.add( new TermInfo( "b", 2, 3, 2, 1 ) );
  phraseCandidate.add( new TermInfo( "c", 4, 5, 4, 1 ) );
  assertNull( fq.searchPhrase( F, phraseCandidate ) );

  // "a b c"~1
  query = pqF( 1F, 1, "a", "b", "c" );

  // phraseHighlight = true, fieldMatch = true
  fq = new FieldQuery( query, true, true );

  // "a b c" w/ position-gap = 2
  assertNotNull( fq.searchPhrase( F, phraseCandidate ) );

  // "a b c" w/ position-gap = 3
  phraseCandidate.clear();
  phraseCandidate.add( new TermInfo( "a", 0, 1, 0, 1 ) );
  phraseCandidate.add( new TermInfo( "b", 2, 3, 3, 1 ) );
  phraseCandidate.add( new TermInfo( "c", 4, 5, 6, 1 ) );
  assertNull( fq.searchPhrase( F, phraseCandidate ) );
}
项目:NYBC    文件:FieldQueryTest.java   
private void defgMultiTermQueryTest(Query query) throws IOException {
  FieldQuery fq = new FieldQuery( query, reader, true, true );
  QueryPhraseMap qpm = fq.getFieldTermMap(F, "defg");
  assertNotNull (qpm);
  assertNull (fq.getFieldTermMap(F, "dog"));
  List<TermInfo> phraseCandidate = new ArrayList<TermInfo>();
  phraseCandidate.add( new TermInfo( "defg", 0, 12, 0, 1 ) );
  assertNotNull (fq.searchPhrase(F, phraseCandidate));
}
项目:read-open-source-code    文件:FieldPhraseList.java   
/**
 * Text of the match, calculated on the fly.  Use for debugging only.
 * @return the text
 */
public String getText() {
  StringBuilder text = new StringBuilder();
  for ( TermInfo ti: termsInfos ) {
    text.append( ti.getText() );
  }
  return text.toString();
}
项目:read-open-source-code    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  List<SubInfo> tempSubInfos = new ArrayList<SubInfo>();
  List<SubInfo> realSubInfos = new ArrayList<SubInfo>();
  HashSet<String> distinctTerms = new HashSet<String>();   
  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    float phraseTotalBoost = 0;
    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        phraseTotalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
    tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(),
      phraseInfo.getSeqnum(), phraseTotalBoost ) );
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.
  float norm = length * ( 1 / (float)Math.sqrt( length ) );

  float totalBoost = 0;
  for ( SubInfo tempSubInfo : tempSubInfos ) {
    float subInfoBoost = tempSubInfo.getBoost() * norm;
    realSubInfos.add( new SubInfo( tempSubInfo.getText(), tempSubInfo.getTermsOffsets(),
      tempSubInfo.getSeqnum(), subInfoBoost ));
    totalBoost += subInfoBoost;
  }

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, realSubInfos, totalBoost ) );
}
项目:read-open-source-code    文件:FieldQuery.java   
public QueryPhraseMap searchPhrase( final List<TermInfo> phraseCandidate ){
  QueryPhraseMap currMap = this;
  for( TermInfo ti : phraseCandidate ){
    currMap = currMap.subMap.get( ti.getText() );
    if( currMap == null ) return null;
  }
  return currMap.isValidTermOrPhrase( phraseCandidate ) ? currMap : null;
}
项目:read-open-source-code    文件:FieldPhraseList.java   
/**
 * Text of the match, calculated on the fly.  Use for debugging only.
 * @return the text
 */
public String getText() {
  StringBuilder text = new StringBuilder();
  for ( TermInfo ti: termsInfos ) {
    text.append( ti.getText() );
  }
  return text.toString();
}
项目:read-open-source-code    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  List<SubInfo> tempSubInfos = new ArrayList<SubInfo>();
  List<SubInfo> realSubInfos = new ArrayList<SubInfo>();
  HashSet<String> distinctTerms = new HashSet<String>();   
  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    float phraseTotalBoost = 0;
    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        phraseTotalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
    tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(),
      phraseInfo.getSeqnum(), phraseTotalBoost ) );
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.
  float norm = length * ( 1 / (float)Math.sqrt( length ) );

  float totalBoost = 0;
  for ( SubInfo tempSubInfo : tempSubInfos ) {
    float subInfoBoost = tempSubInfo.getBoost() * norm;
    realSubInfos.add( new SubInfo( tempSubInfo.getText(), tempSubInfo.getTermsOffsets(),
      tempSubInfo.getSeqnum(), subInfoBoost ));
    totalBoost += subInfoBoost;
  }

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, realSubInfos, totalBoost ) );
}
项目:read-open-source-code    文件:FieldQuery.java   
public QueryPhraseMap searchPhrase( final List<TermInfo> phraseCandidate ){
  QueryPhraseMap currMap = this;
  for( TermInfo ti : phraseCandidate ){
    currMap = currMap.subMap.get( ti.getText() );
    if( currMap == null ) return null;
  }
  return currMap.isValidTermOrPhrase( phraseCandidate ) ? currMap : null;
}
项目:read-open-source-code    文件:FieldPhraseList.java   
/**
 * Text of the match, calculated on the fly.  Use for debugging only.
 * @return the text
 */
public String getText() {
  StringBuilder text = new StringBuilder();
  for ( TermInfo ti: termsInfos ) {
    text.append( ti.getText() );
  }
  return text.toString();
}
项目:read-open-source-code    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  List<SubInfo> tempSubInfos = new ArrayList<>();
  List<SubInfo> realSubInfos = new ArrayList<>();
  HashSet<String> distinctTerms = new HashSet<>();
  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    float phraseTotalBoost = 0;
    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        phraseTotalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
    tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(),
      phraseInfo.getSeqnum(), phraseTotalBoost ) );
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.
  float norm = length * ( 1 / (float)Math.sqrt( length ) );

  float totalBoost = 0;
  for ( SubInfo tempSubInfo : tempSubInfos ) {
    float subInfoBoost = tempSubInfo.getBoost() * norm;
    realSubInfos.add( new SubInfo( tempSubInfo.getText(), tempSubInfo.getTermsOffsets(),
      tempSubInfo.getSeqnum(), subInfoBoost ));
    totalBoost += subInfoBoost;
  }

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, realSubInfos, totalBoost ) );
}
项目:read-open-source-code    文件:FieldQuery.java   
public QueryPhraseMap searchPhrase( final List<TermInfo> phraseCandidate ){
  QueryPhraseMap currMap = this;
  for( TermInfo ti : phraseCandidate ){
    currMap = currMap.subMap.get( ti.getText() );
    if( currMap == null ) return null;
  }
  return currMap.isValidTermOrPhrase( phraseCandidate ) ? currMap : null;
}
项目:Maskana-Gestor-de-Conocimiento    文件:FieldPhraseList.java   
/**
 * Text of the match, calculated on the fly.  Use for debugging only.
 * @return the text
 */
public String getText() {
  StringBuilder text = new StringBuilder();
  for ( TermInfo ti: termsInfos ) {
    text.append( ti.getText() );
  }
  return text.toString();
}
项目:Maskana-Gestor-de-Conocimiento    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {

  float totalBoost = 0;

  List<SubInfo> subInfos = new ArrayList<SubInfo>();

  HashSet<String> distinctTerms = new HashSet<String>();

  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){

    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum() ) );

    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        totalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.  
  totalBoost *= length * ( 1 / Math.sqrt( length ) );

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:Maskana-Gestor-de-Conocimiento    文件:FieldQuery.java   
public QueryPhraseMap searchPhrase( final List<TermInfo> phraseCandidate ){
  QueryPhraseMap currMap = this;
  for( TermInfo ti : phraseCandidate ){
    currMap = currMap.subMap.get( ti.getText() );
    if( currMap == null ) return null;
  }
  return currMap.isValidTermOrPhrase( phraseCandidate ) ? currMap : null;
}