Java 类org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo 实例源码

项目:search    文件:BaseFragmentsBuilder.java   
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
    String[] preTags, String[] postTags, Encoder encoder ){
  StringBuilder fragment = new StringBuilder();
  final int s = fragInfo.getStartOffset();
  int[] modifiedStartOffset = { s };
  String src = getFragmentSourceMSO( buffer, index, values, s, fragInfo.getEndOffset(), modifiedStartOffset );
  int srcIndex = 0;
  for( SubInfo subInfo : fragInfo.getSubInfos() ){
    for( Toffs to : subInfo.getTermsOffsets() ){
      fragment
        .append( encoder.encodeText( src.substring( srcIndex, to.getStartOffset() - modifiedStartOffset[0] ) ) )
        .append( getPreTag( preTags, subInfo.getSeqnum() ) )
        .append( encoder.encodeText( src.substring( to.getStartOffset() - modifiedStartOffset[0], to.getEndOffset() - modifiedStartOffset[0] ) ) )
        .append( getPostTag( postTags, subInfo.getSeqnum() ) );
      srcIndex = to.getEndOffset() - modifiedStartOffset[0];
    }
  }
  fragment.append( encoder.encodeText( src.substring( srcIndex ) ) );
  return fragment.toString();
}
项目:search    文件:WeightedFragListBuilderTest.java   
private void testCase( Query query, int fragCharSize, String expectedFragInfo,
    double expectedTotalSubInfoBoost ) throws Exception {
  makeIndexLongMV();

  FieldQuery fq = new FieldQuery( query, true, true );
  FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
  FieldPhraseList fpl = new FieldPhraseList( stack, fq );
  WeightedFragListBuilder wflb = new WeightedFragListBuilder();
  FieldFragList ffl = wflb.createFieldFragList( fpl, fragCharSize );
  assertEquals( 1, ffl.getFragInfos().size() );
  assertEquals( expectedFragInfo, ffl.getFragInfos().get( 0 ).toString() );

  float totalSubInfoBoost = 0;
  for ( WeightedFragInfo info : ffl.getFragInfos() ) {
    for ( SubInfo subInfo : info.getSubInfos() ) {
      totalSubInfoBoost += subInfo.getBoost();
    }
  }
  assertEquals( expectedTotalSubInfoBoost, totalSubInfoBoost, .0000001 );
}
项目:NYBC    文件:BaseFragmentsBuilder.java   
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
    String[] preTags, String[] postTags, Encoder encoder ){
  StringBuilder fragment = new StringBuilder();
  final int s = fragInfo.getStartOffset();
  int[] modifiedStartOffset = { s };
  String src = getFragmentSourceMSO( buffer, index, values, s, fragInfo.getEndOffset(), modifiedStartOffset );
  int srcIndex = 0;
  for( SubInfo subInfo : fragInfo.getSubInfos() ){
    for( Toffs to : subInfo.getTermsOffsets() ){
      fragment
        .append( encoder.encodeText( src.substring( srcIndex, to.getStartOffset() - modifiedStartOffset[0] ) ) )
        .append( getPreTag( preTags, subInfo.getSeqnum() ) )
        .append( encoder.encodeText( src.substring( to.getStartOffset() - modifiedStartOffset[0], to.getEndOffset() - modifiedStartOffset[0] ) ) )
        .append( getPostTag( postTags, subInfo.getSeqnum() ) );
      srcIndex = to.getEndOffset() - modifiedStartOffset[0];
    }
  }
  fragment.append( encoder.encodeText( src.substring( srcIndex ) ) );
  return fragment.toString();
}
项目:read-open-source-code    文件:BaseFragmentsBuilder.java   
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
    String[] preTags, String[] postTags, Encoder encoder ){
  StringBuilder fragment = new StringBuilder();
  final int s = fragInfo.getStartOffset();
  int[] modifiedStartOffset = { s };
  String src = getFragmentSourceMSO( buffer, index, values, s, fragInfo.getEndOffset(), modifiedStartOffset );
  int srcIndex = 0;
  for( SubInfo subInfo : fragInfo.getSubInfos() ){
    for( Toffs to : subInfo.getTermsOffsets() ){
      fragment
        .append( encoder.encodeText( src.substring( srcIndex, to.getStartOffset() - modifiedStartOffset[0] ) ) )
        .append( getPreTag( preTags, subInfo.getSeqnum() ) )
        .append( encoder.encodeText( src.substring( to.getStartOffset() - modifiedStartOffset[0], to.getEndOffset() - modifiedStartOffset[0] ) ) )
        .append( getPostTag( postTags, subInfo.getSeqnum() ) );
      srcIndex = to.getEndOffset() - modifiedStartOffset[0];
    }
  }
  fragment.append( encoder.encodeText( src.substring( srcIndex ) ) );
  return fragment.toString();
}
项目:read-open-source-code    文件:BaseFragmentsBuilder.java   
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
    String[] preTags, String[] postTags, Encoder encoder ){
  StringBuilder fragment = new StringBuilder();
  final int s = fragInfo.getStartOffset();
  int[] modifiedStartOffset = { s };
  String src = getFragmentSourceMSO( buffer, index, values, s, fragInfo.getEndOffset(), modifiedStartOffset );
  int srcIndex = 0;
  for( SubInfo subInfo : fragInfo.getSubInfos() ){
    for( Toffs to : subInfo.getTermsOffsets() ){
      fragment
        .append( encoder.encodeText( src.substring( srcIndex, to.getStartOffset() - modifiedStartOffset[0] ) ) )
        .append( getPreTag( preTags, subInfo.getSeqnum() ) )
        .append( encoder.encodeText( src.substring( to.getStartOffset() - modifiedStartOffset[0], to.getEndOffset() - modifiedStartOffset[0] ) ) )
        .append( getPostTag( postTags, subInfo.getSeqnum() ) );
      srcIndex = to.getEndOffset() - modifiedStartOffset[0];
    }
  }
  fragment.append( encoder.encodeText( src.substring( srcIndex ) ) );
  return fragment.toString();
}
项目:read-open-source-code    文件:BaseFragmentsBuilder.java   
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
    String[] preTags, String[] postTags, Encoder encoder ){
  StringBuilder fragment = new StringBuilder();
  final int s = fragInfo.getStartOffset();
  int[] modifiedStartOffset = { s };
  String src = getFragmentSourceMSO( buffer, index, values, s, fragInfo.getEndOffset(), modifiedStartOffset );
  int srcIndex = 0;
  for( SubInfo subInfo : fragInfo.getSubInfos() ){
    for( Toffs to : subInfo.getTermsOffsets() ){
      fragment
        .append( encoder.encodeText( src.substring( srcIndex, to.getStartOffset() - modifiedStartOffset[0] ) ) )
        .append( getPreTag( preTags, subInfo.getSeqnum() ) )
        .append( encoder.encodeText( src.substring( to.getStartOffset() - modifiedStartOffset[0], to.getEndOffset() - modifiedStartOffset[0] ) ) )
        .append( getPostTag( postTags, subInfo.getSeqnum() ) );
      srcIndex = to.getEndOffset() - modifiedStartOffset[0];
    }
  }
  fragment.append( encoder.encodeText( src.substring( srcIndex ) ) );
  return fragment.toString();
}
项目:Maskana-Gestor-de-Conocimiento    文件:BaseFragmentsBuilder.java   
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
    String[] preTags, String[] postTags, Encoder encoder ){
  StringBuilder fragment = new StringBuilder();
  final int s = fragInfo.getStartOffset();
  int[] modifiedStartOffset = { s };
  String src = getFragmentSourceMSO( buffer, index, values, s, fragInfo.getEndOffset(), modifiedStartOffset );
  int srcIndex = 0;
  for( SubInfo subInfo : fragInfo.getSubInfos() ){
    for( Toffs to : subInfo.getTermsOffsets() ){
      fragment
        .append( encoder.encodeText( src.substring( srcIndex, to.getStartOffset() - modifiedStartOffset[0] ) ) )
        .append( getPreTag( preTags, subInfo.getSeqnum() ) )
        .append( encoder.encodeText( src.substring( to.getStartOffset() - modifiedStartOffset[0], to.getEndOffset() - modifiedStartOffset[0] ) ) )
        .append( getPostTag( postTags, subInfo.getSeqnum() ) );
      srcIndex = to.getEndOffset() - modifiedStartOffset[0];
    }
  }
  fragment.append( encoder.encodeText( src.substring( srcIndex ) ) );
  return fragment.toString();
}
项目:search    文件:SimpleFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  float totalBoost = 0;
  List<SubInfo> subInfos = new ArrayList<>();
  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum(), phraseInfo.getBoost() ) );
    totalBoost += phraseInfo.getBoost();
  }
  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:search    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  List<SubInfo> tempSubInfos = new ArrayList<>();
  List<SubInfo> realSubInfos = new ArrayList<>();
  HashSet<String> distinctTerms = new HashSet<>();
  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    float phraseTotalBoost = 0;
    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        phraseTotalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
    tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(),
      phraseInfo.getSeqnum(), phraseTotalBoost ) );
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.
  float norm = length * ( 1 / (float)Math.sqrt( length ) );

  float totalBoost = 0;
  for ( SubInfo tempSubInfo : tempSubInfos ) {
    float subInfoBoost = tempSubInfo.getBoost() * norm;
    realSubInfos.add( new SubInfo( tempSubInfo.getText(), tempSubInfo.getTermsOffsets(),
      tempSubInfo.getSeqnum(), subInfoBoost ));
    totalBoost += subInfoBoost;
  }

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, realSubInfos, totalBoost ) );
}
项目:NYBC    文件:SimpleFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  float totalBoost = 0;
  List<SubInfo> subInfos = new ArrayList<SubInfo>();
  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum() ) );
    totalBoost += phraseInfo.getBoost();
  }
  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:NYBC    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {

  float totalBoost = 0;

  List<SubInfo> subInfos = new ArrayList<SubInfo>();

  HashSet<String> distinctTerms = new HashSet<String>();

  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){

    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum() ) );

    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        totalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.  
  totalBoost *= length * ( 1 / Math.sqrt( length ) );

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:read-open-source-code    文件:SimpleFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  float totalBoost = 0;
  List<SubInfo> subInfos = new ArrayList<SubInfo>();
  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum(), phraseInfo.getBoost() ) );
    totalBoost += phraseInfo.getBoost();
  }
  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:read-open-source-code    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  List<SubInfo> tempSubInfos = new ArrayList<SubInfo>();
  List<SubInfo> realSubInfos = new ArrayList<SubInfo>();
  HashSet<String> distinctTerms = new HashSet<String>();   
  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    float phraseTotalBoost = 0;
    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        phraseTotalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
    tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(),
      phraseInfo.getSeqnum(), phraseTotalBoost ) );
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.
  float norm = length * ( 1 / (float)Math.sqrt( length ) );

  float totalBoost = 0;
  for ( SubInfo tempSubInfo : tempSubInfos ) {
    float subInfoBoost = tempSubInfo.getBoost() * norm;
    realSubInfos.add( new SubInfo( tempSubInfo.getText(), tempSubInfo.getTermsOffsets(),
      tempSubInfo.getSeqnum(), subInfoBoost ));
    totalBoost += subInfoBoost;
  }

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, realSubInfos, totalBoost ) );
}
项目:read-open-source-code    文件:SimpleFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  float totalBoost = 0;
  List<SubInfo> subInfos = new ArrayList<SubInfo>();
  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum(), phraseInfo.getBoost() ) );
    totalBoost += phraseInfo.getBoost();
  }
  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:read-open-source-code    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  List<SubInfo> tempSubInfos = new ArrayList<SubInfo>();
  List<SubInfo> realSubInfos = new ArrayList<SubInfo>();
  HashSet<String> distinctTerms = new HashSet<String>();   
  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    float phraseTotalBoost = 0;
    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        phraseTotalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
    tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(),
      phraseInfo.getSeqnum(), phraseTotalBoost ) );
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.
  float norm = length * ( 1 / (float)Math.sqrt( length ) );

  float totalBoost = 0;
  for ( SubInfo tempSubInfo : tempSubInfos ) {
    float subInfoBoost = tempSubInfo.getBoost() * norm;
    realSubInfos.add( new SubInfo( tempSubInfo.getText(), tempSubInfo.getTermsOffsets(),
      tempSubInfo.getSeqnum(), subInfoBoost ));
    totalBoost += subInfoBoost;
  }

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, realSubInfos, totalBoost ) );
}
项目:read-open-source-code    文件:SimpleFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  float totalBoost = 0;
  List<SubInfo> subInfos = new ArrayList<>();
  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum(), phraseInfo.getBoost() ) );
    totalBoost += phraseInfo.getBoost();
  }
  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:read-open-source-code    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  List<SubInfo> tempSubInfos = new ArrayList<>();
  List<SubInfo> realSubInfos = new ArrayList<>();
  HashSet<String> distinctTerms = new HashSet<>();
  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    float phraseTotalBoost = 0;
    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        phraseTotalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
    tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(),
      phraseInfo.getSeqnum(), phraseTotalBoost ) );
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.
  float norm = length * ( 1 / (float)Math.sqrt( length ) );

  float totalBoost = 0;
  for ( SubInfo tempSubInfo : tempSubInfos ) {
    float subInfoBoost = tempSubInfo.getBoost() * norm;
    realSubInfos.add( new SubInfo( tempSubInfo.getText(), tempSubInfo.getTermsOffsets(),
      tempSubInfo.getSeqnum(), subInfoBoost ));
    totalBoost += subInfoBoost;
  }

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, realSubInfos, totalBoost ) );
}
项目:Maskana-Gestor-de-Conocimiento    文件:SimpleFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  float totalBoost = 0;
  List<SubInfo> subInfos = new ArrayList<SubInfo>();
  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum() ) );
    totalBoost += phraseInfo.getBoost();
  }
  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:Maskana-Gestor-de-Conocimiento    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {

  float totalBoost = 0;

  List<SubInfo> subInfos = new ArrayList<SubInfo>();

  HashSet<String> distinctTerms = new HashSet<String>();

  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){

    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum() ) );

    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        totalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.  
  totalBoost *= length * ( 1 / Math.sqrt( length ) );

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}