Java 类org.apache.lucene.analysis.ja.dict.Dictionary 实例源码

项目:elasticsearch-analysis-ja    文件:ReloadableKuromojiTokenizerFactory.java   
@Override
public void reset() throws IOException {
    updateUserDictionary();

    if (dictionaryTimestamp > tokenizerTimestamp) {
        if (VERBOSE) {
            System.out.println("Update KuromojiTokenizer ("
                    + tokenizerTimestamp + "," + dictionaryTimestamp
                    + ")");
        }
        if (userDictionary != null) {
            try {
                tokenizerTimestamp = dictionaryTimestamp;
                userDictionaryField.set(tokenizer, userDictionary);
                final TokenInfoFST userFst = userDictionary.getFST();
                userFSTField.set(tokenizer, userFst);
                userFSTReaderField.set(tokenizer,
                        userFst.getBytesReader());
                @SuppressWarnings("unchecked")
                final
                EnumMap<Type, Dictionary> dictionaryMap = (EnumMap<Type, Dictionary>) dictionaryMapField.get(tokenizer);
                dictionaryMap.put(Type.USER, userDictionary);
            } catch (final Exception e) {
                throw new IllegalStateException(
                        "Failed to update the tokenizer.", e);
            }
        }
    }

    final Reader inputPending = getInputPending();
    if (inputPending != ILLEGAL_STATE_READER) {
        tokenizer.setReader(inputPending);
    }
    tokenizer.reset();
}
项目:search    文件:Token.java   
public Token(int wordId, char[] surfaceForm, int offset, int length, Type type, int position, Dictionary dictionary) {
  this.wordId = wordId;
  this.surfaceForm = surfaceForm;
  this.offset = offset;
  this.length = length;
  this.type = type;
  this.position = position;
  this.dictionary = dictionary;
}
项目:NYBC    文件:Token.java   
public Token(int wordId, char[] surfaceForm, int offset, int length, Type type, int position, Dictionary dictionary) {
  this.wordId = wordId;
  this.surfaceForm = surfaceForm;
  this.offset = offset;
  this.length = length;
  this.type = type;
  this.position = position;
  this.dictionary = dictionary;
}
项目:read-open-source-code    文件:Token.java   
public Token(int wordId, char[] surfaceForm, int offset, int length, Type type, int position, Dictionary dictionary) {
  this.wordId = wordId;
  this.surfaceForm = surfaceForm;
  this.offset = offset;
  this.length = length;
  this.type = type;
  this.position = position;
  this.dictionary = dictionary;
}
项目:read-open-source-code    文件:Token.java   
public Token(int wordId, char[] surfaceForm, int offset, int length, Type type, int position, Dictionary dictionary) {
  this.wordId = wordId;
  this.surfaceForm = surfaceForm;
  this.offset = offset;
  this.length = length;
  this.type = type;
  this.position = position;
  this.dictionary = dictionary;
}
项目:Maskana-Gestor-de-Conocimiento    文件:Token.java   
public Token(int wordId, char[] surfaceForm, int offset, int length, Type type, int position, Dictionary dictionary) {
  this.wordId = wordId;
  this.surfaceForm = surfaceForm;
  this.offset = offset;
  this.length = length;
  this.type = type;
  this.position = position;
  this.dictionary = dictionary;
}
项目:search    文件:JapaneseTokenizer.java   
private void add(Dictionary dict, Position fromPosData, int endPos, int wordID, Type type, boolean addPenalty) throws IOException {
  final int wordCost = dict.getWordCost(wordID);
  final int leftID = dict.getLeftId(wordID);
  int leastCost = Integer.MAX_VALUE;
  int leastIDX = -1;
  assert fromPosData.count > 0;
  for(int idx=0;idx<fromPosData.count;idx++) {
    // Cost is path cost so far, plus word cost (added at
    // end of loop), plus bigram cost:
    final int cost = fromPosData.costs[idx] + costs.get(fromPosData.lastRightID[idx], leftID);
    if (VERBOSE) {
      System.out.println("      fromIDX=" + idx + ": cost=" + cost + " (prevCost=" + fromPosData.costs[idx] + " wordCost=" + wordCost + " bgCost=" + costs.get(fromPosData.lastRightID[idx], leftID) + " leftID=" + leftID);
    }
    if (cost < leastCost) {
      leastCost = cost;
      leastIDX = idx;
      if (VERBOSE) {
        System.out.println("        **");
      }
    }
  }

  leastCost += wordCost;

  if (VERBOSE) {
    System.out.println("      + cost=" + leastCost + " wordID=" + wordID + " leftID=" + leftID + " leastIDX=" + leastIDX + " toPos=" + endPos + " toPos.idx=" + positions.get(endPos).count);
  }

  if ((addPenalty || (!outputCompounds && searchMode)) && type != Type.USER) {
    final int penalty = computePenalty(fromPosData.pos, endPos - fromPosData.pos);
    if (VERBOSE) {
      if (penalty > 0) {
        System.out.println("        + penalty=" + penalty + " cost=" + (leastCost+penalty));
      }
    }
    leastCost += penalty;
  }

  //positions.get(endPos).add(leastCost, dict.getRightId(wordID), fromPosData.pos, leastIDX, wordID, type);
  assert leftID == dict.getRightId(wordID);
  positions.get(endPos).add(leastCost, leftID, fromPosData.pos, leastIDX, wordID, type);
}
项目:search    文件:JapaneseTokenizer.java   
Dictionary getDict(Type type) {
  return dictionaryMap.get(type);
}
项目:NYBC    文件:JapaneseTokenizer.java   
private void add(Dictionary dict, Position fromPosData, int endPos, int wordID, Type type, boolean addPenalty) throws IOException {
  final int wordCost = dict.getWordCost(wordID);
  final int leftID = dict.getLeftId(wordID);
  int leastCost = Integer.MAX_VALUE;
  int leastIDX = -1;
  assert fromPosData.count > 0;
  for(int idx=0;idx<fromPosData.count;idx++) {
    // Cost is path cost so far, plus word cost (added at
    // end of loop), plus bigram cost:
    final int cost = fromPosData.costs[idx] + costs.get(fromPosData.lastRightID[idx], leftID);
    if (VERBOSE) {
      System.out.println("      fromIDX=" + idx + ": cost=" + cost + " (prevCost=" + fromPosData.costs[idx] + " wordCost=" + wordCost + " bgCost=" + costs.get(fromPosData.lastRightID[idx], leftID) + " leftID=" + leftID);
    }
    if (cost < leastCost) {
      leastCost = cost;
      leastIDX = idx;
      if (VERBOSE) {
        System.out.println("        **");
      }
    }
  }

  leastCost += wordCost;

  if (VERBOSE) {
    System.out.println("      + cost=" + leastCost + " wordID=" + wordID + " leftID=" + leftID + " leastIDX=" + leastIDX + " toPos=" + endPos + " toPos.idx=" + positions.get(endPos).count);
  }

  if ((addPenalty || (!outputCompounds && searchMode)) && type != Type.USER) {
    final int penalty = computePenalty(fromPosData.pos, endPos - fromPosData.pos);
    if (VERBOSE) {
      if (penalty > 0) {
        System.out.println("        + penalty=" + penalty + " cost=" + (leastCost+penalty));
      }
    }
    leastCost += penalty;
  }

  //positions.get(endPos).add(leastCost, dict.getRightId(wordID), fromPosData.pos, leastIDX, wordID, type);
  assert leftID == dict.getRightId(wordID);
  positions.get(endPos).add(leastCost, leftID, fromPosData.pos, leastIDX, wordID, type);
}
项目:NYBC    文件:JapaneseTokenizer.java   
Dictionary getDict(Type type) {
  return dictionaryMap.get(type);
}
项目:read-open-source-code    文件:JapaneseTokenizer.java   
private void add(Dictionary dict, Position fromPosData, int endPos, int wordID, Type type, boolean addPenalty) throws IOException {
  final int wordCost = dict.getWordCost(wordID);
  final int leftID = dict.getLeftId(wordID);
  int leastCost = Integer.MAX_VALUE;
  int leastIDX = -1;
  assert fromPosData.count > 0;
  for(int idx=0;idx<fromPosData.count;idx++) {
    // Cost is path cost so far, plus word cost (added at
    // end of loop), plus bigram cost:
    final int cost = fromPosData.costs[idx] + costs.get(fromPosData.lastRightID[idx], leftID);
    if (VERBOSE) {
      System.out.println("      fromIDX=" + idx + ": cost=" + cost + " (prevCost=" + fromPosData.costs[idx] + " wordCost=" + wordCost + " bgCost=" + costs.get(fromPosData.lastRightID[idx], leftID) + " leftID=" + leftID);
    }
    if (cost < leastCost) {
      leastCost = cost;
      leastIDX = idx;
      if (VERBOSE) {
        System.out.println("        **");
      }
    }
  }

  leastCost += wordCost;

  if (VERBOSE) {
    System.out.println("      + cost=" + leastCost + " wordID=" + wordID + " leftID=" + leftID + " leastIDX=" + leastIDX + " toPos=" + endPos + " toPos.idx=" + positions.get(endPos).count);
  }

  if ((addPenalty || (!outputCompounds && searchMode)) && type != Type.USER) {
    final int penalty = computePenalty(fromPosData.pos, endPos - fromPosData.pos);
    if (VERBOSE) {
      if (penalty > 0) {
        System.out.println("        + penalty=" + penalty + " cost=" + (leastCost+penalty));
      }
    }
    leastCost += penalty;
  }

  //positions.get(endPos).add(leastCost, dict.getRightId(wordID), fromPosData.pos, leastIDX, wordID, type);
  assert leftID == dict.getRightId(wordID);
  positions.get(endPos).add(leastCost, leftID, fromPosData.pos, leastIDX, wordID, type);
}
项目:read-open-source-code    文件:JapaneseTokenizer.java   
Dictionary getDict(Type type) {
  return dictionaryMap.get(type);
}
项目:read-open-source-code    文件:JapaneseTokenizer.java   
private void add(Dictionary dict, Position fromPosData, int endPos, int wordID, Type type, boolean addPenalty) throws IOException {
  final int wordCost = dict.getWordCost(wordID);
  final int leftID = dict.getLeftId(wordID);
  int leastCost = Integer.MAX_VALUE;
  int leastIDX = -1;
  assert fromPosData.count > 0;
  for(int idx=0;idx<fromPosData.count;idx++) {
    // Cost is path cost so far, plus word cost (added at
    // end of loop), plus bigram cost:
    final int cost = fromPosData.costs[idx] + costs.get(fromPosData.lastRightID[idx], leftID);
    if (VERBOSE) {
      System.out.println("      fromIDX=" + idx + ": cost=" + cost + " (prevCost=" + fromPosData.costs[idx] + " wordCost=" + wordCost + " bgCost=" + costs.get(fromPosData.lastRightID[idx], leftID) + " leftID=" + leftID);
    }
    if (cost < leastCost) {
      leastCost = cost;
      leastIDX = idx;
      if (VERBOSE) {
        System.out.println("        **");
      }
    }
  }

  leastCost += wordCost;

  if (VERBOSE) {
    System.out.println("      + cost=" + leastCost + " wordID=" + wordID + " leftID=" + leftID + " leastIDX=" + leastIDX + " toPos=" + endPos + " toPos.idx=" + positions.get(endPos).count);
  }

  if ((addPenalty || (!outputCompounds && searchMode)) && type != Type.USER) {
    final int penalty = computePenalty(fromPosData.pos, endPos - fromPosData.pos);
    if (VERBOSE) {
      if (penalty > 0) {
        System.out.println("        + penalty=" + penalty + " cost=" + (leastCost+penalty));
      }
    }
    leastCost += penalty;
  }

  //positions.get(endPos).add(leastCost, dict.getRightId(wordID), fromPosData.pos, leastIDX, wordID, type);
  assert leftID == dict.getRightId(wordID);
  positions.get(endPos).add(leastCost, leftID, fromPosData.pos, leastIDX, wordID, type);
}
项目:read-open-source-code    文件:JapaneseTokenizer.java   
Dictionary getDict(Type type) {
  return dictionaryMap.get(type);
}
项目:Maskana-Gestor-de-Conocimiento    文件:JapaneseTokenizer.java   
private void add(Dictionary dict, Position fromPosData, int endPos, int wordID, Type type, boolean addPenalty) throws IOException {
  final int wordCost = dict.getWordCost(wordID);
  final int leftID = dict.getLeftId(wordID);
  int leastCost = Integer.MAX_VALUE;
  int leastIDX = -1;
  assert fromPosData.count > 0;
  for(int idx=0;idx<fromPosData.count;idx++) {
    // Cost is path cost so far, plus word cost (added at
    // end of loop), plus bigram cost:
    final int cost = fromPosData.costs[idx] + costs.get(fromPosData.lastRightID[idx], leftID);
    if (VERBOSE) {
      System.out.println("      fromIDX=" + idx + ": cost=" + cost + " (prevCost=" + fromPosData.costs[idx] + " wordCost=" + wordCost + " bgCost=" + costs.get(fromPosData.lastRightID[idx], leftID) + " leftID=" + leftID);
    }
    if (cost < leastCost) {
      leastCost = cost;
      leastIDX = idx;
      if (VERBOSE) {
        System.out.println("        **");
      }
    }
  }

  leastCost += wordCost;

  if (VERBOSE) {
    System.out.println("      + cost=" + leastCost + " wordID=" + wordID + " leftID=" + leftID + " leastIDX=" + leastIDX + " toPos=" + endPos + " toPos.idx=" + positions.get(endPos).count);
  }

  if ((addPenalty || (!outputCompounds && searchMode)) && type != Type.USER) {
    final int penalty = computePenalty(fromPosData.pos, endPos - fromPosData.pos);
    if (VERBOSE) {
      if (penalty > 0) {
        System.out.println("        + penalty=" + penalty + " cost=" + (leastCost+penalty));
      }
    }
    leastCost += penalty;
  }

  //positions.get(endPos).add(leastCost, dict.getRightId(wordID), fromPosData.pos, leastIDX, wordID, type);
  assert leftID == dict.getRightId(wordID);
  positions.get(endPos).add(leastCost, leftID, fromPosData.pos, leastIDX, wordID, type);
}
项目:Maskana-Gestor-de-Conocimiento    文件:JapaneseTokenizer.java   
Dictionary getDict(Type type) {
  return dictionaryMap.get(type);
}