Java 类java.text.Normalizer 实例源码

项目:solo-spring    文件:URICoder.java   
/**
 * Encodes a string containing non ASCII characters using an UTF-8 encoder.
 * 
 * @param s
 *            The string the encode (assuming ASCII characters only)
 * @param e
 *            A character that does not require encoding if found in the
 *            string.
 */
private static String encode_UTF8(String s, char e) {
    // TODO: Normalizer requires Java 6!
    String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
    // convert String to UTF-8
    ByteBuffer bb = UTF8.encode(n);
    // URI encode
    StringBuffer sb = new StringBuffer();

    while (bb.hasRemaining()) {
        int b = bb.get() & 0xff;

        if (isUnreserved(b) || b == e) {
            sb.append((char) b);
        } else {
            appendEscape(sb, (byte) b);
        }
    }
    return sb.toString();
}
项目:solo-spring    文件:URICoder.java   
/**
 * Encodes a string containing non ASCII characters using an UTF-8 encoder.
 * 
 * @param s
 *            The string the encode (assuming ASCII characters only)
 */
private static String minimalEncode_UTF8(String s) {
    // TODO: Normalizer requires Java 6!
    String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
    // convert String to UTF-8
    ByteBuffer bb = UTF8.encode(n);
    // URI encode
    StringBuffer sb = new StringBuffer();

    while (bb.hasRemaining()) {
        int b = bb.get() & 0xff;

        if (isLegal(b)) {
            sb.append((char) b);
        } else {
            appendEscape(sb, (byte) b);
        }
    }
    return sb.toString();
}
项目:OpenJSharp    文件:NormalizerBase.java   
/**
 * Test if a string is in a given normalization form.
 * This is semantically equivalent to source.equals(normalize(source, mode)).
 *
 * Unlike quickCheck(), this function returns a definitive result,
 * never a "maybe".
 * For NFD, NFKD, and FCD, both functions work exactly the same.
 * For NFC and NFKC where quickCheck may return "maybe", this function will
 * perform further tests to arrive at a true/false result.
 * @param str       the input string to be checked to see if it is normalized
 * @param form      the normalization form
 * @param options   the optional features to be enabled.
 */
public static boolean isNormalized(String str, Normalizer.Form form, int options) {
    switch (form) {
    case NFC:
        return (NFC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
    case NFD:
        return (NFD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
    case NFKC:
        return (NFKC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
    case NFKD:
        return (NFKD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
    }

    throw new IllegalArgumentException("Unexpected normalization form: " +
                                       form);
}
项目:OpenJSharp    文件:CDataTransferer.java   
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
                                long format, Transferable transferable) throws IOException {

        if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
        {
            String charset = getDefaultTextCharset();
            if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
                try {
                    charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
                } catch (UnsupportedFlavorException cannotHappen) {
                }
            }

            return new URL(new String(bytes, charset));
        }

        if (format == CF_STRING) {
            bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
        }

        return super.translateBytes(bytes, flavor, format, transferable);
}
项目:OperatieBRP    文件:ZoekCriterium.java   
public void setWaarde(final String waarde) {
    this.waarde = waarde;
    if (waarde == null) {
        this.slimZoekenWaarde = null;
    } else if (waarde.startsWith("\\")) {
        this.exact = true;
        this.slimZoekenWaarde = waarde.substring(1);
    } else if (waarde.endsWith("*")) {
        this.wildcard = true;
        this.slimZoekenWaarde = waarde.substring(0, waarde.length() - 1);
    } else {
        this.slimZoekenWaarde = waarde;
    }

    if (waarde != null && !this.exact) {
        if (!waarde.matches(".*[A-Z].*") && attribuut.isString()) {
            this.caseInsensitive = true;
        }
        String normalizedWaarde = Normalizer.normalize(waarde, Normalizer.Form.NFD);
        Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
        if (pattern.matcher(normalizedWaarde).find()) {
            this.diakriet = true;
        }
    }
}
项目:OperatieBRP    文件:Utils.java   
/**
 * Converteer een naam naar een java enumeratie naam.
 * @param javaNameBase naam
 * @return enumeratie naam
 */
public static String convertToJavaEnumName(final String javaNameBase) {
    if (javaNameBase.startsWith(LITERAL)) {
        return StringEscapeUtils.unescapeJava(javaNameBase.replaceAll(String.format("^%s", LITERAL), ""));
    } else {
        String result = javaNameBase;

        // Unaccent
        result = Normalizer.normalize(result, Normalizer.Form.NFD);
        // Replace whitespace with underscore
        result = result.replaceAll("(\\s|-)", "_");
        // Uppercase
        result = result.toUpperCase();
        // Remove unsupported characters
        result = result.replaceAll("[^A-Z0-9_]", "");
        // Remove duplicate seperators
        result = result.replaceAll("_{2,}", "_");

        return result;
    }
}
项目:jdk8u-jdk    文件:NormalizerBase.java   
/**
 * Test if a string is in a given normalization form.
 * This is semantically equivalent to source.equals(normalize(source, mode)).
 *
 * Unlike quickCheck(), this function returns a definitive result,
 * never a "maybe".
 * For NFD, NFKD, and FCD, both functions work exactly the same.
 * For NFC and NFKC where quickCheck may return "maybe", this function will
 * perform further tests to arrive at a true/false result.
 * @param str       the input string to be checked to see if it is normalized
 * @param form      the normalization form
 * @param options   the optional features to be enabled.
 */
public static boolean isNormalized(String str, Normalizer.Form form, int options) {
    switch (form) {
    case NFC:
        return (NFC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
    case NFD:
        return (NFD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
    case NFKC:
        return (NFKC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
    case NFKD:
        return (NFKD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
    }

    throw new IllegalArgumentException("Unexpected normalization form: " +
                                       form);
}
项目:bibliome-java-utils    文件:Strings.java   
/**
 * Remove diacritics from the specified string.
 * @param s
 * @return a copy of the specified string with diacritics removed.
 */
public static final String removeDiacritics(String s) {
    String n = Normalizer.normalize(s, Form.NFD);
    StringBuilder sb = null;
    for (int i = 0; i < n.length(); ++i) {
        char c = n.charAt(i);
        UnicodeBlock b = UnicodeBlock.of(c);
        if (UnicodeBlock.COMBINING_DIACRITICAL_MARKS.equals(b) || UnicodeBlock.COMBINING_DIACRITICAL_MARKS_SUPPLEMENT.equals(b)) {
            if (sb == null) {
                sb = new StringBuilder(n.length());
                sb.append(n.substring(0, i));
            }
            continue;
        }
        if (sb != null)
            sb.append(c);
    }
    if (sb == null)
        return n;
    return sb.toString();
}
项目:mapr-music    文件:SlugUtil.java   
/**
 * Converts specified string to it's slug representation, which can be used to generate readable and SEO-friendly
 * URLs.
 *
 * @param input string, which will be converted.
 * @return slug representation of string, which can be used to generate readable and SEO-friendly
 * URLs.
 */
public static String toSlug(String input) {

    String transliterated = transliterator.transform(input);
    String noWhitespace = WHITESPACE.matcher(transliterated).replaceAll("-");
    String normalized = Normalizer.normalize(noWhitespace, Normalizer.Form.NFD);
    String slug = NONLATIN.matcher(normalized).replaceAll("");
    slug = EDGESDHASHES.matcher(slug).replaceAll("");

    return slug.toLowerCase(Locale.ENGLISH);
}
项目:mapr-music    文件:SlugService.java   
/**
 * Converts specified string to it's slug representation, which can be used to generate readable and SEO-friendly
 * URLs.
 *
 * @param input string, which will be converted.
 * @return slug representation of string, which can be used to generate readable and SEO-friendly
 * URLs.
 */
public String toSlug(String input) {

    String transliterated = transliterator.transform(input);
    String noWhitespace = WHITESPACE.matcher(transliterated).replaceAll("-");
    String normalized = Normalizer.normalize(noWhitespace, Normalizer.Form.NFD);
    String slug = NONLATIN.matcher(normalized).replaceAll("");
    slug = EDGESDHASHES.matcher(slug).replaceAll("");

    return slug.toLowerCase(Locale.ENGLISH);
}
项目:TensorFlowDetector-App    文件:MainActivity.java   
private String processData(String input) {
    // to extract all alphabets from string
    String withoutAccent = Normalizer.normalize(input, Normalizer.Form.NFD);
    String output = withoutAccent.replaceAll("[^a-zA-Z ]", "");
    return output;

    //return s.replaceAll("[^A-Za-z]+", "");
}
项目:devops-cstack    文件:AlphaNumericsCharactersCheckUtils.java   
public static String convertToAlphaNumerics(String value) {
    logger.debug("Before : " + value);
    value = Normalizer.normalize(value, Form.NFD);
    value = value.replaceAll("[\\p{InCombiningDiacriticalMarks}]", "");
    value = value.replaceAll("[^-_a-zA-Z0-9\\s]", "").replace(" ", "");
    logger.debug("After : " + value);
    return value;
}
项目:devops-cstack    文件:AlphaNumericsCharactersCheckUtils.java   
public static String deAccent(String value) {
    logger.debug("Before : " + value);
    String nfdNormalizedString = Normalizer.normalize(value, Form.NFD);
    Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    value = pattern.matcher(nfdNormalizedString).replaceAll("");
    logger.debug("After : " + value);
    return value;
}
项目:devops-cstack    文件:Snapshot.java   
public void setTag(String tag) {
    if (tag != null) {
        tag = tag.toLowerCase();
        tag = Normalizer.normalize(tag, Normalizer.Form.NFD);
        tag = tag.replaceAll("[\\p{InCombiningDiacriticalMarks}]", "");
        tag = tag.replaceAll("[^a-z0-9]", "");
    }
    this.tag = tag;
}
项目:devops-cstack    文件:Snapshot.java   
public void setFullTag(String fullTag) {
    if (fullTag != null) {
        fullTag = fullTag.toLowerCase();
        fullTag = Normalizer.normalize(fullTag, Normalizer.Form.NFD);
        fullTag = fullTag.replaceAll("[\\p{InCombiningDiacriticalMarks}]", "");
        fullTag = fullTag.replaceAll("[^a-z0-9-]", "");
    }
    this.fullTag = fullTag;
}
项目:devops-cstack    文件:AlphaNumericsCharactersCheckUtils.java   
public static String convertToAlphaNumerics(String value, Integer countApp)
        throws UnsupportedEncodingException {

    value = new String(value.getBytes("ISO-8859-1"), "UTF-8");
    value = Normalizer.normalize(value, Form.NFD);
    value = value.replaceAll("[^\\p{ASCII}]", "")
            .replaceAll("[^a-zA-Z0-9\\s]", "").replace(" ", "");

    if (value.equalsIgnoreCase("")) {
        value = "default" + countApp;
    }

    return value;

}
项目:directory-ldap-api    文件:PrepareString.java   
/**
 * Normalize a String 
 * 
 * @param value the value to normalize
 * @return The normalized value
 */
public static String normalize( String value )
{
    if ( !Normalizer.isNormalized( value, Normalizer.Form.NFKC ) )
    {
        return Normalizer.normalize( value, Normalizer.Form.NFKC );
    }
    else
    {
        return value;
    }
}
项目:alfresco-repository    文件:NameBasedUserNameGenerator.java   
private String cleanseName(String name)
{
    // Replace whitespace with _
    String result= name.trim().toLowerCase().replaceAll("\\s+", "_");

    // Remove accents from characters and strips out non-alphanumeric chars.
    return Normalizer.normalize(result, Normalizer.Form.NFD).replaceAll("[^a-zA-z0-9_]+", "");
}
项目:chromium-net-for-android    文件:NetStringUtil.java   
/**
 * Attempts to convert text in a given character set to a Unicode string,
 * and normalize it.  Returns null on failure.
 * @param text ByteBuffer containing the character array to convert.
 * @param charsetName Character set it's in encoded in.
 * @return: Unicode string on success, null on failure.
 */
@CalledByNative
private static String convertToUnicodeAndNormalize(
        ByteBuffer text,
        String charsetName) {
    String unicodeString = convertToUnicode(text, charsetName);
    if (unicodeString == null) return null;
    return Normalizer.normalize(unicodeString, Normalizer.Form.NFC);
}
项目:idea-php-typo3-plugin    文件:Slugify.java   
private String normalize(final String input) {
    String text = Normalizer.normalize(input, Normalizer.Form.NFKD);
    text = PATTERN_NORMALIZE_NON_ASCII.matcher(text).replaceAll(EMPTY);
    text = PATTERN_NORMALIZE_SEPARATOR.matcher(text).replaceAll(underscoreSeparator ? "_" : "-");
    text = PATTERN_NORMALIZE_TRIM_DASH.matcher(text).replaceAll(EMPTY);

    return text;
}
项目:armadillo    文件:HkdfMessageDigest.java   
@Override
public String derive(String providedMessage, String usageName) {
    Objects.requireNonNull(providedMessage);
    Objects.requireNonNull(usageName);

    return Bytes.wrap(HKDF.fromHmacSha512().extractAndExpand(salt, Bytes.from(providedMessage, Normalizer.Form.NFKD).array(),
            Bytes.from(usageName, Normalizer.Form.NFKD).array(), outLength)).encodeHex();
}
项目:armadillo    文件:DefaultEncryptionProtocol.java   
private byte[] keyDerivationFunction(String contentKey, byte[] fingerprint, byte[] contentSalt, byte[] preferenceSalt, @Nullable char[] password) {
    Bytes ikm = Bytes.wrap(fingerprint).append(contentSalt).append(Bytes.from(contentKey, Normalizer.Form.NFKD));

    if (password != null) {
        ikm.append(keyStretchingFunction.stretch(contentSalt, password, 32));
    }

    return HKDF.fromHmacSha512().extractAndExpand(preferenceSalt, ikm.array(), "DefaultEncryptionProtocol".getBytes(), keyLengthBit / 8);
}
项目:ARCLib    文件:Utils.java   
public static String stripAccents(String s) {
    if (s != null) {
        s = Normalizer.normalize(s, Normalizer.Form.NFD);
        s = s.replaceAll("[^\\p{ASCII}]", "");
        return s;
    } else {
        return null;
    }
}
项目:CommentView    文件:Validator.java   
public int getTweetLength(String text) {
  text = Normalizer.normalize(text, Normalizer.Form.NFC);
  int length = text.codePointCount(0, text.length());

  for (Extractor.Entity urlEntity : extractor.extractURLsWithIndices(text)) {
    length += urlEntity.start - urlEntity.end;
    length += urlEntity.value.toLowerCase().startsWith("https://") ? shortUrlLengthHttps : shortUrlLength;
  }

  return length;
}
项目:CommentView    文件:Validator.java   
public int getTweetLength(String text) {
  text = Normalizer.normalize(text, Normalizer.Form.NFC);
  int length = text.codePointCount(0, text.length());

  for (Extractor.Entity urlEntity : extractor.extractURLsWithIndices(text)) {
    length += urlEntity.start - urlEntity.end;
    length += urlEntity.value.toLowerCase().startsWith("https://") ? shortUrlLengthHttps : shortUrlLength;
  }

  return length;
}
项目:creacoinj    文件:BIP38PrivateKey.java   
public ECKey decrypt(String passphrase) throws BadPassphraseException {
    String normalizedPassphrase = Normalizer.normalize(passphrase, Normalizer.Form.NFC);
    ECKey key = ecMultiply ? decryptEC(normalizedPassphrase) : decryptNoEC(normalizedPassphrase);
    Sha256Hash hash = Sha256Hash.twiceOf(key.toAddress(params).toString().getBytes(Charsets.US_ASCII));
    byte[] actualAddressHash = Arrays.copyOfRange(hash.getBytes(), 0, 4);
    if (!Arrays.equals(actualAddressHash, addressHash))
        throw new BadPassphraseException();
    return key;
}
项目:cyberduck    文件:NFCNormalizer.java   
public CharSequence normalize(final CharSequence name) {
    if(!Normalizer.isNormalized(name, Normalizer.Form.NFC)) {
        // Canonical decomposition followed by canonical composition (default)
        final String normalized = Normalizer.normalize(name, Normalizer.Form.NFC);
        if(log.isDebugEnabled()) {
            log.debug(String.format("Normalized string %s to %s", name, normalized));
        }
        return normalized;
    }
    return name;
}
项目:bytes-java    文件:BytesConstructorTests.java   
private void checkString(String string, Charset charset) {
    Bytes b = Bytes.from(string, charset);
    assertArrayEquals(string.getBytes(charset), b.array());
    assertEquals(new String(string.getBytes(charset), charset), b.encodeCharset(charset));

    if (charset != StandardCharsets.UTF_8) {
        Bytes bUtf8 = Bytes.from(string);
        assertArrayEquals(string.getBytes(StandardCharsets.UTF_8), bUtf8.array());
        assertEquals(new String(string.getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8), bUtf8.encodeUtf8());
    } else {
        Bytes bNormalized = Bytes.from(string, Normalizer.Form.NFKD);
        assertArrayEquals(Normalizer.normalize(string, Normalizer.Form.NFKD).getBytes(charset), bNormalized.array());
    }
}
项目:commons-sandbox    文件:JavaNormalization.java   
public static void main(String[] args) {
    String s = "São Paulo";
    System.out.println(Normalizer.isNormalized(s, Normalizer.Form.NFKD));
    System.out.println(s);
    s = Normalizer.normalize(s, Normalizer.Form.NFKD);
    System.out.println(Normalizer.isNormalized(s, Normalizer.Form.NFKD));
    System.out.println(s);
    // TODO: how can I print the difference?
}
项目:OpenJSharp    文件:RegularFileObject.java   
@Override
public boolean isNameCompatible(String cn, JavaFileObject.Kind kind) {
    cn.getClass();
    // null check
    if (kind == Kind.OTHER && getKind() != kind) {
        return false;
    }
    String n = cn + kind.extension;
    if (name.equals(n)) {
        return true;
    }
    if (isMacOS && Normalizer.isNormalized(name, Normalizer.Form.NFD)
        && Normalizer.isNormalized(n, Normalizer.Form.NFC)) {
        // On Mac OS X it is quite possible to file name and class
        // name normalized in a different way - in that case we have to normalize file name
        // to the Normal Form Compised (NFC)
        String normName = Normalizer.normalize(name, Normalizer.Form.NFC);
        if (normName.equals(n)) {
            this.name = normName;
            return true;
        }
    }

        if (name.equalsIgnoreCase(n)) {
        try {
            // allow for Windows
            return file.getCanonicalFile().getName().equals(n);
        } catch (IOException e) {
        }
    }
    return false;
}
项目:OpenJSharp    文件:Pattern.java   
/**
 * The pattern is converted to normalizedD form and then a pure group
 * is constructed to match canonical equivalences of the characters.
 */
private void normalize() {
    boolean inCharClass = false;
    int lastCodePoint = -1;

    // Convert pattern into normalizedD form
    normalizedPattern = Normalizer.normalize(pattern, Normalizer.Form.NFD);
    patternLength = normalizedPattern.length();

    // Modify pattern to match canonical equivalences
    StringBuilder newPattern = new StringBuilder(patternLength);
    for(int i=0; i<patternLength; ) {
        int c = normalizedPattern.codePointAt(i);
        StringBuilder sequenceBuffer;
        if ((Character.getType(c) == Character.NON_SPACING_MARK)
            && (lastCodePoint != -1)) {
            sequenceBuffer = new StringBuilder();
            sequenceBuffer.appendCodePoint(lastCodePoint);
            sequenceBuffer.appendCodePoint(c);
            while(Character.getType(c) == Character.NON_SPACING_MARK) {
                i += Character.charCount(c);
                if (i >= patternLength)
                    break;
                c = normalizedPattern.codePointAt(i);
                sequenceBuffer.appendCodePoint(c);
            }
            String ea = produceEquivalentAlternation(
                                           sequenceBuffer.toString());
            newPattern.setLength(newPattern.length()-Character.charCount(lastCodePoint));
            newPattern.append("(?:").append(ea).append(")");
        } else if (c == '[' && lastCodePoint != '\\') {
            i = normalizeCharClass(newPattern, i);
        } else {
            newPattern.appendCodePoint(c);
        }
        lastCodePoint = c;
        i += Character.charCount(c);
    }
    normalizedPattern = newPattern.toString();
}
项目:OpenJSharp    文件:Pattern.java   
/**
 * Attempts to compose input by combining the first character
 * with the first combining mark following it. Returns a String
 * that is the composition of the leading character with its first
 * combining mark followed by the remaining combining marks. Returns
 * null if the first two characters cannot be further composed.
 */
private String composeOneStep(String input) {
    int len = countChars(input, 0, 2);
    String firstTwoCharacters = input.substring(0, len);
    String result = Normalizer.normalize(firstTwoCharacters, Normalizer.Form.NFC);

    if (result.equals(firstTwoCharacters))
        return null;
    else {
        String remainder = input.substring(len);
        return result + remainder;
    }
}
项目:OpenJSharp    文件:NormalizerBase.java   
/**
 * Normalizes a <code>String</code> using the given normalization form.
 *
 * @param str      the input string to be normalized.
 * @param form     the normalization form
 * @param options   the optional features to be enabled.
 */
public static String normalize(String str, Normalizer.Form form, int options) {
    int len = str.length();
    boolean asciiOnly = true;
    if (len < 80) {
        for (int i = 0; i < len; i++) {
            if (str.charAt(i) > 127) {
                asciiOnly = false;
                break;
            }
        }
    } else {
        char[] a = str.toCharArray();
        for (int i = 0; i < len; i++) {
            if (a[i] > 127) {
                asciiOnly = false;
                break;
            }
        }
    }

    switch (form) {
    case NFC :
        return asciiOnly ? str : NFC.normalize(str, options);
    case NFD :
        return asciiOnly ? str : NFD.normalize(str, options);
    case NFKC :
        return asciiOnly ? str : NFKC.normalize(str, options);
    case NFKD :
        return asciiOnly ? str : NFKD.normalize(str, options);
    }

    throw new IllegalArgumentException("Unexpected normalization form: " +
                                       form);
}
项目:guereza    文件:SimpleIndexer.java   
private Stream<String> getWords(final String sentence) {
    return Arrays.stream(sentence.split(REGEX_SPACE))
            .map(String::toLowerCase)
            .map(s -> Normalizer.normalize(s, Normalizer.Form.NFD))
            .map(s -> s.replaceAll(REGEX_ALPHANUM, ""))
            .map(this::stemmed)
            .filter(s -> !s.isEmpty())
            .filter(w -> !StopWords.match(w));
}
项目:sunbird-utils    文件:Slug.java   
public static String makeSlug(String input, boolean transliterate) {
  String origInput = input;
  // Validate the input
  if (input == null) {
    ProjectLogger.log("Provided input value is null");
    return input;
  }
  // Remove extra spaces
  input = input.trim();
  // Remove URL encoding
  input = urlDecode(input);
  // If transliterate is required
  if (transliterate) {
    // Tranlisterate & cleanup
    String transliterated = transliterate(input);
    // transliterated = removeDuplicateChars(transliterated);
    input = transliterated;
  }
  // Replace all whitespace with dashes
  input = WHITESPACE.matcher(input).replaceAll("-");
  // Remove all accent chars
  input = Normalizer.normalize(input, Form.NFD);
  // Remove all non-latin special characters
  input = NONLATIN.matcher(input).replaceAll("");
  // Remove any consecutive dashes
  input = normalizeDashes(input);
  // Validate before returning
  validateResult(input, origInput);
  // Slug is always lowercase
  return input.toLowerCase(Locale.ENGLISH);
}
项目:Java_CTe    文件:XmlUtil.java   
public static String removeAcentos(String str) {
    str = str.replaceAll("\r", "");
    str = str.replaceAll("\t", "");
    str = str.replaceAll("\n", "");
    str = str.replaceAll("&", "E");
    str = str.replaceAll(">\\s+<", "><");
    CharSequence cs = new StringBuilder(str == null ? "" : str);
    return Normalizer.normalize(cs, Normalizer.Form.NFKD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
}
项目:jdk8u-jdk    文件:Pattern.java   
/**
 * The pattern is converted to normalizedD form and then a pure group
 * is constructed to match canonical equivalences of the characters.
 */
private void normalize() {
    boolean inCharClass = false;
    int lastCodePoint = -1;

    // Convert pattern into normalizedD form
    normalizedPattern = Normalizer.normalize(pattern, Normalizer.Form.NFD);
    patternLength = normalizedPattern.length();

    // Modify pattern to match canonical equivalences
    StringBuilder newPattern = new StringBuilder(patternLength);
    for(int i=0; i<patternLength; ) {
        int c = normalizedPattern.codePointAt(i);
        StringBuilder sequenceBuffer;
        if ((Character.getType(c) == Character.NON_SPACING_MARK)
            && (lastCodePoint != -1)) {
            sequenceBuffer = new StringBuilder();
            sequenceBuffer.appendCodePoint(lastCodePoint);
            sequenceBuffer.appendCodePoint(c);
            while(Character.getType(c) == Character.NON_SPACING_MARK) {
                i += Character.charCount(c);
                if (i >= patternLength)
                    break;
                c = normalizedPattern.codePointAt(i);
                sequenceBuffer.appendCodePoint(c);
            }
            String ea = produceEquivalentAlternation(
                                           sequenceBuffer.toString());
            newPattern.setLength(newPattern.length()-Character.charCount(lastCodePoint));
            newPattern.append("(?:").append(ea).append(")");
        } else if (c == '[' && lastCodePoint != '\\') {
            i = normalizeCharClass(newPattern, i);
        } else {
            newPattern.appendCodePoint(c);
        }
        lastCodePoint = c;
        i += Character.charCount(c);
    }
    normalizedPattern = newPattern.toString();
}
项目:jdk8u-jdk    文件:Pattern.java   
/**
 * Attempts to compose input by combining the first character
 * with the first combining mark following it. Returns a String
 * that is the composition of the leading character with its first
 * combining mark followed by the remaining combining marks. Returns
 * null if the first two characters cannot be further composed.
 */
private String composeOneStep(String input) {
    int len = countChars(input, 0, 2);
    String firstTwoCharacters = input.substring(0, len);
    String result = Normalizer.normalize(firstTwoCharacters, Normalizer.Form.NFC);

    if (result.equals(firstTwoCharacters))
        return null;
    else {
        String remainder = input.substring(len);
        return result + remainder;
    }
}
项目:jdk8u-jdk    文件:NormalizerBase.java   
/**
 * Normalizes a <code>String</code> using the given normalization form.
 *
 * @param str      the input string to be normalized.
 * @param form     the normalization form
 * @param options   the optional features to be enabled.
 */
public static String normalize(String str, Normalizer.Form form, int options) {
    int len = str.length();
    boolean asciiOnly = true;
    if (len < 80) {
        for (int i = 0; i < len; i++) {
            if (str.charAt(i) > 127) {
                asciiOnly = false;
                break;
            }
        }
    } else {
        char[] a = str.toCharArray();
        for (int i = 0; i < len; i++) {
            if (a[i] > 127) {
                asciiOnly = false;
                break;
            }
        }
    }

    switch (form) {
    case NFC :
        return asciiOnly ? str : NFC.normalize(str, options);
    case NFD :
        return asciiOnly ? str : NFD.normalize(str, options);
    case NFKC :
        return asciiOnly ? str : NFKC.normalize(str, options);
    case NFKD :
        return asciiOnly ? str : NFKD.normalize(str, options);
    }

    throw new IllegalArgumentException("Unexpected normalization form: " +
                                       form);
}
项目:BIP39    文件:WordListHashing.java   
private static WordList normalizeNFKD(WordList wordList) {
    return new WordList() {
        @Override
        public String getWord(int index) {
            return Normalizer.normalize(wordList.getWord(index), Normalizer.Form.NFKD);
        }

        @Override
        public char getSpace() {
            return wordList.getSpace();
        }
    };
}