public void testParseFalseEncodedFile() throws IOException { Path tempDir = createTempDir(); Path dict = tempDir.resolve("foo.dict"); Settings nodeSettings = Settings.builder() .put("foo.bar_path", dict) .put(Environment.PATH_HOME_SETTING.getKey(), tempDir).build(); try (OutputStream writer = Files.newOutputStream(dict)) { writer.write(new byte[]{(byte) 0xff, 0x00, 0x00}); // some invalid UTF-8 writer.write('\n'); } Environment env = new Environment(nodeSettings); IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> Analysis.getWordList(env, nodeSettings, "foo.bar")); assertEquals("Unsupported character encoding detected while reading foo.bar_path: " + tempDir.resolve("foo.dict").toString() + " - files must be UTF-8 encoded" , ex.getMessage()); assertTrue(ex.getCause().toString(), ex.getCause() instanceof MalformedInputException || ex.getCause() instanceof CharacterCodingException); }
public void processData() throws CannotReadException { //Skip other flags dataBuffer.position(dataBuffer.position() + VERSION_FLAG_LENGTH + OTHER_FLAG_LENGTH + RESERVED_FLAG_LENGTH); CharsetDecoder decoder = Charset.forName("ISO-8859-1").newDecoder(); try { handlerType = decoder.decode((ByteBuffer) dataBuffer.slice().limit(HANDLER_LENGTH)).toString(); } catch (CharacterCodingException cee) { //Ignore } //To getFields human readable name mediaDataType = mediaDataTypeMap.get(handlerType); }
public void processData() throws CannotReadException { //Skip other flags dataBuffer.position(dataBuffer.position() + VERSION_FLAG_LENGTH + OTHER_FLAG_LENGTH + RESERVED_FLAG_LENGTH); CharsetDecoder decoder = Charset.forName("ISO-8859-1").newDecoder(); try { handlerType = decoder.decode((ByteBuffer) dataBuffer.slice().limit(HANDLER_LENGTH)).toString(); } catch (CharacterCodingException cee) { //Ignore } //To getFields human readable name mediaDataType = mediaDataTypeMap.get( handlerType); }
/** * Read a string from buffer of fixed size(size has already been set in constructor) * * @param arr this is the buffer for the frame * @param offset this is where to start reading in the buffer for this field */ public void readByteArray(byte[] arr, int offset) throws InvalidDataTypeException { logger.config("Reading from array from offset:" + offset); try { String charSetName = getTextEncodingCharSet(); CharsetDecoder decoder = Charset.forName(charSetName).newDecoder(); //Decode buffer if runs into problems should through exception which we //catch and then set value to empty string. logger.finest("Array length is:" + arr.length + "offset is:" + offset + "Size is:" + size); if (arr.length - offset < size) { throw new InvalidDataTypeException("byte array is to small to retrieve string of declared length:" + size); } String str = decoder.decode(ByteBuffer.wrap(arr, offset, size)).toString(); if (str == null) { throw new NullPointerException("String is null"); } value = str; } catch (CharacterCodingException ce) { logger.severe(ce.getMessage()); value = ""; } logger.config("Read StringFixedLength:" + value); }
private void splitKeyVal(byte[] line, int length, Text key, Text val) throws IOException { // Need to find numKeyFields separators int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator); for(int k=1; k<numKeyFields && pos!=-1; k++) { pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, length, separator); } try { if (pos == -1) { key.set(line, 0, length); val.set(""); } else { StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos, separator.length); } } catch (CharacterCodingException e) { throw new IOException(StringUtils.stringifyException(e)); } }
public static String decode(ByteBuffer buffer) { //buffer.flip(); // FIXME: Setting the charset somewhere CharsetDecoder decoder = charset.newDecoder(); CharBuffer charBuffer; try { charBuffer = decoder.decode(buffer); String message = charBuffer.toString(); message = message.split("\n")[0]; message = message.split("\r")[0]; return message; } catch(CharacterCodingException cce) { cce.printStackTrace(); } return ""; }
public static void decode(CharsetDecoder charsetDecoder, ByteBuffer byteBuf, CharBuffer charByte) { try { CoderResult cr = charsetDecoder.decode(byteBuf, charByte, true); if (!cr.isUnderflow()) { cr.throwException(); } cr = charsetDecoder.flush(charByte); if (!cr.isUnderflow()) { cr.throwException(); } } catch (CharacterCodingException x) { // Substitution is always enabled, // so this shouldn't happen throw new JSONException("utf8 decode error, " + x.getMessage(), x); } }
private char getCharAt(int index) throws IndexOutOfBoundsException { if(sink.buffer.scope.isBefore(index)) { reset(); } while(!sink.buffer.scope.isInside(index)) { boolean hasNext; try { hasNext = sink.next(); } catch (CharacterCodingException e) { throw new SourceIOException(e); } if (listener != null && sink.buffer.scope.start > maxReadOffset) { maxReadOffset = sink.buffer.scope.start; listener.fileContentMatchingProgress(source.fo.getPath(), maxReadOffset); } if(!hasNext) { throw new IndexOutOfBoundsException("index is " + index + " > lenght"); // NOI18N } } return sink.charAt(index); }
/** * Converts the provided String to bytes using the * UTF-8 encoding. If <code>replace</code> is true, then * malformed input is replaced with the * substitution character, which is U+FFFD. Otherwise the * method throws a MalformedInputException. * @return ByteBuffer: bytes stores at ByteBuffer.array() * and length is ByteBuffer.limit() */ public static ByteBuffer encode(String string, boolean replace) throws CharacterCodingException { CharsetEncoder encoder = ENCODER_FACTORY.get(); if (replace) { encoder.onMalformedInput(CodingErrorAction.REPLACE); encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } ByteBuffer bytes = encoder.encode(CharBuffer.wrap(string.toCharArray())); if (replace) { encoder.onMalformedInput(CodingErrorAction.REPORT); encoder.onUnmappableCharacter(CodingErrorAction.REPORT); } return bytes; }
public void testEncodingOfString() throws CharacterCodingException { final PropCharsetEncoder encoder = new PropCharsetEncoder(new PropCharset()); compare(encoder.encodeStringForTests(""), new byte[] {}); compare(encoder.encodeStringForTests("a"), new byte[] {'a'}); compare(encoder.encodeStringForTests("\\"), //pending character new byte[] {'\\'}); compare(encoder.encodeStringForTests("\\\\"), new byte[] {'\\', '\\'}); compare(encoder.encodeStringForTests("\\t"), new byte[] {'\\', 't'}); compare(encoder.encodeStringForTests("key\t=value"), new byte[] {'k', 'e', 'y', '\t', '=', 'v', 'a', 'l', 'u', 'e'}); compare(encoder.encodeStringForTests("key=\tvalue"), new byte[] {'k', 'e', 'y', '=', '\t', 'v', 'a', 'l', 'u', 'e'}); }
private static String decode(ByteBuffer utf8, boolean replace) throws CharacterCodingException { CharsetDecoder decoder = DECODER_FACTORY.get(); if (replace) { decoder.onMalformedInput( java.nio.charset.CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } String str = decoder.decode(utf8).toString(); // set decoder back to its default value: REPORT if (replace) { decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); } return str; }
/** * Decode the buffer using the CharsetDecoder. * @param byteBuf * @return false if failed because the charbuffer was not big enough * @throws RuntimeException if it fails for encoding errors */ private boolean decodeUT8(ByteBuffer byteBuf) { // We give it all of the input data in call. boolean endOfInput = true; decoder.reset(); charBuffer.rewind(); // Convert utf-8 bytes to sequence of chars CoderResult result = decoder.decode(byteBuf, charBuffer, endOfInput); if (result.isOverflow()) { // Not enough space in the charBuffer. return false; } else if (result.isError()) { // Any other error try { result.throwException(); } catch (CharacterCodingException e) { throw new RuntimeException(e); } } return true; }
@Benchmark public byte[] byCharsetEncoder_US_ASCII() { try { CharsetEncoder encoder = asciiencode.get(); CharBuffer buffer = charbuffergenerator.get(); buffer.clear(); buffer.append(STR); buffer.flip(); ByteBuffer outbuffer = bytebuffergenerator.get(); outbuffer.clear(); CoderResult result = encoder.encode(buffer, outbuffer, false); if (result.isError()) { result.throwException(); } byte[] b = new byte[STR.length()]; outbuffer.flip(); outbuffer.get(b); return b; } catch (CharacterCodingException e) { throw new RuntimeException(e); } }
private boolean writeBuffer(final ByteBuffer buffer, final IoCallback callback) { StringBuilder builder = new StringBuilder(); try { builder.append(charsetDecoder.decode(buffer)); } catch (CharacterCodingException e) { callback.onException(exchange, this, e); return false; } String data = builder.toString(); writer.write(data); if (writer.checkError()) { callback.onException(exchange, this, new IOException()); return false; } return true; }
private Map<String, String> decodeProperties ( final IoSession session, final IoBuffer data ) throws CharacterCodingException { final int count = data.getInt (); final Map<String, String> result = new HashMap<String, String> ( count ); final CharsetDecoder decoder = getCharsetDecoder ( session ); for ( int i = 0; i < count; i++ ) { final String key = data.getString ( decoder ); final String value = data.getString ( decoder ); result.put ( key, value ); } return result; }
private IoBuffer encodeProperties ( final IoSession session, final Map<String, String> properties ) throws CharacterCodingException { final IoBuffer data = IoBuffer.allocate ( 0 ); data.setAutoExpand ( true ); data.putInt ( properties.size () ); final CharsetEncoder encoder = getCharsetEncoder ( session ); for ( final Map.Entry<String, String> entry : properties.entrySet () ) { final String key = entry.getKey (); final String value = entry.getValue (); data.putString ( key, encoder ); data.put ( (byte)0x00 ); data.putString ( value, encoder ); data.put ( (byte)0x00 ); } data.flip (); return data; }
private boolean readRequest(SocketChannel channel, StringBuilder request) throws IOException { ByteBuffer buffer = ByteBuffer.allocate(512); int num = channel.read(buffer); if (num == -1) { return false; } CharBuffer decoded; buffer.flip(); try { decoded = ISO_8859_1.newDecoder().decode(buffer); } catch (CharacterCodingException e) { throw new UncheckedIOException(e); } request.append(decoded); return Pattern.compile("\r\n\r\n").matcher(request).find(); }
@Override public void put ( final IoBuffer data, final String value ) { if ( value == null ) { data.put ( (byte)0x00 ); } try { data.putString ( value, this.length, this.charset.newEncoder () ); } catch ( final CharacterCodingException e ) { throw new RuntimeException ( e ); } }
private void encodeProperties ( final IoBuffer data, final Map<String, String> properties ) throws ProtocolCodecException { final CharsetEncoder encoder = this.defaultCharset.newEncoder (); data.putUnsignedShort ( properties.size () ); for ( final Map.Entry<String, String> entry : properties.entrySet () ) { try { data.putPrefixedString ( entry.getKey (), encoder ); data.putPrefixedString ( entry.getValue (), encoder ); } catch ( final CharacterCodingException e ) { throw new ProtocolCodecException ( e ); } } }
public static void decode(CharsetDecoder charsetDecoder, ByteBuffer byteBuf, CharBuffer charByte) { try { CoderResult cr = charsetDecoder.decode(byteBuf, charByte, true); if (!cr.isUnderflow()) { cr.throwException(); } cr = charsetDecoder.flush(charByte); if (!cr.isUnderflow()) { cr.throwException(); } } catch (CharacterCodingException x) { // Substitution is always enabled, // so this shouldn't happen throw new JSONException(x.getMessage(), x); } }
private boolean processWriteResult ( final IoSession session, final IoBuffer data, final ProtocolDecoderOutput out ) throws ProtocolCodecException { final int len = messageLength ( data ); if ( len < 0 ) { return false; } try { final int operationId = data.getInt (); final int errorCode = data.getUnsignedShort (); final String errorMessage = decodeString ( session, data ); out.write ( new WriteResult ( operationId, errorCode, errorMessage ) ); } catch ( final CharacterCodingException e ) { throw new ProtocolCodecException ( e ); } return true; }
/** * Finds any occurence of <code>what</code> in the backing * buffer, starting as position <code>start</code>. The starting * position is measured in bytes and the return value is in * terms of byte position in the buffer. The backing buffer is * not converted to a string for this operation. * @return byte position of the first occurence of the search * string in the UTF-8 buffer or -1 if not found */ public int find(String what, int start) { try { ByteBuffer src = ByteBuffer.wrap(this.bytes,0,this.length); ByteBuffer tgt = encode(what); byte b = tgt.get(); src.position(start); while (src.hasRemaining()) { if (b == src.get()) { // matching first byte src.mark(); // save position in loop tgt.mark(); // save position in target boolean found = true; int pos = src.position()-1; while (tgt.hasRemaining()) { if (!src.hasRemaining()) { // src expired first tgt.reset(); src.reset(); found = false; break; } if (!(tgt.get() == src.get())) { tgt.reset(); src.reset(); found = false; break; // no match } } if (found) return pos; } } return -1; // not found } catch (CharacterCodingException e) { // can't get here e.printStackTrace(); return -1; } }
public ByteBuffer fromString(String source) { // the encoder must be reset each time it's used, hence the thread-local storage CharsetEncoder theEncoder = encoder.get(); theEncoder.reset(); try { return theEncoder.encode(CharBuffer.wrap(source)); } catch (CharacterCodingException exc) { throw new MarshalException(String.format("Invalid ASCII character in string literal: %s", exc)); } }
public static String stringUtf8( ByteBuffer bytes ) throws InvalidDataException { CharsetDecoder decode = Charset.forName( "UTF8" ).newDecoder(); decode.onMalformedInput( codingErrorAction ); decode.onUnmappableCharacter( codingErrorAction ); // decode.replaceWith( "X" ); String s; try { bytes.mark(); s = decode.decode( bytes ).toString(); bytes.reset(); } catch ( CharacterCodingException e ) { throw new InvalidDataException( CloseFrame.NO_UTF8, e ); } return s; }
private CharSequence decode(ByteBuffer buf) { // Decode a byte buffer into a CharBuffer CharBuffer isodcb = null; CharsetDecoder isodecoder = charset.newDecoder(); try { isodcb = isodecoder.decode(buf); } catch (CharacterCodingException e) { log.error(e); } return (CharSequence)isodcb; }
public String deserialize(ByteBuffer bytes) { try { return ByteBufferUtil.string(bytes, charset); } catch (CharacterCodingException e) { throw new MarshalException("Invalid " + charset + " bytes " + ByteBufferUtil.bytesToHex(bytes)); } }
public String readAsString(Charset charset) { try { return doReadAsString(charset); } catch (CharacterCodingException e) { throw UncheckedException.throwAsUncheckedException(e); } }
public String decode(String encodedFolderName) throws CharacterCodingException { CharsetDecoder decoder = modifiedUtf7Charset.newDecoder().onMalformedInput(CodingErrorAction.REPORT); ByteBuffer byteBuffer = ByteBuffer.wrap(encodedFolderName.getBytes(asciiCharset)); CharBuffer charBuffer = decoder.decode(byteBuffer); return charBuffer.toString(); }
/** * Handle an error thrown while file decoding. Inform search listener and * append detailed info into the IDE Log. */ protected final void handleDecodingError(SearchListener listener, FileObject file, CharsetDecoder decoder, CharacterCodingException e) { String charsetName; try { if (decoder.isAutoDetecting() && decoder.isCharsetDetected()) { Charset c = decoder.detectedCharset(); if (c != null) { charsetName = c.displayName(); } else { charsetName = decoder.charset().displayName(); } } else { charsetName = decoder.charset().displayName(); } } catch (Exception ex) { LOG.log(Level.INFO, "Failed to obtain actual charset", ex); //NOI18N charsetName = decoder == null ? "null" : decoder.toString();//NOI18N } String msg = NbBundle.getMessage(ResultView.class, "TEXT_INFO_ERROR_ENCODING", charsetName); //NOI18N listener.fileContentMatchingError(file.getPath(), new Exception(msg, e)); LOG.log(Level.INFO, "{0}; UnmappableCharacterException: {1}", //NOI18N new Object[]{file.getPath(), e.getMessage()}); }
/** * Obtains a next decoded portion of data. This method implements a * <a href= *"http://java.sun.com/javase/6/docs/api/java/nio/charset/CharsetDecoder.html#steps" * >decoding operation</a>. * @return {@code true} is successful, otherwise {@code false}. */ private boolean next() throws CharacterCodingException { if (wasEndOfInput) { return false; } CharBuffer out = buffer.clear(); boolean endOfInput = wasEndOfInput; do { if(coderResult == CoderResult.UNDERFLOW) { endOfInput = source.readNext(); } coderResult = decoder.decode(source.buffer, out, endOfInput); if (coderResult.isOverflow()) { out = buffer.growBuffer(); } // loop if underflow is reported, EOF is not reached && no chars // produced, otherwise our logic could be damaged checkError(coderResult); } while (out.position() == 0 && coderResult.isUnderflow() && !endOfInput); if(endOfInput) { while((coderResult = decoder.flush(out)) == CoderResult.OVERFLOW) { out = buffer.growBuffer(); } checkError(coderResult); } buffer.adjustScope(); wasEndOfInput = endOfInput; return !buffer.scope.isEmpty(); }
private byte[] encodeAsUTF8(String s) throws CharacterCodingException { // not using s.getBytes here so as to catch unmappable characters ByteBuffer bb = UTF_8.newEncoder().encode(CharBuffer.wrap(s)); byte[] result = new byte[bb.limit()]; bb.get(result); assertTrue(bb.remaining() == 0); return result; }
public void testDecodingPendingBytes() throws CharacterCodingException { final PropCharsetDecoder decoder = new PropCharsetDecoder(new PropCharset()); compare(decoder.decodeBytesForTests(new byte[] {'\\'}), new char[] {'\\'}); compare(decoder.decodeBytesForTests(new byte[] {'\\', 'u'}), "\\u".toCharArray()); compare(decoder.decodeBytesForTests(new byte[] {'\\', 'u', '1'}), "\\u1".toCharArray()); compare(decoder.decodeBytesForTests(new byte[] {'\\', 'u', '1', '2'}), "\\u12".toCharArray()); compare(decoder.decodeBytesForTests(new byte[] {'\\', 'u', '1', '2', '3'}), "\\u123".toCharArray()); }
public void testDecodingErrorRecovery() throws CharacterCodingException { final PropCharsetDecoder decoder = new PropCharsetDecoder(new PropCharset()); compare(decoder.decodeBytesForTests(new byte[] {'\\', 'u', 'x', 'y', 'z'}), "\\uxyz".toCharArray()); compare(decoder.decodeBytesForTests(new byte[] {'\\', 'u', '1', 'x', 'y', 'z'}), "\\u1xyz".toCharArray()); compare(decoder.decodeBytesForTests(new byte[] {'\\', 'u', '1', '2', 'x', 'y', 'z'}), "\\u12xyz".toCharArray()); compare(decoder.decodeBytesForTests(new byte[] {'\\', 'u', '1', '2', '3', 'x', 'y', 'z'}), "\\u123xyz".toCharArray()); }
public void testDecodePassword() throws Exception { assertNotNull(DatabaseConnectionConvertor.decodePassword(new byte[0])); assertTrue(DatabaseConnectionConvertor.decodePassword(new byte[0]).isEmpty()); assertEquals("password", DatabaseConnectionConvertor.decodePassword("password".getBytes("UTF-8"))); try { DatabaseConnectionConvertor.decodePassword(new byte[] { (byte)0xff, (byte)0xff, (byte)0xff }); fail(); } catch (CharacterCodingException e) {} }
public static ByteBuffer encode(String message) { try { ByteBuffer ret = encoder.encode(CharBuffer.wrap(message)); ret.position(ret.limit()); return ret; } catch(CharacterCodingException cce) { cce.printStackTrace(); return null; } }
private String decodeString(int offset, int length) { try { return (m_isUTF8 ? UTF8_DECODER : UTF16LE_DECODER).decode( ByteBuffer.wrap(m_strings, offset, length)).toString(); } catch (CharacterCodingException ex) { LOGGER.log(Level.WARNING, null, ex); return null; } }
/** * Finds any occurrence of <code>what</code> in the backing * buffer, starting as position <code>start</code>. The starting * position is measured in bytes and the return value is in * terms of byte position in the buffer. The backing buffer is * not converted to a string for this operation. * @return byte position of the first occurrence of the search * string in the UTF-8 buffer or -1 if not found */ public int find(String what, int start) { try { ByteBuffer src = ByteBuffer.wrap(this.bytes,0,this.length); ByteBuffer tgt = encode(what); byte b = tgt.get(); src.position(start); while (src.hasRemaining()) { if (b == src.get()) { // matching first byte src.mark(); // save position in loop tgt.mark(); // save position in target boolean found = true; int pos = src.position()-1; while (tgt.hasRemaining()) { if (!src.hasRemaining()) { // src expired first tgt.reset(); src.reset(); found = false; break; } if (!(tgt.get() == src.get())) { tgt.reset(); src.reset(); found = false; break; // no match } } if (found) return pos; } } return -1; // not found } catch (CharacterCodingException e) { // can't get here e.printStackTrace(); return -1; } }
private ByteBuffer encode(String str) { // Encode a string into a byte buffer ByteBuffer bb = null; CharsetEncoder isoencoder = charset.newEncoder(); try { bb = isoencoder.encode(CharBuffer.wrap(str)); } catch (CharacterCodingException e) { log.error(e); } return bb; }