@Test public void escape() throws UnsupportedEncodingException { String unicode = new UnicodeEscaper().translate("中国"); System.out.println(unicode); // \u4E2D\u56FD String chinese = new UnicodeUnescaper().translate(unicode); System.out.println(chinese); // 中国 System.out.println(StringEscapeUtils.escapeJava(chinese)); // \u4E2D\u56FD System.out.println(StringEscapeUtils.unescapeJava(unicode)); // 中国 System.out.println(StringEscapeUtils.unescapeJson(unicode)); // 中国 String ns = new String(unicode.getBytes("UTF-8"), "UTF-8"); System.out.println(ns); }
private String escapeJava(String str, boolean unicode) { CharSequenceTranslator tr = new LookupTranslator(new String[][] { { "\"", "\\\"" }, { "\\", "\\\\" } }) .with(new CharSequenceTranslator[] { new LookupTranslator( EntityArrays.JAVA_CTRL_CHARS_ESCAPE()) }); if (unicode) tr = tr.with(new CharSequenceTranslator[] { UnicodeEscaper .outsideOf(32, 127) }); return tr.translate(str); }
@Override @Asynchronous public void importTrainingDataForClassifier(Long targetCollectionId, Long sourceCollectionId, Long nominalAttributeId) { try { List<Long> nominalLabelIds = remoteNominalLabelEJB.getNominalLabelIdsByAttributeID(nominalAttributeId); List<DocumentDTO> documentDTOs = remoteDocumentEJB.getDocumentForNominalLabelAndCrisis(nominalLabelIds, sourceCollectionId); CollectionDTO collectionDTO = remoteCrisisEJB.findCrisisByID(targetCollectionId); CollectionDTO sourceCollection = remoteCrisisEJB.findCrisisByID(sourceCollectionId); // save model family ModelFamilyDTO modelFamilyDTO = new ModelFamilyDTO(); modelFamilyDTO.setCrisisDTO(collectionDTO); NominalAttributeDTO attributeDTO = new NominalAttributeDTO(); attributeDTO.setNominalAttributeId(nominalAttributeId); modelFamilyDTO.setNominalAttributeDTO(attributeDTO); modelFamilyDTO.setIsActive(true); boolean success = modelFamilyResourceFacade.addCrisisAttribute(modelFamilyDTO); if(success) { // iterate through each tagged document for(DocumentDTO documentDTO : documentDTOs) { DocumentDTO documentToSave = new DocumentDTO(); documentToSave.setCrisisDTO(collectionDTO); documentToSave.setData(UnicodeEscaper.outsideOf(32, 0x7f).translate(documentDTO.getData())); documentToSave.setGeoFeatures(documentDTO.getGeoFeatures()); documentToSave.setDoctype(documentDTO.getDoctype()); documentToSave.setHasHumanLabels(true); documentToSave.setLanguage(documentDTO.getLanguage()); documentToSave.setWordFeatures(UnicodeEscaper.outsideOf(32, 0x7f).translate(documentDTO.getWordFeatures())); documentToSave.setValueAsTrainingSample(documentDTO.getValueAsTrainingSample()); documentToSave.setIsEvaluationSet(documentDTO.getIsEvaluationSet()); documentToSave.setReceivedAt(documentDTO.getReceivedAt()); documentToSave.setSourceCollection(sourceCollection); // save new document DocumentDTO newDocument = remoteDocumentEJB.addDocument(documentToSave); // fetch document nominal label for existing doc List<DocumentNominalLabelDTO> documentNominalLabelDTOs = remoteDocumentNominalLabelEJB.findLabeledDocumentListByID(documentDTO.getDocumentID()); // add new document labels if(documentNominalLabelDTOs != null) { for(DocumentNominalLabelDTO documentNominalLabelDTO : documentNominalLabelDTOs) { DocumentNominalLabelDTO labelDTOToSave = new DocumentNominalLabelDTO(); labelDTOToSave.setDocumentDTO(newDocument); labelDTOToSave.setNominalLabelDTO(documentNominalLabelDTO.getNominalLabelDTO()); labelDTOToSave.setIdDTO(new DocumentNominalLabelIdDTO(newDocument.getDocumentID(), documentNominalLabelDTO.getIdDTO().getNominalLabelId(), documentNominalLabelDTO.getIdDTO().getUserId())); this.saveDocumentNominalLabel(labelDTOToSave); } } // fetch task answers for existing doc List<TaskAnswerDTO> answers = remoteTaskAnswerEJB.getTaskAnswer(documentDTO.getDocumentID()); // save task answers for(TaskAnswerDTO answer : answers) { TaskAnswerDTO answerToSave = new TaskAnswerDTO(); answerToSave.setAnswer(answer.getAnswer()); answerToSave.setDocumentID(newDocument.getDocumentID()); answerToSave.setUserID(answer.getUserID()); answerToSave.setTimestamp(new Date()); remoteTaskAnswerEJB.insertTaskAnswer(answerToSave); } } } } catch (Exception e) { logger.error("Error in importing training data for collection id : " + sourceCollectionId + " and attribute : " + nominalAttributeId, e); } }