Python jieba 模块,add_word() 实例源码

我们从Python开源项目中,提取了以下14个代码示例,用于说明如何使用jieba.add_word()

项目:entity-linker    作者:seucs    | 项目源码 | 文件源码
def getData(Mentions,S,E,contextMention,contextEntity, id):
    for mention in Mentions:
        jieba.add_word(mention.name)

        S.append(mention.name)
        id.append('-') #????????????
        contextMention[mention.name] = mention.context
    for item in Mentions:
        temp = []
        cnt = 0
        for candidate in item.candidates:
            if cnt > 100:
                break
            cnt += 1
            temp.append(candidate.title)
            contextEntity[candidate.title] = candidate.context

            id.append(candidate.id)
        E.append(temp)

# element{???mention/entity}
# ??element
项目:KnowledgeGraph    作者:SilverHelmet    | 项目源码 | 文件源码
def test():
    x = u"??????????????????Nintendo???2006-11-02???????????????????????????3???????????????????????????????????????"
    x = u'???????????????'
    sentences = split_sentences(x)
    # jieba.add_word(u'????????', 5, 'baike')
    # jieba.add_word(u'Nintendo', 5, 'baike')
    # jieba.add_word(u'????', 5, 'baike')
    # jieba.add_word(u'???', 5, 'baike')
    # jieba.add_word(u'????', 5, 'baike')

    # name2fb_path = os.path.join(cache_dir, 'DatasetFinder.name2fb.sample.cache')
    # fb_ttls_path = os.path.join(cache_dir, 'DatasetFinder.fb_ttls.sample.cache')
    name2fb_path = os.path.join(cache_dir, 'DatasetFinder.name2fb.cache')
    fb_ttls_path = os.path.join(cache_dir, 'DatasetFinder.fb_ttls.cache')
    finder = DatasetFinder.load_from_cache(name2fb_path, fb_ttls_path)
    for x in gen_dataset(sentences[0], finder):
        print x
    print '-' * 50
    for x in gen_dataset(sentences[1], finder):
        print x
项目:HtmlExtract-Python    作者:xinyi-spark    | 项目源码 | 文件源码
def add_word_dict(word, freq=None, tag=None):
    '''
    ?????????
    '''
    jieba.add_word(word, freq=None, tag=None)
项目:PTTChatBot_DL2017    作者:thisray    | 项目源码 | 文件源码
def jiebaCustomSetting(self, dict_path, usr_dict_path):

        jieba.set_dictionary(dict_path)
        with open(usr_dict_path, 'r', encoding='utf-8') as dic:
            for word in dic:
                jieba.add_word(word.strip('\n'))
项目:PTTChatBot_DL2017    作者:thisray    | 项目源码 | 文件源码
def TaibaCustomSetting(self, usr_dict):

        with open(usr_dict, 'r', encoding='utf-8') as dic:
            for word in dic:
                Taiba.add_word(word.strip('\n'))
项目:FineGrainedOpinionMining    作者:chaoming0625    | 项目源码 | 文件源码
def __init():
    user_dict_path = os.path.join(root_filepath, "f_seg/user_dict.txt")
    jieba.load_userdict(user_dict_path)
    jieba.add_word(u"??", 10000)
    jieba.suggest_freq((u"?", u"??"))
    jieba.suggest_freq((u"??", u"??"))
    jieba.suggest_freq((u"??", u"??"))
    jieba.suggest_freq((u"??", u"?"))
项目:FineGrainedOpinionMining    作者:chaoming0625    | 项目源码 | 文件源码
def __init():
    user_dict_path = os.path.join(root_filepath, "f_seg/user_dict.txt")
    jieba.load_userdict(user_dict_path)
    jieba.add_word("??", 10000)
    jieba.suggest_freq(("?", "??"))
    jieba.suggest_freq(("??", "??"))
    jieba.suggest_freq(("??", "??"))
    jieba.suggest_freq(("??", "?"))
项目:Neural-Headline-Generator-CN    作者:QuantumLiu    | 项目源码 | 文件源码
def cut(text,custom_words=['FLOAT','TIME','DATE','EOS']):
    jieba.enable_parallel(32)
    for word in custom_words:
        jieba.add_word(word)
    words=jieba.lcut(text)
    return words
项目:slack_emoji_bot    作者:linnil1    | 项目源码 | 文件源码
def __init__(self, slack, custom):
        self.slack = slack
        self.rundata = custom['data']
        self.colorPrint = custom['colorPrint']

        self.food_dir = "data/midnight.json"
        self.food_dic = "data/dict.txt.big"

        # find midnight channel
        self.nochannel = False
        rep = self.slack.api_call("channels.list")
        self.channel_id = ""
        for c in rep['channels']:
            if c['name'].lower() == custom['food_channelname']:
                self.channel_id = c['id']
                break
        if not self.channel_id:
            self.colorPrint(
                "No midnight channel",
                "Restart when midnight channel can use",
                color="FAIL")
            self.nochannel = True
            return

        jieba.set_dictionary(self.food_dic)
        jieba.initialize()

        # add and del words
        for word in self.rundata.get('FOOD_addword'):
            jieba.add_word(word)
        for word in self.rundata.get('FOOD_delword'):
            jieba.del_word(word)

        self.init()
项目:TextClassification    作者:mosu027    | 项目源码 | 文件源码
def __init__(self):
        self.negative = []
        self.adverb = []
        self.questionMark = []
        self.rootPath = "E:\workout\data\senitment_data"
        self.wordtypeDict, self.wordfreqDict = self.UserDefineLibrary()
        for word in self.wordfreqDict.keys():
            jieba.add_word(str(word))
        self.initialize()

    # @staticmethod
项目:PTT-Chat-Generator    作者:zake7749    | 项目源码 | 文件源码
def jiebaCustomSetting(self, dict_path, usr_dict_path):

        jieba.set_dictionary(dict_path)
        with open(usr_dict_path, 'r', encoding='utf-8') as dic:
            for word in dic:
                jieba.add_word(word.strip('\n'))
项目:PTT-Chat-Generator    作者:zake7749    | 项目源码 | 文件源码
def TaibaCustomSetting(self, usr_dict):

        with open(usr_dict, 'r', encoding='utf-8') as dic:
            for word in dic:
                Taiba.add_word(word.strip('\n'))
项目:WaiMaiOpinionMiner    作者:chaoming0625    | 项目源码 | 文件源码
def __init():
    user_dict_path = os.path.join(root_filepath, "f_seg/user_dict.txt")
    jieba.load_userdict(user_dict_path)
    jieba.add_word("??", 10000)
    jieba.suggest_freq(("?", "??"))
    jieba.suggest_freq(("??", "??"))
    jieba.suggest_freq(("??", "??"))
    jieba.suggest_freq(("??", "?"))
项目:slack_emoji_bot    作者:linnil1    | 项目源码 | 文件源码
def main(self, datadict):
        if self.nochannel:
            return
        if datadict['type'] == 'message' and \
                datadict.get('subtype') == "file_share" and \
                datadict.get('channel') == self.channel_id:
            self.imageAdd(datadict['file'])
        if not datadict['type'] == 'message' or 'subtype' in datadict:
            return
        if datadict['text'].startswith("food "):
            text = re.search(
                r"(?<=food ).*", datadict['text'], re.DOTALL).group().strip()

            payload = {
                "username": "?? Midnight",
                "icon_emoji": ":_e9_a3_9f:",
                "thread_ts": datadict.get("thread_ts")or'',
                "channel": datadict['channel']}

            try:
                ans = self.wordSearch(text)

                self.slack.api_call("chat.postMessage",
                                    attachments=[self.wordParse(ans)],
                                    **payload
                                    )
            except BaseException:
                self.slack.api_call("chat.postMessage",
                                    text="Sorry Not Found",
                                    **payload
                                    )

        elif datadict['text'].startswith("foodadd "):
            text = re.search(r"(?<=foodadd ).*",
                             datadict['text']).group().strip()
            jieba.add_word(text)
            self.rundata.append("FOOD_addword", text)
            self.init()
        elif datadict['text'].startswith("fooddel "):
            text = re.search(r"(?<=fooddel ).*",
                             datadict['text']).group().strip()
            jieba.del_word(text)
            self.rundata.append("FOOD_delword", text)
            self.init()