Python enchant 模块,DictWithPWL() 实例源码

我们从Python开源项目中,提取了以下15个代码示例,用于说明如何使用enchant.DictWithPWL()

项目:palladio    作者:slipguru    | 项目源码 | 文件源码
def __init__(self, lang, suggest, word_list_filename, filters=[]):
        self.dictionary = enchant.DictWithPWL(lang, word_list_filename)
        self.tokenizer = get_tokenizer(lang, filters)
        self.original_tokenizer = self.tokenizer
        self.suggest = suggest
项目:Taigabot    作者:FrozenPigs    | 项目源码 | 文件源码
def test_pwl(self):
        """Test checker loop with PWL."""
        from enchant import DictWithPWL
        d = DictWithPWL("en_US",None,None)
        txt = "I am sme text to be cheked with personal list of cheked words"
        chkr = SpellChecker(d,txt)
        for n,err in enumerate(chkr):
            if n == 0:
                self.assertEqual(err.word,"sme")
            if n == 1:
                self.assertEqual(err.word,"cheked")
                chkr.add()
        self.assertEqual(n,1)
项目:dalila    作者:slipguru    | 项目源码 | 文件源码
def __init__(self, lang, suggest, word_list_filename, filters=[]):
        self.dictionary = enchant.DictWithPWL(lang, word_list_filename)
        self.tokenizer = get_tokenizer(lang, filters)
        self.original_tokenizer = self.tokenizer
        self.suggest = suggest
项目:Chat-Bot    作者:FredLoh    | 项目源码 | 文件源码
def even_or_odd(self, message=None, match=None, to=None):
        is_odd = len(match.group("evenOrOdd")) % 2
        num = random.randint(1, 10)
        if (is_odd and num % 2) or (not is_odd and not num % 2):
            return TextMessageProtocolEntity("[%d]\nYou win." % num, to=message.getFrom())
        else:
            return TextMessageProtocolEntity("[%d]\nYou lose!" % num, to=message.getFrom())

    # def beban_spell_checker(self, message=None, match=None, to=None):
    #     print(message.getBody())
    #     correctionList = ""
    #     text = message.getBody()
    #     d = enchant.DictWithPWL("es_MX","wordList.txt")
    #     d_en = enchant.Dict("en_US")

    #     wordList = text.split()
    #     for word in wordList:
    #       if(word.isalnum() == True):
    #         print(word)
    #         if(d.check(word) == False):
    #             # if(d_en.check(word) == False):
    #           solutions = d.suggest(word)
    #           print(solutions)
    #           sol = str(solutions[0])
    #           if(sol.isalnum() == False):
    #             correctionList += sol + "* "
    #     if (correctionList != ""):
    #         print(correctionList)
    #         return TextMessageProtocolEntity(correctionList, to=message.getFrom())
项目:icing    作者:slipguru    | 项目源码 | 文件源码
def __init__(self, lang, suggest, word_list_filename, filters=[]):
        self.dictionary = enchant.DictWithPWL(lang, word_list_filename)
        self.tokenizer = get_tokenizer(lang, filters)
        self.original_tokenizer = self.tokenizer
        self.suggest = suggest
项目:python-translate    作者:caspartse    | 项目源码 | 文件源码
def suggest(self):
        if re.sub(r'[a-zA-Z\d\'\-\.\s]', '', self.word):
            return None
        import enchant
        try:
            d = enchant.DictWithPWL(
                'en_US', path + '/data/spell-checker/american-english-large')
        except:
            d = enchant.Dict('en_US')
        suggestion = d.suggest(self.word)
        return suggestion
项目:data_programming    作者:kep1616    | 项目源码 | 文件源码
def __init__(self):


        self.stemmer = LancasterStemmer()
        #Convert a collection of text documents to a matrix of token counts
        #Remove accents during the preprocessing step. 
        self.vectorizer = CountVectorizer(strip_accents='ascii') 
        self.tokenizer = self.vectorizer.build_tokenizer()
        self.preprocessor = self.vectorizer.build_preprocessor()
        self.spellchecker = enchant.DictWithPWL("en_US",
            pwl=path_config.PERSONAL_WORD_DICTIONARY_FILE)
项目:Chromium_DepotTools    作者:p07r0457    | 项目源码 | 文件源码
def open(self):
        self.initialized = False
        self.private_dict_file = None

        if enchant is None:
            return
        dict_name = self.config.spelling_dict
        if not dict_name:
            return

        self.ignore_list = [w.strip() for w in self.config.spelling_ignore_words.split(",")]
        # "param" appears in docstring in param description and
        # "pylint" appears in comments in pylint pragmas.
        self.ignore_list.extend(["param", "pylint"])

        if self.config.spelling_private_dict_file:
            self.spelling_dict = enchant.DictWithPWL(
                dict_name, self.config.spelling_private_dict_file)
            self.private_dict_file = open(
                self.config.spelling_private_dict_file, "a")
        else:
            self.spelling_dict = enchant.Dict(dict_name)

        if self.config.spelling_store_unknown_words:
            self.unknown_words = set()

        # Prepare regex for stripping punctuation signs from text.
        # ' and _ are treated in a special way.
        puncts = string.punctuation.replace("'", "").replace("_", "")
        self.punctuation_regex = re.compile('[%s]' % re.escape(puncts))
        self.initialized = True
项目:node-gn    作者:Shouqun    | 项目源码 | 文件源码
def open(self):
        self.initialized = False
        self.private_dict_file = None

        if enchant is None:
            return
        dict_name = self.config.spelling_dict
        if not dict_name:
            return

        self.ignore_list = [w.strip() for w in self.config.spelling_ignore_words.split(",")]
        # "param" appears in docstring in param description and
        # "pylint" appears in comments in pylint pragmas.
        self.ignore_list.extend(["param", "pylint"])

        if self.config.spelling_private_dict_file:
            self.spelling_dict = enchant.DictWithPWL(
                dict_name, self.config.spelling_private_dict_file)
            self.private_dict_file = open(
                self.config.spelling_private_dict_file, "a")
        else:
            self.spelling_dict = enchant.Dict(dict_name)

        if self.config.spelling_store_unknown_words:
            self.unknown_words = set()

        # Prepare regex for stripping punctuation signs from text.
        # ' and _ are treated in a special way.
        puncts = string.punctuation.replace("'", "").replace("_", "")
        self.punctuation_regex = re.compile('[%s]' % re.escape(puncts))
        self.initialized = True
项目:bslint    作者:sky-uk    | 项目源码 | 文件源码
def get_new_dictionary(dictionary_lang="en_GB"):
    personal_words_list_path = os.path.join(CONFIG_PATH, 'personal-words-list.txt')
    return enchant.DictWithPWL(dictionary_lang, personal_words_list_path)
项目:depot_tools    作者:webrtc-uwp    | 项目源码 | 文件源码
def open(self):
        self.initialized = False
        self.private_dict_file = None

        if enchant is None:
            return
        dict_name = self.config.spelling_dict
        if not dict_name:
            return

        self.ignore_list = [w.strip() for w in self.config.spelling_ignore_words.split(",")]
        # "param" appears in docstring in param description and
        # "pylint" appears in comments in pylint pragmas.
        self.ignore_list.extend(["param", "pylint"])

        if self.config.spelling_private_dict_file:
            self.spelling_dict = enchant.DictWithPWL(
                dict_name, self.config.spelling_private_dict_file)
            self.private_dict_file = open(
                self.config.spelling_private_dict_file, "a")
        else:
            self.spelling_dict = enchant.Dict(dict_name)

        if self.config.spelling_store_unknown_words:
            self.unknown_words = set()

        # Prepare regex for stripping punctuation signs from text.
        # ' and _ are treated in a special way.
        puncts = string.punctuation.replace("'", "").replace("_", "")
        self.punctuation_regex = re.compile('[%s]' % re.escape(puncts))
        self.initialized = True
项目:wuye.vim    作者:zhaoyingnan911    | 项目源码 | 文件源码
def open(self):
        self.initialized = False
        self.private_dict_file = None

        if enchant is None:
            return
        dict_name = self.config.spelling_dict
        if not dict_name:
            return

        self.ignore_list = [w.strip() for w in self.config.spelling_ignore_words.split(",")]
        # "param" appears in docstring in param description and
        # "pylint" appears in comments in pylint pragmas.
        self.ignore_list.extend(["param", "pylint"])

        if self.config.spelling_private_dict_file:
            self.spelling_dict = enchant.DictWithPWL(
                dict_name, self.config.spelling_private_dict_file)
            self.private_dict_file = open(
                self.config.spelling_private_dict_file, "a")
        else:
            self.spelling_dict = enchant.Dict(dict_name)

        if self.config.spelling_store_unknown_words:
            self.unknown_words = set()

        # Prepare regex for stripping punctuation signs from text.
        # ' and _ are treated in a special way.
        puncts = string.punctuation.replace("'", "").replace("_", "")
        self.punctuation_regex = re.compile('[%s]' % re.escape(puncts))
        self.initialized = True
项目:calm    作者:cygwin    | 项目源码 | 文件源码
def spellcheck_hints(args, packages):
    spelldict = DictWithPWL('en-US')
    chkr = SpellChecker(spelldict, filters=[DescFilter])
    misspellings = {}

    # add technical words not in spell-checking dictionary
    wordlist = []
    with open('words.txt') as f:
        for w in f:
            # strip any trailing comment
            w = re.sub(r'#.*$', '', w)
            # strip any whitespace
            w = w.strip()
            spelldict.add(w)
            wordlist.append(w.lower())
            # XXX: for the moment, to reduce the set of errors, ignore the fact
            # that words.txt gives a canonical capitalization, and accept any
            # capitalization
            spelldict.add(w.lower())
            spelldict.add(w.capitalize())

    # add all package names as valid words
    for p in packages:
        for w in re.split('[_-]', p):
            # remove punctuation characters
            w = re.sub(r'[+]', '', w)
            # strip off any trailing numbers
            w = re.sub(r'[\d.]*$', '', w)

            # both with and without any lib prefix
            for w1 in [w, re.sub(r'^lib', '', w)]:
                # add the package name unless it exists in the list above, which
                # will give a canonical capitalization
                if w.lower() not in wordlist:
                    spelldict.add(w.lower())
                    spelldict.add(w)
                    spelldict.add(w.capitalize())

    # for each package
    for p in sorted(packages.keys()):
        # debuginfo packages have uninteresting, auto-generated text which
        # contains the package name
        if p.endswith('-debuginfo'):
            continue

        # spell-check the spell-checkable keys
        for k in ['sdesc', 'ldesc', 'message']:
            if k in packages[p].hints:
                chkr.set_text(packages[p].hints[k])
                # XXX: this is doing all the work to generate suggestions, which
                # we then ignore, so could be written much more efficiently
                for err in chkr:
                    # logging.error("package '%s', hint '%s': Is '%s' a word?" % (p, k, err.word))
                    misspellings.setdefault(err.word, 0)
                    misspellings[err.word] += 1

    # summarize
    for c in sorted(misspellings, key=misspellings.get, reverse=True):
        print('%16s: %4d' % (c, misspellings[c]))
项目:meme_get    作者:memegen    | 项目源码 | 文件源码
def tesseract_ocr_helper(base_image, config="Default"):
    """ A wrapper for using tesseract to do OCR
    """
    tools = pyocr.get_available_tools()
    if len(tools) == 0:
        print("No OCR tool found")
        sys.exit(1)

    # The tools are returned in the recommended order of usage
    tool = tools[0]
    print("Will use tool '%s'" % (tool.get_name()))

    langs = tool.get_available_languages()
    print("Available languages: %s" % ", ".join(langs))
    lang = langs[0]
    print("Will use lang '%s'" % (lang))

    custom_builder = pyocr.builders.TextBuilder()
    if config != "Default":
        custom_builder.tesseract_configs = [config]

    txt = tool.image_to_string(
        base_image,
        lang=lang,
        builder=custom_builder
    )

    # Spell correct
    dict_path = os.path.join(os.path.dirname(__file__),"dict/urban_dict.txt")
    d = enchant.DictWithPWL("en_US", dict_path)
    txtA = txt.replace('\n', ' \n ')
    A = txtA.split(" ")
    B = []

    for x in A:
        if (x != '\n' and len(x) != 0
                and d.check(x) is False
                and len(d.suggest(x)) != 0):
            B.append(d.suggest(x)[0])
        else:
            B.append(x)

    return " ".join(B)
项目:check_modulemd    作者:fedora-modularity    | 项目源码 | 文件源码
def _init_spell_checker(self):
        """
        Initialize spell checker dictionary
        """

        default_dict = "en_US"
        spell_dict = None

        jargonfile = self.params.get('jargonfile')
        if not jargonfile:
            jargonfile = os.environ.get('JARGONFILE')
        if jargonfile is not None:
            try:
                jargonfile = str(jargonfile)
                spell_dict = DictWithPWL(default_dict, jargonfile)
            except:
                self.error(
                    "Could not initialize dictionary using %s file" % jargonfile)

        if not spell_dict:
            try:
                spell_dict = DictWithPWL(default_dict)
            except:
                self.error(
                    "Could not initialize spell checker with dictionary %s" % default_dict)

            #Check if there is jargonfile on module repo
            url = ("https://src.fedoraproject.org/cgit/modules/%s.git/plain/jargon.txt" %
                   self.mmd.name)
            resp = requests.get(url)
            if resp.status_code >= 200 and resp.status_code < 300:
                for w in resp.content.split("\n"):
                    if w != '':
                        spell_dict.add_to_session(w)

        #add words from module name as jargon
        for w in self.mmd.name.split('-'):
            spell_dict.add_to_session(w)

        try:
            chkr = SpellChecker(spell_dict)
        except:
            self.error("Could not initialize spell checker")

        return chkr