Python string 模块,punctuation() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用string.punctuation()

项目:xpandas    作者:alan-turing-institute    | 项目源码 | 文件源码
def test_bag_of_words_for_series():
    dataset = fetch_20newsgroups(shuffle=True, random_state=1,
                                 remove=('headers', 'footers', 'quotes'))

    series = XSeries(dataset.data[:10])
    assert series.data_type == str

    translator = str.maketrans('', '', string.punctuation)
    tokenizer_transformer = XSeriesTransformer(
        transform_function=lambda text: text.lower().translate(translator).strip().split()
    )

    transformed_series = tokenizer_transformer.fit_transform(series)
    # print(transformed_series)

    bag_transform = BagOfWordsTransformer()

    transformed_series = bag_transform.fit_transform(transformed_series)

    # print(transformed_series)

    assert type(transformed_series) == XDataFrame
项目:lang-reps    作者:chaitanyamalaviya    | 项目源码 | 文件源码
def __iter__(self):
        """
        Read a file where each line is of the form "word1 word2 ..."
        Yields lists of the form [word1, word2, ...]
        """
        if os.path.isdir(self.fname):
            filenames = [os.path.join(self.fname,f) for f in os.listdir(self.fname)]
        else:
            filenames = [self.fname]
        for filename in filenames:
            # with io.open(filename, encoding='utf-8') as f:
            with open(filename) as f:
                doc = f.read()
                for line in doc.split("\n"):
                    #if not line:  continue
                    sent = "".join([ch for ch in line.lower() if ch not in string.punctuation]).strip().split()
                    # sent = [word for word in line.strip().split()]
                    sent = [self.begin] + sent + [self.end]
                    yield sent
项目:lang-reps    作者:chaitanyamalaviya    | 项目源码 | 文件源码
def __iter__(self):
        """
        Read a file where each line is of the form "word1 word2 ..."
        Yields lists of the form [word1, word2, ...]
        """
        #jfbbb
    if os.path.isdir(self.fname):
            filenames = [os.path.join(self.fname,f) for f in os.listdir(self.fname)]
        #else:
        #    filenames = [self.fname]

        for langpath in filenames:
            with open(filename) as f:
                doc = f.read()
                for line in doc.split("\n"):
                    #if not line:  continue
                    sent = "".join([ch for ch in line.lower() if ch not in string.punctuation]).strip().split()
                    # sent = [word for word in line.strip().split()]
                    sent = [self.begin] + sent + [self.end]
                    yield sent
项目:newsreap    作者:caronc    | 项目源码 | 文件源码
def hexdump(src, length=16, sep='.'):
    """
    Displays a hex output of the content it is passed.

    This was based on https://gist.github.com/7h3rAm/5603718 with some
    minor modifications
    """
    allowed = digits + ascii_letters + punctuation + ' '

    print_map = ''.join(((x if x in allowed else '.')
                        for x in map(chr, range(256))))
    lines = []

    for c in xrange(0, len(src), length):
        chars = src[c:c + length]
        hex = ' '.join(["%02x" % ord(x) for x in chars])
        if len(hex) > 24:
            hex = "%s %s" % (hex[:24], hex[24:])
        printable = ''.join(["%s" % (
            (ord(x) <= 127 and print_map[ord(x)]) or sep) for x in chars])
        lines.append("%08x:  %-*s  |%s|" % (c, length * 3, hex, printable))
    return '\n'.join(lines)
项目:PyFlooder    作者:D4Vinci    | 项目源码 | 文件源码
def attack():

    ip = socket.gethostbyname( host )
    global n
    msg=str(string.letters+string.digits+string.punctuation)
    data="".join(random.sample(msg,5))
    dos = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

    try:
        n+=1
        dos.connect((ip, port))
        dos.send( "GET /%s HTTP/1.1\r\n" % data )
        print "\n "+time.ctime().split(" ")[3]+" "+"["+str(n)+"] #-#-# Hold Your Tears #-#-#"

    except socket.error:
        print "\n [ No connection! Server maybe down ] "

    dos.close()
项目:scientific-paper-summarisation    作者:EdCo95    | 项目源码 | 文件源码
def removeCommonWords(self, sentence, common_words, tokenized=False):
        """Takes a sentence and list of stopwords and removes the stopwords from the sentence."""
        if not tokenized:
            words = sentence.split(' ')
        else:
            words = sentence
        final_sentence = []

        for word in words:
            word = word.translate(string.maketrans("", ""), string.punctuation)
            word = word.lower()
            if word in common_words:
                continue
            else:
                final_sentence.append(word)

        return final_sentence
项目:code    作者:ActiveState    | 项目源码 | 文件源码
def ex3(argv):

    password = ''

    for i in range(len(argv)):
        for j in range(int(argv[i])):
            if i == 0:
                password += string.uppercase[random.randint(0,len(string.uppercase)-1)]
            elif i == 1:
                password += string.lowercase[random.randint(0,len(string.lowercase)-1)]
            elif i == 2:
                password += string.digits[random.randint(0,len(string.digits)-1)]
            elif i == 3:
                password += string.punctuation[random.randint(0,len(string.punctuation)-1)]

    return ''.join(random.sample(password,len(password)))
项目:CopyNet    作者:MultiPath    | 项目源码 | 文件源码
def mark(line):
    tmp_line = ''
    for c in line:
        if c in string.punctuation:
            if c is not "'":
                tmp_line += ' ' + c + ' '
            else:
                tmp_line += ' ' + c
        else:
            tmp_line += c
    tmp_line = tmp_line.lower()
    words = [w for w in tmp_line.split() if len(w) > 0]
    for w in words:
        if w not in word2freq:
            word2freq[w]  = 1
        else:
            word2freq[w] += 1
    return words
项目:deeppavlov    作者:deepmipt    | 项目源码 | 文件源码
def _normalize_answer(s):
    """Normalize string to score answers according to SQuAD dataset scoring rules.

    Remove articles, remove punctuation, fix multiple whitespaces in string, and convert all characters to lowercase.
    """

    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))
项目:KATE    作者:hugochan    | 项目源码 | 文件源码
def tiny_tokenize(text, stem=False, stop_words=[]):
    words = []
    for token in wordpunct_tokenize(re.sub('[%s]' % re.escape(string.punctuation), ' ', \
            text.decode(encoding='UTF-8', errors='ignore'))):
        if not token.isdigit() and not token in stop_words:
            if stem:
                try:
                    w = EnglishStemmer().stem(token)
                except Exception as e:
                    w = token
            else:
                w = token
            words.append(w)

    return words

    # return [EnglishStemmer().stem(token) if stem else token for token in wordpunct_tokenize(
    #                     re.sub('[%s]' % re.escape(string.punctuation), ' ', text.decode(encoding='UTF-8', errors='ignore'))) if
    #                     not token.isdigit() and not token in stop_words]
项目:regex_extraction    作者:aforsyth    | 项目源码 | 文件源码
def _extract_values_from_rpdr_notes(
        rpdr_notes, phrase_type, phrases, ignore_punctuation,
        show_n_words_context_before, show_n_words_context_after):
    """Return a list of NotePhraseMatches for each note in rpdr_notes."""
    note_phrase_matches = []
    if ignore_punctuation:
        logging.info('ignore_punctuation is True, so we will also ignore '
                     'any punctuation in the entered phrases.')
        phrases = [_remove_punctuation(phrase) for phrase in phrases]
    match_contexts = PhraseMatchContexts(
        show_n_words_context_before, show_n_words_context_after)
    for rpdr_note in rpdr_notes:
        if ignore_punctuation:
            rpdr_note.remove_punctuation_from_note()
        phrase_matches = _extract_phrase_from_notes(phrase_type, phrases,
                                                    rpdr_note, match_contexts)
        note_phrase_matches.append(phrase_matches)
    match_contexts.print_ordered_contexts()
    return note_phrase_matches
项目:crypto-sentiment    作者:codingupastorm    | 项目源码 | 文件源码
def _words_plus_punc(self):
        """
        Returns mapping of form:
        {
            'cat,': 'cat',
            ',cat': 'cat',
        }
        """
        no_punc_text = REGEX_REMOVE_PUNCTUATION.sub('', self.text)
        # removes punctuation (but loses emoticons & contractions)
        words_only = no_punc_text.split()
        # remove singletons
        words_only = set( w for w in words_only if len(w) > 1 )
        # the product gives ('cat', ',') and (',', 'cat')
        punc_before = {''.join(p): p[1] for p in product(PUNC_LIST, words_only)}
        punc_after = {''.join(p): p[0] for p in product(words_only, PUNC_LIST)}
        words_punc_dict = punc_before
        words_punc_dict.update(punc_after)
        return words_punc_dict
项目:hostapd-mana    作者:adde88    | 项目源码 | 文件源码
def ping(self, user, text = None):
        """Measure round-trip delay to another IRC client.
        """
        if self._pings is None:
            self._pings = {}

        if text is None:
            chars = string.letters + string.digits + string.punctuation
            key = ''.join([random.choice(chars) for i in range(12)])
        else:
            key = str(text)
        self._pings[(user, key)] = time.time()
        self.ctcpMakeQuery(user, [('PING', key)])

        if len(self._pings) > self._MAX_PINGRING:
            # Remove some of the oldest entries.
            byValue = [(v, k) for (k, v) in self._pings.items()]
            byValue.sort()
            excess = self._MAX_PINGRING - len(self._pings)
            for i in xrange(excess):
                del self._pings[byValue[i][1]]
项目:Python-Scripts-Repo-on-Data-Science    作者:qalhata    | 项目源码 | 文件源码
def _init_(self, min_cut=0.1, max_cut=0.9):
        # identation changes - we are inside the constructor
        # here we set up the behaviour
        # this is called each time an object of feq summ class is
        # created or instantiated
        self._min_cut = min_cut    # self=keyword that reports the variable
        self._max_cut = max_cut
        # we save the val of the 2 parameters passed by assigning them
        # two member variables - the 'self.' prefix identifies them as part
        # of the self argument - using underscore as first char.
        self._stopwords = set(stopwords.words('english') + list(punctuation))
        # this is alist of all common words and punc symols

    # identation changes - we are out of the constructor here
    # This is still the body of the class
    # Defining var here ( outside a member function) but within the class
    # member var becomes STATIC. This means it belongs to the class, and not
    # to any specific individual instance (object) of the class
项目:allennlp    作者:allenai    | 项目源码 | 文件源码
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))
项目:glassdoor-analysis    作者:THEdavehogue    | 项目源码 | 文件源码
def lemmatize_text(text, stop_words=STOPLIST, keep_pos=KEEP_POS):
    '''
    Function to lemmatize a single document of the corpus

    INPUT:
        text: string, text of review
        stop_words: words to remove from text, default STOPLIST defined above
        keep_pos: parts of speech to keep in text, default KEEP_POS def above

    OUTPUT:
        lemmatized text
    '''
    x = nlp(text)
    words = [tok.lemma_.strip(punctuation) for tok in x if (
        tok.pos_ in keep_pos) and (tok.lemma_.strip(punctuation) not in STOPLIST)]
    words.extend(['boss' for tok in x if tok.lemma_ == 'bos'])
    return ' '.join(words)
项目:python-tutorial    作者:Akuli    | 项目源码 | 文件源码
def header_link(title):
    """Return a github-style link target for a title.

    >>> header_link('Hello there!')
    'hello-there'
    """
    # This doesn't do the-title-1, the-title-2 etc. with multiple titles
    # with same text, but usually this doesn't matter.
    result = ''
    for character in title:
        if character in string.whitespace:
            result += '-'
        elif character in string.punctuation:
            pass
        else:
            result += character.lower()
    return result
项目:python-tutorial    作者:Akuli    | 项目源码 | 文件源码
def header_link(title):
    """Return a github-style link target for a title.

    >>> header_link('Hello there!')
    'hello-there'
    """
    # This doesn't handle multiple titles with the same text in the
    # same file, but usually that's not a problem. GitHub makes
    # links like the-title, the-title-1, the-title-2 etc.
    result = ''
    for character in title:
        if character in string.whitespace:
            result += '-'
        elif character in string.punctuation:
            pass
        else:
            result += character.lower()
    return result
项目:Luna    作者:Moonlington    | 项目源码 | 文件源码
def _bots(self, ctx, amount: int=100):
        """Clears bots and bot calls."""
        def check(m):
            if m.author.bot:
                return True
            for mem in m.mentions:
                if mem.bot:
                    return True
            if m.content.startswith(tuple(i for i in string.punctuation)) and not bool(re.search(r'^<@!?(\d+)>', m.content)):
                return True
            return False
        messages = await self.bot.purge_from(ctx.message.channel, limit=amount, before=ctx.message, check=check)
        await self.bot.delete_message(ctx.message)
        send = await self.bot.say("Successfully cleared **{}** messages".format(len(messages)))
        await asyncio.sleep(3)
        await self.bot.delete_message(send)
项目:ChemDataExtractor    作者:mcs07    | 项目源码 | 文件源码
def could_be(self, other):
        """Return True if the other PersonName is not explicitly inconsistent."""
        # TODO: Some suffix and title differences should be allowed
        if type(other) is not type(self):
            return NotImplemented
        if self == other:
            return True
        for attr in ['title', 'firstname', 'middlename', 'nickname', 'prefix', 'lastname', 'suffix']:
            if attr not in self or attr not in other:
                continue
            puncmap = dict((ord(char), None) for char in string.punctuation)
            s = self[attr].lower().translate(puncmap)
            o = other[attr].lower().translate(puncmap)
            if s == o:
                continue
            if attr in {'firstname', 'middlename', 'lastname'}:
                if (({len(comp) for comp in s.split()} == {1} and [el[0] for el in o.split()] == s.split()) or
                        ({len(comp) for comp in o.split()} == {1} and [el[0] for el in s.split()] == o.split())):
                    continue
            return False
        return True
项目:Twitter-Sentiment-Analysis-For-Birthday-Celebrities    作者:vishal-tiwari    | 项目源码 | 文件源码
def posNegCount(self, tweet):

        pos = 0
        neg = 0

        for p in list(punctuation):
            tweet = tweet.replace(p, '')

        tweet = tweet.lower()
        words = tweet.split(' ')
        word_count = len(words)

        for word in words:
            if word in self.positive_words:
                pos = pos + 1
            elif word in self.negative_words:
                neg = neg + 1

        return pos, neg
项目:jack    作者:uclmr    | 项目源码 | 文件源码
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""

    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))
项目:LDA-REST    作者:valentinarho    | 项目源码 | 文件源码
def LemNormalize(text):
    # convert non ascii characters
    text = text.encode('ascii', 'replace').decode()
    # remove punctuation and digits
    remove_punct_and_digits = dict([(ord(punct), ' ') for punct in string.punctuation + string.digits])
    transformed = text.lower().translate(remove_punct_and_digits)
    # shortword = re.compile(r'\W*\b\w{1,2}\b')
    # transformed = shortword.sub('', transformed)

    # tokenize the transformed string
    tokenized = nltk.word_tokenize(transformed)

    # remove short words (less than 3 char)
    tokenized = [w for w in tokenized if len(w) > 3]
    tokenizer = LemTokens(tokenized)

    return tokenizer
项目:LDA-REST    作者:valentinarho    | 项目源码 | 文件源码
def LemNormalizeIt(text):

    # convert non ascii characters
    text = text.encode('ascii', 'replace').decode()
    # remove punctuation and digits
    remove_punct_and_digits = dict([(ord(punct), ' ') for punct in string.punctuation + string.digits])
    transformed = text.lower().translate(remove_punct_and_digits)

    # tokenize the transformed string
    tokenized = nltk.word_tokenize(transformed)

    # apply lemming with morph it
    morph_it = load_morph_it()
    tokenized = [morph_it.get(w, w) for w in tokenized if len(w) > 3]

    return tokenized
项目:LDA_RecEngine    作者:easonchan1213    | 项目源码 | 文件源码
def preprocessing(content):
    remove_punc = ('? ? ? ? ? ? ? ? ? —').split(' ')
    ## preprocessing #1 : remove XXenglishXX and numbers
    preprocessing_1 = re.compile(r'\d*',re.L)  ## only substitute numbers
    #preprocessing_1 = re.compile(r'\w*',re.L)  ## substitute number & English
    content = preprocessing_1.sub("",content)
    ## preprocessing #2 : remove punctuation
    preprocessing_2 = re.compile('[%s]' % re.escape(string.punctuation))
    content = preprocessing_2.sub("",content)
    ## preprocessing #3 : remove Chinese punctuation and multiple whitspaces
    content = content.replace('\n','')
    for punc in remove_punc:
        content = content.replace(punc,'')
    try:
        content = parsing.strip_multiple_whitespaces(content)
    except:
        print 'Warning : failed to strip whitespaces @ '   

    return content
项目:tsubasa-reddit-bot    作者:ArmandSyah    | 项目源码 | 文件源码
def get_anilist_links(title):
    """Iterates through all search methods until link is constructed"""
    exclude = set(string.punctuation)
    title = ''.join(ch for ch in title if ch not in exclude)
    title = title.lower().split(' ')
    if 'season' in title:
        title.remove('season')
    title = ' '.join(title)
    anilist_regex = re.compile(r'http(s)?://anilist.co/anime/([0-9]){1,5}(/.*)?')
    link_dispatcher = {'api': _get_anilist_link_by_api}

    for _, v in link_dispatcher.items():
        anilist_url = v(title)
        if anilist_url is None:
            continue
        if re.match(anilist_regex, anilist_url) is not None:
            return anilist_url

    return
项目:tsubasa-reddit-bot    作者:ArmandSyah    | 项目源码 | 文件源码
def search_crunchyroll(anime):
    """Searches if anime exists on Crunchyroll and returns a link"""
    try:
        exclude = set(string.punctuation)
        anime = ''.join(ch for ch in anime if ch not in exclude)
        keywords = anime.split(' ')
        crunchy_api = MetaApi()
        crunchyroll_listing = []
        while len(keywords) > 0:
            crunchyroll_listing = list(crunchy_api.search_anime_series(' '.join(keywords)))
            if len(crunchyroll_listing) <= 0:
                print('No crunchyroll listings found')
                keywords.pop()
                continue
            else:
                break
    except:
        print('Crunchyroll url couldn\'t be retrieved')
        return

    return crunchyroll_listing[0].url if len(crunchyroll_listing) > 0 else None
项目:tsubasa-reddit-bot    作者:ArmandSyah    | 项目源码 | 文件源码
def search_funimation(anime):
    """Checks if anime exists on Funimation website and returns a link"""
    try:
        exclude = set(string.punctuation)
        anime = ''.join(ch for ch in anime if ch not in exclude)
        keywords = anime.split(' ')
        funi_url = None
        while len(keywords) > 0:
            show_slug = '-'.join(keywords).lower()
            funi_url = f'https://www.funimation.com/shows/{show_slug}/'
            funi_url = utilities.make_get_request(funi_url)
            if funi_url is None:
                keywords.pop()
                continue
            else:
                break
    except:
        print('Funimation url couldn\'t be retrieved')
        return
    return funi_url.url if funi_url is not None else None
项目:tsubasa-reddit-bot    作者:ArmandSyah    | 项目源码 | 文件源码
def search_animelab(anime):
    """Checks if anime title exists on AnimeLab website and returns a link"""
    try:
        exclude = set(string.punctuation)
        anime = ''.join(ch for ch in anime if ch not in exclude)
        keywords = anime.split(' ')
        animelab_url = None
        while len(keywords) > 0:
            show_slug = '-'.join(keywords).lower()
            animelab_url = f'https://www.animelab.com/shows/{show_slug}'
            animelab_url = utilities.make_get_request(animelab_url)
            if animelab_url is None:
                keywords.pop()
                return
            else:
                break
    except:
        print('Animelab url couldn\'t be retrieved')
        return
    return animelab_url.url
项目:dupandas    作者:shivam5992    | 项目源码 | 文件源码
def __init__(self, clean_config = None):
        self.cc = {
            'lower' : False,
            'punctuation' : False,
            'whitespace' : False,
            'digit' : False,
        }

        # Override clean config and validation check
        if clean_config != None:
            for key, value in clean_config.iteritems():
                if key in self.cc:
                    if value not in [True, False,1,0]:
                        print ("Invalid: Incorrect boolean value: "+str(value)+" for key: " + str(key))
                    else:
                        self.cc[key] = value
                else:
                    print ("Invalid: Cleaner not recognized: " + str(key) + ", available Cleaners: " +
                                                                     ", ".join(self.cc.keys()))

        cleaners_applied = [key for key in self.cc if self.cc[key]]
        if cleaners_applied:
            print ("Applying Cleaners: " + ", ".join(cleaners_applied))
        else:
            print ("Warning: No cleaners in config")
项目:dupandas    作者:shivam5992    | 项目源码 | 文件源码
def clean_text(self, txt):
        """
        function to clean a text on the basis of configurations mentioned in clean config.
        """

        txt = str(txt)

        if self.cc['lower']:
            txt = txt.lower()

        if self.cc['punctuation']:
            txt = "".join([x for x in txt if x not in punctuations])

        if self.cc['whitespace']:
            txt = "".join(txt.split()).strip()

        if self.cc['digit']:
            txt = "".join(x for x in txt if x not in "0987654321")

        return txt
项目:dict_based_learning    作者:tombosc    | 项目源码 | 文件源码
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))
项目:bedrock-core    作者:Bedrock-py    | 项目源码 | 文件源码
def list_returns(fileToCheck, desiredInterface):
    returnsList = []
    newLine = ""
    with open(fileToCheck, 'r') as pyFile:
        for line in pyFile:
            if line.find("#") == -1:
                newFront = line.find("return")
                if newFront != -1:
                    possibleErrorMessageCheck1 = line.find("'")
                    bracketBefore = line.find("{")
                    lastBracket = line.find("}")
                    newLine = line[possibleErrorMessageCheck1:]
                    possibleErrorMessageCheck2 = newLine.find(" ")
                    if possibleErrorMessageCheck2 == -1:
                        line = line[newFront + 7:]
                        line.split()
                        line = [word.strip(punctuation) for word in line.split()]
                        returnsList.extend(line)
                    elif possibleErrorMessageCheck1 == bracketBefore + 1:
                        line = line[newFront + 7:lastBracket + 1]
                        line.split()
                        returnsList.append(line)
    return returnsList
项目:MachineComprehension    作者:sa-j    | 项目源码 | 文件源码
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))
项目:nfcpy    作者:nfcpy    | 项目源码 | 文件源码
def make_wifipassword(args):
    import random, string, hashlib
    if args.password is None:
        printable = string.digits + string.letters + string.punctuation
        args.password = ''.join([random.choice(printable) for i in xrange(32)])
    if args.password_id is None:
        args.password_id = random.randint(0x0010, 0xFFFF)
    pkhash = hashlib.sha256(args.pubkey.read()).digest()[0:20]

    record = nfc.ndef.WifiPasswordRecord()
    record.password['public-key-hash'] = pkhash
    record.password['password-id'] = args.password_id
    record.password['password'] = args.password

    message = nfc.ndef.Message(record)
    if args.outfile.name == "<stdout>":
        args.outfile.write(str(message).encode("hex"))
    else:
        args.outfile.write(str(message))
项目:Question-Answering    作者:MurtyShikhar    | 项目源码 | 文件源码
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))
项目:PandasDataFrameGUI    作者:bluenote10    | 项目源码 | 文件源码
def _insert(self, x, y, text):
        """ Insert text at given x, y coordinates --- used with drag-and-drop. """

        # Clean text.
        import string
        text = filter(lambda x: x in (string.letters + string.digits + string.punctuation + ' '), text)

        # Find insertion point.
        index, flags = self.HitTest((x, y))

        if index == wx.NOT_FOUND:
            if flags & wx.LIST_HITTEST_NOWHERE:
                index = self.GetItemCount()
            else:
                return

        # Get bounding rectangle for the item the user is dropping over.
        rect = self.GetItemRect(index)

        # If the user is dropping into the lower half of the rect, we want to insert _after_ this item.
        if y > rect.y + rect.height/2:
            index += 1

        self.InsertStringItem(index, text)
项目:miptnews    作者:fiztehradio    | 项目源码 | 文件源码
def public_posts(self):
        now = datetime.now()
        # ???????? 30 ????????? ??????? ?? rss ?????? ? ??????? ?? ??, ? ??????? message_id=0
        posts_from_db = self.db.get_post_without_message_id()
        today_news = [i for i in self.src.news if (
            now - datetime.fromtimestamp(i.date)).days < 1]
        # ????? ?????????? ???? ???????
        for_publishing = list(set(today_news) & set(posts_from_db))
        for_publishing = sorted(for_publishing, key=lambda news: news.date)
        # for_publishing = sorted(today_news, key=lambda news: news.date)
        # ??????? ??????? ?????????
        for post in tqdm(for_publishing, desc="Posting news"):
            header = base64.b64decode(post.text).decode('utf8')
            header = ''.join(c for c in header if c not in set(punctuation + '—«»'))
            header = '#' + '_'.join(header.lower().split())
            text = '%s %s' % (header,
                              self.bit_ly.short_link(base64.b64decode(post.link).decode('utf8')))
            a = self.send_message(
                chat_id=self.chat_id, text=text)  # , parse_mode=telegram.ParseMode.HTML)
            message_id = a.message_id
            chat_id = a['chat']['id']
            self.db.update(post.link, chat_id, message_id)
            logging.info(u'Public: %s;%s;' %
                         (str(post), message_id))
            time.sleep(self.delay_between_messages)
项目:PyperGrabber    作者:pykong    | 项目源码 | 文件源码
def rem_whitespace(string):
    """ careful to keep this order of patterns or duplicate whitespace created in first round
    will not be removed
    """
    unwanted_chars = punctuation + whitespace

    pat_l = [r'[' + unwanted_chars + ']',
             r'\s+',
             r'  ',
             r' \\',
             r' \ '
             ]

    for p in pat_l:
        rx = re.compile(p)
        string = re.sub(rx, ' ', string)

    return string.strip()
项目:coquery    作者:gkunter    | 项目源码 | 文件源码
def get_tag_translate(self, tag):
        translate_dict = {
            "p": "p",
            "punctuation": "",
            "heading": "span style='font-style: bold'",
            #"heading": "span style='font-style: bold; font-size:150%'",
            #"h1": "span style='font-style: bold; font-size:150%'",
            "boldface": "b",
            "italics": "i",
            "underline": "u",
            "superscript": "sup",
            "subscript": "sup",
            "object": "object",
            "text": "html"}

        if tag in translate_dict:
            return translate_dict[tag]
        else:
            print("unsupported tag: ", tag)
            return tag
项目:coquery    作者:gkunter    | 项目源码 | 文件源码
def add_token(self, token_string, token_pos=None):
        # get lemma string:
        if all(x in string.punctuation for x in token_string):
            token_pos = "PUNCT"
            lemma = token_string
        else:
            try:
                # use the current lemmatizer to assign the token to a lemma:
                lemma = self._lemmatize(token_string, self._pos_translate(token_pos)).lower()
            except Exception:
                lemma = token_string.lower()

        # get word id, and create new word if necessary:
        word_dict = {self.word_lemma: lemma, self.word_label: token_string}
        if token_pos and self.arguments.use_nltk:
            word_dict[self.word_pos] = token_pos
        word_id = self.table(self.word_table).get_or_insert(word_dict, case=True)

        # store new token in corpus table:
        return self.add_token_to_corpus(
            {self.corpus_word_id: word_id,
             self.corpus_sentence: self._sentence_id,
             self.corpus_file_id: self._file_id})
项目:bi-att-flow    作者:allenai    | 项目源码 | 文件源码
def normalize_answer(self, s):
        """Lower text and remove punctuation, articles and extra whitespace."""
        def remove_articles(text):
            return re.sub(r'\b(a|an|the)\b', ' ', text)

        def white_space_fix(text):
            return ' '.join(text.split())

        def remove_punc(text):
            exclude = set(string.punctuation)
            return ''.join(ch for ch in text if ch not in exclude)

        def lower(text):
            return text.lower()

        return white_space_fix(remove_articles(remove_punc(lower(s))))
项目:bi-att-flow    作者:allenai    | 项目源码 | 文件源码
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))
项目:bi-att-flow    作者:allenai    | 项目源码 | 文件源码
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))
项目:TensorFlow-Machine-Learning-Cookbook    作者:PacktPublishing    | 项目源码 | 文件源码
def normalize_text(texts, stops):
    # Lower case
    texts = [x.lower() for x in texts]

    # Remove punctuation
    texts = [''.join(c for c in x if c not in string.punctuation) for x in texts]

    # Remove numbers
    texts = [''.join(c for c in x if c not in '0123456789') for x in texts]

    # Remove stopwords
    texts = [' '.join([word for word in x.split() if word not in (stops)]) for x in texts]

    # Trim extra whitespace
    texts = [' '.join(x.split()) for x in texts]

    return(texts)


# Build dictionary of words
项目:TensorFlow-Machine-Learning-Cookbook    作者:PacktPublishing    | 项目源码 | 文件源码
def normalize_text(texts, stops):
    # Lower case
    texts = [x.lower() for x in texts]

    # Remove punctuation
    texts = [''.join(c for c in x if c not in string.punctuation) for x in texts]

    # Remove numbers
    texts = [''.join(c for c in x if c not in '0123456789') for x in texts]

    # Remove stopwords
    texts = [' '.join([word for word in x.split() if word not in (stops)]) for x in texts]

    # Trim extra whitespace
    texts = [' '.join(x.split()) for x in texts]

    return(texts)
项目:IntroPython2016    作者:UWPCE-PythonCert    | 项目源码 | 文件源码
def strip_punctuation(text):
    """
    strips the punctuation from a bunch of text
    """
    # build a translation table for string.translate:
    # there are other ways to do this:

    # create a translation table to replace all punctuation with spaces
    #    -- then split() will remove the extra spaces
    punctuation = string.punctuation
    punctuation = punctuation.replace("'", "")  # keep apostropies
    punctuation = punctuation.replace("-", "")  # keep hyphenated words
    # building a translation table
    table = {}
    for c in punctuation:
        table[ord(c)] = ' '
    # remove punctuation with the translation table
    text = text.translate(table)
    # remove "--" -- can't do multiple characters with translate
    text = text.replace("--", " ")

    return text
项目:IntroPython2016    作者:UWPCE-PythonCert    | 项目源码 | 文件源码
def make_words(text):

    """
    make a list of words from a large bunch of text

    strips all the punctuation and other stuff from a string
    """
    text = strip_punctuation(text)

    # lower-case everything to remove that complication:
    text = text.lower()

    # split into words
    words = text.split()

    # remove the bare single quotes: "'" is both a quote and an apostrophe
    # and capitalize "i"
    words2 = []
    for word in words:
        if word != "'":  # remove quote by itself
            # "i" by itself should be capitalized
            words2.append("I" if word == 'i' else word)
    # could be done with list comprehension too -- next week!
    # words2 = [("I" if word == 'i' else word) for word in words if word != "'"]
    return words2
项目:DogeGen    作者:MemeTrash    | 项目源码 | 文件源码
def _get_base_doge_words(self, eng_text):
        """
        Get all base words from text to make doge phrases from.
        eg. 'Hello there, I am happy' -> ['hello', 'are', 'happy']

        Args:
            eng_text (str): Text to get words from.

        Returns:
            list[str]: List of lower case words to use from text.
        """
        phrase_no_punct = "".join([ch for ch in eng_text if ch not in string.punctuation])
        tagged_words = nltk.pos_tag([w.lower() for w in phrase_no_punct.split(' ') if w.isalpha()])
        chosen_words = []
        for word, tag in tagged_words:
            if tag[0] in ['N', 'V', 'J']:
                # make noun singular
                if tag[0] == 'N':
                    word = self._lemmatizer.lemmatize(word, pos='n')
                # make verb infinitive
                elif tag[0] == 'V':
                    word = self._lemmatizer.lemmatize(word, pos='v')
                chosen_words.append(word.encode('ascii', 'ignore'))  # lemmatize makes word unicode
        return list(set(chosen_words))
项目:Personal_AI_Assistant    作者:PratylenClub    | 项目源码 | 文件源码
def str2index(str_):

    # clean white space
    str_ = ' '.join(str_.split())
    # remove punctuation and make lower case
    str_ = str_.translate(None, string.punctuation).lower()

    res = []
    for ch in str_:
        try:
            res.append(byte2index[ch])
        except KeyError:
            # drop OOV
            pass
    return res


# convert index list to string