Python nltk.tokenize 模块,line_tokenize() 实例源码

我们从Python开源项目中,提取了以下9个代码示例,用于说明如何使用nltk.tokenize.line_tokenize()

项目:hate-to-hugs    作者:sdoran35    | 项目源码 | 文件源码
def words(self, lang=None, fileids=None, ignore_lines_startswith='#'):
        """
        This module returns a list of nonbreaking prefixes for the specified
        language(s).

        >>> from nltk.corpus import nonbreaking_prefixes as nbp
        >>> nbp.words('en')[:10] == [u'A', u'B', u'C', u'D', u'E', u'F', u'G', u'H', u'I', u'J']
        True
        >>> nbp.words('ta')[:5] == [u'\u0b85', u'\u0b86', u'\u0b87', u'\u0b88', u'\u0b89']
        True

        :return: a list words for the specified language(s).
        """
        # If *lang* in list of languages available, allocate apt fileid.
        # Otherwise, the function returns non-breaking prefixes for 
        # all languages when fileids==None.
        if lang in self.available_langs:
            lang = self.available_langs[lang]
            fileids = ['nonbreaking_prefix.'+lang]
        return [line for line in line_tokenize(self.raw(fileids))
                if not line.startswith(ignore_lines_startswith)]
项目:Price-Comparator    作者:Thejas-1    | 项目源码 | 文件源码
def words(self, fileids=None):
        return line_tokenize(self.raw(fileids))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def words(self, fileids=None):
        return line_tokenize(self.raw(fileids))
项目:neighborhood_mood_aws    作者:jarrellmark    | 项目源码 | 文件源码
def words(self, fileids=None):
        return line_tokenize(self.raw(fileids))
项目:hate-to-hugs    作者:sdoran35    | 项目源码 | 文件源码
def words(self, fileids=None, ignore_lines_startswith='\n'):
        return [line for line in line_tokenize(self.raw(fileids))
                if not line.startswith(ignore_lines_startswith)]
项目:FancyWord    作者:EastonLee    | 项目源码 | 文件源码
def words(self, fileids=None):
        return line_tokenize(self.raw(fileids))
项目:beepboop    作者:nicolehe    | 项目源码 | 文件源码
def words(self, fileids=None):
        return line_tokenize(self.raw(fileids))
项目:kind2anki    作者:prz3m    | 项目源码 | 文件源码
def words(self, fileids=None):
        return line_tokenize(self.raw(fileids))
项目:but_sentiment    作者:MixedEmotions    | 项目源码 | 文件源码
def words(self, fileids=None):
        return line_tokenize(self.raw(fileids))