Python nltk 模块,BigramTagger() 实例源码

我们从Python开源项目中,提取了以下9个代码示例,用于说明如何使用nltk.BigramTagger()

项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def train(self):
        train_data = nltk.corpus.brown.tagged_sents(categories='news')
        regexp_tagger = nltk.RegexpTagger([
            (r'^-?[0-9]+(.[0-9]+)?$', 'CD'),
            (r'(-|:|;)$', ':'),
            (r'\'*$', 'MD'),
            (r'(The|the|A|a|An|an)$', 'AT'),
            (r'.*able$', 'JJ'),
            (r'^[A-Z].*$', 'NNP'),
            (r'.*ness$', 'NN'),
            (r'.*ly$', 'RB'),
            (r'.*s$', 'NNS'),
            (r'.*ing$', 'VBG'),
            (r'.*ed$', 'VBD'),
            (r'.*', 'NN'),
            ])
        unigram_tagger = nltk.UnigramTagger(train_data, backoff=regexp_tagger)
        self.tagger = nltk.BigramTagger(train_data, backoff=unigram_tagger)
        self._trained = True
        return None
项目:neighborhood_mood_aws    作者:jarrellmark    | 项目源码 | 文件源码
def train(self):
        train_data = nltk.corpus.brown.tagged_sents(categories='news')
        regexp_tagger = nltk.RegexpTagger([
            (r'^-?[0-9]+(.[0-9]+)?$', 'CD'),
            (r'(-|:|;)$', ':'),
            (r'\'*$', 'MD'),
            (r'(The|the|A|a|An|an)$', 'AT'),
            (r'.*able$', 'JJ'),
            (r'^[A-Z].*$', 'NNP'),
            (r'.*ness$', 'NN'),
            (r'.*ly$', 'RB'),
            (r'.*s$', 'NNS'),
            (r'.*ing$', 'VBG'),
            (r'.*ed$', 'VBD'),
            (r'.*', 'NN'),
            ])
        unigram_tagger = nltk.UnigramTagger(train_data, backoff=regexp_tagger)
        self.tagger = nltk.BigramTagger(train_data, backoff=unigram_tagger)
        self._trained = True
        return None
项目:beepboop    作者:nicolehe    | 项目源码 | 文件源码
def train(self):
        train_data = nltk.corpus.brown.tagged_sents(categories='news')
        regexp_tagger = nltk.RegexpTagger([
            (r'^-?[0-9]+(.[0-9]+)?$', 'CD'),
            (r'(-|:|;)$', ':'),
            (r'\'*$', 'MD'),
            (r'(The|the|A|a|An|an)$', 'AT'),
            (r'.*able$', 'JJ'),
            (r'^[A-Z].*$', 'NNP'),
            (r'.*ness$', 'NN'),
            (r'.*ly$', 'RB'),
            (r'.*s$', 'NNS'),
            (r'.*ing$', 'VBG'),
            (r'.*ed$', 'VBD'),
            (r'.*', 'NN'),
            ])
        unigram_tagger = nltk.UnigramTagger(train_data, backoff=regexp_tagger)
        self.tagger = nltk.BigramTagger(train_data, backoff=unigram_tagger)
        self._trained = True
        return None
项目:kind2anki    作者:prz3m    | 项目源码 | 文件源码
def train(self):
        train_data = nltk.corpus.brown.tagged_sents(categories='news')
        regexp_tagger = nltk.RegexpTagger([
            (r'^-?[0-9]+(.[0-9]+)?$', 'CD'),
            (r'(-|:|;)$', ':'),
            (r'\'*$', 'MD'),
            (r'(The|the|A|a|An|an)$', 'AT'),
            (r'.*able$', 'JJ'),
            (r'^[A-Z].*$', 'NNP'),
            (r'.*ness$', 'NN'),
            (r'.*ly$', 'RB'),
            (r'.*s$', 'NNS'),
            (r'.*ing$', 'VBG'),
            (r'.*ed$', 'VBD'),
            (r'.*', 'NN'),
            ])
        unigram_tagger = nltk.UnigramTagger(train_data, backoff=regexp_tagger)
        self.tagger = nltk.BigramTagger(train_data, backoff=unigram_tagger)
        self._trained = True
        return None
项目:topicModelling    作者:balikasg    | 项目源码 | 文件源码
def __init__(self, train_sents): # [_code-unigram-chunker-constructor]
        train_data = [[(t,c) for w,t,c in nltk.chunk.tree2conlltags(sent)]
                      for sent in train_sents]
        self.tagger = nltk.BigramTagger(train_data) # [_code-unigram-chunker-buildit]
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def train(self):
        '''Train the Chunker on the ConLL-2000 corpus.'''
        train_data = [[(t, c) for _, t, c in nltk.chunk.tree2conlltags(sent)]
                      for sent in
                      nltk.corpus.conll2000.chunked_sents('train.txt',
                                                    chunk_types=['NP'])]
        unigram_tagger = nltk.UnigramTagger(train_data)
        self.tagger = nltk.BigramTagger(train_data, backoff=unigram_tagger)
        self._trained = True
项目:neighborhood_mood_aws    作者:jarrellmark    | 项目源码 | 文件源码
def train(self):
        '''Train the Chunker on the ConLL-2000 corpus.'''
        train_data = [[(t, c) for _, t, c in nltk.chunk.tree2conlltags(sent)]
                      for sent in
                      nltk.corpus.conll2000.chunked_sents('train.txt',
                                                    chunk_types=['NP'])]
        unigram_tagger = nltk.UnigramTagger(train_data)
        self.tagger = nltk.BigramTagger(train_data, backoff=unigram_tagger)
        self._trained = True
项目:beepboop    作者:nicolehe    | 项目源码 | 文件源码
def train(self):
        '''Train the Chunker on the ConLL-2000 corpus.'''
        train_data = [[(t, c) for _, t, c in nltk.chunk.tree2conlltags(sent)]
                      for sent in
                      nltk.corpus.conll2000.chunked_sents('train.txt',
                                                    chunk_types=['NP'])]
        unigram_tagger = nltk.UnigramTagger(train_data)
        self.tagger = nltk.BigramTagger(train_data, backoff=unigram_tagger)
        self._trained = True
项目:kind2anki    作者:prz3m    | 项目源码 | 文件源码
def train(self):
        '''Train the Chunker on the ConLL-2000 corpus.'''
        train_data = [[(t, c) for _, t, c in nltk.chunk.tree2conlltags(sent)]
                      for sent in
                      nltk.corpus.conll2000.chunked_sents('train.txt',
                                                    chunk_types=['NP'])]
        unigram_tagger = nltk.UnigramTagger(train_data)
        self.tagger = nltk.BigramTagger(train_data, backoff=unigram_tagger)
        self._trained = True