Python nltk 模块,trigrams() 实例源码

我们从Python开源项目中,提取了以下4个代码示例,用于说明如何使用nltk.trigrams()

项目:codenn    作者:sriniiyer    | 项目源码 | 文件源码
def tokenize(text):
      # text = NB.remove_punctuation(text)
      try:
        text = text.decode('utf-8').encode('ascii', 'replace').strip().lower()
      except:
        text = text.encode('ascii', 'replace').strip().lower()
      word = [porter.stem(w) for w in re.findall(r"[\w'-]+|[^\s\w]", text)]   # split punctuations but dont split single quotes for words like don't
      biword =  [b for b in nltk.bigrams(word)]
      triword =  [t for t in nltk.trigrams(word)]
      # word = [w for w in word if w not in stopwords.words('english')]
      return  word # triword
项目:aueb.twitter.sentiment    作者:nlpaueb    | 项目源码 | 文件源码
def posTrigramsScore(trigrams,category,pos_tags_trigrams,labels):

    #keep pos tags bigrams of specific category
    trigrams_category = subList(pos_tags_trigrams,labels,category)

    #initialize dictionary
    d = {}

    #calculate score for every bigram
    for trigram in trigrams:
        d[trigram] = score(trigram,category,trigrams_category,pos_tags_trigrams)

    return d

#calculate bigram's f1 score
项目:aueb.twitter.sentiment    作者:nlpaueb    | 项目源码 | 文件源码
def getBigrams(l):
    b = []
    for x in l:
        b.append(list(bigrams(x)))

    return b

#calculate trigrams of every item of the list l
项目:aueb.twitter.sentiment    作者:nlpaueb    | 项目源码 | 文件源码
def getTrigrams(l):
    tr = []
    for x in l:
        tr.append(list(trigrams(x)))

    return tr

#calculate pos tag score