Python nltk 模块,classify() 实例源码

我们从Python开源项目中,提取了以下39个代码示例,用于说明如何使用nltk.classify()

项目:Price-Comparator    作者:Thejas-1    | 项目源码 | 文件源码
def demo_sent_subjectivity(text):
    """
    Classify a single sentence as subjective or objective using a stored
    SentimentAnalyzer.

    :param text: a sentence whose subjectivity has to be classified.
    """
    from nltk.classify import NaiveBayesClassifier
    from nltk.tokenize import regexp
    word_tokenizer = regexp.WhitespaceTokenizer()
    try:
        sentim_analyzer = load('sa_subjectivity.pickle')
    except LookupError:
        print('Cannot find the sentiment analyzer you want to load.')
        print('Training a new one using NaiveBayesClassifier.')
        sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)

    # Tokenize and convert to lower case
    tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
    print(sentim_analyzer.classify(tokenized_text))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def train(self, *args, **kwargs):
        """Train the classifier with a labeled feature set and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        .. versionadded:: 0.6.2

        :rtype: A classifier
        """
        try:
            self.classifier = self.nltk_class.train(self.train_features,
                                                    *args, **kwargs)
            return self.classifier
        except AttributeError:
            raise ValueError("NLTKClassifier must have a nltk_class"
                            " variable that is not None.")
项目:Political-Opinion-Finder    作者:philhabell    | 项目源码 | 文件源码
def ment(text):
        try:
            doc = pickle.load(open("pickle/doc.pickle", "rb"))
        except:
            print("Pickles missing!                                           ")
            print("Program will now constuct pickles, this may take some time.")
            trainClassifier().train()
            doc = pickle.load(open("pickle/doc.pickle", "rb"))
        wordFeat = pickle.load(open("pickle/wordFeat.pickle", "rb"))
        featSet = pickle.load(open("pickle/featSet.pickle", "rb"))
        ONB = pickle.load(open("pickle/ONB.pickle", "rb"))
        MNB = pickle.load(open("pickle/MNB.pickle", "rb"))
        BNB = pickle.load(open("pickle/BNB.pickle", "rb"))
        LR = pickle.load(open("pickle/LR.pickle", "rb"))
        LSVC = pickle.load(open("pickle/LSVC.pickle", "rb"))
        SGDC = pickle.load(open("pickle/SGDC.pickle", "rb"))

        vote = sent(ONB,MNB,BNB,LR,LSVC,SGDC)
        feats = sent().featureFind(text,wordFeat)
        out = (vote.conf(feats))*100
        # out = str(out)+"%"
        return vote.classify(feats),out
项目:neighborhood_mood_aws    作者:jarrellmark    | 项目源码 | 文件源码
def train(self, *args, **kwargs):
        """Train the classifier with a labeled feature set and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        .. versionadded:: 0.6.2

        :rtype: A classifier
        """
        try:
            self.classifier = self.nltk_class.train(self.train_features,
                                                    *args, **kwargs)
            return self.classifier
        except AttributeError:
            raise ValueError("NLTKClassifier must have a nltk_class"
                            " variable that is not None.")
项目:hate-to-hugs    作者:sdoran35    | 项目源码 | 文件源码
def demo_sent_subjectivity(text):
    """
    Classify a single sentence as subjective or objective using a stored
    SentimentAnalyzer.

    :param text: a sentence whose subjectivity has to be classified.
    """
    from nltk.classify import NaiveBayesClassifier
    from nltk.tokenize import regexp
    word_tokenizer = regexp.WhitespaceTokenizer()
    try:
        sentim_analyzer = load('sa_subjectivity.pickle')
    except LookupError:
        print('Cannot find the sentiment analyzer you want to load.')
        print('Training a new one using NaiveBayesClassifier.')
        sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)

    # Tokenize and convert to lower case
    tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
    print(sentim_analyzer.classify(tokenized_text))
项目:OpinionMining728    作者:stasi009    | 项目源码 | 文件源码
def classify(self,features,threshold=0.8):
        business_keywords = ["business","wi","fi","wifi","internet","wireless"]
        clean_keywords = ['clean']

        prob_dist = self.classifier.prob_classify(features)
        most_prob_label = prob_dist.max()

        if prob_dist.prob(most_prob_label) >= threshold:
            return most_prob_label
        elif any( bkeyword in features for bkeyword in business_keywords):
            # features contain keyword for business
            return common.AspectBusiness
        elif any ( ckeyword in features for ckeyword in clean_keywords):
            return common.AspectClean
        else:
            return common.AspectUnknown
项目:beepboop    作者:nicolehe    | 项目源码 | 文件源码
def train(self, *args, **kwargs):
        """Train the classifier with a labeled feature set and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        .. versionadded:: 0.6.2

        :rtype: A classifier
        """
        try:
            self.classifier = self.nltk_class.train(self.train_features,
                                                    *args, **kwargs)
            return self.classifier
        except AttributeError:
            raise ValueError("NLTKClassifier must have a nltk_class"
                            " variable that is not None.")
项目:kind2anki    作者:prz3m    | 项目源码 | 文件源码
def train(self, *args, **kwargs):
        """Train the classifier with a labeled feature set and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        .. versionadded:: 0.6.2

        :rtype: A classifier
        """
        try:
            self.classifier = self.nltk_class.train(self.train_features,
                                                    *args, **kwargs)
            return self.classifier
        except AttributeError:
            raise ValueError("NLTKClassifier must have a nltk_class"
                            " variable that is not None.")
项目:but_sentiment    作者:MixedEmotions    | 项目源码 | 文件源码
def demo_sent_subjectivity(text):
    """
    Classify a single sentence as subjective or objective using a stored
    SentimentAnalyzer.

    :param text: a sentence whose subjectivity has to be classified.
    """
    from nltk.classify import NaiveBayesClassifier
    from nltk.tokenize import regexp
    word_tokenizer = regexp.WhitespaceTokenizer()
    try:
        sentim_analyzer = load('sa_subjectivity.pickle')
    except LookupError:
        print('Cannot find the sentiment analyzer you want to load.')
        print('Training a new one using NaiveBayesClassifier.')
        sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)

    # Tokenize and convert to lower case
    tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
    print(sentim_analyzer.classify(tokenized_text))
项目:GDG-IIIT-BHUBANESWAR    作者:shivank01    | 项目源码 | 文件源码
def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes)
项目:GDG-IIIT-BHUBANESWAR    作者:shivank01    | 项目源码 | 文件源码
def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf
项目:NLP-Sentiment-Analysis-Twitter    作者:aalind0    | 项目源码 | 文件源码
def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes)
项目:NLP-Sentiment-Analysis-Twitter    作者:aalind0    | 项目源码 | 文件源码
def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf
项目:NLP-Sentiment-Analysis-Twitter    作者:aalind0    | 项目源码 | 文件源码
def sentiment(text):
    feats = find_features(text)
    return voted_classifier.classify(feats),voted_classifier.confidence(feats)
项目:NLP-Sentiment-Analysis-Twitter    作者:aalind0    | 项目源码 | 文件源码
def __init__(self, *classifiers):
        self._classifiers = classifiers

    #Creating our own classify method.
    #After iterating we return mode(votes), which just returns the most popular vote.
项目:NLP-Sentiment-Analysis-Twitter    作者:aalind0    | 项目源码 | 文件源码
def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes)

    #Defining another parameter, confidence.
    #Since we have algorithms voting, we can tally the votes for and against the winning vote, and call this "confidence.
项目:twitter_trolls    作者:merqurio    | 项目源码 | 文件源码
def is_positive(sentence):
    sentence_features = find_features(sentence, all_features)
    return 1 if classifier.classify(sentence_features) == "pos" else 0
项目:twitter-sentiment    作者:words-sdsc    | 项目源码 | 文件源码
def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes)
项目:twitter-sentiment    作者:words-sdsc    | 项目源码 | 文件源码
def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def classify(self, text):
        """Classifies a string of text."""
        raise NotImplementedError('Must implement a "classify" method.')
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def classify(self, text):
        """Classifies the text.

        :param str text: A string of text.
        """
        text_features = self.extract_features(text)
        return self.classifier.classify(text_features)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def accuracy(self, test_set, format=None):
        """Compute the accuracy on a test set.

        :param test_set: A list of tuples of the form ``(text, label)``, or a
            file pointer.
        :param format: If ``test_set`` is a filename, the file format, e.g.
            ``"csv"`` or ``"json"``. If ``None``, will attempt to detect the
            file format.
        """
        if is_filelike(test_set):
            test_data = self._read_data(test_set)
        else:  # test_set is a list of tuples
            test_data = test_set
        test_features = [(self.extract_features(d), c) for d, c in test_data]
        return nltk.classify.accuracy(self.classifier, test_features)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def train(self, *args, **kwargs):
        """Train the classifier with a labeled and unlabeled feature sets and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        :rtype: A classifier
        """
        self.classifier = self.nltk_class.train(self.positive_features,
                                                self.unlabeled_features,
                                                self.positive_prob_prior)
        return self.classifier
项目:Political-Opinion-Finder    作者:philhabell    | 项目源码 | 文件源码
def classify(self, features):
        self.votes = []
        for self.i in self._classifiers:
            self.j = self.i.classify(features)
            self.votes.append(self.j)
        return mode(self.votes)

    # find the confidents of results
    # must be handed:
    #     *featured words
项目:Political-Opinion-Finder    作者:philhabell    | 项目源码 | 文件源码
def conf(self, features):
        self.votes = []
        for self.i in self._classifiers:
            self.j = self.i.classify(features)
            self.votes.append(self.j)

        self.choice_votes = self.votes.count(mode(self.votes))
        self.conf = self.choice_votes / len(self.votes)
        return self.conf

    # find the features of document
    # must be handed:
    #     *document to find feature of
    #     *word features
项目:neighborhood_mood_aws    作者:jarrellmark    | 项目源码 | 文件源码
def classify(self, text):
        """Classifies a string of text."""
        raise NotImplementedError('Must implement a "classify" method.')
项目:neighborhood_mood_aws    作者:jarrellmark    | 项目源码 | 文件源码
def classify(self, text):
        """Classifies the text.

        :param str text: A string of text.
        """
        text_features = self.extract_features(text)
        return self.classifier.classify(text_features)
项目:neighborhood_mood_aws    作者:jarrellmark    | 项目源码 | 文件源码
def accuracy(self, test_set, format=None):
        """Compute the accuracy on a test set.

        :param test_set: A list of tuples of the form ``(text, label)``, or a
            file pointer.
        :param format: If ``test_set`` is a filename, the file format, e.g.
            ``"csv"`` or ``"json"``. If ``None``, will attempt to detect the
            file format.
        """
        if is_filelike(test_set):
            test_data = self._read_data(test_set)
        else:  # test_set is a list of tuples
            test_data = test_set
        test_features = [(self.extract_features(d), c) for d, c in test_data]
        return nltk.classify.accuracy(self.classifier, test_features)
项目:neighborhood_mood_aws    作者:jarrellmark    | 项目源码 | 文件源码
def train(self, *args, **kwargs):
        """Train the classifier with a labeled and unlabeled feature sets and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        :rtype: A classifier
        """
        self.classifier = self.nltk_class.train(self.positive_features,
                                                self.unlabeled_features,
                                                self.positive_prob_prior)
        return self.classifier
项目:Data-Mining-From-Twitter    作者:N1h1l1sT    | 项目源码 | 文件源码
def DoClassify(CurClassifier, topicResultsTxt, topicTweetsLDATxt):
    counter = 0
    topicSentiments = dict()
    topicResult = open(topicResultsTxt, 'w')
    with open(topicTweetsLDATxt) as topicFile:
        for line in topicFile:
            if counter != 100:
                tSentiment = CurClassifier.classify(extract_features(line.split()))

                if tSentiment in topicSentiments.keys():
                    topicSentiments[tSentiment] += 1
                else:
                    topicSentiments[tSentiment] = 1

                counter += 1

            else:
                majorSentiment = 'Dummy'
                topicSentiments[majorSentiment] = 1
                for sentiKey in topicSentiments.keys():
                    if topicSentiments[majorSentiment] < topicSentiments[sentiKey]:
                        majorSentiment = sentiKey

                topicResult.write(majorSentiment +'\n')
                topicSentiments.clear()
                counter = 0

    topicResult.close()

#Extracting the features of the tweet without term frequencies with the format as needed by the classifier
项目:beepboop    作者:nicolehe    | 项目源码 | 文件源码
def classify(self, text):
        """Classifies a string of text."""
        raise NotImplementedError('Must implement a "classify" method.')
项目:beepboop    作者:nicolehe    | 项目源码 | 文件源码
def classify(self, text):
        """Classifies the text.

        :param str text: A string of text.
        """
        text_features = self.extract_features(text)
        return self.classifier.classify(text_features)
项目:beepboop    作者:nicolehe    | 项目源码 | 文件源码
def accuracy(self, test_set, format=None):
        """Compute the accuracy on a test set.

        :param test_set: A list of tuples of the form ``(text, label)``, or a
            file pointer.
        :param format: If ``test_set`` is a filename, the file format, e.g.
            ``"csv"`` or ``"json"``. If ``None``, will attempt to detect the
            file format.
        """
        if is_filelike(test_set):
            test_data = self._read_data(test_set)
        else:  # test_set is a list of tuples
            test_data = test_set
        test_features = [(self.extract_features(d), c) for d, c in test_data]
        return nltk.classify.accuracy(self.classifier, test_features)
项目:beepboop    作者:nicolehe    | 项目源码 | 文件源码
def train(self, *args, **kwargs):
        """Train the classifier with a labeled and unlabeled feature sets and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        :rtype: A classifier
        """
        self.classifier = self.nltk_class.train(self.positive_features,
                                                self.unlabeled_features,
                                                self.positive_prob_prior)
        return self.classifier
项目:kind2anki    作者:prz3m    | 项目源码 | 文件源码
def classify(self, text):
        """Classifies the text.

        :param str text: A string of text.
        """
        text_features = self.extract_features(text)
        return self.classifier.classify(text_features)
项目:kind2anki    作者:prz3m    | 项目源码 | 文件源码
def accuracy(self, test_set, format=None):
        """Compute the accuracy on a test set.

        :param test_set: A list of tuples of the form ``(text, label)``, or a
            file pointer.
        :param format: If ``test_set`` is a filename, the file format, e.g.
            ``"csv"`` or ``"json"``. If ``None``, will attempt to detect the
            file format.
        """
        if is_filelike(test_set):
            test_data = self._read_data(test_set)
        else:  # test_set is a list of tuples
            test_data = test_set
        test_features = [(self.extract_features(d), c) for d, c in test_data]
        return nltk.classify.accuracy(self.classifier, test_features)
项目:kind2anki    作者:prz3m    | 项目源码 | 文件源码
def train(self, *args, **kwargs):
        """Train the classifier with a labeled and unlabeled feature sets and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        :rtype: A classifier
        """
        self.classifier = self.nltk_class.train(self.positive_features,
                                                self.unlabeled_features,
                                                self.positive_prob_prior)
        return self.classifier
项目:chatbot    作者:minggli    | 项目源码 | 文件源码
def classify(query,
             engine=engine,
             threshold=.85,
             limit=5):
    """spell out most probable diseases and respective percentages."""
    words = preprocess(' '.join(query))
    print('understanding {}...'.format(words))
    objects = engine.prob_classify(words)
    keys = list(objects.samples())

    samples = [tuple((key, objects.prob(key))) for key in keys]

    return feed_conversation(samples, limit, threshold)
项目:kind2anki    作者:prz3m    | 项目源码 | 文件源码
def classify(self, text):
        """Classifies a string of text."""
        raise NotImplementedError('Must implement a "classify" method.')