Python nltk 模块,induce_pcfg() 实例源码

我们从Python开源项目中,提取了以下16个代码示例,用于说明如何使用nltk.induce_pcfg()

项目:Price-Comparator    作者:Thejas-1    | 项目源码 | 文件源码
def induce_pcfg(start, productions):
    """
    Induce a PCFG grammar from a list of productions.

    The probability of a production A -> B C in a PCFG is:

    |                count(A -> B C)
    |  P(B, C | A) = ---------------       where \* is any right hand side
    |                 count(A -> \*)

    :param start: The start symbol
    :type start: Nonterminal
    :param productions: The list of productions that defines the grammar
    :type productions: list(Production)
    """
    # Production count: the number of times a given production occurs
    pcount = {}

    # LHS-count: counts the number of times a given lhs occurs
    lcount = {}

    for prod in productions:
        lcount[prod.lhs()] = lcount.get(prod.lhs(), 0) + 1
        pcount[prod]       = pcount.get(prod,       0) + 1

    prods = [ProbabilisticProduction(p.lhs(), p.rhs(),
                                prob=pcount[p] / lcount[p.lhs()])
             for p in pcount]
    return PCFG(start, prods)


#################################################################
# Helper functions for reading productions
#################################################################
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def induce_pcfg(start, productions):
    """
    Induce a PCFG grammar from a list of productions.

    The probability of a production A -> B C in a PCFG is:

    |                count(A -> B C)
    |  P(B, C | A) = ---------------       where \* is any right hand side
    |                 count(A -> \*)

    :param start: The start symbol
    :type start: Nonterminal
    :param productions: The list of productions that defines the grammar
    :type productions: list(Production)
    """
    # Production count: the number of times a given production occurs
    pcount = {}

    # LHS-count: counts the number of times a given lhs occurs
    lcount = {}

    for prod in productions:
        lcount[prod.lhs()] = lcount.get(prod.lhs(), 0) + 1
        pcount[prod]       = pcount.get(prod,       0) + 1

    prods = [ProbabilisticProduction(p.lhs(), p.rhs(),
                                prob=pcount[p] / lcount[p.lhs()])
             for p in pcount]
    return PCFG(start, prods)


#################################################################
# Helper functions for reading productions
#################################################################
项目:neighborhood_mood_aws    作者:jarrellmark    | 项目源码 | 文件源码
def induce_pcfg(start, productions):
    """
    Induce a PCFG grammar from a list of productions.

    The probability of a production A -> B C in a PCFG is:

    |                count(A -> B C)
    |  P(B, C | A) = ---------------       where \* is any right hand side
    |                 count(A -> \*)

    :param start: The start symbol
    :type start: Nonterminal
    :param productions: The list of productions that defines the grammar
    :type productions: list(Production)
    """
    # Production count: the number of times a given production occurs
    pcount = {}

    # LHS-count: counts the number of times a given lhs occurs
    lcount = {}

    for prod in productions:
        lcount[prod.lhs()] = lcount.get(prod.lhs(), 0) + 1
        pcount[prod]       = pcount.get(prod,       0) + 1

    prods = [ProbabilisticProduction(p.lhs(), p.rhs(),
                                prob=float(pcount[p]) / lcount[p.lhs()])
             for p in pcount]
    return PCFG(start, prods)


#################################################################
# Helper functions for reading productions
#################################################################
项目:hate-to-hugs    作者:sdoran35    | 项目源码 | 文件源码
def induce_pcfg(start, productions):
    """
    Induce a PCFG grammar from a list of productions.

    The probability of a production A -> B C in a PCFG is:

    |                count(A -> B C)
    |  P(B, C | A) = ---------------       where \* is any right hand side
    |                 count(A -> \*)

    :param start: The start symbol
    :type start: Nonterminal
    :param productions: The list of productions that defines the grammar
    :type productions: list(Production)
    """
    # Production count: the number of times a given production occurs
    pcount = {}

    # LHS-count: counts the number of times a given lhs occurs
    lcount = {}

    for prod in productions:
        lcount[prod.lhs()] = lcount.get(prod.lhs(), 0) + 1
        pcount[prod]       = pcount.get(prod,       0) + 1

    prods = [ProbabilisticProduction(p.lhs(), p.rhs(),
                                prob=pcount[p] / lcount[p.lhs()])
             for p in pcount]
    return PCFG(start, prods)


#################################################################
# Helper functions for reading productions
#################################################################
项目:FancyWord    作者:EastonLee    | 项目源码 | 文件源码
def induce_pcfg(start, productions):
    """
    Induce a PCFG grammar from a list of productions.

    The probability of a production A -> B C in a PCFG is:

    |                count(A -> B C)
    |  P(B, C | A) = ---------------       where \* is any right hand side
    |                 count(A -> \*)

    :param start: The start symbol
    :type start: Nonterminal
    :param productions: The list of productions that defines the grammar
    :type productions: list(Production)
    """
    # Production count: the number of times a given production occurs
    pcount = {}

    # LHS-count: counts the number of times a given lhs occurs
    lcount = {}

    for prod in productions:
        lcount[prod.lhs()] = lcount.get(prod.lhs(), 0) + 1
        pcount[prod]       = pcount.get(prod,       0) + 1

    prods = [ProbabilisticProduction(p.lhs(), p.rhs(),
                                prob=float(pcount[p]) / lcount[p.lhs()])
             for p in pcount]
    return PCFG(start, prods)


#################################################################
# Helper functions for reading productions
#################################################################
项目:beepboop    作者:nicolehe    | 项目源码 | 文件源码
def induce_pcfg(start, productions):
    """
    Induce a PCFG grammar from a list of productions.

    The probability of a production A -> B C in a PCFG is:

    |                count(A -> B C)
    |  P(B, C | A) = ---------------       where \* is any right hand side
    |                 count(A -> \*)

    :param start: The start symbol
    :type start: Nonterminal
    :param productions: The list of productions that defines the grammar
    :type productions: list(Production)
    """
    # Production count: the number of times a given production occurs
    pcount = {}

    # LHS-count: counts the number of times a given lhs occurs
    lcount = {}

    for prod in productions:
        lcount[prod.lhs()] = lcount.get(prod.lhs(), 0) + 1
        pcount[prod]       = pcount.get(prod,       0) + 1

    prods = [ProbabilisticProduction(p.lhs(), p.rhs(),
                                prob=pcount[p] / lcount[p.lhs()])
             for p in pcount]
    return PCFG(start, prods)


#################################################################
# Helper functions for reading productions
#################################################################
项目:kind2anki    作者:prz3m    | 项目源码 | 文件源码
def induce_pcfg(start, productions):
    """
    Induce a PCFG grammar from a list of productions.

    The probability of a production A -> B C in a PCFG is:

    |                count(A -> B C)
    |  P(B, C | A) = ---------------       where \* is any right hand side
    |                 count(A -> \*)

    :param start: The start symbol
    :type start: Nonterminal
    :param productions: The list of productions that defines the grammar
    :type productions: list(Production)
    """
    # Production count: the number of times a given production occurs
    pcount = {}

    # LHS-count: counts the number of times a given lhs occurs
    lcount = {}

    for prod in productions:
        lcount[prod.lhs()] = lcount.get(prod.lhs(), 0) + 1
        pcount[prod]       = pcount.get(prod,       0) + 1

    prods = [ProbabilisticProduction(p.lhs(), p.rhs(),
                                prob=pcount[p] / lcount[p.lhs()])
             for p in pcount]
    return PCFG(start, prods)


#################################################################
# Helper functions for reading productions
#################################################################
项目:but_sentiment    作者:MixedEmotions    | 项目源码 | 文件源码
def induce_pcfg(start, productions):
    """
    Induce a PCFG grammar from a list of productions.

    The probability of a production A -> B C in a PCFG is:

    |                count(A -> B C)
    |  P(B, C | A) = ---------------       where \* is any right hand side
    |                 count(A -> \*)

    :param start: The start symbol
    :type start: Nonterminal
    :param productions: The list of productions that defines the grammar
    :type productions: list(Production)
    """
    # Production count: the number of times a given production occurs
    pcount = {}

    # LHS-count: counts the number of times a given lhs occurs
    lcount = {}

    for prod in productions:
        lcount[prod.lhs()] = lcount.get(prod.lhs(), 0) + 1
        pcount[prod]       = pcount.get(prod,       0) + 1

    prods = [ProbabilisticProduction(p.lhs(), p.rhs(),
                                prob=pcount[p] / lcount[p.lhs()])
             for p in pcount]
    return PCFG(start, prods)


#################################################################
# Helper functions for reading productions
#################################################################
项目:Price-Comparator    作者:Thejas-1    | 项目源码 | 文件源码
def pcfg_demo():
    """
    A demonstration showing how a ``PCFG`` can be created and used.
    """

    from nltk.corpus import treebank
    from nltk import treetransforms
    from nltk import induce_pcfg
    from nltk.parse import pchart

    pcfg_prods = toy_pcfg1.productions()

    pcfg_prod = pcfg_prods[2]
    print('A PCFG production:', repr(pcfg_prod))
    print('    pcfg_prod.lhs()  =>', repr(pcfg_prod.lhs()))
    print('    pcfg_prod.rhs()  =>', repr(pcfg_prod.rhs()))
    print('    pcfg_prod.prob() =>', repr(pcfg_prod.prob()))
    print()

    grammar = toy_pcfg2
    print('A PCFG grammar:', repr(grammar))
    print('    grammar.start()       =>', repr(grammar.start()))
    print('    grammar.productions() =>', end=' ')
    # Use .replace(...) is to line-wrap the output.
    print(repr(grammar.productions()).replace(',', ',\n'+' '*26))
    print()

    # extract productions from three trees and induce the PCFG
    print("Induce PCFG grammar from treebank data:")

    productions = []
    item = treebank._fileids[0]
    for tree in treebank.parsed_sents(item)[:3]:
        # perform optional tree transformations, e.g.:
        tree.collapse_unary(collapsePOS = False)
        tree.chomsky_normal_form(horzMarkov = 2)

        productions += tree.productions()

    S = Nonterminal('S')
    grammar = induce_pcfg(S, productions)
    print(grammar)
    print()

    print("Parse sentence using induced grammar:")

    parser = pchart.InsideChartParser(grammar)
    parser.trace(3)

    # doesn't work as tokens are different:
    #sent = treebank.tokenized('wsj_0001.mrg')[0]

    sent = treebank.parsed_sents(item)[0].leaves()
    print(sent)
    for parse in parser.parse(sent):
        print(parse)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def pcfg_demo():
    """
    A demonstration showing how a ``PCFG`` can be created and used.
    """

    from nltk.corpus import treebank
    from nltk import treetransforms
    from nltk import induce_pcfg
    from nltk.parse import pchart

    pcfg_prods = toy_pcfg1.productions()

    pcfg_prod = pcfg_prods[2]
    print('A PCFG production:', repr(pcfg_prod))
    print('    pcfg_prod.lhs()  =>', repr(pcfg_prod.lhs()))
    print('    pcfg_prod.rhs()  =>', repr(pcfg_prod.rhs()))
    print('    pcfg_prod.prob() =>', repr(pcfg_prod.prob()))
    print()

    grammar = toy_pcfg2
    print('A PCFG grammar:', repr(grammar))
    print('    grammar.start()       =>', repr(grammar.start()))
    print('    grammar.productions() =>', end=' ')
    # Use .replace(...) is to line-wrap the output.
    print(repr(grammar.productions()).replace(',', ',\n'+' '*26))
    print()

    # extract productions from three trees and induce the PCFG
    print("Induce PCFG grammar from treebank data:")

    productions = []
    item = treebank._fileids[0]
    for tree in treebank.parsed_sents(item)[:3]:
        # perform optional tree transformations, e.g.:
        tree.collapse_unary(collapsePOS = False)
        tree.chomsky_normal_form(horzMarkov = 2)

        productions += tree.productions()

    S = Nonterminal('S')
    grammar = induce_pcfg(S, productions)
    print(grammar)
    print()

    print("Parse sentence using induced grammar:")

    parser = pchart.InsideChartParser(grammar)
    parser.trace(3)

    # doesn't work as tokens are different:
    #sent = treebank.tokenized('wsj_0001.mrg')[0]

    sent = treebank.parsed_sents(item)[0].leaves()
    print(sent)
    for parse in parser.parse(sent):
        print(parse)
项目:neighborhood_mood_aws    作者:jarrellmark    | 项目源码 | 文件源码
def pcfg_demo():
    """
    A demonstration showing how a ``PCFG`` can be created and used.
    """

    from nltk.corpus import treebank
    from nltk import treetransforms
    from nltk import induce_pcfg
    from nltk.parse import pchart

    pcfg_prods = toy_pcfg1.productions()

    pcfg_prod = pcfg_prods[2]
    print('A PCFG production:', repr(pcfg_prod))
    print('    pcfg_prod.lhs()  =>', repr(pcfg_prod.lhs()))
    print('    pcfg_prod.rhs()  =>', repr(pcfg_prod.rhs()))
    print('    pcfg_prod.prob() =>', repr(pcfg_prod.prob()))
    print()

    grammar = toy_pcfg2
    print('A PCFG grammar:', repr(grammar))
    print('    grammar.start()       =>', repr(grammar.start()))
    print('    grammar.productions() =>', end=' ')
    # Use .replace(...) is to line-wrap the output.
    print(repr(grammar.productions()).replace(',', ',\n'+' '*26))
    print()

    # extract productions from three trees and induce the PCFG
    print("Induce PCFG grammar from treebank data:")

    productions = []
    item = treebank._fileids[0]
    for tree in treebank.parsed_sents(item)[:3]:
        # perform optional tree transformations, e.g.:
        tree.collapse_unary(collapsePOS = False)
        tree.chomsky_normal_form(horzMarkov = 2)

        productions += tree.productions()

    S = Nonterminal('S')
    grammar = induce_pcfg(S, productions)
    print(grammar)
    print()

    print("Parse sentence using induced grammar:")

    parser = pchart.InsideChartParser(grammar)
    parser.trace(3)

    # doesn't work as tokens are different:
    #sent = treebank.tokenized('wsj_0001.mrg')[0]

    sent = treebank.parsed_sents(item)[0].leaves()
    print(sent)
    for parse in parser.parse(sent):
        print(parse)
项目:hate-to-hugs    作者:sdoran35    | 项目源码 | 文件源码
def pcfg_demo():
    """
    A demonstration showing how a ``PCFG`` can be created and used.
    """

    from nltk.corpus import treebank
    from nltk import treetransforms
    from nltk import induce_pcfg
    from nltk.parse import pchart

    pcfg_prods = toy_pcfg1.productions()

    pcfg_prod = pcfg_prods[2]
    print('A PCFG production:', repr(pcfg_prod))
    print('    pcfg_prod.lhs()  =>', repr(pcfg_prod.lhs()))
    print('    pcfg_prod.rhs()  =>', repr(pcfg_prod.rhs()))
    print('    pcfg_prod.prob() =>', repr(pcfg_prod.prob()))
    print()

    grammar = toy_pcfg2
    print('A PCFG grammar:', repr(grammar))
    print('    grammar.start()       =>', repr(grammar.start()))
    print('    grammar.productions() =>', end=' ')
    # Use .replace(...) is to line-wrap the output.
    print(repr(grammar.productions()).replace(',', ',\n'+' '*26))
    print()

    # extract productions from three trees and induce the PCFG
    print("Induce PCFG grammar from treebank data:")

    productions = []
    item = treebank._fileids[0]
    for tree in treebank.parsed_sents(item)[:3]:
        # perform optional tree transformations, e.g.:
        tree.collapse_unary(collapsePOS = False)
        tree.chomsky_normal_form(horzMarkov = 2)

        productions += tree.productions()

    S = Nonterminal('S')
    grammar = induce_pcfg(S, productions)
    print(grammar)
    print()

    print("Parse sentence using induced grammar:")

    parser = pchart.InsideChartParser(grammar)
    parser.trace(3)

    # doesn't work as tokens are different:
    #sent = treebank.tokenized('wsj_0001.mrg')[0]

    sent = treebank.parsed_sents(item)[0].leaves()
    print(sent)
    for parse in parser.parse(sent):
        print(parse)
项目:FancyWord    作者:EastonLee    | 项目源码 | 文件源码
def pcfg_demo():
    """
    A demonstration showing how a ``PCFG`` can be created and used.
    """

    from nltk.corpus import treebank
    from nltk import treetransforms
    from nltk import induce_pcfg
    from nltk.parse import pchart

    pcfg_prods = toy_pcfg1.productions()

    pcfg_prod = pcfg_prods[2]
    print('A PCFG production:', repr(pcfg_prod))
    print('    pcfg_prod.lhs()  =>', repr(pcfg_prod.lhs()))
    print('    pcfg_prod.rhs()  =>', repr(pcfg_prod.rhs()))
    print('    pcfg_prod.prob() =>', repr(pcfg_prod.prob()))
    print()

    grammar = toy_pcfg2
    print('A PCFG grammar:', repr(grammar))
    print('    grammar.start()       =>', repr(grammar.start()))
    print('    grammar.productions() =>', end=' ')
    # Use .replace(...) is to line-wrap the output.
    print(repr(grammar.productions()).replace(',', ',\n'+' '*26))
    print()

    # extract productions from three trees and induce the PCFG
    print("Induce PCFG grammar from treebank data:")

    productions = []
    item = treebank._fileids[0]
    for tree in treebank.parsed_sents(item)[:3]:
        # perform optional tree transformations, e.g.:
        tree.collapse_unary(collapsePOS = False)
        tree.chomsky_normal_form(horzMarkov = 2)

        productions += tree.productions()

    S = Nonterminal('S')
    grammar = induce_pcfg(S, productions)
    print(grammar)
    print()

    print("Parse sentence using induced grammar:")

    parser = pchart.InsideChartParser(grammar)
    parser.trace(3)

    # doesn't work as tokens are different:
    #sent = treebank.tokenized('wsj_0001.mrg')[0]

    sent = treebank.parsed_sents(item)[0].leaves()
    print(sent)
    for parse in parser.parse(sent):
        print(parse)
项目:beepboop    作者:nicolehe    | 项目源码 | 文件源码
def pcfg_demo():
    """
    A demonstration showing how a ``PCFG`` can be created and used.
    """

    from nltk.corpus import treebank
    from nltk import treetransforms
    from nltk import induce_pcfg
    from nltk.parse import pchart

    pcfg_prods = toy_pcfg1.productions()

    pcfg_prod = pcfg_prods[2]
    print('A PCFG production:', repr(pcfg_prod))
    print('    pcfg_prod.lhs()  =>', repr(pcfg_prod.lhs()))
    print('    pcfg_prod.rhs()  =>', repr(pcfg_prod.rhs()))
    print('    pcfg_prod.prob() =>', repr(pcfg_prod.prob()))
    print()

    grammar = toy_pcfg2
    print('A PCFG grammar:', repr(grammar))
    print('    grammar.start()       =>', repr(grammar.start()))
    print('    grammar.productions() =>', end=' ')
    # Use .replace(...) is to line-wrap the output.
    print(repr(grammar.productions()).replace(',', ',\n'+' '*26))
    print()

    # extract productions from three trees and induce the PCFG
    print("Induce PCFG grammar from treebank data:")

    productions = []
    item = treebank._fileids[0]
    for tree in treebank.parsed_sents(item)[:3]:
        # perform optional tree transformations, e.g.:
        tree.collapse_unary(collapsePOS = False)
        tree.chomsky_normal_form(horzMarkov = 2)

        productions += tree.productions()

    S = Nonterminal('S')
    grammar = induce_pcfg(S, productions)
    print(grammar)
    print()

    print("Parse sentence using induced grammar:")

    parser = pchart.InsideChartParser(grammar)
    parser.trace(3)

    # doesn't work as tokens are different:
    #sent = treebank.tokenized('wsj_0001.mrg')[0]

    sent = treebank.parsed_sents(item)[0].leaves()
    print(sent)
    for parse in parser.parse(sent):
        print(parse)
项目:kind2anki    作者:prz3m    | 项目源码 | 文件源码
def pcfg_demo():
    """
    A demonstration showing how a ``PCFG`` can be created and used.
    """

    from nltk.corpus import treebank
    from nltk import treetransforms
    from nltk import induce_pcfg
    from nltk.parse import pchart

    pcfg_prods = toy_pcfg1.productions()

    pcfg_prod = pcfg_prods[2]
    print('A PCFG production:', repr(pcfg_prod))
    print('    pcfg_prod.lhs()  =>', repr(pcfg_prod.lhs()))
    print('    pcfg_prod.rhs()  =>', repr(pcfg_prod.rhs()))
    print('    pcfg_prod.prob() =>', repr(pcfg_prod.prob()))
    print()

    grammar = toy_pcfg2
    print('A PCFG grammar:', repr(grammar))
    print('    grammar.start()       =>', repr(grammar.start()))
    print('    grammar.productions() =>', end=' ')
    # Use .replace(...) is to line-wrap the output.
    print(repr(grammar.productions()).replace(',', ',\n'+' '*26))
    print()

    # extract productions from three trees and induce the PCFG
    print("Induce PCFG grammar from treebank data:")

    productions = []
    item = treebank._fileids[0]
    for tree in treebank.parsed_sents(item)[:3]:
        # perform optional tree transformations, e.g.:
        tree.collapse_unary(collapsePOS = False)
        tree.chomsky_normal_form(horzMarkov = 2)

        productions += tree.productions()

    S = Nonterminal('S')
    grammar = induce_pcfg(S, productions)
    print(grammar)
    print()

    print("Parse sentence using induced grammar:")

    parser = pchart.InsideChartParser(grammar)
    parser.trace(3)

    # doesn't work as tokens are different:
    #sent = treebank.tokenized('wsj_0001.mrg')[0]

    sent = treebank.parsed_sents(item)[0].leaves()
    print(sent)
    for parse in parser.parse(sent):
        print(parse)
项目:but_sentiment    作者:MixedEmotions    | 项目源码 | 文件源码
def pcfg_demo():
    """
    A demonstration showing how a ``PCFG`` can be created and used.
    """

    from nltk.corpus import treebank
    from nltk import treetransforms
    from nltk import induce_pcfg
    from nltk.parse import pchart

    pcfg_prods = toy_pcfg1.productions()

    pcfg_prod = pcfg_prods[2]
    print('A PCFG production:', repr(pcfg_prod))
    print('    pcfg_prod.lhs()  =>', repr(pcfg_prod.lhs()))
    print('    pcfg_prod.rhs()  =>', repr(pcfg_prod.rhs()))
    print('    pcfg_prod.prob() =>', repr(pcfg_prod.prob()))
    print()

    grammar = toy_pcfg2
    print('A PCFG grammar:', repr(grammar))
    print('    grammar.start()       =>', repr(grammar.start()))
    print('    grammar.productions() =>', end=' ')
    # Use .replace(...) is to line-wrap the output.
    print(repr(grammar.productions()).replace(',', ',\n'+' '*26))
    print()

    # extract productions from three trees and induce the PCFG
    print("Induce PCFG grammar from treebank data:")

    productions = []
    item = treebank._fileids[0]
    for tree in treebank.parsed_sents(item)[:3]:
        # perform optional tree transformations, e.g.:
        tree.collapse_unary(collapsePOS = False)
        tree.chomsky_normal_form(horzMarkov = 2)

        productions += tree.productions()

    S = Nonterminal('S')
    grammar = induce_pcfg(S, productions)
    print(grammar)
    print()

    print("Parse sentence using induced grammar:")

    parser = pchart.InsideChartParser(grammar)
    parser.trace(3)

    # doesn't work as tokens are different:
    #sent = treebank.tokenized('wsj_0001.mrg')[0]

    sent = treebank.parsed_sents(item)[0].leaves()
    print(sent)
    for parse in parser.parse(sent):
        print(parse)