Python nltk 模块,Tree() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用nltk.Tree()

项目:FancyWord    作者:EastonLee    | 项目源码 | 文件源码
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._root_label = chunk_struct.label()
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level
项目:Price-Comparator    作者:Thejas-1    | 项目源码 | 文件源码
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct
项目:neighborhood_mood_aws    作者:jarrellmark    | 项目源码 | 文件源码
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct
项目:tetre    作者:aoldoni    | 项目源码 | 文件源码
def to_nltk_tree_general(node, attr_list=("dep_", "pos_"), level=99999):
    """Tranforms a Spacy dependency tree into an NLTK tree, with certain spacy tree node attributes serving
    as parts of the NLTK tree node label content for uniqueness.

    Args:
        node: The starting node from the tree in which the transformation will occur.
        attr_list: Which attributes from the Spacy nodes will be included in the NLTK node label.
        level: The maximum depth of the tree.

    Returns:
        A NLTK Tree (nltk.tree)
    """

    # transforms attributes in a node representation
    value_list = [getattr(node, attr) for attr in attr_list]
    node_representation = "/".join(value_list)

    if level == 0:
        return node_representation

    if node.n_lefts + node.n_rights > 0:
        return Tree(node_representation, [to_nltk_tree_general(child, attr_list, level-1) for child in node.children])
    else:
        return node_representation
项目:tetre    作者:aoldoni    | 项目源码 | 文件源码
def get_node_representation(tetre_format, token):
    """Given a format and a SpaCy node (spacy.token), returns this node representation using the NLTK tree (nltk.tree).
    It recursivelly builds a NLTK tree and returns it, not only the node itself.

    Args:
        tetre_format: The attributes of this node that will be part of its string representation.
        token: The SpaCy node itself (spacy.token).

    Returns:
        A NLTK Tree (nltk.tree)
    """

    params = tetre_format.split(",")
    node_representation = token.pos_

    if token.n_lefts + token.n_rights > 0:
        tree = Tree(node_representation,
                    [to_nltk_tree_general(child, attr_list=params, level=0) for child in token.children])
    else:
        tree = Tree(node_representation, [])

    return tree
项目:tetre    作者:aoldoni    | 项目源码 | 文件源码
def nltk_tree_to_qtree(tree):
    """Transforms a NLTK Tree in a QTREE. A QTREE is a string representation of a tree.

    For details, please see: http://www.ling.upenn.edu/advice/latex/qtree/qtreenotes.pdf

    Args:
        tree: The NLTK Tree (nltk.tree).

    Returns:
        A string with the QTREE representation of the NLTK Tree (nltk.tree).
    """
    self_result = " [ "

    if isinstance(tree, Tree):
        self_result += " " + tree.label() + " "

        if len(tree) > 0:
            self_result += " ".join([nltk_tree_to_qtree(node) for node in sorted(tree)])

    else:
        self_result += " " + str(tree) + " "

    self_result += " ] "

    return self_result
项目:koalaNLP    作者:yuchenz    | 项目源码 | 文件源码
def rightBinarize(tr):
    children = []
    for child in tr:
        children.append(child)

    tmpNode = children[-1] 
    i = len(children) - 2
    while i > 0:
        tmpNode2 = nltk.Tree("(X)")
        tmpNode2.append(children[i])
        tmpNode2.append(tmpNode)
        tmpNode = tmpNode2
        i -= 1

    while len(tr) > 1:
        tr.pop()

    tr.append(tmpNode)
项目:koalaNLP    作者:yuchenz    | 项目源码 | 文件源码
def leftBinarize(tr):
    children = []
    for child in tr:
        children.append(child)

    tmpNode = children[0] 
    i = 1
    while i < len(children) - 1:
        tmpNode2 = nltk.Tree("(X)")
        tmpNode2.append(tmpNode)
        tmpNode2.append(children[i])
        tmpNode = tmpNode2
        i += 1

    while len(tr) > 1:
        tr.pop(0)

    tr.insert(0, tmpNode)
项目:koalaNLP    作者:yuchenz    | 项目源码 | 文件源码
def vvBinarize(tr):
    children = []
    vvIndex = None
    for i, child in enumerate(tr):
        children.append(child)
        if child.node in vvTags: 
            vvIndex = i

    if vvIndex == None:
        print >> sys.stderr, "no vv in the children!!!",
        output(tr)
        return

    tmpNode = nltk.Tree("(X)")
    for i in xrange(vvIndex, len(tr)):
        tmpNode.append(children[i])
    leftBinarize(tmpNode)

    while len(tr) > vvIndex:
        tr.pop()
    tr.append(tmpNode)
    rightBinarize(tr)
项目:hate-to-hugs    作者:sdoran35    | 项目源码 | 文件源码
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct
项目:Lango    作者:ayoungprogrammer    | 项目源码 | 文件源码
def match_rules_context(tree, rules, parent_context={}):
    """Recursively matches a Tree structure with rules and returns context

    Args:
        tree (Tree): Parsed tree structure
        rules (dict): See match_rules
        parent_context (dict): Context of parent call
    Returns:
        dict: Context matched dictionary of matched rules or
        None if no match
    """
    for template, match_rules in rules.items():
        context = parent_context.copy()
        if match_template(tree, template, context):
            for key, child_rules in match_rules.items():
                child_context = match_rules_context(context[key], child_rules, context)
                if child_context:
                    for k, v in child_context.items():
                        context[k] = v
                else:
                    return None
            return context
    return None
项目:Lango    作者:ayoungprogrammer    | 项目源码 | 文件源码
def match_rules_context_multi(tree, rules, parent_context={}):
    """Recursively matches a Tree structure with rules and returns context

    Args:
        tree (Tree): Parsed tree structure
        rules (dict): See match_rules
        parent_context (dict): Context of parent call
    Returns:
        dict: Context matched dictionary of matched rules or
        None if no match
    """
    all_contexts = []
    for template, match_rules in rules.items():
        context = parent_context.copy()
        if match_template(tree, template, context):
            child_contextss = []
            if not match_rules:
                all_contexts += [context]
            else:
                for key, child_rules in match_rules.items():
                    child_contextss.append(match_rules_context_multi(context[key], child_rules, context))
                all_contexts += cross_context(child_contextss)    
    return all_contexts
项目:Lango    作者:ayoungprogrammer    | 项目源码 | 文件源码
def match_template(tree, template, args=None):
    """Check if match string matches Tree structure

    Args:
        tree (Tree): Parsed Tree structure of a sentence
        template (str): String template to match. Example: "( S ( NP ) )"
    Returns:
        bool: If they match or not
    """
    tokens = get_tokens(template.split())
    cur_args = {}
    if match_tokens(tree, tokens, cur_args):
        if args is not None:
            for k, v in cur_args.items():
                args[k] = v
        logger.debug('MATCHED: {0}'.format(template))
        return True
    else:
        return False
项目:Lango    作者:ayoungprogrammer    | 项目源码 | 文件源码
def get_object(tree):
    """Get the object in the tree object.

    Method should remove unnecessary letters and words::

        the
        a/an
        's

    Args:
        tree (Tree): Parsed tree structure
    Returns:
        Resulting string of tree ``(Ex: "red car")``
    """
    if isinstance(tree, Tree):
        if tree.label() == 'DT' or tree.label() == 'POS':
            return ''
        words = []
        for child in tree:
            words.append(get_object(child))
        return ' '.join([_f for _f in words if _f])
    else:
        return tree
项目:FancyWord    作者:EastonLee    | 项目源码 | 文件源码
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct
项目:keras-textgen    作者:kenoma    | 项目源码 | 文件源码
def get_continuous_chunks(self, text):
         chunked = nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(text)))
         prev = None
         continuous_chunk = []
         current_chunk = []
         for i in chunked:
                 if type(i) == nltk.Tree:
                         current_chunk.append(" ".join([token for token, pos in i.leaves()]))
                 elif current_chunk:
                         named_entity = " ".join(current_chunk)
                         if named_entity not in continuous_chunk:
                                 continuous_chunk.append(named_entity)
                                 current_chunk = []
                 else:
                         continue
         return continuous_chunk
项目:beepboop    作者:nicolehe    | 项目源码 | 文件源码
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct
项目:props    作者:gabrielStanovsky    | 项目源码 | 文件源码
def _to_nltk_format(self):
        from nltk import Tree

        return Tree(self.parent_relation,
                   [Tree(self.pos,
                         [self.word] + [c._to_nltk_format() for c in self.children]  )])

#         from nltk import Tree
#         label = "({0}) {1} ({2})".format(self.parent_relation,self.word,self.pos)
#         if not self.children:
#             return label
#         return Tree(label,[c._to_nltk_format() for c in self.children])


    # Feature functions, should conform to naming _(PREDICATE/ARGUMENT)_FEATURE_(feature_name)
    # and return a tuple of (value,span)

    #return the head of the
项目:props    作者:gabrielStanovsky    | 项目源码 | 文件源码
def find_tree_matches(tree,pat):
    """
    Get all subtrees matching pattern

    @type  tree: DepTree
    @param tree: tree in which to search for matches

    @type  pat: nltk.Tree
    @param pat: a pattern to match against tree

    @rtype:  list [unification of pat]
    @return: all possible unification of pat in tree
    """


    ret = []
    curMatch = tree.match(pat)
    if curMatch:
        ret.append(curMatch)
    for c in tree.children:
        ret.extend(find_tree_matches(c,pat))
    return ret
项目:kind2anki    作者:prz3m    | 项目源码 | 文件源码
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct
项目:but_sentiment    作者:MixedEmotions    | 项目源码 | 文件源码
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct
项目:nlp-demo    作者:SaptakS    | 项目源码 | 文件源码
def bft(tree):
    """ Perform a breadth-first traversal of a tree.
    Return the nodes in a list in level-order.

    Args:
        tree: a tree node
    Returns:
        lst: a list of tree nodes in left-to-right level-order
    """
    lst = []
    queue = Queue.Queue()
    queue.put(tree)
    while not queue.empty():
        node = queue.get()
        lst.append(node)
        for child in node:
            if isinstance(child, nltk.Tree):
                queue.put(child)
    return lst
项目:nlp-demo    作者:SaptakS    | 项目源码 | 文件源码
def traverse_tree(tree, pro):
    """ Traverse a tree in a left-to-right, breadth-first manner,
    proposing any NP encountered as an antecedent. Returns the 
    tree and the position of the first possible antecedent.

    Args:
        tree: the tree being searched
        pro: the pronoun being resolved (string)
    """
    # Initialize a queue and enqueue the root of the tree
    queue = Queue.Queue()
    queue.put(tree)
    while not queue.empty():
        node = queue.get()
        # if the node is an NP, return it as a potential antecedent
        if "NP" in node.label() and match(tree, get_pos(tree,node), pro):
            return tree, get_pos(tree, node)
        for child in node:
            if isinstance(child, nltk.Tree):
                queue.put(child)
    # if no antecedent is found, return None
    return None, None
项目:nlp-demo    作者:SaptakS    | 项目源码 | 文件源码
def calc(param):

    p = ["He", "he", "Him", "him", "She", "she", "Her",
        "her", "It", "it", "They", "they"]
    r = ["Himself", "himself", "Herself", "herself",
        "Itself", "itself", "Themselves", "themselves"]
    fname = param[1]
    pro = param[2]
    with open(fname) as f:
        sents = f.readlines()
    trees = [Tree.fromstring(s) for s in sents]
    pos = get_pos(trees[-1], pro)
    pos = pos[:-1]
    if pro in p:
        tree, pos = hobbs(trees, pos)
        #for t in trees:
        #    print t, '\n'        
        #print "Proposed antecedent for '"+pro+"':", tree[pos]
        return tree, tree[pos]
    elif pro in r:
        tree, pos = resolve_reflexive(trees, pos)
        #for t in trees:
        #    print t, '\n'
        #print "Proposed antecedent for '"+pro+"':", tree[pos] 
        return tree, tree[pos]
项目:cognitive-system-postagger    作者:made2591    | 项目源码 | 文件源码
def create_xsvversion_of_tree(t):
    founded, sub_tree = find_deepleftfirst_verb(t)
    if sub_tree != None:
        t._label = "S1"
        temp = Tree("S", [sub_tree]+[t])
        return temp
    return t
项目:Price-Comparator    作者:Thejas-1    | 项目源码 | 文件源码
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._root_label = chunk_struct.label()
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level
项目:Price-Comparator    作者:Thejas-1    | 项目源码 | 文件源码
def _tag(self, tok):
        if isinstance(tok, tuple):
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.label()
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees')
项目:Price-Comparator    作者:Thejas-1    | 项目源码 | 文件源码
def to_chunkstruct(self, chunk_label='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0: self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index:index+length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_label, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._root_label, pieces)
项目:Price-Comparator    作者:Thejas-1    | 项目源码 | 文件源码
def parse(self, chunk_struct, trace=None):
        """
        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :rtype: Tree
        :return: a chunk structure that encodes the chunks in a given
            tagged sentence.  A chunk is a non-overlapping linguistic
            group, such as a noun phrase.  The set of chunks
            identified in the chunk structure depends on the rules
            used to define this ``RegexpChunkParser``.
        """
        if len(chunk_struct) == 0:
            print('Warning: parsing empty text')
            return Tree(self._root_label, [])

        try:
            chunk_struct.label()
        except AttributeError:
            chunk_struct = Tree(self._root_label, chunk_struct)

        # Use the default trace value?
        if trace is None: trace = self._trace

        chunkstr = ChunkString(chunk_struct)

        # Apply the sequence of rules to the chunkstring.
        if trace:
            verbose = (trace>1)
            self._trace_apply(chunkstr, verbose)
        else:
            self._notrace_apply(chunkstr)

        # Use the chunkstring to create a chunk structure.
        return chunkstr.to_chunkstruct(self._chunk_label)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._root_label = chunk_struct.label()
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def _tag(self, tok):
        if isinstance(tok, tuple):
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.label()
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees')
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def to_chunkstruct(self, chunk_label='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0: self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index:index+length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_label, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._root_label, pieces)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def parse(self, chunk_struct, trace=None):
        """
        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :rtype: Tree
        :return: a chunk structure that encodes the chunks in a given
            tagged sentence.  A chunk is a non-overlapping linguistic
            group, such as a noun phrase.  The set of chunks
            identified in the chunk structure depends on the rules
            used to define this ``RegexpChunkParser``.
        """
        if len(chunk_struct) == 0:
            print('Warning: parsing empty text')
            return Tree(self._root_label, [])

        try:
            chunk_struct.label()
        except AttributeError:
            chunk_struct = Tree(self._root_label, chunk_struct)

        # Use the default trace value?
        if trace is None: trace = self._trace

        chunkstr = ChunkString(chunk_struct)

        # Apply the sequence of rules to the chunkstring.
        if trace:
            verbose = (trace>1)
            self._trace_apply(chunkstr, verbose)
        else:
            self._notrace_apply(chunkstr)

        # Use the chunkstring to create a chunk structure.
        return chunkstr.to_chunkstruct(self._chunk_label)
项目:neighborhood_mood_aws    作者:jarrellmark    | 项目源码 | 文件源码
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._root_label = chunk_struct.label()
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level
项目:neighborhood_mood_aws    作者:jarrellmark    | 项目源码 | 文件源码
def _tag(self, tok):
        if isinstance(tok, tuple):
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.label()
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees')
项目:neighborhood_mood_aws    作者:jarrellmark    | 项目源码 | 文件源码
def to_chunkstruct(self, chunk_label='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0: self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index:index+length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_label, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._root_label, pieces)
项目:neighborhood_mood_aws    作者:jarrellmark    | 项目源码 | 文件源码
def parse(self, chunk_struct, trace=None):
        """
        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :rtype: Tree
        :return: a chunk structure that encodes the chunks in a given
            tagged sentence.  A chunk is a non-overlapping linguistic
            group, such as a noun phrase.  The set of chunks
            identified in the chunk structure depends on the rules
            used to define this ``RegexpChunkParser``.
        """
        if len(chunk_struct) == 0:
            print('Warning: parsing empty text')
            return Tree(self._root_label, [])

        try:
            chunk_struct.label()
        except AttributeError:
            chunk_struct = Tree(self._root_label, chunk_struct)

        # Use the default trace value?
        if trace is None: trace = self._trace

        chunkstr = ChunkString(chunk_struct)

        # Apply the sequence of rules to the chunkstring.
        if trace:
            verbose = (trace>1)
            self._trace_apply(chunkstr, verbose)
        else:
            self._notrace_apply(chunkstr)

        # Use the chunkstring to create a chunk structure.
        return chunkstr.to_chunkstruct(self._chunk_label)
项目:tetre    作者:aoldoni    | 项目源码 | 文件源码
def to_nltk_tree(node):
    """Creates a fixed representation of a Spacy dependency tree as a NLTK tree. This fixed representation
    will be formed by the Spacy's node attributes: dep_, orth_ and pos_.

    Args:
        node: The starting node from the tree in which the transformation will occur.

    Returns:
        A NLTK Tree (nltk.tree)
    """
    if node.n_lefts + node.n_rights > 0:
        return Tree(node.dep_+"/"+node.orth_+"/"+node.pos_, [to_nltk_tree(child) for child in node.children])
    else:
        return node.dep_+"/"+node.orth_+"/"+node.pos_
项目:koalaNLP    作者:yuchenz    | 项目源码 | 文件源码
def isLegalTree(line, i):
    try:
        t = nltk.Tree(line)
        pt = nltk.ParentedTree(line)
    except ValueError:
        print >> sys.stderr, "illegal tree!!! #" + str(i)
        print >> sys.stderr, line
        exit(1)
项目:koalaNLP    作者:yuchenz    | 项目源码 | 文件源码
def binarize(line, lan = "en"):
    assert lan in ['en', 'ch'], "illegal language (en or ch): %s" % lan

    root = nltk.Tree(line)
    stack = [root]
    while stack:
        curNode = stack.pop()
        if len(curNode) > 2:
            if curNode.node == 'NP':
                rightBinarize(curNode)
            elif curNode.node == 'VP':
                if lan == 'en':
                    vvBinarize(curNode)
                elif lan == 'ch':
                    if curNode[0].node in vvTags: 
                        leftBinarize(curNode)
                    elif curNode[-1].node in vvTags: 
                        rightBinarize(curNode)
                    else:
                        vvBinarize(curNode)

        for child in curNode:
            #print >> sys.stderr, child
            if child.height() > 2:
                stack.append(child)
        continue

    return ' '.join(root.pprint().split()) + '\n'
项目:hate-to-hugs    作者:sdoran35    | 项目源码 | 文件源码
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._root_label = chunk_struct.label()
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level
项目:hate-to-hugs    作者:sdoran35    | 项目源码 | 文件源码
def _tag(self, tok):
        if isinstance(tok, tuple):
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.label()
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees')
项目:hate-to-hugs    作者:sdoran35    | 项目源码 | 文件源码
def to_chunkstruct(self, chunk_label='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0: self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index:index+length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_label, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._root_label, pieces)
项目:hate-to-hugs    作者:sdoran35    | 项目源码 | 文件源码
def parse(self, chunk_struct, trace=None):
        """
        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :rtype: Tree
        :return: a chunk structure that encodes the chunks in a given
            tagged sentence.  A chunk is a non-overlapping linguistic
            group, such as a noun phrase.  The set of chunks
            identified in the chunk structure depends on the rules
            used to define this ``RegexpChunkParser``.
        """
        if len(chunk_struct) == 0:
            print('Warning: parsing empty text')
            return Tree(self._root_label, [])

        try:
            chunk_struct.label()
        except AttributeError:
            chunk_struct = Tree(self._root_label, chunk_struct)

        # Use the default trace value?
        if trace is None: trace = self._trace

        chunkstr = ChunkString(chunk_struct)

        # Apply the sequence of rules to the chunkstring.
        if trace:
            verbose = (trace>1)
            self._trace_apply(chunkstr, verbose)
        else:
            self._notrace_apply(chunkstr)

        # Use the chunkstring to create a chunk structure.
        return chunkstr.to_chunkstruct(self._chunk_label)
项目:FancyWord    作者:EastonLee    | 项目源码 | 文件源码
def _tag(self, tok):
        if isinstance(tok, tuple):
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.label()
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees')
项目:FancyWord    作者:EastonLee    | 项目源码 | 文件源码
def to_chunkstruct(self, chunk_label='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0: self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index:index+length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_label, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._root_label, pieces)
项目:FancyWord    作者:EastonLee    | 项目源码 | 文件源码
def parse(self, chunk_struct, trace=None):
        """
        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :rtype: Tree
        :return: a chunk structure that encodes the chunks in a given
            tagged sentence.  A chunk is a non-overlapping linguistic
            group, such as a noun phrase.  The set of chunks
            identified in the chunk structure depends on the rules
            used to define this ``RegexpChunkParser``.
        """
        if len(chunk_struct) == 0:
            print('Warning: parsing empty text')
            return Tree(self._root_label, [])

        try:
            chunk_struct.label()
        except AttributeError:
            chunk_struct = Tree(self._root_label, chunk_struct)

        # Use the default trace value?
        if trace is None: trace = self._trace

        chunkstr = ChunkString(chunk_struct)

        # Apply the sequence of rules to the chunkstring.
        if trace:
            verbose = (trace>1)
            self._trace_apply(chunkstr, verbose)
        else:
            self._notrace_apply(chunkstr)

        # Use the chunkstring to create a chunk structure.
        return chunkstr.to_chunkstruct(self._chunk_label)
项目:beepboop    作者:nicolehe    | 项目源码 | 文件源码
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._root_label = chunk_struct.label()
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level
项目:beepboop    作者:nicolehe    | 项目源码 | 文件源码
def _tag(self, tok):
        if isinstance(tok, tuple):
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.label()
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees')