Source code for grammarinator.runtime.rule

# Copyright (c) 2017-2025 Renata Hodovan, Akos Kiss.
#
# Licensed under the BSD 3-Clause License
# <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
# This file may not be copied, modified, or distributed except
# according to those terms.

from __future__ import annotations

from copy import deepcopy
from itertools import zip_longest
from math import inf
from textwrap import indent
from typing import Any, ClassVar, Iterator


[docs] class RuleSize: """ Size information about a grammar rule or a (sub)tree (generated based on a grammar rule), expressed using different metrics (both vertical and horizontal). This class can be used to describe various size-related concepts, e.g.: - The minimum depth of derivation of a grammar rule and the minimum number of tokens generated by a grammar rule. - The actual depth of derivation of a grammar rule and the actual number of tokens generated by a grammar rule. - The maximum allowed depth of derivation in a tree and the maximum number of tokens allowed in a tree. - The depth of derivation reaching a node from the root of the tree and the number of tokens in the tree outside the subtree of the node. The class supports additive arithmetics (``+``, ``+=``, ``-``, ``-=``) and comparisons (``==``, ``<=``, ``<``). Note, however, that the type is not totally but partially ordered, i.e., a size object is less than another size object iff all its metrics compare less than the corresponding metrics of the other size object. """ __slots__ = ('depth', 'tokens') max: ClassVar[RuleSize] # pylint: disable=declare-non-slot """Maxium possible size (``(inf, inf)``).""" def __init__(self, depth: int | float = 0, tokens: int | float = 0) -> None: """ :param depth: Derivation length (default: 0). :param tokens: Token count (default: 0). """ self.depth: int | float = depth #: Derivation length. self.tokens: int | float = tokens #: Token count. def __add__(self, other: RuleSize) -> RuleSize: return RuleSize(depth=self.depth + other.depth, tokens=self.tokens + other.tokens) def __iadd__(self, other: RuleSize) -> RuleSize: self.depth += other.depth self.tokens += other.tokens return self def __sub__(self, other: RuleSize) -> RuleSize: return RuleSize(depth=self.depth - other.depth, tokens=self.tokens - other.tokens) def __isub__(self, other: RuleSize) -> RuleSize: self.depth -= other.depth self.tokens -= other.tokens return self def __eq__(self, other: object) -> bool: if not isinstance(other, RuleSize): return NotImplemented return self.depth == other.depth and self.tokens == other.tokens def __le__(self, other: RuleSize) -> bool: # This defines a partial order (i.e., reflexive, antisymmetric, and transitive). # Not every pair of objects are comparable. return self.depth <= other.depth and self.tokens <= other.tokens def __lt__(self, other: RuleSize) -> bool: # This defines a strict partial order (i.e., irreflexive, asymmetric, and transitive). # Not every pair of objects are comparable. return self.depth < other.depth and self.tokens < other.tokens def __repr__(self) -> str: return f'{self.__class__.__name__}(depth={self.depth!r}, tokens={self.tokens!r})'
RuleSize.max = RuleSize(inf, inf)
[docs] class Rule: """ Abstract base class of tree nodes. Tree nodes support deep copying via :func:`copy.deepcopy` (but no shallow copying via :func:`copy.copy`). A deep copy of a node is the copy of the whole subtree rooted at that node. However, the parent of a copy node is always None, even if the original node had a parent. Tree nodes support various string representations: - The "informal" representation of a node consists of the concatenation of the string contents of all tokens found in the tree rooted at the node in depth-first order. No extra whitespace is inserted for separation. - The "official" representation of a node is an (almost) valid Python expression to recreate the tree rooted at the node. If the node has any children, the representation consists of multiple lines. - The "debug" representation of a node is a multi-line string with the most important attributes of the tree rooted at the node, using vertical bars for visual guidance. The builtins :class:`str` and :func:`repr` can be used to compute the "informal" and "official" representations, respectively, while :func:`format` can produce all of the above. When string formatting is used, the following format specifiers are recognized: ================= ========================== Specifier Meaning ================= ========================== ``''`` or ``'s'`` "Informal" representation. ``'r'`` "Official" representation. ``'|'`` "Debug" representation. ================= ========================== Thus, if ``n`` is a node, the following expressions give equivalent results: - ``str(n)``, ``f'{n}'``, ``f'{n!s}'``, ``f'{n:s}'``, ``'{}'.format(n)``, ``'{!s}'.format(n)``, and ``'{:s}'.format(n)`` - ``repr(n)``, ``f'{n!r}'``, ``f'{n:r}'``, ``'{!r}'.format(n)``, and ``'{:r}'.format(n)`` - ``f'{n:|}'`` and ``'{:|}'.format(n)`` """ __slots__ = ('name', 'parent') def __init__(self, *, name: str) -> None: """ :param name: Name of the node, i.e., name of the corresponding parser or lexer rule in the grammar. """ self.name: str = name #: Name of the node, i.e., name of the corresponding parser or lexer rule in the grammar. self.parent: ParentRule | None = None #: Parent node object. @property def left_sibling(self) -> Rule | None: """ Get the left sibling of the node if any. Return ``None`` if the node has no parent or is the leftmost child of its parent. :return: The left sibling of the current node or ``None``. """ if not self.parent: return None self_idx = self.parent.children.index(self) if self_idx == 0: return None return self.parent.children[self_idx - 1] @property def right_sibling(self) -> Rule | None: """ Get the right sibling of the node if any. Return ``None`` if the node has no parent or is the rightmost child of its parent. :return: The right sibling of the current node or ``None``. """ if not self.parent: return None self_idx = self.parent.children.index(self) if self_idx == len(self.parent.children) - 1: return None return self.parent.children[self_idx + 1] @property def root(self) -> Rule: """ Get the root of the node, i.e., the node at the top of the parent chain. :return: The root of the current node. """ node = self while node.parent: node = node.parent return node
[docs] def replace(self, node: Rule) -> Rule: """ Replace the current node with ``node`` among its siblings. :param node: The replacement node. :return: ``node`` """ node.remove() if self.parent and node is not self: self.parent.children[self.parent.children.index(self)] = node node.parent = self.parent self.parent = None return node
[docs] def remove(self) -> None: """ Remove the current node from its parent. """ if self.parent: self.parent.children.remove(self) self.parent = None
[docs] def equals(self, other: Any) -> bool: """ Compare two nodes (potentially including any children) for equality. The comparison is not implemented within ``__eq__`` to ensure that nodes can be added to collections based on identity. :param other: The node to compare the current node to. :return: Whether the two nodes are equal. """ return self.__class__ == other.__class__ and self.name == other.name
[docs] def equalTokens(self, other: Rule) -> bool: """ Compare the tokens in the sub-trees of two nodes. :param other: The node to compare the current node to. :return: Whether the two nodes are equal. """ return all(self_token == other_token for self_token, other_token in zip_longest(self.tokens(), other.tokens()))
[docs] def tokens(self) -> Iterator[str]: """ Generator method to iterate over the (non-empty) tokens (i.e., strings) of the sub-tree of the node. :return: Iterator over token string contents. """ raise NotImplementedError()
def _dbg_(self) -> str: """ Called by :meth:`__format__` to compute the "debug" string representation of a node. """ raise NotImplementedError() def __format__(self, format_spec: str) -> str: if format_spec in ['', 's']: return str(self) if format_spec == 'r': return repr(self) if format_spec == '|': return self._dbg_() raise TypeError def __copy__(self) -> Rule: raise TypeError('shallow copy not supported') def __deepcopy__(self, memo: dict[int, Any]) -> Rule: raise NotImplementedError()
[docs] class ParentRule(Rule): """ Abstract base class of tree nodes that can have children. """ __slots__ = ('children',) def __init__(self, *, name: str, children: list[Rule] | None = None) -> None: """ :param name: Name of the corresponding parser rule in the grammar. :param children: Children of the rule (default: no children). """ super().__init__(name=name) self.children: list[Rule] = [] #: Children of the rule. if children: self.add_children(children) @property def last_child(self) -> Rule | None: """ Get the last child of the current node if any. Return ``None`` if the node has no children. """ return self.children[-1] if self.children else None
[docs] def insert_child(self, idx: int, node: Rule) -> None: """ Insert node as child at position. :param idx: Index of position to insert ``node`` at. :param node: Node to be inserted. """ if not node: return node.remove() self.children.insert(idx, node) node.parent = self
[docs] def add_child(self, node: Rule) -> None: """ Add node to the end of the list of the children. :param node: Node to be added to children. """ if node is None: return node.remove() self.children.append(node) node.parent = self
[docs] def add_children(self, nodes: list[Rule]) -> None: """ Add mulitple nodes to the end of the list of the children. :param nodes: List of nodes to be added to children. """ for node in nodes: self.add_child(node)
[docs] def __iadd__(self, item: Rule | list[Rule]) -> ParentRule: """ Support for ``+=`` operation to add one or more children to the current node. An alias to :meth:`add_child` or :meth:`add_children` depending on the type of ``child``. :param item: The node(s) to be added as child. :return: The current node with extended children. """ if isinstance(item, list): self.add_children(item) else: self.add_child(item) return self
[docs] def equals(self, other: Any) -> bool: return super().equals(other) and len(self.children) == len(other.children) and all(child.equals(other.children[i]) for i, child in enumerate(self.children))
[docs] def tokens(self) -> Iterator[str]: for child in self.children: yield from child.tokens()
def __str__(self) -> str: return ''.join(str(child) for child in self.children) def _repr_children(self) -> str: return 'children=[\n{children}]'.format(children=indent(',\n'.join(repr(child) for child in self.children), ' ')) def _dbg_children(self) -> str: return '\n' + indent('\n'.join(child._dbg_() for child in self.children), '| ') if self.children else ""
[docs] class UnparserRule(ParentRule): """ Tree node representing a parser rule. It can have zero or more :class:`UnparserRule`, :class:`UnlexerRule`, :class:`UnparserRuleQuantifier` or :class:`UnparserRuleAlternative` children. """ def __getattr__(self, item: str) -> Rule | list[Rule]: # This check is needed to avoid infinite recursions when loading a tree # with pickle. In such cases, the loaded instance is prepared by # creating an empty object with the expected ``__class__`` and by # restoring the saved attributes (without calling ``__init__``). # During this operation, the ``__set_state__`` method of the target # class is tried to be called, if it exists. Otherwise, ``__getattr__`` # throws an ``AttributeError``. However, if the instantiation of this # error object tries to access any field that is not yet added by # pickle, then it throws another ``AttributeError``, causing an # infinite recursion. Filtering for the field names, that are used # later in this method, eliminates the issue. if item in ['name', 'children']: raise AttributeError() result = [] worklist = list(reversed(self.children)) while worklist: child = worklist.pop() if isinstance(child, ParentRule) and not isinstance(child, UnparserRule): worklist.extend(reversed(child.children)) elif child.name == item: result.append(child) if not result: raise AttributeError(f'[{self.name}] No child with name {item!r} {[child.name for child in self.children]}.') return result[0] if len(result) == 1 else result def __repr__(self) -> str: parts = [ f'name={self.name!r}', ] if self.children: parts.append(self._repr_children()) return f'{self.__class__.__name__}({", ".join(parts)})' def _dbg_(self) -> str: return f'{self.name}{self._dbg_children()}' def __deepcopy__(self, memo: dict[int, Any]) -> UnparserRule: return UnparserRule(name=deepcopy(self.name, memo), children=[deepcopy(child, memo) for child in self.children])
[docs] class UnlexerRule(Rule): """ Tree node representing a lexer rule or token. It has a string constant set in its ``src`` field. """ __slots__ = ('src', 'size', 'immutable') def __init__(self, *, name: str, src: str = '', size: RuleSize | None = None, immutable: bool = False) -> None: """ :param name: Name of the corresponding lexer rule in the grammar. :param src: String content of the lexer rule (default: ""). :param size: Size of the lexer rule (default: (1,1) if ``src`` is not empty, (0,0) otherwise). :param immutable: Boolean to mark literal Unlexer nodes as immutable. """ super().__init__(name=name) self.src: str = src #: String content of the lexer rule. self.size: RuleSize = size or (RuleSize(depth=1, tokens=1) if src else RuleSize(depth=0, tokens=0)) #: Size of the lexer rule, aggregated from the token fragments it is composed of. self.immutable: bool = immutable #: Whether the lexer rule is immutable, i.e., its source is defined as a literal and cannot be changed.
[docs] def equals(self, other: Any) -> bool: return super().equals(other) and self.src == other.src and self.immutable == other.immutable
[docs] def tokens(self) -> Iterator[str]: if self.src: yield self.src
def __str__(self) -> str: return self.src def __repr__(self) -> str: parts = [] if self.name: parts.append(f'name={self.name!r}') if self.src: parts.append(f'src={self.src!r}') if (self.src and self.size != RuleSize(1, 1)) or (not self.src and self.size != RuleSize(0, 0)): parts.append(f'size={self.size!r}') if self.immutable: parts.append(f'immutable={self.immutable}') return f'{self.__class__.__name__}({", ".join(parts)})' def _dbg_(self) -> str: return f'{self.name or ""}{":" if self.name else ""}{self.src!r}{" (immutable)" if self.immutable else ""}' def __deepcopy__(self, memo: dict[int, Any]) -> UnlexerRule: return UnlexerRule(name=deepcopy(self.name, memo), src=deepcopy(self.src, memo), size=deepcopy(self.size, memo), immutable=deepcopy(self.immutable, memo))
[docs] class UnparserRuleQuantifier(ParentRule): """ Tree node representing the root of a quantified sub-tree. It can have one or more :class:`UnparserRuleQuantified` children. """ __slots__ = ('idx', 'start', 'stop') def __init__(self, *, idx: int, start: int, stop: int | float, children: list[Rule] | None = None) -> None: """ :param idx: Index of the quantifier in the parent rule. :param start: Minimum number of expected items in the sub-tree. :param stop: Maximum number of expected items in the sub-tree. :param children: Children of the quantifier (default: no children). """ super().__init__(name='', children=children) self.idx: int = idx #: Index of the quantifier in the parent rule. self.start: int = start #: Minimum number of expected items in the sub-tree. self.stop: int | float = stop #: Maximum number of expected items in the sub-tree.
[docs] def equals(self, other: Any) -> bool: return super().equals(other) and self.idx == other.idx and self.start == other.start and self.stop == other.stop
def __repr__(self) -> str: parts = [ f'idx={self.idx}', f'start={self.start}', f'stop={self.stop}', ] if self.children: parts.append(self._repr_children()) return f'{self.__class__.__name__}({", ".join(parts)})' def _dbg_(self) -> str: return f'{self.__class__.__name__}:[{self.idx}]{self._dbg_children()}' def __deepcopy__(self, memo: dict[int, Any]) -> UnparserRuleQuantifier: return UnparserRuleQuantifier(idx=deepcopy(self.idx, memo), start=deepcopy(self.start, memo), stop=deepcopy(self.stop, memo), children=[deepcopy(child, memo) for child in self.children])
[docs] class UnparserRuleQuantified(ParentRule): """ Tree node representing a single instance of quantified sub-tree. It can have one or more :class:`UnparserRule`, :class:`UnlexerRule`, :class:`UnparserRuleQuantifier` or :class:`UnparserRuleAlternative` children. """ def __init__(self, *, children: list[Rule] | None = None) -> None: """ :param children: Children of the quantified rule (default: no children). """ super().__init__(name='', children=children) def __repr__(self) -> str: return f'{self.__class__.__name__}({self._repr_children() if self.children else ""})' def _dbg_(self) -> str: return f'{self.__class__.__name__}{self._dbg_children()}' def __deepcopy__(self, memo: dict[int, Any]) -> UnparserRuleQuantified: return UnparserRuleQuantified(children=[deepcopy(child, memo) for child in self.children])
[docs] class UnparserRuleAlternative(ParentRule): """ Tree node representing a sub-tree of an alternative. It can have zero or more :class:`UnparserRule`, :class:`UnlexerRule`, :class:`UnparserRuleQuantifier` or :class:`UnparserRuleAlternative` children. """ __slots__ = ('alt_idx', 'idx') def __init__(self, *, alt_idx: int, idx: int, children: list[Rule] | None = None) -> None: """ :param alt_idx: Index of the alternation in the parent rule. :param idx: Index of the alternative in the parent alternation. :param children: Children of the alternative (default: no children). """ super().__init__(name='', children=children) self.alt_idx: int = alt_idx #: Index of the alternation in the parent rule. self.idx: int = idx #: Index of the alternative in the parent alternation.
[docs] def equals(self, other: Any) -> bool: return super().equals(other) and self.alt_idx == other.alt_idx and self.idx == other.idx
def __repr__(self) -> str: parts = [ f'alt_idx={self.alt_idx}', f'idx={self.idx}', ] if self.children: parts.append(self._repr_children()) return f'{self.__class__.__name__}({", ".join(parts)})' def _dbg_(self) -> str: return f'{self.__class__.__name__}:[{self.alt_idx}/{self.idx}]{self._dbg_children()}' def __deepcopy__(self, memo: dict[int, Any]) -> UnparserRuleAlternative: return UnparserRuleAlternative(alt_idx=deepcopy(self.alt_idx, memo), idx=deepcopy(self.idx, memo), children=[deepcopy(child, memo) for child in self.children])