pytermgui.highlighters

This module provides the Highlighter class, and some pre-configured instances.

  1"""This module provides the `Highlighter` class, and some pre-configured instances."""
  2
  3from __future__ import annotations
  4
  5import builtins
  6import keyword
  7import re
  8from dataclasses import dataclass, field
  9from functools import lru_cache
 10from typing import TYPE_CHECKING, Callable, Generator, Match, Pattern, Protocol
 11
 12from .markup import Token, consume_tag, escape
 13from .regex import RE_MARKUP
 14
 15if TYPE_CHECKING:
 16    from .fancy_repr import FancyYield
 17
 18__all__ = [
 19    "Highlighter",
 20    "RegexHighlighter",
 21    "highlight_tim",
 22    "highlight_python",
 23]
 24
 25
 26class Highlighter(Protocol):  # pylint: disable=too-few-public-methods
 27    """The protocol for highlighters."""
 28
 29    def __call__(self, text: str, cache: bool = True) -> str:
 30        """Highlights the given text.
 31
 32        Args:
 33            text: The text to highlight.
 34            cache: If set (default), results will be stored, keyed by their respective
 35                inputs, and retrieved the next time the same key is given.
 36        """
 37
 38
 39@dataclass
 40class RegexHighlighter:
 41    """A class to highlight strings using regular expressions.
 42
 43    This class must be provided with a list of styles. These styles are really just a
 44    tuple of the markup alias name, and their associated RE patterns. If *all* aliases
 45    in the instance use the same prefix, it can be given under the `prefix` key and
 46    ommitted from the style names.
 47
 48    On construction, the instance will combine all of its patterns into a monster regex
 49    including named capturing groups. The general format is something like:
 50
 51        (?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...
 52
 53    Calling this instance will then replace all matches, going in the order of
 54    definition, with style-injected versions. These follow the format:
 55
 56        [{prefix?}{name}]{content}[/{prefix}{name}]
 57
 58    Oddities to keep in mind:
 59    - Regex replace goes in the order of the defined groups, and is non-overlapping. Two
 60        groups cannot match the same text.
 61    - Because of how capturing groups work, everything within the patterns will be
 62        matched. To look for context around a match, look-around assertions can be used.
 63    """
 64
 65    styles: list[tuple[str, str]]
 66    """A list of tuples of (style_alias, pattern_str)."""
 67
 68    prefix: str = ""
 69    """Some string to insert before each style alias."""
 70
 71    pre_formatter: Callable[[str], str] | None = None
 72    """A callable that formats the input string, before any highlighting is done to it."""
 73
 74    match_formatter: Callable[[Match, str], str] | None = None
 75    """A callable of (match, content) that gets called on every match.
 76
 77    Its return value will be used as the content that the already set highlighting will apply
 78    to. Useful to trim text, or apply other transformations before inserting it back.
 79    """
 80
 81    re_flags: int = 0
 82    """All regex flags to apply when compiling the generated pattern, OR-d (|) together."""
 83
 84    _pattern: Pattern = field(init=False)
 85    _highlight_cache: dict[str, str] = field(init=False, default_factory=dict)
 86
 87    def __post_init__(self) -> None:
 88        """Combines all styles into one pattern."""
 89
 90        pattern = ""
 91        names: list[str] = []
 92        for name, ptrn in self.styles:
 93            pattern += f"(?P<{name}>{ptrn})|"
 94            names.append(name)
 95
 96        pattern = pattern[:-1]
 97
 98        self._pattern = re.compile(pattern, flags=self.re_flags)
 99
100    def __call__(self, text: str, cache: bool = True) -> str:
101        """Highlights the given text, using the combined regex pattern."""
102
103        if self.pre_formatter is not None:
104            text = self.pre_formatter(text)
105
106        if cache and text in self._highlight_cache:
107            return self._highlight_cache[text]
108
109        cache_key = text
110
111        def _insert_style(matchobj: Match) -> str:
112            """Returns the match inserted into a markup style."""
113
114            groups = matchobj.groupdict()
115
116            name = matchobj.lastgroup
117            content = groups.get(str(name), None)
118
119            if self.match_formatter is not None:
120                content = self.match_formatter(matchobj, content)
121
122                if content == "":
123                    return ""
124
125            tag = f"{self.prefix}{name}"
126            style = f"[{tag}]{{}}[/{tag}]"
127
128            return style.format(content)
129
130        text = self._pattern.sub(_insert_style, text)
131        self._highlight_cache[cache_key] = text
132
133        return text
134
135    def __fancy_repr__(self) -> Generator[FancyYield, None, None]:
136        """Yields some fancy looking repr text."""
137
138        preview = self("highlight_python()") + "\x1b[0m"
139        pattern = self._pattern.pattern
140
141        if len(pattern) > 40:
142            pattern = pattern[:38] + "..."
143
144        yield f"<{type(self).__name__} pattern: {pattern!r}, preview: "
145        yield {"text": str(preview), "highlight": False}
146
147        yield ">"
148
149
150def highlight_tim(text: str, cache: bool = True) -> str:
151    """Highlights some TIM code."""
152
153    @lru_cache(1048)
154    def _highlight(txt: str) -> str:
155        output = ""
156        cursor = 0
157        active_tokens: list[Token] = []
158
159        def _get_active_markup() -> str:
160            active_markup = " ".join(tkn.markup for tkn in active_tokens)
161
162            if active_markup == "":
163                return ""
164
165            return f"[{active_markup}]"
166
167        for matchobj in RE_MARKUP.finditer(txt):
168            start, end = matchobj.span()
169
170            if cursor < start:
171                if cursor > 0:
172                    output += "]"
173
174                output += _get_active_markup()
175                output += f"{txt[cursor:start]}[/]"
176
177            *_, tags = matchobj.groups()
178
179            output += "["
180            for tag in tags.split():
181                token = consume_tag(tag)
182                output += f"{token.prettified_markup} "
183
184                if Token.is_clear(token):
185                    active_tokens = [
186                        tkn for tkn in active_tokens if not token.targets(tkn)
187                    ]
188
189                else:
190                    active_tokens.append(token)
191
192            output = output.rstrip()
193            cursor = end
194
195        if cursor < len(txt) - 1:
196            if cursor > 0:
197                output += "]"
198
199            output += _get_active_markup()
200            output += f"{txt[cursor:]}"
201
202            if len(active_tokens) > 0:
203                output += "[/]"
204
205        if output.count("[") != output.count("]"):
206            output += "]"
207
208        return output
209
210    if cache:
211        return _highlight(text)
212
213    return _highlight.__wrapped__(text)
214
215
216_BUILTIN_NAMES = "|".join(f"(?:{item})" for item in dir(builtins))
217_KEYWORD_NAMES = "|".join(
218    f"(?:{keyw})" for keyw in list(keyword.kwlist) + ["builtin", "function", "module"]
219)
220
221highlight_python = RegexHighlighter(
222    pre_formatter=escape,
223    prefix="code.",
224    styles=[
225        ("multiline_str", r"([frbu]*)\"{3}([\s\S]*?)(?<!\\)\"{3}"),
226        (
227            "str",
228            r"([frbu]*(\".*?(?<!\\)\")|(\'.*?(?<!\\)\'))",
229        ),
230        ("comment", "(#.*)"),
231        ("keyword", rf"\b(?<![\.\-])()({_KEYWORD_NAMES}+)\b"),
232        ("builtin", rf"\b(?<!\.)({_BUILTIN_NAMES})\b"),
233        ("identifier", r"([^ \.=]+)(?=\()"),
234        ("global", r"(?<=\b)([A-Z]\w+)"),
235        ("number", r"(?<=\b)((?:0x[\da-zA-Z]+)|(?:\d+))"),
236    ],
237)
class Highlighter(typing.Protocol):
27class Highlighter(Protocol):  # pylint: disable=too-few-public-methods
28    """The protocol for highlighters."""
29
30    def __call__(self, text: str, cache: bool = True) -> str:
31        """Highlights the given text.
32
33        Args:
34            text: The text to highlight.
35            cache: If set (default), results will be stored, keyed by their respective
36                inputs, and retrieved the next time the same key is given.
37        """

The protocol for highlighters.

Highlighter(*args, **kwargs)
1430def _no_init_or_replace_init(self, *args, **kwargs):
1431    cls = type(self)
1432
1433    if cls._is_protocol:
1434        raise TypeError('Protocols cannot be instantiated')
1435
1436    # Already using a custom `__init__`. No need to calculate correct
1437    # `__init__` to call. This can lead to RecursionError. See bpo-45121.
1438    if cls.__init__ is not _no_init_or_replace_init:
1439        return
1440
1441    # Initially, `__init__` of a protocol subclass is set to `_no_init_or_replace_init`.
1442    # The first instantiation of the subclass will call `_no_init_or_replace_init` which
1443    # searches for a proper new `__init__` in the MRO. The new `__init__`
1444    # replaces the subclass' old `__init__` (ie `_no_init_or_replace_init`). Subsequent
1445    # instantiation of the protocol subclass will thus use the new
1446    # `__init__` and no longer call `_no_init_or_replace_init`.
1447    for base in cls.__mro__:
1448        init = base.__dict__.get('__init__', _no_init_or_replace_init)
1449        if init is not _no_init_or_replace_init:
1450            cls.__init__ = init
1451            break
1452    else:
1453        # should not happen
1454        cls.__init__ = object.__init__
1455
1456    cls.__init__(self, *args, **kwargs)
@dataclass
class RegexHighlighter:
 40@dataclass
 41class RegexHighlighter:
 42    """A class to highlight strings using regular expressions.
 43
 44    This class must be provided with a list of styles. These styles are really just a
 45    tuple of the markup alias name, and their associated RE patterns. If *all* aliases
 46    in the instance use the same prefix, it can be given under the `prefix` key and
 47    ommitted from the style names.
 48
 49    On construction, the instance will combine all of its patterns into a monster regex
 50    including named capturing groups. The general format is something like:
 51
 52        (?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...
 53
 54    Calling this instance will then replace all matches, going in the order of
 55    definition, with style-injected versions. These follow the format:
 56
 57        [{prefix?}{name}]{content}[/{prefix}{name}]
 58
 59    Oddities to keep in mind:
 60    - Regex replace goes in the order of the defined groups, and is non-overlapping. Two
 61        groups cannot match the same text.
 62    - Because of how capturing groups work, everything within the patterns will be
 63        matched. To look for context around a match, look-around assertions can be used.
 64    """
 65
 66    styles: list[tuple[str, str]]
 67    """A list of tuples of (style_alias, pattern_str)."""
 68
 69    prefix: str = ""
 70    """Some string to insert before each style alias."""
 71
 72    pre_formatter: Callable[[str], str] | None = None
 73    """A callable that formats the input string, before any highlighting is done to it."""
 74
 75    match_formatter: Callable[[Match, str], str] | None = None
 76    """A callable of (match, content) that gets called on every match.
 77
 78    Its return value will be used as the content that the already set highlighting will apply
 79    to. Useful to trim text, or apply other transformations before inserting it back.
 80    """
 81
 82    re_flags: int = 0
 83    """All regex flags to apply when compiling the generated pattern, OR-d (|) together."""
 84
 85    _pattern: Pattern = field(init=False)
 86    _highlight_cache: dict[str, str] = field(init=False, default_factory=dict)
 87
 88    def __post_init__(self) -> None:
 89        """Combines all styles into one pattern."""
 90
 91        pattern = ""
 92        names: list[str] = []
 93        for name, ptrn in self.styles:
 94            pattern += f"(?P<{name}>{ptrn})|"
 95            names.append(name)
 96
 97        pattern = pattern[:-1]
 98
 99        self._pattern = re.compile(pattern, flags=self.re_flags)
100
101    def __call__(self, text: str, cache: bool = True) -> str:
102        """Highlights the given text, using the combined regex pattern."""
103
104        if self.pre_formatter is not None:
105            text = self.pre_formatter(text)
106
107        if cache and text in self._highlight_cache:
108            return self._highlight_cache[text]
109
110        cache_key = text
111
112        def _insert_style(matchobj: Match) -> str:
113            """Returns the match inserted into a markup style."""
114
115            groups = matchobj.groupdict()
116
117            name = matchobj.lastgroup
118            content = groups.get(str(name), None)
119
120            if self.match_formatter is not None:
121                content = self.match_formatter(matchobj, content)
122
123                if content == "":
124                    return ""
125
126            tag = f"{self.prefix}{name}"
127            style = f"[{tag}]{{}}[/{tag}]"
128
129            return style.format(content)
130
131        text = self._pattern.sub(_insert_style, text)
132        self._highlight_cache[cache_key] = text
133
134        return text
135
136    def __fancy_repr__(self) -> Generator[FancyYield, None, None]:
137        """Yields some fancy looking repr text."""
138
139        preview = self("highlight_python()") + "\x1b[0m"
140        pattern = self._pattern.pattern
141
142        if len(pattern) > 40:
143            pattern = pattern[:38] + "..."
144
145        yield f"<{type(self).__name__} pattern: {pattern!r}, preview: "
146        yield {"text": str(preview), "highlight": False}
147
148        yield ">"

A class to highlight strings using regular expressions.

This class must be provided with a list of styles. These styles are really just a tuple of the markup alias name, and their associated RE patterns. If all aliases in the instance use the same prefix, it can be given under the prefix key and ommitted from the style names.

On construction, the instance will combine all of its patterns into a monster regex including named capturing groups. The general format is something like:

(?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...

Calling this instance will then replace all matches, going in the order of definition, with style-injected versions. These follow the format:

[{prefix?}{name}]{content}[/{prefix}{name}]

Oddities to keep in mind:

  • Regex replace goes in the order of the defined groups, and is non-overlapping. Two groups cannot match the same text.
  • Because of how capturing groups work, everything within the patterns will be matched. To look for context around a match, look-around assertions can be used.
RegexHighlighter( styles: list[tuple[str, str]], prefix: str = '', pre_formatter: Optional[Callable[[str], str]] = None, match_formatter: Optional[Callable[[Match, str], str]] = None, re_flags: int = 0)
styles: list[tuple[str, str]]

A list of tuples of (style_alias, pattern_str).

prefix: str = ''

Some string to insert before each style alias.

pre_formatter: Optional[Callable[[str], str]] = None

A callable that formats the input string, before any highlighting is done to it.

match_formatter: Optional[Callable[[Match, str], str]] = None

A callable of (match, content) that gets called on every match.

Its return value will be used as the content that the already set highlighting will apply to. Useful to trim text, or apply other transformations before inserting it back.

re_flags: int = 0

All regex flags to apply when compiling the generated pattern, OR-d (|) together.

def highlight_tim(text: str, cache: bool = True) -> str:
151def highlight_tim(text: str, cache: bool = True) -> str:
152    """Highlights some TIM code."""
153
154    @lru_cache(1048)
155    def _highlight(txt: str) -> str:
156        output = ""
157        cursor = 0
158        active_tokens: list[Token] = []
159
160        def _get_active_markup() -> str:
161            active_markup = " ".join(tkn.markup for tkn in active_tokens)
162
163            if active_markup == "":
164                return ""
165
166            return f"[{active_markup}]"
167
168        for matchobj in RE_MARKUP.finditer(txt):
169            start, end = matchobj.span()
170
171            if cursor < start:
172                if cursor > 0:
173                    output += "]"
174
175                output += _get_active_markup()
176                output += f"{txt[cursor:start]}[/]"
177
178            *_, tags = matchobj.groups()
179
180            output += "["
181            for tag in tags.split():
182                token = consume_tag(tag)
183                output += f"{token.prettified_markup} "
184
185                if Token.is_clear(token):
186                    active_tokens = [
187                        tkn for tkn in active_tokens if not token.targets(tkn)
188                    ]
189
190                else:
191                    active_tokens.append(token)
192
193            output = output.rstrip()
194            cursor = end
195
196        if cursor < len(txt) - 1:
197            if cursor > 0:
198                output += "]"
199
200            output += _get_active_markup()
201            output += f"{txt[cursor:]}"
202
203            if len(active_tokens) > 0:
204                output += "[/]"
205
206        if output.count("[") != output.count("]"):
207            output += "]"
208
209        return output
210
211    if cache:
212        return _highlight(text)
213
214    return _highlight.__wrapped__(text)

Highlights some TIM code.

highlight_python = RegexHighlighter(styles=[('multiline_str', '([frbu]*)\\"{3}([\\s\\S]*?)(?<!\\\\)\\"{3}'), ('str', '([frbu]*(\\".*?(?<!\\\\)\\")|(\\\'.*?(?<!\\\\)\\\'))'), ('comment', '(#.*)'), ('keyword', '\\b(?<![\\.\\-])()((?:False)|(?:None)|(?:True)|(?:and)|(?:as)|(?:assert)|(?:async)|(?:await)|(?:break)|(?:class)|(?:continue)|(?:def)|(?:del)|(?:elif)|(?:else)|(?:except)|(?:finally)|(?:for)|(?:from)|(?:global)|(?:if)|(?:import)|(?:in)|(?:is)|(?:lambda)|(?:nonlocal)|(?:not)|(?:or)|(?:pass)|(?:raise)|(?:return)|(?:try)|(?:while)|(?:with)|(?:yield)|(?:builtin)|(?:function)|(?:module)+)\\b'), ('builtin', '\\b(?<!\\.)((?:ArithmeticError)|(?:AssertionError)|(?:AttributeError)|(?:BaseException)|(?:BlockingIOError)|(?:BrokenPipeError)|(?:BufferError)|(?:BytesWarning)|(?:ChildProcessError)|(?:ConnectionAbortedError)|(?:ConnectionError)|(?:ConnectionRefusedError)|(?:ConnectionResetError)|(?:DeprecationWarning)|(?:EOFError)|(?:Ellipsis)|(?:EncodingWarning)|(?:EnvironmentError)|(?:Exception)|(?:False)|(?:FileExistsError)|(?:FileNotFoundError)|(?:FloatingPointError)|(?:FutureWarning)|(?:GeneratorExit)|(?:IOError)|(?:ImportError)|(?:ImportWarning)|(?:IndentationError)|(?:IndexError)|(?:InterruptedError)|(?:IsADirectoryError)|(?:KeyError)|(?:KeyboardInterrupt)|(?:LookupError)|(?:MemoryError)|(?:ModuleNotFoundError)|(?:NameError)|(?:None)|(?:NotADirectoryError)|(?:NotImplemented)|(?:NotImplementedError)|(?:OSError)|(?:OverflowError)|(?:PendingDeprecationWarning)|(?:PermissionError)|(?:ProcessLookupError)|(?:RecursionError)|(?:ReferenceError)|(?:ResourceWarning)|(?:RuntimeError)|(?:RuntimeWarning)|(?:StopAsyncIteration)|(?:StopIteration)|(?:SyntaxError)|(?:SyntaxWarning)|(?:SystemError)|(?:SystemExit)|(?:TabError)|(?:TimeoutError)|(?:True)|(?:TypeError)|(?:UnboundLocalError)|(?:UnicodeDecodeError)|(?:UnicodeEncodeError)|(?:UnicodeError)|(?:UnicodeTranslateError)|(?:UnicodeWarning)|(?:UserWarning)|(?:ValueError)|(?:Warning)|(?:ZeroDivisionError)|(?:__build_class__)|(?:__debug__)|(?:__doc__)|(?:__import__)|(?:__loader__)|(?:__name__)|(?:__package__)|(?:__spec__)|(?:abs)|(?:aiter)|(?:all)|(?:anext)|(?:any)|(?:ascii)|(?:bin)|(?:bool)|(?:breakpoint)|(?:bytearray)|(?:bytes)|(?:callable)|(?:chr)|(?:classmethod)|(?:compile)|(?:complex)|(?:copyright)|(?:credits)|(?:delattr)|(?:dict)|(?:dir)|(?:divmod)|(?:enumerate)|(?:eval)|(?:exec)|(?:exit)|(?:filter)|(?:float)|(?:format)|(?:frozenset)|(?:getattr)|(?:globals)|(?:hasattr)|(?:hash)|(?:help)|(?:hex)|(?:id)|(?:input)|(?:int)|(?:isinstance)|(?:issubclass)|(?:iter)|(?:len)|(?:license)|(?:list)|(?:locals)|(?:map)|(?:max)|(?:memoryview)|(?:min)|(?:next)|(?:object)|(?:oct)|(?:open)|(?:ord)|(?:pow)|(?:print)|(?:property)|(?:quit)|(?:range)|(?:repr)|(?:reversed)|(?:round)|(?:set)|(?:setattr)|(?:slice)|(?:sorted)|(?:staticmethod)|(?:str)|(?:sum)|(?:super)|(?:tuple)|(?:type)|(?:vars)|(?:zip))\\b'), ('identifier', '([^ \\.=]+)(?=\\()'), ('global', '(?<=\\b)([A-Z]\\w+)'), ('number', '(?<=\\b)((?:0x[\\da-zA-Z]+)|(?:\\d+))')], prefix='code.', pre_formatter=<function escape>, match_formatter=None, re_flags=0, _pattern=re.compile('(?P<multiline_str>([frbu]*)\\"{3}([\\s\\S]*?)(?<!\\\\)\\"{3})|(?P<str>([frbu]*(\\".*?(?<!\\\\)\\")|(\\\'.*?(?<!\\\\)\\\')))|(?P<comment>(#.*))|(?P<keyword>\\b(?<![\\.\\-])()((?:False)|(?:None)|(?:Tru), _highlight_cache={})