Skip to content

highlighters

This module provides the Highlighter class, and some pre-configured instances.

Highlighter

Bases: Protocol

The protocol for highlighters.

Source code in pytermgui/highlighters.py
26
27
28
29
30
31
32
33
34
35
36
class Highlighter(Protocol):  # pylint: disable=too-few-public-methods
    """The protocol for highlighters."""

    def __call__(self, text: str, cache: bool = True) -> str:
        """Highlights the given text.

        Args:
            text: The text to highlight.
            cache: If set (default), results will be stored, keyed by their respective
                inputs, and retrieved the next time the same key is given.
        """

__call__(text, cache=True)

Highlights the given text.

Parameters:

Name Type Description Default
text str

The text to highlight.

required
cache bool

If set (default), results will be stored, keyed by their respective inputs, and retrieved the next time the same key is given.

True
Source code in pytermgui/highlighters.py
29
30
31
32
33
34
35
36
def __call__(self, text: str, cache: bool = True) -> str:
    """Highlights the given text.

    Args:
        text: The text to highlight.
        cache: If set (default), results will be stored, keyed by their respective
            inputs, and retrieved the next time the same key is given.
    """

RegexHighlighter dataclass

A class to highlight strings using regular expressions.

This class must be provided with a list of styles. These styles are really just a tuple of the markup alias name, and their associated RE patterns. If all aliases in the instance use the same prefix, it can be given under the prefix key and ommitted from the style names.

On construction, the instance will combine all of its patterns into a monster regex including named capturing groups. The general format is something like:

(?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...

Calling this instance will then replace all matches, going in the order of definition, with style-injected versions. These follow the format:

[{prefix?}{name}]{content}[/{prefix}{name}]

Oddities to keep in mind: - Regex replace goes in the order of the defined groups, and is non-overlapping. Two groups cannot match the same text. - Because of how capturing groups work, everything within the patterns will be matched. To look for context around a match, look-around assertions can be used.

Source code in pytermgui/highlighters.py
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
@dataclass
class RegexHighlighter:
    """A class to highlight strings using regular expressions.

    This class must be provided with a list of styles. These styles are really just a
    tuple of the markup alias name, and their associated RE patterns. If *all* aliases
    in the instance use the same prefix, it can be given under the `prefix` key and
    ommitted from the style names.

    On construction, the instance will combine all of its patterns into a monster regex
    including named capturing groups. The general format is something like:

        (?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...

    Calling this instance will then replace all matches, going in the order of
    definition, with style-injected versions. These follow the format:

        [{prefix?}{name}]{content}[/{prefix}{name}]

    Oddities to keep in mind:
    - Regex replace goes in the order of the defined groups, and is non-overlapping. Two
        groups cannot match the same text.
    - Because of how capturing groups work, everything within the patterns will be
        matched. To look for context around a match, look-around assertions can be used.
    """

    styles: list[tuple[str, str]]
    """A list of tuples of (style_alias, pattern_str)."""

    prefix: str = ""
    """Some string to insert before each style alias."""

    pre_formatter: Callable[[str], str] | None = None
    """A callable that formats the input string, before any highlighting is done to it."""

    match_formatter: Callable[[Match, str], str] | None = None
    """A callable of (match, content) that gets called on every match.

    Its return value will be used as the content that the already set highlighting will apply
    to. Useful to trim text, or apply other transformations before inserting it back.
    """

    re_flags: int = 0
    """All regex flags to apply when compiling the generated pattern, OR-d (|) together."""

    _pattern: Pattern = field(init=False)
    _highlight_cache: dict[str, str] = field(init=False, default_factory=dict)

    def __post_init__(self) -> None:
        """Combines all styles into one pattern."""

        pattern = ""
        names: list[str] = []
        for name, ptrn in self.styles:
            pattern += f"(?P<{name}>{ptrn})|"
            names.append(name)

        pattern = pattern[:-1]

        self._pattern = re.compile(pattern, flags=self.re_flags)

    def __call__(self, text: str, cache: bool = True) -> str:
        """Highlights the given text, using the combined regex pattern."""

        if self.pre_formatter is not None:
            text = self.pre_formatter(text)

        if cache and text in self._highlight_cache:
            return self._highlight_cache[text]

        cache_key = text

        def _insert_style(matchobj: Match) -> str:
            """Returns the match inserted into a markup style."""

            groups = matchobj.groupdict()

            name = matchobj.lastgroup
            content = groups.get(str(name), None)

            if self.match_formatter is not None:
                content = self.match_formatter(matchobj, content)

                if content == "":
                    return ""

            tag = f"{self.prefix}{name}"
            style = f"[{tag}]{{}}[/{tag}]"

            return style.format(content)

        text = self._pattern.sub(_insert_style, text)
        self._highlight_cache[cache_key] = text

        return text

    def __fancy_repr__(self) -> Generator[FancyYield, None, None]:
        """Yields some fancy looking repr text."""

        preview = self("highlight_python()") + "\x1b[0m"
        pattern = self._pattern.pattern

        if len(pattern) > 40:
            pattern = pattern[:38] + "..."

        yield f"<{type(self).__name__} pattern: {pattern!r}, preview: "
        yield {"text": str(preview), "highlight": False}

        yield ">"

match_formatter: Callable[[Match, str], str] | None = None class-attribute instance-attribute

A callable of (match, content) that gets called on every match.

Its return value will be used as the content that the already set highlighting will apply to. Useful to trim text, or apply other transformations before inserting it back.

pre_formatter: Callable[[str], str] | None = None class-attribute instance-attribute

A callable that formats the input string, before any highlighting is done to it.

prefix: str = '' class-attribute instance-attribute

Some string to insert before each style alias.

re_flags: int = 0 class-attribute instance-attribute

All regex flags to apply when compiling the generated pattern, OR-d (|) together.

styles: list[tuple[str, str]] instance-attribute

A list of tuples of (style_alias, pattern_str).

__call__(text, cache=True)

Highlights the given text, using the combined regex pattern.

Source code in pytermgui/highlighters.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def __call__(self, text: str, cache: bool = True) -> str:
    """Highlights the given text, using the combined regex pattern."""

    if self.pre_formatter is not None:
        text = self.pre_formatter(text)

    if cache and text in self._highlight_cache:
        return self._highlight_cache[text]

    cache_key = text

    def _insert_style(matchobj: Match) -> str:
        """Returns the match inserted into a markup style."""

        groups = matchobj.groupdict()

        name = matchobj.lastgroup
        content = groups.get(str(name), None)

        if self.match_formatter is not None:
            content = self.match_formatter(matchobj, content)

            if content == "":
                return ""

        tag = f"{self.prefix}{name}"
        style = f"[{tag}]{{}}[/{tag}]"

        return style.format(content)

    text = self._pattern.sub(_insert_style, text)
    self._highlight_cache[cache_key] = text

    return text

__fancy_repr__()

Yields some fancy looking repr text.

Source code in pytermgui/highlighters.py
135
136
137
138
139
140
141
142
143
144
145
146
147
def __fancy_repr__(self) -> Generator[FancyYield, None, None]:
    """Yields some fancy looking repr text."""

    preview = self("highlight_python()") + "\x1b[0m"
    pattern = self._pattern.pattern

    if len(pattern) > 40:
        pattern = pattern[:38] + "..."

    yield f"<{type(self).__name__} pattern: {pattern!r}, preview: "
    yield {"text": str(preview), "highlight": False}

    yield ">"

__post_init__()

Combines all styles into one pattern.

Source code in pytermgui/highlighters.py
87
88
89
90
91
92
93
94
95
96
97
98
def __post_init__(self) -> None:
    """Combines all styles into one pattern."""

    pattern = ""
    names: list[str] = []
    for name, ptrn in self.styles:
        pattern += f"(?P<{name}>{ptrn})|"
        names.append(name)

    pattern = pattern[:-1]

    self._pattern = re.compile(pattern, flags=self.re_flags)

highlight_tim(text, cache=True)

Highlights some TIM code.

Source code in pytermgui/highlighters.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
def highlight_tim(text: str, cache: bool = True) -> str:
    """Highlights some TIM code."""

    @lru_cache(1048)
    def _highlight(txt: str) -> str:
        output = ""
        cursor = 0
        active_tokens: list[Token] = []

        def _get_active_markup() -> str:
            active_markup = " ".join(tkn.markup for tkn in active_tokens)

            if active_markup == "":
                return ""

            return f"[{active_markup}]"

        for matchobj in RE_MARKUP.finditer(txt):
            start, end = matchobj.span()

            if cursor < start:
                if cursor > 0:
                    output += "]"

                output += _get_active_markup()
                output += f"{txt[cursor:start]}[/]"

            *_, tags = matchobj.groups()

            output += "["
            for tag in tags.split():
                token = consume_tag(tag)
                output += f"{token.prettified_markup} "

                if Token.is_clear(token):
                    active_tokens = [
                        tkn for tkn in active_tokens if not token.targets(tkn)
                    ]

                else:
                    active_tokens.append(token)

            output = output.rstrip()
            cursor = end

        if cursor < len(txt) - 1:
            if cursor > 0:
                output += "]"

            output += _get_active_markup()
            output += f"{txt[cursor:]}"

            if len(active_tokens) > 0:
                output += "[/]"

        if output.count("[") != output.count("]"):
            output += "]"

        return output

    if cache:
        return _highlight(text)

    return _highlight.__wrapped__(text)