Skip to content

regex

This modules contains all of the regex-related names and utilites.

escape_markup(text)

Escapes any potential markup to avoid double-parsing.

Use this when treating already parsed markup.

Source code in pytermgui/regex.py
77
78
79
80
81
82
83
84
85
86
87
88
def escape_markup(text: str) -> str:
    """Escapes any potential markup to avoid double-parsing.

    Use this when treating already parsed markup.
    """

    def _escape(mtch: Match) -> str:
        full, *_ = mtch.groups()

        return full.replace("[", r"\[")

    return RE_MARKUP.sub(_escape, text)

has_open_sequence(text) cached

Figures out if the given text has any unclosed ANSI sequences.

It supports standard SGR (\x1b[1mHello), OSC (\x1b[30;2ST\x1b\\) and Kitty APC codes (_Garguments;hex_data\x1b\\). It also recognizes incorrect syntax; it only considers a tag closed when it is using the right closing sequence, e.g. m or H for SGR, \x1b\\ for OSC and APC types.

Parameters:

Name Type Description Default
text str

The text to test.

required

Returns:

Type Description
bool

True if there is at least one tag that hasn't been closed, False otherwise.

Source code in pytermgui/regex.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
@lru_cache(maxsize=1024)
def has_open_sequence(text: str) -> bool:
    """Figures out if the given text has any unclosed ANSI sequences.

    It supports standard SGR (`\\x1b[1mHello`), OSC (`\\x1b[30;2ST\\x1b\\\\`) and Kitty APC codes
    (`\x1b_Garguments;hex_data\\x1b\\\\`). It also recognizes incorrect syntax; it only considers
    a tag closed when it is using the right closing sequence, e.g. `m` or `H` for SGR, `\\x1b\\\\`
    for OSC and APC types.

    Args:
        text: The text to test.

    Returns:
        True if there is at least one tag that hasn't been closed, False otherwise.
    """

    is_osc = False
    is_sgr = False
    is_apc = False

    open_count = 0
    sequence = ""

    for char in text:
        if char == "\x1b":
            open_count += 1
            sequence += char
            continue

        if len(sequence) == 0:
            continue

        # Ignore OSC and APC closers as new openers
        if char == "\\" and sequence[-1] == "\x1b":
            open_count -= 1

        is_osc = is_osc or sequence[:2] == "\x1b]"
        is_sgr = is_sgr or sequence[:2] == "\x1b["
        is_apc = is_apc or sequence[:3] == "\x1b_G"

        sequence += char
        if (is_osc or is_apc) and sequence[-2:] == "\x1b\\":
            sequence = ""
            open_count -= 1

        elif is_sgr and char in {"m", "H"}:
            sequence = ""
            open_count -= 1

    return len(sequence) != 0 or open_count != 0

real_length(text) cached

Gets the display-length of text.

This length means no ANSI sequences are counted. This method is a convenience wrapper for len(strip_ansi(text)).

Parameters:

Name Type Description Default
text str

The text to calculate the length of.

required

Returns:

Type Description
int

The display-length of text.

Source code in pytermgui/regex.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
@lru_cache(maxsize=None)
def real_length(text: str) -> int:
    """Gets the display-length of text.

    This length means no ANSI sequences are counted. This method is a convenience wrapper
    for `len(strip_ansi(text))`.

    Args:
        text: The text to calculate the length of.

    Returns:
        The display-length of text.
    """

    return max(wcswidth(strip_ansi(text)), 0)

strip_ansi(text) cached

Removes ANSI sequences from text.

Parameters:

Name Type Description Default
text str

A string or bytes object containing 0 or more ANSI sequences.

required

Returns:

Type Description
str

The text without any ANSI sequences.

Source code in pytermgui/regex.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
@lru_cache()
def strip_ansi(text: str) -> str:
    """Removes ANSI sequences from text.

    Args:
        text: A string or bytes object containing 0 or more ANSI sequences.

    Returns:
        The text without any ANSI sequences.
    """

    if hasattr(text, "plain"):
        return text.plain  # type: ignore

    return RE_ANSI.sub("", text)

strip_markup(text) cached

Removes markup tags from text.

Parameters:

Name Type Description Default
text str

A string or bytes object containing 0 or more markup tags.

required

Returns:

Type Description
str

The text without any markup tags.

Source code in pytermgui/regex.py
46
47
48
49
50
51
52
53
54
55
56
57
@lru_cache()
def strip_markup(text: str) -> str:
    """Removes markup tags from text.

    Args:
        text: A string or bytes object containing 0 or more markup tags.

    Returns:
        The text without any markup tags.
    """

    return RE_MARKUP.sub("", text)