pytermgui.regex

This modules contains all of the regex-related names and utilites.

  1"""This modules contains all of the regex-related names and utilites."""
  2
  3import re
  4from functools import lru_cache
  5
  6RE_LINK = re.compile(r"(?:\x1b\]8;;([^\\]*)\x1b\\([^\\]*)\x1b\]8;;\x1b\\)")
  7RE_ANSI_NEW = re.compile(rf"(\x1b\[(.*?)[mH])|{RE_LINK.pattern}|(\x1b_G(.*?)\x1b\\)")
  8RE_ANSI = re.compile(r"(?:\x1b\[(.*?)[mH])|(?:\x1b\](.*?)\x1b\\)|(?:\x1b_G(.*?)\x1b\\)")
  9RE_MACRO = re.compile(r"(![a-z0-9_\-]+)(?:\(([\w\/\.?\-=:]+)\))?")
 10RE_MARKUP = re.compile(r"((\\*)\[([^\[\]]+)\])")
 11RE_POSITION = re.compile(r"\x1b\[(\d*?)(?:;(\d*))?H")
 12RE_PIXEL_SIZE = re.compile(r"\x1b\[4;([\d]+);([\d]+)t")
 13
 14RE_256 = re.compile(r"^([\d]{1,3})$")
 15RE_HEX = re.compile(r"#?([0-9a-fA-F]{6})")
 16RE_RGB = re.compile(r"(\d{1,3};\d{1,3};\d{1,3})")
 17
 18__all__ = [
 19    "strip_ansi",
 20    "strip_markup",
 21    "real_length",
 22]
 23
 24
 25@lru_cache()
 26def strip_ansi(text: str) -> str:
 27    """Removes ANSI sequences from text.
 28
 29    Args:
 30        text: A string or bytes object containing 0 or more ANSI sequences.
 31
 32    Returns:
 33        The text without any ANSI sequences.
 34    """
 35
 36    if hasattr(text, "plain"):
 37        return text.plain  # type: ignore
 38
 39    return RE_ANSI.sub("", text)
 40
 41
 42@lru_cache()
 43def strip_markup(text: str) -> str:
 44    """Removes markup tags from text.
 45
 46    Args:
 47        text: A string or bytes object containing 0 or more markup tags.
 48
 49    Returns:
 50        The text without any markup tags.
 51    """
 52
 53    return RE_MARKUP.sub("", text)
 54
 55
 56@lru_cache(maxsize=None)
 57def real_length(text: str) -> int:
 58    """Gets the display-length of text.
 59
 60    This length means no ANSI sequences are counted. This method is a convenience wrapper
 61    for `len(strip_ansi(text))`.
 62
 63    Args:
 64        text: The text to calculate the length of.
 65
 66    Returns:
 67        The display-length of text.
 68    """
 69
 70    return len(strip_ansi(text))
 71
 72
 73@lru_cache(maxsize=1024)
 74def has_open_sequence(text: str) -> bool:
 75    """Figures out if the given text has any unclosed ANSI sequences.
 76
 77    It supports standard SGR (`\\x1b[1mHello`), OSC (`\\x1b[30;2ST\\x1b\\\\`) and Kitty APC codes
 78    (`\x1b_Garguments;hex_data\\x1b\\\\`). It also recognizes incorrect syntax; it only considers
 79    a tag closed when it is using the right closing sequence, e.g. `m` or `H` for SGR, `\\x1b\\\\`
 80    for OSC and APC types.
 81
 82    Args:
 83        text: The text to test.
 84
 85    Returns:
 86        True if there is at least one tag that hasn't been closed, False otherwise.
 87    """
 88
 89    is_osc = False
 90    is_sgr = False
 91    is_apc = False
 92
 93    open_count = 0
 94    sequence = ""
 95
 96    for char in text:
 97        if char == "\x1b":
 98            open_count += 1
 99            sequence += char
100            continue
101
102        if len(sequence) == 0:
103            continue
104
105        # Ignore OSC and APC closers as new openers
106        if char == "\\" and sequence[-1] == "\x1b":
107            open_count -= 1
108
109        is_osc = is_osc or sequence[:2] == "\x1b]"
110        is_sgr = is_sgr or sequence[:2] == "\x1b["
111        is_apc = is_apc or sequence[:3] == "\x1b_G"
112
113        sequence += char
114        if (is_osc or is_apc) and sequence[-2:] == "\x1b\\":
115            sequence = ""
116            open_count -= 1
117
118        elif is_sgr and char in {"m", "H"}:
119            sequence = ""
120            open_count -= 1
121
122    return len(sequence) != 0 or open_count != 0
@lru_cache()
def strip_ansi(text: str) -> str:
26@lru_cache()
27def strip_ansi(text: str) -> str:
28    """Removes ANSI sequences from text.
29
30    Args:
31        text: A string or bytes object containing 0 or more ANSI sequences.
32
33    Returns:
34        The text without any ANSI sequences.
35    """
36
37    if hasattr(text, "plain"):
38        return text.plain  # type: ignore
39
40    return RE_ANSI.sub("", text)

Removes ANSI sequences from text.

Args
  • text: A string or bytes object containing 0 or more ANSI sequences.
Returns

The text without any ANSI sequences.

@lru_cache()
def strip_markup(text: str) -> str:
43@lru_cache()
44def strip_markup(text: str) -> str:
45    """Removes markup tags from text.
46
47    Args:
48        text: A string or bytes object containing 0 or more markup tags.
49
50    Returns:
51        The text without any markup tags.
52    """
53
54    return RE_MARKUP.sub("", text)

Removes markup tags from text.

Args
  • text: A string or bytes object containing 0 or more markup tags.
Returns

The text without any markup tags.

@lru_cache(maxsize=None)
def real_length(text: str) -> int:
57@lru_cache(maxsize=None)
58def real_length(text: str) -> int:
59    """Gets the display-length of text.
60
61    This length means no ANSI sequences are counted. This method is a convenience wrapper
62    for `len(strip_ansi(text))`.
63
64    Args:
65        text: The text to calculate the length of.
66
67    Returns:
68        The display-length of text.
69    """
70
71    return len(strip_ansi(text))

Gets the display-length of text.

This length means no ANSI sequences are counted. This method is a convenience wrapper for len(strip_ansi(text)).

Args
  • text: The text to calculate the length of.
Returns

The display-length of text.