Skip to content

daft.functions.regexp_count#

regexp_count #

regexp_count(expr: Expression, pattern: str | Expression) -> Expression

Counts the number of times a regex pattern appears in a string.

Parameters:

Name Type Description Default
expr Expression

The expression to check.

required
pattern str | Expression

The regex pattern to search for as a string or as a column to pick values from.

required

Returns:

Name Type Description
Expression Expression

An UInt64 expression with the count of regex matches for each string.

Examples:

1
2
3
4
>>> import daft
>>> from daft.functions import regexp_count
>>> df = daft.from_pydict({"x": ["hello world", "foo bar baz", "test123test456"]})
>>> df.with_column("word_count", regexp_count(df["x"], r"\w+")).collect()
╭────────────────┬────────────╮
│ x              ┆ word_count │
│ ---            ┆ ---        │
│ String         ┆ UInt64     │
╞════════════════╪════════════╡
│ hello world    ┆ 2          │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ foo bar baz    ┆ 3          │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ test123test456 ┆ 1          │
╰────────────────┴────────────╯
(Showing first 3 of 3 rows)
1
>>> df.with_column("digit_count", regexp_count(df["x"], r"\d+")).collect()
╭────────────────┬─────────────╮
│ x              ┆ digit_count │
│ ---            ┆ ---         │
│ String         ┆ UInt64      │
╞════════════════╪═════════════╡
│ hello world    ┆ 0           │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ foo bar baz    ┆ 0           │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ test123test456 ┆ 2           │
╰────────────────┴─────────────╯
(Showing first 3 of 3 rows)
Source code in daft/functions/str.py
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
def regexp_count(
    expr: Expression,
    pattern: str | Expression,
) -> Expression:
    r"""Counts the number of times a regex pattern appears in a string.

    Args:
        expr: The expression to check.
        pattern: The regex pattern to search for as a string or as a column to pick values from.

    Returns:
        Expression: An UInt64 expression with the count of regex matches for each string.

    Examples:
        >>> import daft
        >>> from daft.functions import regexp_count
        >>> df = daft.from_pydict({"x": ["hello world", "foo bar baz", "test123test456"]})
        >>> df.with_column("word_count", regexp_count(df["x"], r"\w+")).collect()
        ╭────────────────┬────────────╮
        │ x              ┆ word_count │
        │ ---            ┆ ---        │
        │ String         ┆ UInt64     │
        ╞════════════════╪════════════╡
        │ hello world    ┆ 2          │
        ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
        │ foo bar baz    ┆ 3          │
        ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
        │ test123test456 ┆ 1          │
        ╰────────────────┴────────────╯
        <BLANKLINE>
        (Showing first 3 of 3 rows)

        >>> df.with_column("digit_count", regexp_count(df["x"], r"\d+")).collect()
        ╭────────────────┬─────────────╮
        │ x              ┆ digit_count │
        │ ---            ┆ ---         │
        │ String         ┆ UInt64      │
        ╞════════════════╪═════════════╡
        │ hello world    ┆ 0           │
        ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
        │ foo bar baz    ┆ 0           │
        ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
        │ test123test456 ┆ 2           │
        ╰────────────────┴─────────────╯
        <BLANKLINE>
        (Showing first 3 of 3 rows)

    """
    return Expression._call_builtin_scalar_fn("regexp_count", expr, pattern)