Skip to content

daft.functions.hamming_distance_str#

hamming_distance_str #

hamming_distance_str(left: Expression, right: Expression) -> Expression

Compute the character-level Hamming distance between two strings.

The Hamming distance is the number of positions at which the corresponding characters are different.

Parameters:

Name Type Description Default
left Expression

The left string expression to compare.

required
right Expression

The right string expression to compare against.

required

Returns:

Type Description
Expression

The Hamming distance for each pair of strings. Returns null when either input

Expression

is null or the two strings have different lengths.

Examples:

1
2
3
4
5
>>> import daft
>>> from daft.functions import hamming_distance_str
>>> df = daft.from_pydict({"x": ["ronald", "ronald", "ronald"], "y": ["ronald", "renuld", "ronaldo"]})
>>> df = df.with_column("distance", hamming_distance_str(df["x"], df["y"]))
>>> df.collect()
╭────────┬─────────┬──────────╮
│ x      ┆ y       ┆ distance │
│ ---    ┆ ---     ┆ ---      │
│ String ┆ String  ┆ Int64    │
╞════════╪═════════╪══════════╡
│ ronald ┆ ronald  ┆ 0        │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ ronald ┆ renuld  ┆ 2        │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ ronald ┆ ronaldo ┆ None     │
╰────────┴─────────┴──────────╯
(Showing first 3 of 3 rows)
Source code in daft/functions/str.py
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
def hamming_distance_str(left: Expression, right: Expression) -> Expression:
    """Compute the character-level Hamming distance between two strings.

    The Hamming distance is the number of positions at which the corresponding
    characters are different.

    Args:
        left: The left string expression to compare.
        right: The right string expression to compare against.

    Returns:
        The Hamming distance for each pair of strings. Returns null when either input
        is null or the two strings have different lengths.

    Examples:
        >>> import daft
        >>> from daft.functions import hamming_distance_str
        >>> df = daft.from_pydict({"x": ["ronald", "ronald", "ronald"], "y": ["ronald", "renuld", "ronaldo"]})
        >>> df = df.with_column("distance", hamming_distance_str(df["x"], df["y"]))
        >>> df.collect()
        ╭────────┬─────────┬──────────╮
        │ x      ┆ y       ┆ distance │
        │ ---    ┆ ---     ┆ ---      │
        │ String ┆ String  ┆ Int64    │
        ╞════════╪═════════╪══════════╡
        │ ronald ┆ ronald  ┆ 0        │
        ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
        │ ronald ┆ renuld  ┆ 2        │
        ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
        │ ronald ┆ ronaldo ┆ None     │
        ╰────────┴─────────┴──────────╯
        <BLANKLINE>
        (Showing first 3 of 3 rows)
    """
    return Expression._call_builtin_scalar_fn("hamming_distance_str", left, right)