Skip to content

daft.functions.cosine_distance#

cosine_distance #

cosine_distance(left: Expression, right: Expression) -> Expression

Compute the cosine distance between two embeddings.

Parameters:

Name Type Description Default
left FixedSizeList or Embedding Expression

The left vector

required
right FixedSizeList or Embedding Expression

The right vector

required

Returns:

Name Type Description
Expression Float64 Expression

an expression with the cosine distance between the two vectors.

Examples:

1
2
3
4
5
6
7
>>> import daft
>>> from daft.functions import cosine_distance
>>>
>>> df = daft.from_pydict({"e1": [[1, 2, 3], [1, 2, 3]], "e2": [[1, 2, 3], [-1, -2, -3]]})
>>> dtype = daft.DataType.fixed_size_list(daft.DataType.float32(), 3)
>>> df = df.with_column("dist", cosine_distance(df["e1"].cast(dtype), df["e2"].cast(dtype)))
>>> df.show()
╭─────────────┬──────────────┬─────────╮
│ e1          ┆ e2           ┆ dist    │
│ ---         ┆ ---          ┆ ---     │
│ List[Int64] ┆ List[Int64]  ┆ Float64 │
╞═════════════╪══════════════╪═════════╡
│ [1, 2, 3]   ┆ [1, 2, 3]    ┆ 0       │
├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
│ [1, 2, 3]   ┆ [-1, -2, -3] ┆ 2       │
╰─────────────┴──────────────┴─────────╯
(Showing first 2 of 2 rows)
Source code in daft/functions/distance.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def cosine_distance(left: Expression, right: Expression) -> Expression:
    """Compute the cosine distance between two embeddings.

    Args:
        left (FixedSizeList or Embedding Expression): The left vector
        right (FixedSizeList or Embedding Expression): The right vector

    Returns:
        Expression (Float64 Expression): an expression with the cosine distance between the two vectors.

    Examples:
        >>> import daft
        >>> from daft.functions import cosine_distance
        >>>
        >>> df = daft.from_pydict({"e1": [[1, 2, 3], [1, 2, 3]], "e2": [[1, 2, 3], [-1, -2, -3]]})
        >>> dtype = daft.DataType.fixed_size_list(daft.DataType.float32(), 3)
        >>> df = df.with_column("dist", cosine_distance(df["e1"].cast(dtype), df["e2"].cast(dtype)))
        >>> df.show()
        ╭─────────────┬──────────────┬─────────╮
        │ e1          ┆ e2           ┆ dist    │
        │ ---         ┆ ---          ┆ ---     │
        │ List[Int64] ┆ List[Int64]  ┆ Float64 │
        ╞═════════════╪══════════════╪═════════╡
        │ [1, 2, 3]   ┆ [1, 2, 3]    ┆ 0       │
        ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
        │ [1, 2, 3]   ┆ [-1, -2, -3] ┆ 2       │
        ╰─────────────┴──────────────┴─────────╯
        <BLANKLINE>
        (Showing first 2 of 2 rows)

    """
    return Expression._call_builtin_scalar_fn("cosine_distance", left, right)