Compute the cosine distance between two embeddings.
Parameters:
| Name | Type | Description | Default |
left | FixedSizeList or Embedding Expression | | required |
right | FixedSizeList or Embedding Expression | | required |
Returns:
| Name | Type | Description |
Expression | Float64 Expression | an expression with the cosine distance between the two vectors. |
Examples:
| >>> import daft
>>> from daft.functions import cosine_distance
>>>
>>> df = daft.from_pydict({"e1": [[1, 2, 3], [1, 2, 3]], "e2": [[1, 2, 3], [-1, -2, -3]]})
>>> dtype = daft.DataType.fixed_size_list(daft.DataType.float32(), 3)
>>> df = df.with_column("dist", cosine_distance(df["e1"].cast(dtype), df["e2"].cast(dtype)))
>>> df.show()
|
╭─────────────┬──────────────┬─────────╮
│ e1 ┆ e2 ┆ dist │
│ --- ┆ --- ┆ --- │
│ List[Int64] ┆ List[Int64] ┆ Float64 │
╞═════════════╪══════════════╪═════════╡
│ [1, 2, 3] ┆ [1, 2, 3] ┆ 0 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
│ [1, 2, 3] ┆ [-1, -2, -3] ┆ 2 │
╰─────────────┴──────────────┴─────────╯
(Showing first 2 of 2 rows)
Source code in daft/functions/distance.py
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39 | def cosine_distance(left: Expression, right: Expression) -> Expression:
"""Compute the cosine distance between two embeddings.
Args:
left (FixedSizeList or Embedding Expression): The left vector
right (FixedSizeList or Embedding Expression): The right vector
Returns:
Expression (Float64 Expression): an expression with the cosine distance between the two vectors.
Examples:
>>> import daft
>>> from daft.functions import cosine_distance
>>>
>>> df = daft.from_pydict({"e1": [[1, 2, 3], [1, 2, 3]], "e2": [[1, 2, 3], [-1, -2, -3]]})
>>> dtype = daft.DataType.fixed_size_list(daft.DataType.float32(), 3)
>>> df = df.with_column("dist", cosine_distance(df["e1"].cast(dtype), df["e2"].cast(dtype)))
>>> df.show()
╭─────────────┬──────────────┬─────────╮
│ e1 ┆ e2 ┆ dist │
│ --- ┆ --- ┆ --- │
│ List[Int64] ┆ List[Int64] ┆ Float64 │
╞═════════════╪══════════════╪═════════╡
│ [1, 2, 3] ┆ [1, 2, 3] ┆ 0 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
│ [1, 2, 3] ┆ [-1, -2, -3] ┆ 2 │
╰─────────────┴──────────────┴─────────╯
<BLANKLINE>
(Showing first 2 of 2 rows)
"""
return Expression._call_builtin_scalar_fn("cosine_distance", left, right)
|