Reference#

class tca.pipeline.SearchIndex(embeddings, metadata, config)[source]#

Parameters:

embeddings (np.ndarray)
metadata (list[MetadataRecord])
config (TurboQuantConfig)

classmethod from_embeddings(embeddings, metadata, config=None)[source]#

Parameters:

embeddings (ndarray)
metadata (list[dict[str, Any]])
config (TurboQuantConfig | None)

Return type:

SearchIndex

search(query, filters=None, *, query_mode='auto')[source]#

Parameters:

query (ndarray)
filters (dict[str, Any] | None)
query_mode (str)

Return type:

SearchResults

search_exact(query, filters=None)[source]#

Parameters:

query (ndarray)
filters (dict[str, Any] | None)

Return type:

SearchResults

to_manifest()[source]#

Return type:: dict[str, Any]

class tca.config.TurboQuantConfig(bit_width: 'int' = 3, candidate_k: 'int' = 128, rerank_k: 'int' = 20, oversample: 'int' = 2, seed: 'int' = 0, quantizer_kind: 'str' = 'prod', lloyd_max_iter: 'int' = 100, lloyd_tol: 'float' = 1e-06, monte_carlo_samples: 'int' = 20000, store_original_embeddings: 'bool' = True, auto_score_gap_threshold: 'float' = 0.06, auto_score_spread_threshold: 'float' = 0.015, max_candidate_k: 'int' = 2048, max_oversample: 'int' = 8)[source]#

Parameters:

bit_width (int)
candidate_k (int)
rerank_k (int)
oversample (int)
seed (int)
quantizer_kind (str)
lloyd_max_iter (int)
lloyd_tol (float)
monte_carlo_samples (int)
store_original_embeddings (bool)
auto_score_gap_threshold (float)
auto_score_spread_threshold (float)
max_candidate_k (int)
max_oversample (int)

bit_width: int#

candidate_k: int#

rerank_k: int#

oversample: int#

seed: int#

quantizer_kind: str#

lloyd_max_iter: int#

lloyd_tol: float#

monte_carlo_samples: int#

store_original_embeddings: bool#

auto_score_gap_threshold: float#

auto_score_spread_threshold: float#

max_candidate_k: int#

max_oversample: int#

validate()[source]#

Return type:: TurboQuantConfig

to_dict()[source]#

Return type:: dict[str, Any]

to_json(path)[source]#

Parameters:: path (str | Path)
Return type:: None

classmethod from_json(path)[source]#

Parameters:: path (str | Path)
Return type:: TurboQuantConfig

tca.quantization.fit_scalar_codebook(bit_width, dimension, n_samples=20000, seed=0, max_iter=100, tol=1e-06)[source]#

Parameters:

bit_width (int)
dimension (int)
n_samples (int)
seed (int)
max_iter (int)
tol (float)

Return type:

ndarray[tuple[Any, …], dtype[float32]]

class tca.quantization.EncodedMSE(indices: 'IntArray')[source]#

Parameters:: indices (ndarray[tuple[Any, ...], dtype[int32]])

indices: ndarray[tuple[Any, ...], dtype[int32]]#

class tca.quantization.TurboQuantMSE(dimension, bit_width, *, seed=0, monte_carlo_samples=20000, lloyd_max_iter=100, lloyd_tol=1e-06)[source]#

Parameters:

dimension (int)
bit_width (int)
seed (int)
monte_carlo_samples (int)
lloyd_max_iter (int)
lloyd_tol (float)

encode(x)[source]#

Parameters:: x (_Buffer | _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | complex | bytes | str | _NestedSequence[complex | bytes | str])
Return type:: EncodedMSE

decode(encoded)[source]#

Parameters:: encoded (EncodedMSE | _Buffer | _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | complex | bytes | str | _NestedSequence[complex | bytes | str])
Return type:: ndarray[tuple[Any, …], dtype[float32]]

prepare_query(query)[source]#

Parameters:: query (_Buffer | _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | complex | bytes | str | _NestedSequence[complex | bytes | str])
Return type:: ndarray[tuple[Any, …], dtype[float32]]

approximate_inner_products(query, encoded)[source]#

Parameters:

query (_Buffer | _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | complex | bytes | str | _NestedSequence[complex | bytes | str])
encoded (EncodedMSE | _Buffer | _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | complex | bytes | str | _NestedSequence[complex | bytes | str])

Return type:

ndarray[tuple[Any, …], dtype[float32]]

class tca.quantization.EncodedProd(indices: 'IntArray', signs: 'SignArray', residual_norms: 'FloatArray')[source]#

Parameters:

indices (ndarray[tuple[Any, ...], dtype[int32]])
signs (ndarray[tuple[Any, ...], dtype[int8]])
residual_norms (ndarray[tuple[Any, ...], dtype[float32]])

indices: ndarray[tuple[Any, ...], dtype[int32]]#

signs: ndarray[tuple[Any, ...], dtype[int8]]#

residual_norms: ndarray[tuple[Any, ...], dtype[float32]]#

class tca.quantization.TurboQuantProd(dimension, bit_width, *, seed=0, monte_carlo_samples=20000, lloyd_max_iter=100, lloyd_tol=1e-06)[source]#

Parameters:

dimension (int)
bit_width (int)
seed (int)
monte_carlo_samples (int)
lloyd_max_iter (int)
lloyd_tol (float)

encode(x)[source]#

Parameters:: x (_Buffer | _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | complex | bytes | str | _NestedSequence[complex | bytes | str])
Return type:: EncodedProd

decode(encoded)[source]#

Parameters:: encoded (EncodedProd)
Return type:: ndarray[tuple[Any, …], dtype[float32]]

prepare_query(query)[source]#

Parameters:: query (_Buffer | _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | complex | bytes | str | _NestedSequence[complex | bytes | str])
Return type:: tuple[ndarray[tuple[Any, …], dtype[float32]], ndarray[tuple[Any, …], dtype[float32]]]

approximate_inner_products(query, encoded)[source]#

Parameters:

query (_Buffer | _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | complex | bytes | str | _NestedSequence[complex | bytes | str])
encoded (EncodedProd)

Return type:

ndarray[tuple[Any, …], dtype[float32]]

tca.quantization.exact_topk(query, bank, top_k, ids=None)[source]#

Parameters:

query (_Buffer | _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | complex | bytes | str | _NestedSequence[complex | bytes | str])
bank (_Buffer | _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | complex | bytes | str | _NestedSequence[complex | bytes | str])
top_k (int)
ids (Iterable[int] | None)

Return type:

tuple[ndarray, ndarray]

tca.cli.build_parser()[source]#

Return type:: ArgumentParser

tca.cli.main()[source]#

Return type:: None